* [PATCH net-next v3 3/3] samples: bpf: add userspace example for modifying sk_bound_dev_if
From: David Ahern @ 2016-11-28 15:48 UTC (permalink / raw)
To: netdev; +Cc: daniel, ast, daniel, maheshb, tgraf, David Ahern
In-Reply-To: <1480348130-31354-1-git-send-email-dsa@cumulusnetworks.com>
Add a simple program to demonstrate the ability to attach a bpf program
to a cgroup that sets sk_bound_dev_if for AF_INET{6} sockets when they
are created.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
v3
- revert to BPF_PROG_TYPE_CGROUP_SOCK prog type
v2
- removed bpf_sock_store_u32 references
- changed BPF_CGROUP_INET_SOCK_CREATE to BPF_CGROUP_INET_SOCK
- remove BPF_PROG_TYPE_CGROUP_SOCK prog type and add prog_subtype
samples/bpf/Makefile | 2 ++
samples/bpf/test_cgrp2_sock.c | 83 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 85 insertions(+)
create mode 100644 samples/bpf/test_cgrp2_sock.c
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index fb17206ddb57..7de5cd9849c2 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -23,6 +23,7 @@ hostprogs-y += map_perf_test
hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin
hostprogs-y += test_cgrp2_attach
+hostprogs-y += test_cgrp2_sock
hostprogs-y += xdp1
hostprogs-y += xdp2
hostprogs-y += test_current_task_under_cgroup
@@ -51,6 +52,7 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
+test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o
xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
new file mode 100644
index 000000000000..2831e5f41f86
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -0,0 +1,83 @@
+/* eBPF example program:
+ *
+ * - Loads eBPF program
+ *
+ * The eBPF program sets the sk_bound_dev_if index in new AF_INET{6}
+ * sockets opened by processes in the cgroup.
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+
+static int prog_load(int idx)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_3, idx),
+ BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
+ BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
+ "GPL", 0);
+}
+
+static int usage(const char *argv0)
+{
+ printf("Usage: %s cg-path device-index\n", argv0);
+ return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+ int cg_fd, prog_fd, ret;
+ int idx = 0;
+
+ if (argc < 2)
+ return usage(argv[0]);
+
+ idx = atoi(argv[2]);
+ if (!idx) {
+ printf("Invalid device index\n");
+ return EXIT_FAILURE;
+ }
+
+ cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+ if (cg_fd < 0) {
+ printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ prog_fd = prog_load(idx);
+ printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+ if (prog_fd < 0) {
+ printf("Failed to load prog: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK);
+ ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK);
+ if (ret < 0) {
+ printf("Failed to attach prog to cgroup: '%s'\n",
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
--
2.1.4
^ permalink raw reply related
* [PATCH net-next v3 2/3] bpf: Add new cgroup attach type to enable sock modifications
From: David Ahern @ 2016-11-28 15:48 UTC (permalink / raw)
To: netdev; +Cc: daniel, ast, daniel, maheshb, tgraf, David Ahern
In-Reply-To: <1480348130-31354-1-git-send-email-dsa@cumulusnetworks.com>
Add new cgroup based program type, BPF_PROG_TYPE_CGROUP_SOCK. Similar to
BPF_PROG_TYPE_CGROUP_SKB programs can be attached to a cgroup and run
any time a process in the cgroup opens an AF_INET or AF_INET6 socket.
Currently only sk_bound_dev_if is exported to userspace for modification
by a bpf program.
This allows a cgroup to be configured such that AF_INET{6} sockets opened
by processes are automatically bound to a specific device. In turn, this
enables the running of programs that do not support SO_BINDTODEVICE in a
specific VRF context / L3 domain.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
v3
- reverted to new prog type BPF_PROG_TYPE_CGROUP_SOCK
- dropped the subtype
v2
- dropped the bpf_sock_store_u32 helper
- dropped the new prog type BPF_PROG_TYPE_CGROUP_SOCK
- moved valid access and context conversion to use subtype
- dropped CREATE from BPF_CGROUP_INET_SOCK and related function names
- moved running of filter from sk_alloc to inet{6}_create
include/linux/bpf-cgroup.h | 11 ++++++++
include/linux/filter.h | 2 +-
include/uapi/linux/bpf.h | 6 +++++
kernel/bpf/cgroup.c | 9 +++++++
kernel/bpf/syscall.c | 5 +++-
net/core/filter.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++
net/ipv4/af_inet.c | 12 ++++++++-
net/ipv6/af_inet6.c | 8 ++++++
8 files changed, 115 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index ec80d0c0953e..2ca529664c8b 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -64,6 +64,16 @@ int __cgroup_bpf_run_filter(struct sock *sk,
__ret; \
})
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled && sk) { \
+ __ret = __cgroup_bpf_run_filter(sk, NULL, \
+ BPF_CGROUP_INET_SOCK); \
+ } \
+ __ret; \
+})
+
#else
struct cgroup_bpf {};
@@ -73,6 +83,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#endif /* CONFIG_CGROUP_BPF */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1f09c521adfe..808e158742a2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -408,7 +408,7 @@ struct bpf_prog {
enum bpf_prog_type type; /* Type of BPF program */
struct bpf_prog_aux *aux; /* Auxiliary fields */
struct sock_fprog_kern *orig_prog; /* Original BPF program */
- unsigned int (*bpf_func)(const struct sk_buff *skb,
+ unsigned int (*bpf_func)(const void *ctx,
const struct bpf_insn *filter);
/* Instructions for interpreter */
union {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1370a9d1456f..8f410ecdaac4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -101,11 +101,13 @@ enum bpf_prog_type {
BPF_PROG_TYPE_XDP,
BPF_PROG_TYPE_PERF_EVENT,
BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
};
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK,
__MAX_BPF_ATTACH_TYPE
};
@@ -537,6 +539,10 @@ struct bpf_tunnel_key {
__u32 tunnel_label;
};
+struct bpf_sock {
+ __u32 bound_dev_if;
+};
+
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index d5746aec8f34..796e39aa28f5 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -117,6 +117,12 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
}
}
+static int __cgroup_bpf_run_filter_sock(struct sock *sk,
+ struct bpf_prog *prog)
+{
+ return prog->bpf_func(sk, prog->insnsi) == 1 ? 0 : -EPERM;
+}
+
static int __cgroup_bpf_run_filter_skb(struct sk_buff *skb,
struct bpf_prog *prog)
{
@@ -171,6 +177,9 @@ int __cgroup_bpf_run_filter(struct sock *sk,
case BPF_CGROUP_INET_EGRESS:
ret = __cgroup_bpf_run_filter_skb(skb, prog);
break;
+ case BPF_CGROUP_INET_SOCK:
+ ret = __cgroup_bpf_run_filter_sock(sk, prog);
+ break;
/* make gcc happy else complains about missing enum value */
default:
return 0;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c2bce596e842..f5247901a4cc 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -856,7 +856,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_INET_EGRESS:
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
-
+ case BPF_CGROUP_INET_SOCK:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCK;
+ break;
default:
return -EINVAL;
}
@@ -892,6 +894,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
+ case BPF_CGROUP_INET_SOCK:
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
diff --git a/net/core/filter.c b/net/core/filter.c
index ea315af56511..593b6b664c0c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2645,6 +2645,12 @@ cg_skb_func_proto(enum bpf_func_id func_id)
}
}
+static const struct bpf_func_proto *
+cg_sock_func_proto(enum bpf_func_id func_id)
+{
+ return NULL;
+}
+
static bool __is_valid_access(int off, int size, enum bpf_access_type type)
{
if (off < 0 || off >= sizeof(struct __sk_buff))
@@ -2682,6 +2688,29 @@ static bool sk_filter_is_valid_access(int off, int size,
return __is_valid_access(off, size, type);
}
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ if (off < 0 || off + size > sizeof(struct bpf_sock))
+ return false;
+
+ /* The verifier guarantees that size > 0. */
+ if (off % size != 0)
+ return false;
+
+ return true;
+}
+
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog)
{
@@ -2940,6 +2969,30 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
return insn - insn_buf;
}
+static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
+ int dst_reg, int src_reg,
+ int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn_buf,
@@ -3013,6 +3066,12 @@ static const struct bpf_verifier_ops cg_skb_ops = {
.convert_ctx_access = sk_filter_convert_ctx_access,
};
+static const struct bpf_verifier_ops cg_sock_ops = {
+ .get_func_proto = cg_sock_func_proto,
+ .is_valid_access = sock_filter_is_valid_access,
+ .convert_ctx_access = sock_filter_convert_ctx_access,
+};
+
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
.ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3038,6 +3097,11 @@ static struct bpf_prog_type_list cg_skb_type __read_mostly = {
.type = BPF_PROG_TYPE_CGROUP_SKB,
};
+static struct bpf_prog_type_list cg_sock_type __read_mostly = {
+ .ops = &cg_sock_ops,
+ .type = BPF_PROG_TYPE_CGROUP_SOCK
+};
+
static int __init register_sk_filter_ops(void)
{
bpf_register_prog_type(&sk_filter_type);
@@ -3045,6 +3109,7 @@ static int __init register_sk_filter_ops(void)
bpf_register_prog_type(&sched_act_type);
bpf_register_prog_type(&xdp_type);
bpf_register_prog_type(&cg_skb_type);
+ bpf_register_prog_type(&cg_sock_type);
return 0;
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5ddf5cda07f4..24d2550492ee 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -374,8 +374,18 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err)
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
sk_common_release(sk);
+ goto out;
+ }
}
out:
return err;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d424f3a3737a..237e654ba717 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -258,6 +258,14 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
goto out;
}
}
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
out:
return err;
out_rcu_unlock:
--
2.1.4
^ permalink raw reply related
* [PATCH net-next v3 0/3] net: Add bpf support to set sk_bound_dev_if
From: David Ahern @ 2016-11-28 15:48 UTC (permalink / raw)
To: netdev; +Cc: daniel, ast, daniel, maheshb, tgraf, David Ahern
The recently added VRF support in Linux leverages the bind-to-device
API for programs to specify an L3 domain for a socket. While
SO_BINDTODEVICE has been around for ages, not every ipv4/ipv6 capable
program has support for it. Even for those programs that do support it,
the API requires processes to be started as root (CAP_NET_RAW) which
is not desirable from a general security perspective.
This patch set leverages Daniel Mack's work to attach bpf programs to
a cgroup to provide a capability to set sk_bound_dev_if for all
AF_INET{6} sockets opened by a process in a cgroup when the sockets
are allocated.
For example:
1. configure vrf (e.g., using ifupdown2)
auto eth0
iface eth0 inet dhcp
vrf mgmt
auto mgmt
iface mgmt
vrf-table auto
2. configure cgroup
mount -t cgroup2 none /tmp/cgroupv2
mkdir /tmp/cgroupv2/mgmt
test_cgrp2_sock /tmp/cgroupv2/mgmt 15
3. set shell into cgroup (e.g., can be done at login using pam)
echo $$ >> /tmp/cgroupv2/mgmt/cgroup.procs
At this point all commands run in the shell (e.g, apt) have sockets
automatically bound to the VRF (see output of ss -ap 'dev == <vrf>'),
including processes not running as root.
This capability enables running any program in a VRF context and is key
to deploying Management VRF, a fundamental configuration for networking
gear, with any Linux OS installation.
David Ahern (3):
bpf: Refactor cgroups code in prep for new type
bpf: Add new cgroup attach type to enable sock modifications
samples: bpf: add userspace example for modifying sk_bound_dev_if
include/linux/bpf-cgroup.h | 11 ++++++
include/linux/filter.h | 2 +-
include/uapi/linux/bpf.h | 6 ++++
kernel/bpf/cgroup.c | 36 ++++++++++++++++---
kernel/bpf/syscall.c | 33 +++++++++--------
net/core/filter.c | 65 +++++++++++++++++++++++++++++++++
net/ipv4/af_inet.c | 12 ++++++-
net/ipv6/af_inet6.c | 8 +++++
samples/bpf/Makefile | 2 ++
samples/bpf/test_cgrp2_sock.c | 83 +++++++++++++++++++++++++++++++++++++++++++
10 files changed, 237 insertions(+), 21 deletions(-)
create mode 100644 samples/bpf/test_cgrp2_sock.c
--
2.1.4
^ permalink raw reply
* [PATCH net-next v3 1/3] bpf: Refactor cgroups code in prep for new type
From: David Ahern @ 2016-11-28 15:48 UTC (permalink / raw)
To: netdev; +Cc: daniel, ast, daniel, maheshb, tgraf, David Ahern
In-Reply-To: <1480348130-31354-1-git-send-email-dsa@cumulusnetworks.com>
Code move only; no functional change intended.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
v3
- dropped the rename
v2
- fix bpf_prog_run_clear_cb to bpf_prog_run_save_cb as caught by Daniel
- rename BPF_PROG_TYPE_CGROUP_SKB and its cg_skb functions to
BPF_PROG_TYPE_CGROUP and cgroup
kernel/bpf/cgroup.c | 27 ++++++++++++++++++++++-----
kernel/bpf/syscall.c | 28 +++++++++++++++-------------
2 files changed, 37 insertions(+), 18 deletions(-)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index a0ab43f264b0..d5746aec8f34 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -117,6 +117,19 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
}
}
+static int __cgroup_bpf_run_filter_skb(struct sk_buff *skb,
+ struct bpf_prog *prog)
+{
+ unsigned int offset = skb->data - skb_network_header(skb);
+ int ret;
+
+ __skb_push(skb, offset);
+ ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
+ __skb_pull(skb, offset);
+
+ return ret;
+}
+
/**
* __cgroup_bpf_run_filter() - Run a program for packet filtering
* @sk: The socken sending or receiving traffic
@@ -153,11 +166,15 @@ int __cgroup_bpf_run_filter(struct sock *sk,
prog = rcu_dereference(cgrp->bpf.effective[type]);
if (prog) {
- unsigned int offset = skb->data - skb_network_header(skb);
-
- __skb_push(skb, offset);
- ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
- __skb_pull(skb, offset);
+ switch (type) {
+ case BPF_CGROUP_INET_INGRESS:
+ case BPF_CGROUP_INET_EGRESS:
+ ret = __cgroup_bpf_run_filter_skb(skb, prog);
+ break;
+ /* make gcc happy else complains about missing enum value */
+ default:
+ return 0;
+ }
}
rcu_read_unlock();
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1090d16a31c1..c2bce596e842 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -843,6 +843,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
{
struct bpf_prog *prog;
struct cgroup *cgrp;
+ enum bpf_prog_type ptype;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -853,25 +854,26 @@ static int bpf_prog_attach(const union bpf_attr *attr)
switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
- prog = bpf_prog_get_type(attr->attach_bpf_fd,
- BPF_PROG_TYPE_CGROUP_SKB);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
-
- cgrp = cgroup_get_from_fd(attr->target_fd);
- if (IS_ERR(cgrp)) {
- bpf_prog_put(prog);
- return PTR_ERR(cgrp);
- }
-
- cgroup_bpf_update(cgrp, prog, attr->attach_type);
- cgroup_put(cgrp);
+ ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
default:
return -EINVAL;
}
+ prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp)) {
+ bpf_prog_put(prog);
+ return PTR_ERR(cgrp);
+ }
+
+ cgroup_bpf_update(cgrp, prog, attr->attach_type);
+ cgroup_put(cgrp);
+
return 0;
}
--
2.1.4
^ permalink raw reply related
* Re: [PATCH 2/2] net: dsa: mv88e6xxx: Add 88E6176 device tree support
From: Uwe Kleine-König @ 2016-11-28 15:44 UTC (permalink / raw)
To: Andrew Lunn
Cc: Rob Herring, Frank Rowand, Andreas Färber,
netdev-u79uwXL29TY76Z2rM5mHXA,
linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
Michal Hrusecki, Tomas Hlavacek, Bed??icha Ko??atu,
Vivien Didelot, Florian Fainelli,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
devicetree-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20161128131735.GA4379-g2DYL2Zd6BY@public.gmane.org>
[-- Attachment #1.1: Type: text/plain, Size: 2968 bytes --]
On 11/28/2016 02:17 PM, Andrew Lunn wrote:
>> I still wonder (and didn't get an answer back when I asked about this)
>> why a comment is preferred here. For other devices I know it's usual and
>> requested by the maintainers to use:
>>
>> compatible = "exact name", "earlyer device to match driver";
>>
>> . This is more robust, documents the situation more formally and makes
>> it better greppable. The price to pay is only a few bytes in the dtb
>> which IMO is ok.
>
> We did discuss this a while back. The information is useless and
> should to be ignored if present.
Who is "we"?
> The switch has a register which contains its model and revision. Each
> port has a set of registers, and register 3 contains the
> model/version. For all devices compatible with the 6085, the port
> registers start at address 0x10. For the 6190, the port registers
> start at 0x0. So given one of these two compatible strings, we can
> find the model of the device, from something which is burned into the
> silicon.
>
> Now, say we did add per device compatible strings. We look up the
> model burned into the silicon, find it is different to what the device
> tree is and do what? Fail the probe? Or just keep going using the
I'd say fail to probe is the right thing to do. Of course that doesn't
work for already supported models because it will break compatibility.
I'd value the advantages (i.e. easily find machines with a given
hardware) higher than making broken dtbs work, so being a bit silly is
fine for me.
> value in the silicon? It seems silly to fail the probe if the driver
> does support the model, but that means the device tree is never
> verified and hence probably wrong. Why have wrong information in the
> device tree, especially wrong information which we never use. It is
> better to not have that information in the device tree.
At least we'd have a canonical way to specify the type of switch. If
it's not verified it's as good and bad as a dts comment. But the latter
isn't available in the dtb, which I consider a small disadvantage.
Also it seems wrong to write "marvell,mv88e6085" (only) if I know the
hardware is really a "marvell,mv88e6176".
> Linus has said he does not like ARM devices because of all the busses
> which are not enumerable. Here we have a device which with a little
> bit of help we can enumerate. So we should.
If you write
compatible = "marvell,mv88e6176", "marvell,mv88e6085";
you can still enumerate in the same way as before.
There are several more instances where the device tree specifies
something that could be probed instead. Some examples:
compatible = "ethernet-phy-id0141.0DD1", "ethernet-phy-ieee802.3-c22";
compatible = "spansion,s25fl164k", "jedec,spi-nor";
compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan";
compatible = "arm,pl011", "arm,primecell";
So you think they are all doing it wrong?
Best regards
Uwe
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* RE: [PATCH net-next 01/10] net/mlx4_core: Make each VF manage its own mac table
From: Mintz, Yuval @ 2016-11-28 15:37 UTC (permalink / raw)
To: Tariq Toukan, David S. Miller
Cc: netdev@vger.kernel.org, Eran Ben Elisha, Erez Shitrit,
Gal Pressman
In-Reply-To: <1480261877-19720-2-git-send-email-tariqt@mellanox.com>
> Each VF can catch up to the max number of MACs in the MAC table (128) on
> the base of "first asks first gets".
> The VF should know the total free number of MACs from the PF.
While this might be a reasonable default policy given the alternatives
[or more precisely, the lack of ability for hyper-user to configure such],
it still sounds a bit crude - a user would have an available MAC only if
other users were kind enough not to consume all of them?
Have you thought of adding some configurable method through which
this could be controlled?
^ permalink raw reply
* [PATCH 1/1] GSO: Reload iph after pskb_may_pull
From: Arnaldo Carvalho de Melo @ 2016-11-28 15:36 UTC (permalink / raw)
To: David Miller
Cc: Eric Dumazet, Alexander Duyck, Andrey Konovalov,
Linux Networking Development Mailing List
As it may get stale and lead to use after free.
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Alexander Duyck <aduyck@mirantis.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Fixes: cbc53e08a793 ("GSO: Add GSO type for fixed IPv4 ID")
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
net/ipv4/af_inet.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5ddf5cda07f4..215143246e4b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1233,7 +1233,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
/* fixed ID is invalid if DF bit is not set */
- if (fixedid && !(iph->frag_off & htons(IP_DF)))
+ if (fixedid && !(ip_hdr(skb)->frag_off & htons(IP_DF)))
goto out;
}
--
2.9.3
^ permalink raw reply related
* Re: [PATCH net] net/dccp: fix use-after-free in dccp_invalid_packet
From: Arnaldo Carvalho de Melo @ 2016-11-28 15:36 UTC (permalink / raw)
To: Eric Dumazet
Cc: Alexander Duyck, Arnaldo Carvalho de Melo, Andrey Konovalov,
Gerrit Renker, David S. Miller, dccp, netdev, LKML, Dmitry Vyukov,
Kostya Serebryany, Eric Dumazet, syzkaller
In-Reply-To: <1480346458.18162.54.camel@edumazet-glaptop3.roam.corp.google.com>
Em Mon, Nov 28, 2016 at 07:20:58AM -0800, Eric Dumazet escreveu:
> On Mon, 2016-11-28 at 12:05 -0300, Arnaldo Carvalho de Melo wrote:
> > Em Mon, Nov 28, 2016 at 06:47:14AM -0800, Eric Dumazet escreveu:
> > > On Mon, 2016-11-28 at 11:40 -0300, Arnaldo Carvalho de Melo wrote:
> > > > Em Mon, Nov 28, 2016 at 06:26:49AM -0800, Eric Dumazet escreveu:
> > > > > From: Eric Dumazet <edumazet@google.com>
> > > > >
> > > > > pskb_may_pull() can reallocate skb->head, we need to reload dh pointer
> > > > > in dccp_invalid_packet() or risk use after free.
> > > > >
> > > > > Bug found by Andrey Konovalov using syzkaller.
> > > > >
> > > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > > > Reported-by: Andrey Konovalov <andreyknvl@google.com>
> > > >
> > > > Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> > > >
> > > > I was about to send exactly this patch, and while looking at it I think
> > > > the patch below needs to go in as well, no? To follow the advice of that
> > > > Warning line there :-)
> > > >
> > > > From: Arnaldo Carvalho de Melo <acme@redhat.com>
> > > >
> > > > pskb_may_pull() can reallocate skb->head, so we can't access
> > > > iph->frag_off or risk use after free, save it to a variable and us that
> > > > later.
> > > >
> > > > Cc: Andrey Konovalov <andreyknvl@google.com>
> > > > Cc: Eric Dumazet <edumazet@google.com>
> > > > Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> > > >
> > > > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> > > > index 5ddf5cda07f4..9462070561a3 100644
> > > > --- a/net/ipv4/af_inet.c
> > > > +++ b/net/ipv4/af_inet.c
> > > > @@ -1198,6 +1198,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> > > > struct iphdr *iph;
> > > > int proto, tot_len;
> > > > int nhoff;
> > > > + u16 frag_off;
> > > > int ihl;
> > > > int id;
> > > >
> > > > @@ -1213,6 +1214,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> > > >
> > > > id = ntohs(iph->id);
> > > > proto = iph->protocol;
> > > > + frag_off = iph->frag_off;
> > > >
> > > > /* Warning: after this point, iph might be no longer valid */
> > > > if (unlikely(!pskb_may_pull(skb, ihl)))
> > > > @@ -1233,7 +1235,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> > > > fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
> > > >
> > > > /* fixed ID is invalid if DF bit is not set */
> > > > - if (fixedid && !(iph->frag_off & htons(IP_DF)))
> > > > + if (fixedid && !(frag_off & htons(IP_DF)))
> > > > goto out;
> > > > }
> > > >
> > >
> > >
> > > I do not see why this patch would be needed ?
> >
> > Where is iph being reloaded after that pskb_may_pull() and thus at line 1236 we
> > could use after free? The warning at line 1217?
> >
> > 1209 iph = ip_hdr(skb);
> > 1210 ihl = iph->ihl * 4;
> > 1211 if (ihl < sizeof(*iph))
> > 1212 goto out;
> > 1213
> > 1214 id = ntohs(iph->id);
> > 1215 proto = iph->protocol;
> > 1216
> > 1217 /* Warning: after this point, iph might be no longer valid */
> > 1218 if (unlikely(!pskb_may_pull(skb, ihl)))
> > 1219 goto out;
> > 1220 __skb_pull(skb, ihl);
> > 1221
> > 1222 encap = SKB_GSO_CB(skb)->encap_level > 0;
> > 1223 if (encap)
> > 1224 features &= skb->dev->hw_enc_features;
> > 1225 SKB_GSO_CB(skb)->encap_level += ihl;
> > 1226
> > 1227 skb_reset_transport_header(skb);
> > 1228
> > 1229 segs = ERR_PTR(-EPROTONOSUPPORT);
> > 1230
> > 1231 if (!skb->encapsulation || encap) {
> > 1232 udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
> > 1233 fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
> > 1234
> > 1235 /* fixed ID is invalid if DF bit is not set */
> > 1236 if (fixedid && !(iph->frag_off & htons(IP_DF)))
> > 1237 goto out;
> > 1238 }
> >
>
> Arg, I was looking at an old tree.
>
> Please then add
>
> Fixes: cbc53e08a793b ("GSO: Add GSO type for fixed IPv4 ID")
>
> To ease stable backports.
>
> Also, it looks comments are not read, we might kill this one and reload
> iph.
>
> ( Saving 3 fields is now more expensive than simply reloading iph )
Ok, I instead used ip_hdr(skb)->frag_off at that place, as it is the
only use after the pskb_may_pull(), right after that use it will reload
iph in another fashion anyway, sending the patch in another message,
holler if you disagree, i.e. nacking that ack 8-)
- Arnaldo
^ permalink raw reply
* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
From: Lino Sanfilippo @ 2016-11-28 15:33 UTC (permalink / raw)
To: David Miller; +Cc: pavel, peppe.cavallaro, netdev, linux-kernel
In-Reply-To: <20161128.095431.856183735501262965.davem@davemloft.net>
Hi,
On 28.11.2016 15:54, David Miller wrote:
> From: Lino Sanfilippo <lsanfil@marvell.com>
> Date: Mon, 28 Nov 2016 14:07:51 +0100
>
>> Calling skb_orphan() in the xmit handler made this issue disappear.
>
> This is not the way to handle this problem.
>
I agree, its not a clean solution. But if the use of skb_orphan makes the delays
disappear, it can at least be a hint that socket queue limits are involved.
Regards,
Lino
^ permalink raw reply
* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
From: Eric Dumazet @ 2016-11-28 15:31 UTC (permalink / raw)
To: David Miller; +Cc: lsanfil, pavel, peppe.cavallaro, netdev, linux-kernel
In-Reply-To: <20161128.095431.856183735501262965.davem@davemloft.net>
On Mon, 2016-11-28 at 09:54 -0500, David Miller wrote:
> From: Lino Sanfilippo <lsanfil@marvell.com>
> Date: Mon, 28 Nov 2016 14:07:51 +0100
>
> > Calling skb_orphan() in the xmit handler made this issue disappear.
>
> This is not the way to handle this problem.
>
> The solution is to free the SKBs in a timely manner after the
> chip has transmitted the frame.
Note that the 'pauses' described by Pavel are also caused by a too small
SO_SNDBUF value on the UDP socket.
An immediate fix, with no kernel change is to increase it.
echo 1000000 >/proc/sys/net/core/wmem_default
or
val = 1000000;
setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
^ permalink raw reply
* RE: [PATCH net-next 06/10] net/mlx4_core: Dynamically allocate structs at mlx4_slave_cap
From: David Laight @ 2016-11-28 15:28 UTC (permalink / raw)
To: 'Tariq Toukan', David S. Miller
Cc: netdev@vger.kernel.org, Eran Ben Elisha, Tal Alon
In-Reply-To: <1480261877-19720-7-git-send-email-tariqt@mellanox.com>
From: Tariq Toukan
> Sent: 27 November 2016 15:51
> From: Eran Ben Elisha <eranbe@mellanox.com>
>
> In order to avoid temporary large structs on the stack,
> allocate them dynamically.
>
> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
> Signed-off-by: Tal Alon <talal@mellanox.com>
> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> ---
> drivers/net/ethernet/mellanox/mlx4/main.c | 244 +++++++++++++++++-------------
> 1 file changed, 142 insertions(+), 102 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
> index 4a9497e9778d..65502df9fd96 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/main.c
> @@ -799,40 +799,117 @@ static void slave_adjust_steering_mode(struct mlx4_dev *dev,
...
> +static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev)
> +{
> + struct mlx4_func_cap *func_cap = NULL;
> + int i, err = 0;
> +
> + func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
> + dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports,
> + sizeof(u32), GFP_KERNEL);
> + dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports,
> + sizeof(u32), GFP_KERNEL);
> + dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports,
> + sizeof(u32), GFP_KERNEL);
> + dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports,
> + sizeof(u32), GFP_KERNEL);
> + dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports,
> + sizeof(u32), GFP_KERNEL);
...
It has to be better to allocate a single piece of memory.
Potentially using a structure something like:
struct fubar {
struct mlx4_func_cap func_cap;
struct {
u32 qp0_qkey;
u32 qp0_tunnel;
u32 qp0_proxy;
u32 qp1_tunnel;
u32 qp1_proxy;
} port_info[];
};
David
^ permalink raw reply
* Re: [PATCH net-next] virtio-net: enable multiqueue by default
From: Neil Horman @ 2016-11-28 15:26 UTC (permalink / raw)
To: Jason Wang
Cc: virtualization, netdev, linux-kernel, Hannes Frederic Sowa,
Michael S . Tsirkin, Jeremy Eder, Marko Myllynen, Maxime Coquelin
In-Reply-To: <1480048646-17536-1-git-send-email-jasowang@redhat.com>
On Fri, Nov 25, 2016 at 12:37:26PM +0800, Jason Wang wrote:
> We use single queue even if multiqueue is enabled and let admin to
> enable it through ethtool later. This is used to avoid possible
> regression (small packet TCP stream transmission). But looks like an
> overkill since:
>
> - single queue user can disable multiqueue when launching qemu
> - brings extra troubles for the management since it needs extra admin
> tool in guest to enable multiqueue
> - multiqueue performs much better than single queue in most of the
> cases
>
> So this patch enables multiqueue by default: if #queues is less than or
> equal to #vcpu, enable as much as queue pairs; if #queues is greater
> than #vcpu, enable #vcpu queue pairs.
>
> Cc: Hannes Frederic Sowa <hannes@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Neil Horman <nhorman@redhat.com>
> Cc: Jeremy Eder <jeder@redhat.com>
> Cc: Marko Myllynen <myllynen@redhat.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> drivers/net/virtio_net.c | 9 +++++++--
> 1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index d4ac7a6..a21d93a 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1886,8 +1886,11 @@ static int virtnet_probe(struct virtio_device *vdev)
> if (vi->any_header_sg)
> dev->needed_headroom = vi->hdr_len;
>
> - /* Use single tx/rx queue pair as default */
> - vi->curr_queue_pairs = 1;
> + /* Enable multiqueue by default */
> + if (num_online_cpus() >= max_queue_pairs)
> + vi->curr_queue_pairs = max_queue_pairs;
> + else
> + vi->curr_queue_pairs = num_online_cpus();
> vi->max_queue_pairs = max_queue_pairs;
>
> /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
> @@ -1918,6 +1921,8 @@ static int virtnet_probe(struct virtio_device *vdev)
> goto free_unregister_netdev;
> }
>
> + virtnet_set_affinity(vi);
> +
> /* Assume link up if device can't report link status,
> otherwise get link status from config. */
> if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
> --
> 2.7.4
>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
^ permalink raw reply
* Re: [PATCH] cpsw: ethtool: add support for nway reset
From: Yegor Yefremov @ 2016-11-28 15:25 UTC (permalink / raw)
To: netdev
Cc: linux-omap@vger.kernel.org, Grygorii Strashko, N, Mugunthan V,
David Miller, Yegor Yefremov
In-Reply-To: <1480326472-5849-1-git-send-email-yegorslists@googlemail.com>
On Mon, Nov 28, 2016 at 10:47 AM, <yegorslists@googlemail.com> wrote:
> From: Yegor Yefremov <yegorslists@googlemail.com>
>
> This patch adds support for ethtool's '-r' command. Restarting
> N-WAY negotiation can be useful to activate newly changed EEE
> settings etc.
>
> Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
This patch applies on top of
http://marc.info/?l=linux-netdev&m=148032251729251&w=2
Yegor
^ permalink raw reply
* Re: [PATCH] stmmac: reduce code duplication getting basic descriptors
From: kbuild test robot @ 2016-11-28 15:25 UTC (permalink / raw)
To: Pavel Machek
Cc: kbuild-all, David Miller, Andrew Morton, alexandre.torgue,
peppe.cavallaro, netdev, linux-kernel
In-Reply-To: <20161128121736.GD15034@amd>
[-- Attachment #1: Type: text/plain, Size: 1828 bytes --]
Hi Pavel,
[auto build test WARNING on net-next/master]
[also build test WARNING on v4.9-rc7 next-20161128]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Pavel-Machek/stmmac-reduce-code-duplication-getting-basic-descriptors/20161128-204339
config: x86_64-randconfig-s1-11282147 (attached as .config)
compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All warnings (new ones prefixed by >>):
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c: In function 'dma_free_tx_skbufs':
>> drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:1137: warning: unused variable 'p'
drivers/net/ethernet/stmicro/stmmac/stmmac_main.o: warning: objtool: stmmac_resume()+0x125: function has unreachable instruction
vim +/p +1137 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
1121 return ret;
1122 }
1123
1124 static void dma_free_rx_skbufs(struct stmmac_priv *priv)
1125 {
1126 int i;
1127
1128 for (i = 0; i < DMA_RX_SIZE; i++)
1129 stmmac_free_rx_buffers(priv, i);
1130 }
1131
1132 static void dma_free_tx_skbufs(struct stmmac_priv *priv)
1133 {
1134 int i;
1135
1136 for (i = 0; i < DMA_TX_SIZE; i++) {
> 1137 struct dma_desc *p = stmmac_tx_desc(priv, i);
1138
1139 if (priv->tx_skbuff_dma[i].buf) {
1140 if (priv->tx_skbuff_dma[i].map_as_page)
1141 dma_unmap_page(priv->device,
1142 priv->tx_skbuff_dma[i].buf,
1143 priv->tx_skbuff_dma[i].len,
1144 DMA_TO_DEVICE);
1145 else
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 31122 bytes --]
^ permalink raw reply
* Re: [PATCH net-next] virtio-net: enable multiqueue by default
From: Neil Horman @ 2016-11-28 15:25 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Jason Wang, virtualization, netdev, linux-kernel,
Hannes Frederic Sowa, Jeremy Eder, Marko Myllynen,
Maxime Coquelin
In-Reply-To: <20161125064201-mutt-send-email-mst@kernel.org>
On Fri, Nov 25, 2016 at 06:43:08AM +0200, Michael S. Tsirkin wrote:
> On Fri, Nov 25, 2016 at 12:37:26PM +0800, Jason Wang wrote:
> > We use single queue even if multiqueue is enabled and let admin to
> > enable it through ethtool later. This is used to avoid possible
> > regression (small packet TCP stream transmission). But looks like an
> > overkill since:
> >
> > - single queue user can disable multiqueue when launching qemu
> > - brings extra troubles for the management since it needs extra admin
> > tool in guest to enable multiqueue
> > - multiqueue performs much better than single queue in most of the
> > cases
> >
> > So this patch enables multiqueue by default: if #queues is less than or
> > equal to #vcpu, enable as much as queue pairs; if #queues is greater
> > than #vcpu, enable #vcpu queue pairs.
> >
> > Cc: Hannes Frederic Sowa <hannes@redhat.com>
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Neil Horman <nhorman@redhat.com>
> > Cc: Jeremy Eder <jeder@redhat.com>
> > Cc: Marko Myllynen <myllynen@redhat.com>
> > Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
>
> OK at some level but all uses of num_online_cpus()
> like this are racy versus hotplug.
> I know we already have this bug but shouldn't we fix it
> before we add more?
>
Isn't the fix orthogonal to this use though? That is to say, you shoudl
register a hotplug notifier first, and use the handler to adjust the number of
queues on hotplug add/remove?
Neil
>
> > ---
> > drivers/net/virtio_net.c | 9 +++++++--
> > 1 file changed, 7 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index d4ac7a6..a21d93a 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -1886,8 +1886,11 @@ static int virtnet_probe(struct virtio_device *vdev)
> > if (vi->any_header_sg)
> > dev->needed_headroom = vi->hdr_len;
> >
> > - /* Use single tx/rx queue pair as default */
> > - vi->curr_queue_pairs = 1;
> > + /* Enable multiqueue by default */
> > + if (num_online_cpus() >= max_queue_pairs)
> > + vi->curr_queue_pairs = max_queue_pairs;
> > + else
> > + vi->curr_queue_pairs = num_online_cpus();
> > vi->max_queue_pairs = max_queue_pairs;
> >
> > /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
> > @@ -1918,6 +1921,8 @@ static int virtnet_probe(struct virtio_device *vdev)
> > goto free_unregister_netdev;
> > }
> >
> > + virtnet_set_affinity(vi);
> > +
> > /* Assume link up if device can't report link status,
> > otherwise get link status from config. */
> > if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
> > --
> > 2.7.4
^ permalink raw reply
* Re: [PATCH net] net/dccp: fix use-after-free in dccp_invalid_packet
From: Eric Dumazet @ 2016-11-28 15:20 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Alexander Duyck
Cc: Arnaldo Carvalho de Melo, Andrey Konovalov, Gerrit Renker,
David S. Miller, dccp, netdev, LKML, Dmitry Vyukov,
Kostya Serebryany, Eric Dumazet, syzkaller
In-Reply-To: <20161128150518.GA17913@kernel.org>
On Mon, 2016-11-28 at 12:05 -0300, Arnaldo Carvalho de Melo wrote:
> Em Mon, Nov 28, 2016 at 06:47:14AM -0800, Eric Dumazet escreveu:
> > On Mon, 2016-11-28 at 11:40 -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Mon, Nov 28, 2016 at 06:26:49AM -0800, Eric Dumazet escreveu:
> > > > From: Eric Dumazet <edumazet@google.com>
> > > >
> > > > pskb_may_pull() can reallocate skb->head, we need to reload dh pointer
> > > > in dccp_invalid_packet() or risk use after free.
> > > >
> > > > Bug found by Andrey Konovalov using syzkaller.
> > > >
> > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > > Reported-by: Andrey Konovalov <andreyknvl@google.com>
> > >
> > > Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> > >
> > > I was about to send exactly this patch, and while looking at it I think
> > > the patch below needs to go in as well, no? To follow the advice of that
> > > Warning line there :-)
> > >
> > > From: Arnaldo Carvalho de Melo <acme@redhat.com>
> > >
> > > pskb_may_pull() can reallocate skb->head, so we can't access
> > > iph->frag_off or risk use after free, save it to a variable and us that
> > > later.
> > >
> > > Cc: Andrey Konovalov <andreyknvl@google.com>
> > > Cc: Eric Dumazet <edumazet@google.com>
> > > Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> > >
> > > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> > > index 5ddf5cda07f4..9462070561a3 100644
> > > --- a/net/ipv4/af_inet.c
> > > +++ b/net/ipv4/af_inet.c
> > > @@ -1198,6 +1198,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> > > struct iphdr *iph;
> > > int proto, tot_len;
> > > int nhoff;
> > > + u16 frag_off;
> > > int ihl;
> > > int id;
> > >
> > > @@ -1213,6 +1214,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> > >
> > > id = ntohs(iph->id);
> > > proto = iph->protocol;
> > > + frag_off = iph->frag_off;
> > >
> > > /* Warning: after this point, iph might be no longer valid */
> > > if (unlikely(!pskb_may_pull(skb, ihl)))
> > > @@ -1233,7 +1235,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> > > fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
> > >
> > > /* fixed ID is invalid if DF bit is not set */
> > > - if (fixedid && !(iph->frag_off & htons(IP_DF)))
> > > + if (fixedid && !(frag_off & htons(IP_DF)))
> > > goto out;
> > > }
> > >
> >
> >
> > I do not see why this patch would be needed ?
>
> Where is iph being reloaded after that pskb_may_pull() and thus at line 1236 we
> could use after free? The warning at line 1217?
>
> 1209 iph = ip_hdr(skb);
> 1210 ihl = iph->ihl * 4;
> 1211 if (ihl < sizeof(*iph))
> 1212 goto out;
> 1213
> 1214 id = ntohs(iph->id);
> 1215 proto = iph->protocol;
> 1216
> 1217 /* Warning: after this point, iph might be no longer valid */
> 1218 if (unlikely(!pskb_may_pull(skb, ihl)))
> 1219 goto out;
> 1220 __skb_pull(skb, ihl);
> 1221
> 1222 encap = SKB_GSO_CB(skb)->encap_level > 0;
> 1223 if (encap)
> 1224 features &= skb->dev->hw_enc_features;
> 1225 SKB_GSO_CB(skb)->encap_level += ihl;
> 1226
> 1227 skb_reset_transport_header(skb);
> 1228
> 1229 segs = ERR_PTR(-EPROTONOSUPPORT);
> 1230
> 1231 if (!skb->encapsulation || encap) {
> 1232 udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
> 1233 fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
> 1234
> 1235 /* fixed ID is invalid if DF bit is not set */
> 1236 if (fixedid && !(iph->frag_off & htons(IP_DF)))
> 1237 goto out;
> 1238 }
>
Arg, I was looking at an old tree.
Please then add
Fixes: cbc53e08a793b ("GSO: Add GSO type for fixed IPv4 ID")
To ease stable backports.
Also, it looks comments are not read, we might kill this one and reload
iph.
( Saving 3 fields is now more expensive than simply reloading iph )
Thanks.
^ permalink raw reply
* Re: [patch net] sched: cls_flower: remove from hashtable only in case skip sw flag is not set
From: Amir Vadai" @ 2016-11-28 15:16 UTC (permalink / raw)
To: Jiri Pirko; +Cc: netdev, davem, jhs, idosch, eladr, ogerlitz, hadarh
In-Reply-To: <1480344013-4812-1-git-send-email-jiri@resnulli.us>
On Mon, Nov 28, 2016 at 03:40:13PM +0100, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
>
> Be symmetric to hashtable insert and remove filter from hashtable only
> in case skip sw flag is not set.
>
> Fixes: e69985c67c33 ("net/sched: cls_flower: Introduce support in SKIP SW flag")
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
Reviewed-by: Amir Vadai <amir@vadai.me>
^ permalink raw reply
* Re: net/sctp: vmalloc allocation failure in sctp_setsockopt/xt_alloc_table_info
From: Marcelo Ricardo Leitner @ 2016-11-28 15:13 UTC (permalink / raw)
To: Neil Horman
Cc: Andrey Konovalov, Vlad Yasevich, linux-sctp, netdev, LKML,
Pablo Neira Ayuso, Patrick McHardy, Jozsef Kadlecsik,
David S. Miller, netfilter-devel, coreteam, Dmitry Vyukov,
Kostya Serebryany, Eric Dumazet, syzkaller
In-Reply-To: <20161128143931.GB29839@hmsreliant.think-freely.org>
On Mon, Nov 28, 2016 at 09:39:31AM -0500, Neil Horman wrote:
> On Mon, Nov 28, 2016 at 03:33:40PM +0100, Andrey Konovalov wrote:
> > On Mon, Nov 28, 2016 at 3:13 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
> > > On Mon, Nov 28, 2016 at 02:00:19PM +0100, Andrey Konovalov wrote:
> > >> Hi!
> > >>
> > >> I've got the following error report while running the syzkaller fuzzer.
> > >>
> > >> On commit d8e435f3ab6fea2ea324dce72b51dd7761747523 (Nov 26).
> > >>
> > >> A reproducer is attached.
> > >>
> > >> a.out: vmalloc: allocation failure, allocated 823562240 of 1427091456
> > >> bytes, mode:0x24000c2(GFP_KERNEL|__GFP_HIGHMEM)
> > >>
> > > How much total ram do you have in this system? The call appears to be
> > > attempting to allocate 1.3 Gb of data. Even using vmalloc to allow
> > > discontiguous allocation, thats alot of memory, and if enough is in use already,
> > > I could make the argument that this might be expected behavior.
> >
> > Hi Neail,
> >
> > I have 2 Gb.
> >
> That would be why. Allocating 65% of the available system memory will almost
> certainly lead to OOM failures quickly.
>
> > Just tested with 4 Gb, everything seems to be working fine.
> > So I guess this is not actually a bug and allocating 1.3 Gb is OK.
Still we probably should avoid the warn triggered by an userspace
application: (untested)
--8<--
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index fc4977456c30..b56a0e128fc3 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -958,7 +958,8 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
if (!info) {
- info = vmalloc(sz);
+ info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_HIGHMEM,
+ PAGE_KERNEL);
if (!info)
return NULL;
}
^ permalink raw reply related
* Re: [PATCH net] net/dccp: fix use-after-free in dccp_invalid_packet
From: Arnaldo Carvalho de Melo @ 2016-11-28 15:05 UTC (permalink / raw)
To: Eric Dumazet
Cc: Arnaldo Carvalho de Melo, Andrey Konovalov, Gerrit Renker,
David S. Miller, dccp, netdev, LKML, Dmitry Vyukov,
Kostya Serebryany, Eric Dumazet, syzkaller
In-Reply-To: <1480344434.18162.51.camel@edumazet-glaptop3.roam.corp.google.com>
Em Mon, Nov 28, 2016 at 06:47:14AM -0800, Eric Dumazet escreveu:
> On Mon, 2016-11-28 at 11:40 -0300, Arnaldo Carvalho de Melo wrote:
> > Em Mon, Nov 28, 2016 at 06:26:49AM -0800, Eric Dumazet escreveu:
> > > From: Eric Dumazet <edumazet@google.com>
> > >
> > > pskb_may_pull() can reallocate skb->head, we need to reload dh pointer
> > > in dccp_invalid_packet() or risk use after free.
> > >
> > > Bug found by Andrey Konovalov using syzkaller.
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > Reported-by: Andrey Konovalov <andreyknvl@google.com>
> >
> > Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> >
> > I was about to send exactly this patch, and while looking at it I think
> > the patch below needs to go in as well, no? To follow the advice of that
> > Warning line there :-)
> >
> > From: Arnaldo Carvalho de Melo <acme@redhat.com>
> >
> > pskb_may_pull() can reallocate skb->head, so we can't access
> > iph->frag_off or risk use after free, save it to a variable and us that
> > later.
> >
> > Cc: Andrey Konovalov <andreyknvl@google.com>
> > Cc: Eric Dumazet <edumazet@google.com>
> > Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
> >
> > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> > index 5ddf5cda07f4..9462070561a3 100644
> > --- a/net/ipv4/af_inet.c
> > +++ b/net/ipv4/af_inet.c
> > @@ -1198,6 +1198,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> > struct iphdr *iph;
> > int proto, tot_len;
> > int nhoff;
> > + u16 frag_off;
> > int ihl;
> > int id;
> >
> > @@ -1213,6 +1214,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> >
> > id = ntohs(iph->id);
> > proto = iph->protocol;
> > + frag_off = iph->frag_off;
> >
> > /* Warning: after this point, iph might be no longer valid */
> > if (unlikely(!pskb_may_pull(skb, ihl)))
> > @@ -1233,7 +1235,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> > fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
> >
> > /* fixed ID is invalid if DF bit is not set */
> > - if (fixedid && !(iph->frag_off & htons(IP_DF)))
> > + if (fixedid && !(frag_off & htons(IP_DF)))
> > goto out;
> > }
> >
>
>
> I do not see why this patch would be needed ?
Where is iph being reloaded after that pskb_may_pull() and thus at line 1236 we
could use after free? The warning at line 1217?
1209 iph = ip_hdr(skb);
1210 ihl = iph->ihl * 4;
1211 if (ihl < sizeof(*iph))
1212 goto out;
1213
1214 id = ntohs(iph->id);
1215 proto = iph->protocol;
1216
1217 /* Warning: after this point, iph might be no longer valid */
1218 if (unlikely(!pskb_may_pull(skb, ihl)))
1219 goto out;
1220 __skb_pull(skb, ihl);
1221
1222 encap = SKB_GSO_CB(skb)->encap_level > 0;
1223 if (encap)
1224 features &= skb->dev->hw_enc_features;
1225 SKB_GSO_CB(skb)->encap_level += ihl;
1226
1227 skb_reset_transport_header(skb);
1228
1229 segs = ERR_PTR(-EPROTONOSUPPORT);
1230
1231 if (!skb->encapsulation || encap) {
1232 udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
1233 fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
1234
1235 /* fixed ID is invalid if DF bit is not set */
1236 if (fixedid && !(iph->frag_off & htons(IP_DF)))
1237 goto out;
1238 }
^ permalink raw reply
* Re: stmmac ethernet in kernel 4.4: coalescing related pauses?
From: David Miller @ 2016-11-28 14:54 UTC (permalink / raw)
To: lsanfil; +Cc: pavel, peppe.cavallaro, netdev, linux-kernel
In-Reply-To: <f14255ee-7fa8-afef-23ec-fc4f9a74eec8@marvell.com>
From: Lino Sanfilippo <lsanfil@marvell.com>
Date: Mon, 28 Nov 2016 14:07:51 +0100
> Calling skb_orphan() in the xmit handler made this issue disappear.
This is not the way to handle this problem.
The solution is to free the SKBs in a timely manner after the
chip has transmitted the frame.
^ permalink raw reply
* Re: [PATCH net] net/dccp: fix use-after-free in dccp_invalid_packet
From: Eric Dumazet @ 2016-11-28 14:47 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Andrey Konovalov, Gerrit Renker, David S. Miller, dccp, netdev,
LKML, Dmitry Vyukov, Kostya Serebryany, Eric Dumazet, syzkaller
In-Reply-To: <20161128144001.GE16426@kernel.org>
On Mon, 2016-11-28 at 11:40 -0300, Arnaldo Carvalho de Melo wrote:
> Em Mon, Nov 28, 2016 at 06:26:49AM -0800, Eric Dumazet escreveu:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > pskb_may_pull() can reallocate skb->head, we need to reload dh pointer
> > in dccp_invalid_packet() or risk use after free.
> >
> > Bug found by Andrey Konovalov using syzkaller.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > Reported-by: Andrey Konovalov <andreyknvl@google.com>
>
> Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
>
> I was about to send exactly this patch, and while looking at it I think
> the patch below needs to go in as well, no? To follow the advice of that
> Warning line there :-)
>
> From: Arnaldo Carvalho de Melo <acme@redhat.com>
>
> pskb_may_pull() can reallocate skb->head, so we can't access
> iph->frag_off or risk use after free, save it to a variable and us that
> later.
>
> Cc: Andrey Konovalov <andreyknvl@google.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
>
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index 5ddf5cda07f4..9462070561a3 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1198,6 +1198,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> struct iphdr *iph;
> int proto, tot_len;
> int nhoff;
> + u16 frag_off;
> int ihl;
> int id;
>
> @@ -1213,6 +1214,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
>
> id = ntohs(iph->id);
> proto = iph->protocol;
> + frag_off = iph->frag_off;
>
> /* Warning: after this point, iph might be no longer valid */
> if (unlikely(!pskb_may_pull(skb, ihl)))
> @@ -1233,7 +1235,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
> fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
>
> /* fixed ID is invalid if DF bit is not set */
> - if (fixedid && !(iph->frag_off & htons(IP_DF)))
> + if (fixedid && !(frag_off & htons(IP_DF)))
> goto out;
> }
>
I do not see why this patch would be needed ?
^ permalink raw reply
* [patch net] sched: cls_flower: remove from hashtable only in case skip sw flag is not set
From: Jiri Pirko @ 2016-11-28 14:40 UTC (permalink / raw)
To: netdev; +Cc: davem, jhs, idosch, eladr, ogerlitz, hadarh, amir
From: Jiri Pirko <jiri@mellanox.com>
Be symmetric to hashtable insert and remove filter from hashtable only
in case skip sw flag is not set.
Fixes: e69985c67c33 ("net/sched: cls_flower: Introduce support in SKIP SW flag")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
net/sched/cls_flower.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f6f40fb..641c44c 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -711,8 +711,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout;
if (fold) {
- rhashtable_remove_fast(&head->ht, &fold->ht_node,
- head->ht_params);
+ if (!tc_skip_sw(fold->flags))
+ rhashtable_remove_fast(&head->ht, &fold->ht_node,
+ head->ht_params);
fl_hw_destroy_filter(tp, (unsigned long)fold);
}
@@ -739,8 +740,9 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
- rhashtable_remove_fast(&head->ht, &f->ht_node,
- head->ht_params);
+ if (!tc_skip_sw(f->flags))
+ rhashtable_remove_fast(&head->ht, &f->ht_node,
+ head->ht_params);
list_del_rcu(&f->list);
fl_hw_destroy_filter(tp, (unsigned long)f);
tcf_unbind_filter(tp, &f->res);
--
2.7.4
^ permalink raw reply related
* Re: [PATCH net] net/dccp: fix use-after-free in dccp_invalid_packet
From: Arnaldo Carvalho de Melo @ 2016-11-28 14:40 UTC (permalink / raw)
To: Eric Dumazet
Cc: Andrey Konovalov, Gerrit Renker, David S. Miller, dccp, netdev,
LKML, Dmitry Vyukov, Kostya Serebryany, Eric Dumazet, syzkaller
In-Reply-To: <1480343209.18162.50.camel@edumazet-glaptop3.roam.corp.google.com>
Em Mon, Nov 28, 2016 at 06:26:49AM -0800, Eric Dumazet escreveu:
> From: Eric Dumazet <edumazet@google.com>
>
> pskb_may_pull() can reallocate skb->head, we need to reload dh pointer
> in dccp_invalid_packet() or risk use after free.
>
> Bug found by Andrey Konovalov using syzkaller.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
I was about to send exactly this patch, and while looking at it I think
the patch below needs to go in as well, no? To follow the advice of that
Warning line there :-)
From: Arnaldo Carvalho de Melo <acme@redhat.com>
pskb_may_pull() can reallocate skb->head, so we can't access
iph->frag_off or risk use after free, save it to a variable and us that
later.
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5ddf5cda07f4..9462070561a3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1198,6 +1198,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
struct iphdr *iph;
int proto, tot_len;
int nhoff;
+ u16 frag_off;
int ihl;
int id;
@@ -1213,6 +1214,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
id = ntohs(iph->id);
proto = iph->protocol;
+ frag_off = iph->frag_off;
/* Warning: after this point, iph might be no longer valid */
if (unlikely(!pskb_may_pull(skb, ihl)))
@@ -1233,7 +1235,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
/* fixed ID is invalid if DF bit is not set */
- if (fixedid && !(iph->frag_off & htons(IP_DF)))
+ if (fixedid && !(frag_off & htons(IP_DF)))
goto out;
}
^ permalink raw reply related
* Re: net/sctp: vmalloc allocation failure in sctp_setsockopt/xt_alloc_table_info
From: Neil Horman @ 2016-11-28 14:39 UTC (permalink / raw)
To: Andrey Konovalov
Cc: Vlad Yasevich, linux-sctp, netdev, LKML, Pablo Neira Ayuso,
Patrick McHardy, Jozsef Kadlecsik, David S. Miller,
netfilter-devel, coreteam, Dmitry Vyukov, Kostya Serebryany,
Eric Dumazet, syzkaller
In-Reply-To: <CAAeHK+xJmwF6hddS-sO-y3HQkRicwQN=ZceFWTZT2M_0h4E2hQ@mail.gmail.com>
On Mon, Nov 28, 2016 at 03:33:40PM +0100, Andrey Konovalov wrote:
> On Mon, Nov 28, 2016 at 3:13 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
> > On Mon, Nov 28, 2016 at 02:00:19PM +0100, Andrey Konovalov wrote:
> >> Hi!
> >>
> >> I've got the following error report while running the syzkaller fuzzer.
> >>
> >> On commit d8e435f3ab6fea2ea324dce72b51dd7761747523 (Nov 26).
> >>
> >> A reproducer is attached.
> >>
> >> a.out: vmalloc: allocation failure, allocated 823562240 of 1427091456
> >> bytes, mode:0x24000c2(GFP_KERNEL|__GFP_HIGHMEM)
> >>
> > How much total ram do you have in this system? The call appears to be
> > attempting to allocate 1.3 Gb of data. Even using vmalloc to allow
> > discontiguous allocation, thats alot of memory, and if enough is in use already,
> > I could make the argument that this might be expected behavior.
>
> Hi Neail,
>
> I have 2 Gb.
>
That would be why. Allocating 65% of the available system memory will almost
certainly lead to OOM failures quickly.
> Just tested with 4 Gb, everything seems to be working fine.
> So I guess this is not actually a bug and allocating 1.3 Gb is OK.
>
> Thanks!
>
No problem.
Neil
> >
> > Neil
> >
> >> oom_reaper: reaped process 3810 (a.out), now anon-rss:0kB,
> >> file-rss:0kB, shmem-rss:0kB
> >> a.out invoked oom-killer:
> >> gfp_mask=0x24002c2(GFP_KERNEL|__GFP_HIGHMEM|__GFP_NOWARN), nodemask=0,
> >> order=0, oom_score_adj=0
> >> a.out cpuset=/ mems_allowed=0
> >> CPU: 0 PID: 3814 Comm: a.out Not tainted 4.9.0-rc6+ #457
> >> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> >> ffff880068667380 ffffffff81c73b14 ffff880068667710 ffff88006b469018
> >> ffff880068667718 0000000000000000 ffff880068667400 ffffffff81641a87
> >> 0000000000000000 0000000000000000 0000000000000297 ffffffff84d37280
> >> Call Trace:
> >> [< inline >] __dump_stack lib/dump_stack.c:15
> >> [<ffffffff81c73b14>] dump_stack+0xb3/0x10f lib/dump_stack.c:51
> >> [<ffffffff81641a87>] dump_header.isra.21+0x16f/0x5f5 mm/oom_kill.c:416
> >> [<ffffffff8154bad8>] oom_kill_process+0x4d8/0xab0 mm/oom_kill.c:835
> >> [<ffffffff8154c77c>] out_of_memory+0x2dc/0x1790 mm/oom_kill.c:1044
> >> [< inline >] __alloc_pages_may_oom mm/page_alloc.c:3086
> >> [<ffffffff8155afb6>] __alloc_pages_slowpath+0x1886/0x1bf0 mm/page_alloc.c:3683
> >> [<ffffffff8155b8e2>] __alloc_pages_nodemask+0x5c2/0x710 mm/page_alloc.c:3781
> >> [<ffffffff816236a4>] alloc_pages_current+0xf4/0x400 mm/mempolicy.c:2072
> >> [< inline >] alloc_pages ./include/linux/gfp.h:469
> >> [< inline >] __vmalloc_area_node mm/vmalloc.c:1631
> >> [<ffffffff815f8eab>] __vmalloc_node_range+0x33b/0x690 mm/vmalloc.c:1691
> >> [< inline >] __vmalloc_node mm/vmalloc.c:1734
> >> [< inline >] __vmalloc_node_flags mm/vmalloc.c:1748
> >> [<ffffffff815f92cb>] vmalloc+0x5b/0x70 mm/vmalloc.c:1763
> >> [<ffffffff82fd0893>] xt_alloc_table_info+0x83/0x120
> >> net/netfilter/x_tables.c:961
> >> [< inline >] do_replace net/ipv4/netfilter/ip_tables.c:1140
> >> [<ffffffff8335b420>] do_ipt_set_ctl+0x210/0x420
> >> net/ipv4/netfilter/ip_tables.c:1687
> >> [< inline >] nf_sockopt net/netfilter/nf_sockopt.c:105
> >> [<ffffffff82efdab7>] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:114
> >> [<ffffffff831be741>] ip_setsockopt+0xa1/0xb0 net/ipv4/ip_sockglue.c:1231
> >> [<ffffffff832700d5>] udp_setsockopt+0x45/0x80 net/ipv4/udp.c:2085
> >> [<ffffffff8346b31f>] ipv6_setsockopt+0x11f/0x140 net/ipv6/ipv6_sockglue.c:892
> >> [<ffffffff83a6cd5d>] sctp_setsockopt+0x15d/0x3d70 net/sctp/socket.c:3788
> >> [<ffffffff82ca40e6>] sock_common_setsockopt+0x96/0xd0 net/core/sock.c:2690
> >> [< inline >] SYSC_setsockopt net/socket.c:1757
> >> [<ffffffff82ca10c4>] SyS_setsockopt+0x154/0x240 net/socket.c:1736
> >> [<ffffffff840f2c41>] entry_SYSCALL_64_fastpath+0x1f/0xc2
> >> arch/x86/entry/entry_64.S:209
> >> CPU: 1 PID: 3810 Comm: a.out Not tainted 4.9.0-rc6+ #457
> >> Mem-Info:
> >> active_anon:1938 inactive_anon:75 isolated_anon:0
> >> active_file:14 inactive_file:30 isolated_file:4
> >> unevictable:0 dirty:0 writeback:0 unstable:0
> >> slab_reclaimable:3316 slab_unreclaimable:9767
> >> mapped:21 shmem:81 pagetables:309 bounce:0
> >> free:1 free_pcp:75 free_cma:0
> >> Node 0 active_anon:7752kB inactive_anon:300kB active_file:56kB
> >> inactive_file:120kB unevictable:0kB isolated(anon):0kB
> >> isolated(file):16kB mapped:84kB dirty:0kB writeback:0kB shmem:324kB
> >> writeback_tmp:0kB unstable:0kB pages_scanned:134 all_unreclaimable? no
> >> Node 0 DMA free:4kB min:48kB low:60kB high:72kB active_anon:0kB
> >> inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB
> >> writepending:0kB present:15992kB managed:15908kB mlocked:0kB
> >> slab_reclaimable:0kB slab_unreclaimable:8kB kernel_stack:0kB
> >> pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
> >> lowmem_reserve[]: 0 1641 1641 1641
> >> Node 0 DMA32 free:0kB min:5156kB low:6836kB high:8516kB
> >> active_anon:7752kB inactive_anon:300kB active_file:56kB
> >> inactive_file:120kB unevictable:0kB writepending:0kB present:2080760kB
> >> managed:1684640kB mlocked:0kB slab_reclaimable:13264kB
> >> slab_unreclaimable:39060kB kernel_stack:2944kB pagetables:1236kB
> >> bounce:0kB free_pcp:300kB local_pcp:120kB free_cma:0kB
> >> lowmem_reserve[]: 0 0 0 0
> >> Node 0 DMA: 0*4kB 0*8kB 0*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB
> >> 0*1024kB 0*2048kB 0*4096kB = 0kB
> >> Node 0 DMA32: 0*4kB 0*8kB 0*16kB 0*32kB 0*64kB 0*128kB 0*256kB 0*512kB
> >> 0*1024kB 0*2048kB 0*4096kB = 0kB
> >> Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
> >> 148 total pagecache pages
> >> 0 pages in swap cache
> >> Swap cache stats: add 0, delete 0, find 0/0
> >> Free swap = 0kB
> >> Total swap = 0kB
> >> 524188 pages RAM
> >> 0 pages HighMem/MovableOnly
> >> 99051 pages reserved
> >> [ pid ] uid tgid total_vm rss nr_ptes nr_pmds swapents
> >> oom_score_adj name
> >> 0 1767 5346 133 16 3 0 -1000 udevd
> >> 0 1876 5315 122 15 3 0 -1000 udevd
> >> 0 1877 5315 122 15 3 0 -1000 udevd
> >> 0 3541 2493 573 8 3 0 0 dhclient
> >> 0 3676 13231 171 22 3 0 0 rsyslogd
> >> 0 3725 4725 52 15 3 0 0 cron
> >> 0 3751 12490 155 28 3 0 -1000 sshd
> >> 0 3775 3694 43 13 3 0 0 getty
> >> 0 3776 3694 43 13 3 0 0 getty
> >> 0 3777 3694 42 13 3 0 0 getty
> >> 0 3778 3694 41 13 3 0 0 getty
> >> 0 3779 3694 44 13 3 0 0 getty
> >> 0 3780 3694 43 13 3 0 0 getty
> >> 0 3785 3649 44 12 3 0 0 getty
> >> 0 3797 17818 205 39 3 0 0 sshd
> >> 0 3800 4474 126 15 3 0 0 bash
> >> 0 3804 2053 22 9 3 0 0 a.out
> >> 0 3805 2053 26 9 3 0 0 a.out
> >> 0 3806 18488 0 18 3 0 0 a.out
> >
> >> // autogenerated by syzkaller (http://github.com/google/syzkaller)
> >>
> >> #ifndef __NR_mmap
> >> #define __NR_mmap 9
> >> #endif
> >> #ifndef __NR_setsockopt
> >> #define __NR_setsockopt 54
> >> #endif
> >> #ifndef __NR_syz_fuse_mount
> >> #define __NR_syz_fuse_mount 1000004
> >> #endif
> >> #ifndef __NR_socket
> >> #define __NR_socket 41
> >> #endif
> >> #ifndef __NR_syz_emit_ethernet
> >> #define __NR_syz_emit_ethernet 1000006
> >> #endif
> >> #ifndef __NR_syz_fuseblk_mount
> >> #define __NR_syz_fuseblk_mount 1000005
> >> #endif
> >> #ifndef __NR_syz_open_dev
> >> #define __NR_syz_open_dev 1000002
> >> #endif
> >> #ifndef __NR_syz_open_pts
> >> #define __NR_syz_open_pts 1000003
> >> #endif
> >> #ifndef __NR_syz_test
> >> #define __NR_syz_test 1000001
> >> #endif
> >>
> >> #define SYZ_SANDBOX_NONE 1
> >> #define SYZ_REPEAT 1
> >>
> >> #define _GNU_SOURCE
> >>
> >> #include <sys/ioctl.h>
> >> #include <sys/mount.h>
> >> #include <sys/prctl.h>
> >> #include <sys/resource.h>
> >> #include <sys/socket.h>
> >> #include <sys/stat.h>
> >> #include <sys/syscall.h>
> >> #include <sys/time.h>
> >> #include <sys/types.h>
> >> #include <sys/wait.h>
> >>
> >> #include <linux/capability.h>
> >> #include <linux/if.h>
> >> #include <linux/if_tun.h>
> >> #include <linux/sched.h>
> >> #include <net/if_arp.h>
> >>
> >> #include <assert.h>
> >> #include <dirent.h>
> >> #include <errno.h>
> >> #include <fcntl.h>
> >> #include <grp.h>
> >> #include <pthread.h>
> >> #include <setjmp.h>
> >> #include <signal.h>
> >> #include <stdarg.h>
> >> #include <stddef.h>
> >> #include <stdint.h>
> >> #include <stdio.h>
> >> #include <stdlib.h>
> >> #include <string.h>
> >> #include <unistd.h>
> >>
> >> const int kFailStatus = 67;
> >> const int kErrorStatus = 68;
> >> const int kRetryStatus = 69;
> >>
> >> __attribute__((noreturn)) void fail(const char* msg, ...)
> >> {
> >> int e = errno;
> >> fflush(stdout);
> >> va_list args;
> >> va_start(args, msg);
> >> vfprintf(stderr, msg, args);
> >> va_end(args);
> >> fprintf(stderr, " (errno %d)\n", e);
> >> exit(kFailStatus);
> >> }
> >>
> >> __attribute__((noreturn)) void exitf(const char* msg, ...)
> >> {
> >> int e = errno;
> >> fflush(stdout);
> >> va_list args;
> >> va_start(args, msg);
> >> vfprintf(stderr, msg, args);
> >> va_end(args);
> >> fprintf(stderr, " (errno %d)\n", e);
> >> exit(kRetryStatus);
> >> }
> >>
> >> static int flag_debug;
> >>
> >> void debug(const char* msg, ...)
> >> {
> >> if (!flag_debug)
> >> return;
> >> va_list args;
> >> va_start(args, msg);
> >> vfprintf(stdout, msg, args);
> >> va_end(args);
> >> fflush(stdout);
> >> }
> >>
> >> __thread int skip_segv;
> >> __thread jmp_buf segv_env;
> >>
> >> static void segv_handler(int sig, siginfo_t* info, void* uctx)
> >> {
> >> if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED))
> >> _longjmp(segv_env, 1);
> >> exit(sig);
> >> }
> >>
> >> static void install_segv_handler()
> >> {
> >> struct sigaction sa;
> >> memset(&sa, 0, sizeof(sa));
> >> sa.sa_sigaction = segv_handler;
> >> sa.sa_flags = SA_NODEFER | SA_SIGINFO;
> >> sigaction(SIGSEGV, &sa, NULL);
> >> sigaction(SIGBUS, &sa, NULL);
> >> }
> >>
> >> #define NONFAILING(...) \
> >> { \
> >> __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \
> >> if (_setjmp(segv_env) == 0) { \
> >> __VA_ARGS__; \
> >> } \
> >> __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \
> >> }
> >>
> >> static uintptr_t syz_open_dev(uintptr_t a0, uintptr_t a1, uintptr_t a2)
> >> {
> >> if (a0 == 0xc || a0 == 0xb) {
> >> char buf[128];
> >> sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block",
> >> (uint8_t)a1, (uint8_t)a2);
> >> return open(buf, O_RDWR, 0);
> >> } else {
> >> char buf[1024];
> >> char* hash;
> >> strncpy(buf, (char*)a0, sizeof(buf));
> >> buf[sizeof(buf) - 1] = 0;
> >> while ((hash = strchr(buf, '#'))) {
> >> *hash = '0' + (char)(a1 % 10);
> >> a1 /= 10;
> >> }
> >> return open(buf, a2, 0);
> >> }
> >> }
> >>
> >> static uintptr_t syz_open_pts(uintptr_t a0, uintptr_t a1)
> >> {
> >> int ptyno = 0;
> >> if (ioctl(a0, TIOCGPTN, &ptyno))
> >> return -1;
> >> char buf[128];
> >> sprintf(buf, "/dev/pts/%d", ptyno);
> >> return open(buf, a1, 0);
> >> }
> >>
> >> static uintptr_t syz_fuse_mount(uintptr_t a0, uintptr_t a1,
> >> uintptr_t a2, uintptr_t a3,
> >> uintptr_t a4, uintptr_t a5)
> >> {
> >> uint64_t target = a0;
> >> uint64_t mode = a1;
> >> uint64_t uid = a2;
> >> uint64_t gid = a3;
> >> uint64_t maxread = a4;
> >> uint64_t flags = a5;
> >>
> >> int fd = open("/dev/fuse", O_RDWR);
> >> if (fd == -1)
> >> return fd;
> >> char buf[1024];
> >> sprintf(buf, "fd=%d,user_id=%ld,group_id=%ld,rootmode=0%o", fd,
> >> (long)uid, (long)gid, (unsigned)mode & ~3u);
> >> if (maxread != 0)
> >> sprintf(buf + strlen(buf), ",max_read=%ld", (long)maxread);
> >> if (mode & 1)
> >> strcat(buf, ",default_permissions");
> >> if (mode & 2)
> >> strcat(buf, ",allow_other");
> >> syscall(SYS_mount, "", target, "fuse", flags, buf);
> >> return fd;
> >> }
> >>
> >> static uintptr_t syz_fuseblk_mount(uintptr_t a0, uintptr_t a1,
> >> uintptr_t a2, uintptr_t a3,
> >> uintptr_t a4, uintptr_t a5,
> >> uintptr_t a6, uintptr_t a7)
> >> {
> >> uint64_t target = a0;
> >> uint64_t blkdev = a1;
> >> uint64_t mode = a2;
> >> uint64_t uid = a3;
> >> uint64_t gid = a4;
> >> uint64_t maxread = a5;
> >> uint64_t blksize = a6;
> >> uint64_t flags = a7;
> >>
> >> int fd = open("/dev/fuse", O_RDWR);
> >> if (fd == -1)
> >> return fd;
> >> if (syscall(SYS_mknodat, AT_FDCWD, blkdev, S_IFBLK, makedev(7, 199)))
> >> return fd;
> >> char buf[256];
> >> sprintf(buf, "fd=%d,user_id=%ld,group_id=%ld,rootmode=0%o", fd,
> >> (long)uid, (long)gid, (unsigned)mode & ~3u);
> >> if (maxread != 0)
> >> sprintf(buf + strlen(buf), ",max_read=%ld", (long)maxread);
> >> if (blksize != 0)
> >> sprintf(buf + strlen(buf), ",blksize=%ld", (long)blksize);
> >> if (mode & 1)
> >> strcat(buf, ",default_permissions");
> >> if (mode & 2)
> >> strcat(buf, ",allow_other");
> >> syscall(SYS_mount, blkdev, target, "fuseblk", flags, buf);
> >> return fd;
> >> }
> >>
> >> static uintptr_t execute_syscall(int nr, uintptr_t a0, uintptr_t a1,
> >> uintptr_t a2, uintptr_t a3,
> >> uintptr_t a4, uintptr_t a5,
> >> uintptr_t a6, uintptr_t a7,
> >> uintptr_t a8)
> >> {
> >> switch (nr) {
> >> default:
> >> return syscall(nr, a0, a1, a2, a3, a4, a5);
> >> case __NR_syz_test:
> >> return 0;
> >> case __NR_syz_open_dev:
> >> return syz_open_dev(a0, a1, a2);
> >> case __NR_syz_open_pts:
> >> return syz_open_pts(a0, a1);
> >> case __NR_syz_fuse_mount:
> >> return syz_fuse_mount(a0, a1, a2, a3, a4, a5);
> >> case __NR_syz_fuseblk_mount:
> >> return syz_fuseblk_mount(a0, a1, a2, a3, a4, a5, a6, a7);
> >> }
> >> }
> >>
> >> static void setup_main_process()
> >> {
> >> struct sigaction sa;
> >> memset(&sa, 0, sizeof(sa));
> >> sa.sa_handler = SIG_IGN;
> >> syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8);
> >> syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8);
> >> install_segv_handler();
> >>
> >> char tmpdir_template[] = "./syzkaller.XXXXXX";
> >> char* tmpdir = mkdtemp(tmpdir_template);
> >> if (!tmpdir)
> >> fail("failed to mkdtemp");
> >> if (chmod(tmpdir, 0777))
> >> fail("failed to chmod");
> >> if (chdir(tmpdir))
> >> fail("failed to chdir");
> >> }
> >>
> >> static void loop();
> >>
> >> static void sandbox_common()
> >> {
> >> prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
> >> setpgrp();
> >> setsid();
> >>
> >> struct rlimit rlim;
> >> rlim.rlim_cur = rlim.rlim_max = 128 << 20;
> >> setrlimit(RLIMIT_AS, &rlim);
> >> rlim.rlim_cur = rlim.rlim_max = 1 << 20;
> >> setrlimit(RLIMIT_FSIZE, &rlim);
> >> rlim.rlim_cur = rlim.rlim_max = 1 << 20;
> >> setrlimit(RLIMIT_STACK, &rlim);
> >> rlim.rlim_cur = rlim.rlim_max = 0;
> >> setrlimit(RLIMIT_CORE, &rlim);
> >>
> >> unshare(CLONE_NEWNS);
> >> unshare(CLONE_NEWIPC);
> >> unshare(CLONE_IO);
> >> }
> >>
> >> static int do_sandbox_none()
> >> {
> >> int pid = fork();
> >> if (pid)
> >> return pid;
> >> sandbox_common();
> >> loop();
> >> exit(1);
> >> }
> >>
> >> static void remove_dir(const char* dir)
> >> {
> >> DIR* dp;
> >> struct dirent* ep;
> >> int iter = 0;
> >> int i;
> >> retry:
> >> dp = opendir(dir);
> >> if (dp == NULL) {
> >> if (errno == EMFILE) {
> >> exitf("opendir(%s) failed due to NOFILE, exiting");
> >> }
> >> exitf("opendir(%s) failed", dir);
> >> }
> >> while ((ep = readdir(dp))) {
> >> if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
> >> continue;
> >> char filename[FILENAME_MAX];
> >> snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
> >> struct stat st;
> >> if (lstat(filename, &st))
> >> exitf("lstat(%s) failed", filename);
> >> if (S_ISDIR(st.st_mode)) {
> >> remove_dir(filename);
> >> continue;
> >> }
> >> for (i = 0;; i++) {
> >> debug("unlink(%s)\n", filename);
> >> if (unlink(filename) == 0)
> >> break;
> >> if (errno == EROFS) {
> >> debug("ignoring EROFS\n");
> >> break;
> >> }
> >> if (errno != EBUSY || i > 100)
> >> exitf("unlink(%s) failed", filename);
> >> debug("umount(%s)\n", filename);
> >> if (umount2(filename, MNT_DETACH))
> >> exitf("umount(%s) failed", filename);
> >> }
> >> }
> >> closedir(dp);
> >> for (i = 0;; i++) {
> >> debug("rmdir(%s)\n", dir);
> >> if (rmdir(dir) == 0)
> >> break;
> >> if (i < 100) {
> >> if (errno == EROFS) {
> >> debug("ignoring EROFS\n");
> >> break;
> >> }
> >> if (errno == EBUSY) {
> >> debug("umount(%s)\n", dir);
> >> if (umount2(dir, MNT_DETACH))
> >> exitf("umount(%s) failed", dir);
> >> continue;
> >> }
> >> if (errno == ENOTEMPTY) {
> >> if (iter < 100) {
> >> iter++;
> >> goto retry;
> >> }
> >> }
> >> }
> >> exitf("rmdir(%s) failed", dir);
> >> }
> >> }
> >>
> >> static uint64_t current_time_ms()
> >> {
> >> struct timespec ts;
> >>
> >> if (clock_gettime(CLOCK_MONOTONIC, &ts))
> >> fail("clock_gettime failed");
> >> return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
> >> }
> >>
> >> static void test();
> >>
> >> void loop()
> >> {
> >> int iter;
> >> for (iter = 0;; iter++) {
> >> char cwdbuf[256];
> >> sprintf(cwdbuf, "./%d", iter);
> >> if (mkdir(cwdbuf, 0777))
> >> fail("failed to mkdir");
> >> int pid = fork();
> >> if (pid < 0)
> >> fail("clone failed");
> >> if (pid == 0) {
> >> prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
> >> setpgrp();
> >> if (chdir(cwdbuf))
> >> fail("failed to chdir");
> >> test();
> >> exit(0);
> >> }
> >> int status = 0;
> >> uint64_t start = current_time_ms();
> >> for (;;) {
> >> int res = waitpid(pid, &status, __WALL | WNOHANG);
> >> int errno0 = errno;
> >> if (res == pid)
> >> break;
> >> usleep(1000);
> >> if (current_time_ms() - start > 5 * 1000) {
> >> kill(-pid, SIGKILL);
> >> kill(pid, SIGKILL);
> >> waitpid(pid, &status, __WALL);
> >> break;
> >> }
> >> }
> >> remove_dir(cwdbuf);
> >> }
> >> }
> >>
> >> long r[5];
> >> void* thr(void* arg)
> >> {
> >> switch ((long)arg) {
> >> case 0:
> >> r[0] =
> >> execute_syscall(__NR_mmap, 0x20000000ul, 0xa000ul, 0x3ul,
> >> 0x32ul, 0xfffffffffffffffful, 0x0ul, 0, 0, 0);
> >> break;
> >> case 1:
> >> r[1] = execute_syscall(__NR_socket, 0xaul, 0x5ul, 0x84ul, 0, 0, 0,
> >> 0, 0, 0);
> >> break;
> >> case 2:
> >> r[2] = execute_syscall(__NR_socket, 0x1ful, 0x3ul, 0x6ul, 0, 0, 0,
> >> 0, 0, 0);
> >> break;
> >> case 3:
> >> NONFAILING(memcpy(
> >> (void*)0x20009000,
> >> "\x83\x15\xf6\xdb\x47\x14\xae\xe2\x8d\xb8\x4d\xb9\x0f\x32\xe7"
> >> "\xf5\xbc\xa6\xae\x9a\x2f\x19\xed\xf0\x75\x6a\x0b\xf0\x00\xe9"
> >> "\xe1\x0e\xb4\xa5\x19\x08\x88\xfc\x8b\x2d\xe2\x9a\x0f\x55\x00"
> >> "\x00\x00\x00\x00\x08\x27\xab\x8e\x7d\xcb\xcc\x15\x4e\x79\xe2"
> >> "\xd9\xca\x15\xc3\x66\xbd\x44\xa8\x53\x1f\xda\xab\xce\x98\x39"
> >> "\x40\x4e\x75\x57\xfd\x57\xc0\x01\x0b\xb0",
> >> 85));
> >> r[4] = execute_syscall(__NR_setsockopt, r[1], 0x0ul, 0x40ul,
> >> 0x20009000ul, 0x55ul, 0, 0, 0, 0);
> >> break;
> >> }
> >> return 0;
> >> }
> >>
> >> void test()
> >> {
> >> long i;
> >> pthread_t th[8];
> >>
> >> memset(r, -1, sizeof(r));
> >> srand(getpid());
> >> for (i = 0; i < 4; i++) {
> >> pthread_create(&th[i], 0, thr, (void*)i);
> >> usleep(10000);
> >> }
> >> for (i = 0; i < 4; i++) {
> >> pthread_create(&th[4 + i], 0, thr, (void*)i);
> >> if (rand() % 2)
> >> usleep(rand() % 10000);
> >> }
> >> usleep(100000);
> >> }
> >>
> >> int main()
> >> {
> >> setup_main_process();
> >> int pid = do_sandbox_none();
> >> int status = 0;
> >> while (waitpid(pid, &status, __WALL) != pid) {
> >> }
> >> return 0;
> >> }
> >
>
^ permalink raw reply
* Re: [PATCH] stmmac ethernet: remove cut & paste code
From: Pavel Machek @ 2016-11-28 14:35 UTC (permalink / raw)
To: Joe Perches
Cc: peppe.cavallaro, netdev, kernel list, ezequiel, sonic.zhang,
fabrice.gasnier
In-Reply-To: <1480343068.14294.5.camel@perches.com>
[-- Attachment #1: Type: text/plain, Size: 2695 bytes --]
On Mon 2016-11-28 06:24:28, Joe Perches wrote:
> On Mon, 2016-11-28 at 12:50 +0100, Pavel Machek wrote:
> > On Thu 2016-11-24 14:27:13, Joe Perches wrote:
> > > On Thu, 2016-11-24 at 22:44 +0100, Pavel Machek wrote:
> > > > On Thu 2016-11-24 12:05:25, Joe Perches wrote:
> > > > > On Thu, 2016-11-24 at 12:05 +0100, Pavel Machek wrote:
> > > > > > Remove duplicate code from _tx routines.
> > > > >
> > > > > trivia:
> > > > >
> > > > > > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> > > > >
> > > > > []
> > > > > > @@ -1960,6 +1960,38 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
> > > > > > }
> > > > > > }
> > > > > >
> > > > > > +static void stmmac_xmit_common(struct sk_buff *skb, struct net_device *dev, int nfrags, struct dma_desc *desc)
> > > > > > +{
> > > > > > + struct stmmac_priv *priv = netdev_priv(dev);
> > > > > > +
> > > > > > + if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
> > > > > > + if (netif_msg_hw(priv))
> > > > > > + pr_debug("%s: stop transmitted packets\n", __func__);
> > > > >
> > > > > netif_dbg(priv, hw, dev, "%s: stop transmitted packets\n",
> > > > > __func__);
> > > >
> > > > Not now. Modifying the code while de-duplicating would be bad idea.
> > >
> > > Too many people think overly granular patches are the
> > > best and only way to make changes.
> > > Deduplication and consolidation can happen simultaneously.
> >
> > Can, but should not at this point. Please take a look at the driver in
> > question before commenting on trivial printk style.
>
> I had.
>
> It's perfectly acceptable and already uses netif_<level> properly.
>
> This consolidation now introduces the _only_ instance where it is
> now improperly using a netif_msg_<type> then single pr_<level>
> function sequence that should be consolidated into netif_dbg.
> Every other use of netif_msg_<level> then either emits multiple
> lines or is used in an if/else.
Are you looking at right driver? I don't see single use of
netif_msg_<level>, but see this at stmmac_main.c:756. Code is actually
pretty consistent using pr_*.
if (netif_msg_link(priv))
pr_warn("%s: Speed (%d) not 10/100\n",
dev->name, phydev->speed);
Anyway, I'm moving code around, if you want to do trivial cleanups, do
them yourself.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox