Netdev List
 help / color / mirror / Atom feed
* [PATCH 1/1] net: rds: add service level support in rds-info
From: Zhu Yanjun @ 2019-08-20  0:52 UTC (permalink / raw)
  To: davem, yanjun.zhu, netdev, linux-rdma, rds-devel

From IB specific 7.6.5 SERVICE LEVEL, Service Level (SL)
is used to identify different flows within an IBA subnet.
It is carried in the local route header of the packet.

Before this commit, run "rds-info -I". The output is as
below:
"
RDS IB Connections:
 LocalAddr  RemoteAddr Tos SL  LocalDev               RemoteDev
192.2.95.3  192.2.95.1  2   0  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  1   0  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  0   0  fe80::21:28:1a:39  fe80::21:28:10:b9
"
After this commit, the output is as below:
"
RDS IB Connections:
 LocalAddr  RemoteAddr Tos SL  LocalDev               RemoteDev
192.2.95.3  192.2.95.1  2   2  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  1   1  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  0   0  fe80::21:28:1a:39  fe80::21:28:10:b9
"

The commit fe3475af3bdf ("net: rds: add per rds connection cache
statistics") adds cache_allocs in struct rds_info_rdma_connection
as below:
struct rds_info_rdma_connection {
...
        __u32           rdma_mr_max;
        __u32           rdma_mr_size;
        __u8            tos;
        __u32           cache_allocs;
 };
The peer struct in rds-tools of struct rds_info_rdma_connection is as
below:
struct rds_info_rdma_connection {
...
        uint32_t        rdma_mr_max;
        uint32_t        rdma_mr_size;
        uint8_t         tos;
        uint8_t         sl;
        uint32_t        cache_allocs;
};
The difference between userspace and kernel is the member variable sl.
In kernel struct, the member variable sl is missing. This will introduce
risks. So it is necessary to use this commit to avoid this risk.

Fixes: fe3475af3bdf ("net: rds: add per rds connection cache statistics")
CC: Joe Jin <joe.jin@oracle.com>
CC: JUNXIAO_BI <junxiao.bi@oracle.com>
Suggested-by: Gerd Rausch <gerd.rausch@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
---
 include/uapi/linux/rds.h |    2 ++
 net/rds/ib.c             |   16 ++++++++++------
 net/rds/ib.h             |    1 +
 net/rds/ib_cm.c          |    3 +++
 net/rds/rdma_transport.c |   10 ++++++++--
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index fd6b5f6..cba368e 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -250,6 +250,7 @@ struct rds_info_rdma_connection {
 	__u32		rdma_mr_max;
 	__u32		rdma_mr_size;
 	__u8		tos;
+	__u8		sl;
 	__u32		cache_allocs;
 };
 
@@ -265,6 +266,7 @@ struct rds6_info_rdma_connection {
 	__u32		rdma_mr_max;
 	__u32		rdma_mr_size;
 	__u8		tos;
+	__u8		sl;
 	__u32		cache_allocs;
 };
 
diff --git a/net/rds/ib.c b/net/rds/ib.c
index ec05d91..45acab2 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -291,7 +291,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 				    void *buffer)
 {
 	struct rds_info_rdma_connection *iinfo = buffer;
-	struct rds_ib_connection *ic;
+	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We will only ever look at IB transports */
 	if (conn->c_trans != &rds_ib_transport)
@@ -301,15 +301,16 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 
 	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
 	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
-	iinfo->tos = conn->c_tos;
+	if (ic) {
+		iinfo->tos = conn->c_tos;
+		iinfo->sl = ic->i_sl;
+	}
 
 	memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
 	memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
 
-		ic = conn->c_transport_data;
-
 		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
 			       (union ib_gid *)&iinfo->dst_gid);
 
@@ -329,7 +330,7 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 				     void *buffer)
 {
 	struct rds6_info_rdma_connection *iinfo6 = buffer;
-	struct rds_ib_connection *ic;
+	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We will only ever look at IB transports */
 	if (conn->c_trans != &rds_ib_transport)
@@ -337,6 +338,10 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 
 	iinfo6->src_addr = conn->c_laddr;
 	iinfo6->dst_addr = conn->c_faddr;
+	if (ic) {
+		iinfo6->tos = conn->c_tos;
+		iinfo6->sl = ic->i_sl;
+	}
 
 	memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid));
 	memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid));
@@ -344,7 +349,6 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
 
-		ic = conn->c_transport_data;
 		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid,
 			       (union ib_gid *)&iinfo6->dst_gid);
 		rds_ibdev = ic->rds_ibdev;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 303c6ee..f2b558e 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -220,6 +220,7 @@ struct rds_ib_connection {
 	/* Send/Recv vectors */
 	int			i_scq_vector;
 	int			i_rcq_vector;
+	u8			i_sl;
 };
 
 /* This assumes that atomic_t is at least 32 bits */
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index fddaa09..233f136 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -152,6 +152,9 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
 		  RDS_PROTOCOL_MINOR(conn->c_version),
 		  ic->i_flowctl ? ", flow control" : "");
 
+	/* receive sl from the peer */
+	ic->i_sl = ic->i_cm_id->route.path_rec->sl;
+
 	atomic_set(&ic->i_cq_quiesce, 0);
 
 	/* Init rings and fill recv. this needs to wait until protocol
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index ff74c4b..28668ad 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -43,6 +43,9 @@
 static struct rdma_cm_id *rds6_rdma_listen_id;
 #endif
 
+/* Per IB specification 7.7.3, service level is a 4-bit field. */
+#define TOS_TO_SL(tos)		((tos) & 0xF)
+
 static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 					 struct rdma_cm_event *event,
 					 bool isv6)
@@ -97,10 +100,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 			struct rds_ib_connection *ibic;
 
 			ibic = conn->c_transport_data;
-			if (ibic && ibic->i_cm_id == cm_id)
+			if (ibic && ibic->i_cm_id == cm_id) {
+				cm_id->route.path_rec[0].sl =
+					TOS_TO_SL(conn->c_tos);
 				ret = trans->cm_initiate_connect(cm_id, isv6);
-			else
+			} else {
 				rds_conn_drop(conn);
+			}
 		}
 		break;
 
-- 
1.7.1


^ permalink raw reply related

* [PATCH 1/1] netfilter: nf_tables: fib: Drop IPV6 packages if IPv6 is disabled on boot
From: Leonardo Bras @ 2019-08-20  0:58 UTC (permalink / raw)
  To: netfilter-devel, coreteam, netdev, linux-kernel
  Cc: Leonardo Bras, Pablo Neira Ayuso, Jozsef Kadlecsik,
	Florian Westphal, David S. Miller

If IPv6 is disabled on boot (ipv6.disable=1), but nft_fib_inet ends up
dealing with a IPv6 package, it causes a kernel panic in
fib6_node_lookup_1(), crashing in bad_page_fault.

The panic is caused by trying to deference a very low address (0x38
in ppc64le), due to ipv6.fib6_main_tbl = NULL.
BUG: Kernel NULL pointer dereference at 0x00000038

Fix this behavior by dropping IPv6 packages if !ipv6_mod_enabled().

Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
---
 net/netfilter/nft_fib_inet.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
index 465432e0531b..0017afab3c51 100644
--- a/net/netfilter/nft_fib_inet.c
+++ b/net/netfilter/nft_fib_inet.c
@@ -2,6 +2,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/ipv6.h>
 #include <linux/module.h>
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
@@ -28,6 +29,8 @@ static void nft_fib_inet_eval(const struct nft_expr *expr,
 		}
 		break;
 	case NFPROTO_IPV6:
+		if (!ipv6_mod_enabled())
+			break;
 		switch (priv->result) {
 		case NFT_FIB_RESULT_OIF:
 		case NFT_FIB_RESULT_OIFNAME:
-- 
2.20.1


^ permalink raw reply related

* [PATCH] ipvs: change type of delta and previous_delta in ip_vs_seq.
From: zhang kai @ 2019-08-20  0:37 UTC (permalink / raw)
  To: wensong, horms, ja, davem, kuznet, yoshfuji; +Cc: lvs-devel, netdev

In NAT forwarding mode, Applications may decrease the size of packets,
and TCP sequences will get smaller, so both of variables will be negetive
values in this case.

Signed-off-by: zhang kai <zhangkaiheb@126.com>
---
 include/net/ip_vs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3759167f91f5..de7e75063c7c 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -346,8 +346,8 @@ enum ip_vs_sctp_states {
  */
 struct ip_vs_seq {
 	__u32			init_seq;	/* Add delta from this seq */
-	__u32			delta;		/* Delta in sequence numbers */
-	__u32			previous_delta;	/* Delta in sequence numbers
+	__s32			delta;		/* Delta in sequence numbers */
+	__s32			previous_delta;	/* Delta in sequence numbers
 						 * before last resized pkt */
 };
 
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH] sock: fix potential memory leak in proto_register()
From: David Miller @ 2019-08-20  1:12 UTC (permalink / raw)
  To: zhang.lin16
  Cc: ast, daniel, kafai, songliubraving, yhs, willemb, edumazet,
	deepa.kernel, arnd, dh.herrmann, gnault, netdev, linux-kernel,
	bpf, xue.zhihong, wang.yi59, jiang.xuexin
In-Reply-To: <1566178556-46071-1-git-send-email-zhang.lin16@zte.com.cn>

From: zhanglin <zhang.lin16@zte.com.cn>
Date: Mon, 19 Aug 2019 09:35:56 +0800

> If protocols registered exceeded PROTO_INUSE_NR, prot will be
> added to proto_list, but no available bit left for prot in
> proto_inuse_idx.
> 
> Signed-off-by: zhanglin <zhang.lin16@zte.com.cn>

This won't build with CONFIG_PROC_FS disabled.

^ permalink raw reply

* Re: [PATCH net-next] r8152: fix accessing skb after napi_gro_receive
From: David Miller @ 2019-08-20  1:13 UTC (permalink / raw)
  To: hayeswang; +Cc: netdev, nic_swsd, linux-kernel
In-Reply-To: <1394712342-15778-302-Taiwan-albertk@realtek.com>

From: Hayes Wang <hayeswang@realtek.com>
Date: Mon, 19 Aug 2019 11:15:19 +0800

> Fix accessing skb after napi_gro_receive which is caused by
> commit 47922fcde536 ("r8152: support skb_add_rx_frag").
> 
> Fixes: 47922fcde536 ("r8152: support skb_add_rx_frag")
> Signed-off-by: Hayes Wang <hayeswang@realtek.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH] Kconfig: Fix the reference to the IDT77105 Phy driver in the description of ATM_NICSTAR_USE_IDT77105
From: David Miller @ 2019-08-20  1:15 UTC (permalink / raw)
  To: christophe.jaillet
  Cc: 3chas3, linux-atm-general, netdev, linux-kernel, kernel-janitors
In-Reply-To: <20190819050425.6119-1-christophe.jaillet@wanadoo.fr>

From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 19 Aug 2019 07:04:25 +0200

> This should be IDT77105, not IDT77015.
> 
> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>

Applied.

^ permalink raw reply

* Re: [PATCH net] nfp: flower: verify that block cb is not busy before binding
From: David Miller @ 2019-08-20  1:16 UTC (permalink / raw)
  To: vladbu; +Cc: netdev, jhs, xiyou.wangcong, jiri, jakub.kicinski, pablo
In-Reply-To: <20190819073304.9419-1-vladbu@mellanox.com>

From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 19 Aug 2019 10:33:04 +0300

> When processing FLOW_BLOCK_BIND command on indirect block, check that flow
> block cb is not busy.
> 
> Fixes: 0d4fd02e7199 ("net: flow_offload: add flow_block_cb_is_busy() and use it")
> Reported-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>

Applied.

^ permalink raw reply

* general protection fault in xsk_poll
From: syzbot @ 2019-08-20  1:18 UTC (permalink / raw)
  To: ast, bjorn.topel, bpf, daniel, davem, hawk, jakub.kicinski,
	john.fastabend, jonathan.lemon, kafai, linux-kernel,
	magnus.karlsson, netdev, songliubraving, syzkaller-bugs,
	xdp-newbies, yhs

Hello,

syzbot found the following crash on:

HEAD commit:    da657043 Add linux-next specific files for 20190819
git tree:       linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=16af124c600000
kernel config:  https://syzkaller.appspot.com/x/.config?x=739a9b3ab3d8c770
dashboard link: https://syzkaller.appspot.com/bug?extid=c82697e3043781e08802
compiler:       gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=109e1922600000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1445bf02600000

The bug was bisected to:

commit 77cd0d7b3f257fd0e3096b4fdcff1a7d38e99e10
Author: Magnus Karlsson <magnus.karlsson@intel.com>
Date:   Wed Aug 14 07:27:17 2019 +0000

     xsk: add support for need_wakeup flag in AF_XDP rings

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=15e1ea4c600000
final crash:    https://syzkaller.appspot.com/x/report.txt?x=17e1ea4c600000
console output: https://syzkaller.appspot.com/x/log.txt?x=13e1ea4c600000

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+c82697e3043781e08802@syzkaller.appspotmail.com
Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP  
rings")

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
CPU: 1 PID: 7959 Comm: syz-executor611 Not tainted 5.3.0-rc5-next-20190819  
#68
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
RIP: 0010:xsk_poll+0x95/0x540 net/xdp/xsk.c:386
Code: 80 3c 02 00 0f 85 70 04 00 00 4c 8b a3 88 04 00 00 48 b8 00 00 00 00  
00 fc ff df 49 8d bc 24 96 00 00 00 48 89 fa 48 c1 ea 03 <0f> b6 04 02 48  
89 fa 83 e2 07 38 d0 7f 08 84 c0 0f 85 bf 03 00 00
RSP: 0018:ffff8880926f7850 EFLAGS: 00010207
RAX: dffffc0000000000 RBX: ffff88809a141700 RCX: ffffffff859b07aa
RDX: 0000000000000012 RSI: ffffffff859b07c4 RDI: 0000000000000096
RBP: ffff8880926f7880 R08: ffff88809698a580 R09: ffffed1013428329
R10: ffffed1013428328 R11: ffff88809a141947 R12: 0000000000000000
R13: 0000000000000304 R14: ffff888095d4d840 R15: ffff888092bdd020
FS:  0000555557529880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020000280 CR3: 0000000098281000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
  sock_poll+0x15e/0x480 net/socket.c:1256
  vfs_poll include/linux/poll.h:90 [inline]
  do_pollfd fs/select.c:859 [inline]
  do_poll fs/select.c:907 [inline]
  do_sys_poll+0x7c2/0xde0 fs/select.c:1001
  __do_sys_ppoll fs/select.c:1101 [inline]
  __se_sys_ppoll fs/select.c:1081 [inline]
  __x64_sys_ppoll+0x259/0x310 fs/select.c:1081
  do_syscall_64+0xfa/0x760 arch/x86/entry/common.c:290
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x440159
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7  
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff  
ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007ffd9fbd16e8 EFLAGS: 00000246 ORIG_RAX: 000000000000010f
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440159
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000020000280
RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004019e0
R13: 0000000000401a70 R14: 0000000000000000 R15: 0000000000000000
Modules linked in:
---[ end trace da907175426b4065 ]---
RIP: 0010:xsk_poll+0x95/0x540 net/xdp/xsk.c:386
Code: 80 3c 02 00 0f 85 70 04 00 00 4c 8b a3 88 04 00 00 48 b8 00 00 00 00  
00 fc ff df 49 8d bc 24 96 00 00 00 48 89 fa 48 c1 ea 03 <0f> b6 04 02 48  
89 fa 83 e2 07 38 d0 7f 08 84 c0 0f 85 bf 03 00 00
RSP: 0018:ffff8880926f7850 EFLAGS: 00010207
RAX: dffffc0000000000 RBX: ffff88809a141700 RCX: ffffffff859b07aa
RDX: 0000000000000012 RSI: ffffffff859b07c4 RDI: 0000000000000096
RBP: ffff8880926f7880 R08: ffff88809698a580 R09: ffffed1013428329
R10: ffffed1013428328 R11: ffff88809a141947 R12: 0000000000000000
R13: 0000000000000304 R14: ffff888095d4d840 R15: ffff888092bdd020
FS:  0000555557529880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020000280 CR3: 0000000098281000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
For information about bisection process see: https://goo.gl/tpsmEJ#bisection
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches

^ permalink raw reply

* Re: [PATCH net-next 0/2] Fix problems with using ns plugin
From: David Miller @ 2019-08-20  1:20 UTC (permalink / raw)
  To: vladbu
  Cc: netdev, jhs, lucasb, mrv, shuah, batuhanosmantaskaya, dcaratti,
	marcelo.leitner
In-Reply-To: <20190819075208.12240-1-vladbu@mellanox.com>

From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 19 Aug 2019 10:52:06 +0300

> Recent changes to plugin architecture broke some of the tests when running tdc
> without specifying a test group. Fix tests incompatible with ns plugin and
> modify tests to not reuse interface name of ns veth interface for dummy
> interface.

Series applied.

^ permalink raw reply

* Re: [PATCH bpf-next 4/5] libbpf: add bpf_btf_get_next_id() to cycle through BTF objects
From: Alexei Starovoitov @ 2019-08-20  1:21 UTC (permalink / raw)
  To: Quentin Monnet
  Cc: Alexei Starovoitov, Daniel Borkmann, bpf, netdev, oss-drivers
In-Reply-To: <20190815150019.8523-5-quentin.monnet@netronome.com>

On Thu, Aug 15, 2019 at 04:00:18PM +0100, Quentin Monnet wrote:
> Add an API function taking a BTF object id and providing the id of the
> next BTF object in the kernel. This can be used to list all BTF objects
> loaded on the system.
> 
> Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
..
> +
> +LIBBPF_0.0.5 {
> +	global:
> +		bpf_btf_get_next_id;
> +} LIBBPF_0.0.4;

please rebase.
The rest looks great.


^ permalink raw reply

* Re: [PATCH][net-next] net: remove empty inet_exit_net
From: David Miller @ 2019-08-20  1:23 UTC (permalink / raw)
  To: lirongqing; +Cc: netdev
In-Reply-To: <1566216315-18506-1-git-send-email-lirongqing@baidu.com>

From: Li RongQing <lirongqing@baidu.com>
Date: Mon, 19 Aug 2019 20:05:15 +0800

> Pointer members of an object with static storage duration, if not
> explicitly initialized, will be initialized to a NULL pointer. The
> net namespace API checks if this pointer is not NULL before using it,
> it are safe to remove the function.
> 
> Signed-off-by: Li RongQing <lirongqing@baidu.com>

Applied.

^ permalink raw reply

* Re: [PATCH v3] tun: fix use-after-free when register netdev failed
From: David Miller @ 2019-08-20  1:25 UTC (permalink / raw)
  To: yangyingliang; +Cc: netdev, jasowang, eric.dumazet, xiyou.wangcong, weiyongjun1
In-Reply-To: <1566221479-16094-1-git-send-email-yangyingliang@huawei.com>

From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 19 Aug 2019 21:31:19 +0800

> Call tun_attach() after register_netdevice() to make sure tfile->tun
> is not published until the netdevice is registered. So the read/write
> thread can not use the tun pointer that may freed by free_netdev().
> (The tun and dev pointer are allocated by alloc_netdev_mqs(), they can
> be freed by netdev_freemem().)

register_netdevice() must always be the last operation in the order of
network device setup.

At the point register_netdevice() is called, the device is visible globally
and therefore all of it's software state must be fully initialized and
ready for us.

You're going to have to find another solution to these problems.

^ permalink raw reply

* Re: [PATCH net-next 0/8] sctp: support per endpoint auth and asconf flags
From: David Miller @ 2019-08-20  1:27 UTC (permalink / raw)
  To: lucien.xin; +Cc: netdev, linux-sctp, marcelo.leitner, nhorman
In-Reply-To: <cover.1566223325.git.lucien.xin@gmail.com>

From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 19 Aug 2019 22:02:42 +0800

> This patchset mostly does 3 things:
> 
>   1. add per endpint asconf flag and use asconf flag properly
>      and add SCTP_ASCONF_SUPPORTED sockopt.
>   2. use auth flag properly and add SCTP_AUTH_SUPPORTED sockopt.
>   3. remove the 'global feature switch' to discard chunks.

Series applied, thanks.

^ permalink raw reply

* Re: [net-next v2 04/14] ice: fix set pause param autoneg check
From: David Miller @ 2019-08-20  1:31 UTC (permalink / raw)
  To: jeffrey.t.kirsher
  Cc: paul.greenwalt, netdev, nhorman, sassmann, andrewx.bowers
In-Reply-To: <20190819161708.3763-5-jeffrey.t.kirsher@intel.com>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Mon, 19 Aug 2019 09:16:58 -0700

> +	/* Get pause param reports configured and negotiated flow control pause
> +	 * when ETHTOOL_GLINKSETTINGS is defined. Since ETHTOOL_GLINKSETTINGS is
> +	 * defined get pause param pause->autoneg reports SW configured setting,
> +	 * so compare pause->autoneg with SW configured to prevent the user from
> +	 * using set pause param to chance autoneg.
> +	 */
> +	pcaps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*pcaps),
> +			     GFP_KERNEL);

Just in case it isn't clear, please use plain kzalloc/kfree in this code.

Thank you.

^ permalink raw reply

* [PATCH -next] bpf: Use PTR_ERR_OR_ZERO in xsk_map_inc()
From: YueHaibing @ 2019-08-20  1:36 UTC (permalink / raw)
  To: bjorn.topel, magnus.karlsson, jonathan.lemon, ast, daniel, kafai,
	songliubraving, yhs, john.fastabend
  Cc: YueHaibing, netdev, bpf, kernel-janitors

Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 kernel/bpf/xskmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 4cc28e226398..942c662e2eed 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -21,7 +21,7 @@ int xsk_map_inc(struct xsk_map *map)
 	struct bpf_map *m = &map->map;
 
 	m = bpf_map_inc(m, false);
-	return IS_ERR(m) ? PTR_ERR(m) : 0;
+	return PTR_ERR_OR_ZERO(m);
 }
 
 void xsk_map_put(struct xsk_map *map)




^ permalink raw reply related

* Re: pull-request: wireless-drivers-next 2019-08-19
From: David Miller @ 2019-08-20  1:34 UTC (permalink / raw)
  To: kvalo; +Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <87tvad9l1v.fsf@kamboji.qca.qualcomm.com>

From: Kalle Valo <kvalo@codeaurora.org>
Date: Mon, 19 Aug 2019 19:28:28 +0300

> here's a pull request to net-next for v5.4, more info below. Please let
> me know if there are any problems.

Pulled, thanks Kalle.

^ permalink raw reply

* Re: [PATCH net-next 1/1] fec: add C45 MDIO read/write support
From: David Miller @ 2019-08-20  1:35 UTC (permalink / raw)
  To: marco.hartmann; +Cc: fugang.duan, netdev, linux-kernel, christian.herber
In-Reply-To: <1566234659-7164-2-git-send-email-marco.hartmann@nxp.com>

From: Marco Hartmann <marco.hartmann@nxp.com>
Date: Mon, 19 Aug 2019 17:11:14 +0000

> @@ -1767,7 +1770,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
>  	struct fec_enet_private *fep = bus->priv;
>  	struct device *dev = &fep->pdev->dev;
>  	unsigned long time_left;
> -	int ret = 0;
> +	int ret = 0, frame_start, frame_addr, frame_op;
>  

Please retain the reverse christmas tree ordering of local variables
here, thank you.

^ permalink raw reply

* Re: [PATCH 1/2] bpf: fix 'struct pt_reg' typo in documentation
From: Alexei Starovoitov @ 2019-08-20  1:44 UTC (permalink / raw)
  To: Peter Wu; +Cc: Alexei Starovoitov, Daniel Borkmann, Network Development, bpf
In-Reply-To: <20190819212122.10286-2-peter@lekensteyn.nl>

On Mon, Aug 19, 2019 at 2:21 PM Peter Wu <peter@lekensteyn.nl> wrote:
>
> There is no 'struct pt_reg'.
>
> Signed-off-by: Peter Wu <peter@lekensteyn.nl>
> ---
>  include/uapi/linux/bpf.h       | 6 +++---
>  tools/include/uapi/linux/bpf.h | 6 +++---
>  2 files changed, 6 insertions(+), 6 deletions(-)

please split it into two patches. One for kernel and one for user.
We need tools/* to be updated separately due to auto-sync
of libbpf into github.

^ permalink raw reply

* Re: regression in ath10k dma allocation
From: Nicolin Chen @ 2019-08-20  1:58 UTC (permalink / raw)
  To: Hillf Danton, Tobias Klausmann
  Cc: Christoph Hellwig, kvalo, davem, ath10k, linux-wireless, netdev,
	linux-kernel, m.szyprowski, robin.murphy, iommu, tobias.klausmann
In-Reply-To: <acd7a4b0-fde8-1aa2-af07-2b469e5d5ca7@mni.thm.de>

Hello Hillf,

On Mon, Aug 19, 2019 at 12:38:38AM +0200, Tobias Klausmann wrote:
> 
> On 18.08.19 05:13, Hillf Danton wrote:
> > On Sat, 17 Aug 2019 00:42:48 +0200 Tobias Klausmann wrote:
> > > Hi Nicolin,
> > > 
> > > On 17.08.19 00:25, Nicolin Chen wrote:
> > > > Hi Tobias
> > > > 
> > > > On Fri, Aug 16, 2019 at 10:16:45PM +0200, Tobias Klausmann wrote:
> > > > > > do you have CONFIG_DMA_CMA set in your config?  If not please make sure
> > > > > > you have this commit in your testing tree, and if the problem still
> > > > > > persists it would be a little odd and we'd have to dig deeper:
> > > > > > 
> > > > > > commit dd3dcede9fa0a0b661ac1f24843f4a1b1317fdb6
> > > > > > Author: Nicolin Chen <nicoleotsuka@gmail.com>
> > > > > > Date:   Wed May 29 17:54:25 2019 -0700
> > > > > > 
> > > > > >        dma-contiguous: fix !CONFIG_DMA_CMA version of dma_{alloc, free}_contiguous()
> > > > > yes CONFIG_DMA_CMA is set (=y, see attached config), the commit you mention
> > > > > above is included, if you have any hints how to go forward, please let me
> > > > > know!
> > > > For CONFIG_DMA_CMA=y, by judging the log with error code -12, I
> > > > feel this one should work for you. Would you please check if it
> > > > is included or try it out otherwise?
> > > > 
> > > > dma-contiguous: do not overwrite align in dma_alloc_contiguous()
> > > > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=c6622a425acd1d2f3a443cd39b490a8777b622d7
> > > 
> > > Thanks for the hint, yet the commit is included and does not fix the
> > > problem!
> > > 
> Hi Hillf,
> 
> i just tested you first hunk (which comes from kernel/dma/direct.c if i'm
> not mistaken), it did not compile on its own, yet with a tiny bit of work it
> did, and it does indeed solve the regression. But if using that is the
> "right" way to do it, not sure, but its not on me to decide.
> 
> Anyway: Thanks for the hint,
> 
> Tobias
> 
> 
> > Hi Tobias
> > 
> > Two minor diffs below in hope that they might make sense.
> > 
> > 1, fallback unless dma coherent ok.
> > 
> > --- a/kernel/dma/contiguous.c
> > +++ b/kernel/dma/contiguous.c
> > @@ -246,6 +246,10 @@ struct page *dma_alloc_contiguous(struct
> >   		size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT);
> >   		page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN);
> > +		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
> > +			dma_free_contiguous(dev, page, size);
> > +			page = NULL;
> > +		}

Right...the condition was in-between. However, not every caller
of dma_alloc_contiguous() is supposed to have a coherent check.
So we either add a 'bool coherent_ok' to the API or revert the
dma-direct part back to the original. Probably former option is
better?

Thank you for the debugging. I have been a bit distracted, may
not be able to submit a fix very soon. Would you like to help?

Thanks!
Nicolin

> >   	}
> >   	/* Fallback allocation of normal pages */
> > --
> > 
> > 2, cleanup: cma unless contiguous
> > 
> > --- a/kernel/dma/contiguous.c
> > +++ b/kernel/dma/contiguous.c
> > @@ -234,18 +234,13 @@ struct page *dma_alloc_contiguous(struct
> >   	size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> >   	size_t align = get_order(PAGE_ALIGN(size));
> >   	struct page *page = NULL;
> > -	struct cma *cma = NULL;
> > -
> > -	if (dev && dev->cma_area)
> > -		cma = dev->cma_area;
> > -	else if (count > 1)
> > -		cma = dma_contiguous_default_area;
> >   	/* CMA can be used only in the context which permits sleeping */
> > -	if (cma && gfpflags_allow_blocking(gfp)) {
> > +	if (count > 1 && gfpflags_allow_blocking(gfp)) {
> >   		size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT);
> > -		page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN);
> > +		page = cma_alloc(dev_get_cma_area(dev), count, cma_align,
> > +							gfp & __GFP_NOWARN);
> >   		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
> >   			dma_free_contiguous(dev, page, size);
> >   			page = NULL;
> > --
> > 

^ permalink raw reply

* RE: [PATCH net-next 1/1] fec: add C45 MDIO read/write support
From: Andy Duan @ 2019-08-20  2:08 UTC (permalink / raw)
  To: Marco Hartmann, davem@davemloft.net, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, Christian Herber
In-Reply-To: <1566234659-7164-2-git-send-email-marco.hartmann@nxp.com>

From: Marco Hartmann Sent: Tuesday, August 20, 2019 1:11 AM
> IEEE 802.3ae clause 45 defines a modified MDIO protocol that uses a two
> staged access model in order to increase the address space.
> 
> This patch adds support for C45 MDIO read and write accesses, which are
> used whenever the MII_ADDR_C45 flag in the regnum argument is set.
> In case it is not set, C22 accesses are used as before.
> 
> Co-developed-by: Christian Herber <christian.herber@nxp.com>
> Signed-off-by: Christian Herber <christian.herber@nxp.com>
> Signed-off-by: Marco Hartmann <marco.hartmann@nxp.com>
> ---
>  drivers/net/ethernet/freescale/fec_main.c | 65
> ++++++++++++++++++++++++++++---
>  1 file changed, 59 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec_main.c
> b/drivers/net/ethernet/freescale/fec_main.c
> index c01d3ec3e9af..73f8f9a149a1 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -208,8 +208,11 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet
> MAC address");
> 
>  /* FEC MII MMFR bits definition */
>  #define FEC_MMFR_ST		(1 << 30)
> +#define FEC_MMFR_ST_C45		(0)
>  #define FEC_MMFR_OP_READ	(2 << 28)
> +#define FEC_MMFR_OP_READ_C45	(3 << 28)
>  #define FEC_MMFR_OP_WRITE	(1 << 28)
> +#define FEC_MMFR_OP_ADDR_WRITE	(0)
>  #define FEC_MMFR_PA(v)		((v & 0x1f) << 23)
>  #define FEC_MMFR_RA(v)		((v & 0x1f) << 18)
>  #define FEC_MMFR_TA		(2 << 16)
> @@ -1767,7 +1770,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus,
> int mii_id, int regnum)
>  	struct fec_enet_private *fep = bus->priv;
>  	struct device *dev = &fep->pdev->dev;
>  	unsigned long time_left;
> -	int ret = 0;
> +	int ret = 0, frame_start, frame_addr, frame_op;

Add bool variable:

bool is_c45 = !!(regnum & MII_ADDR_C45);
> 
>  	ret = pm_runtime_get_sync(dev);
>  	if (ret < 0)
> @@ -1775,9 +1778,36 @@ static int fec_enet_mdio_read(struct mii_bus
> *bus, int mii_id, int regnum)
> 
>  	reinit_completion(&fep->mdio_done);
> 
> +	if (MII_ADDR_C45 & regnum) {
if (is_c45)

> +		frame_start = FEC_MMFR_ST_C45;
> +
> +		/* write address */
> +		frame_addr = (regnum >> 16);
> +		writel(frame_start | FEC_MMFR_OP_ADDR_WRITE |
> +		       FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) |
> +		       FEC_MMFR_TA | (regnum & 0xFFFF),
> +		       fep->hwp + FEC_MII_DATA);
> +
> +		/* wait for end of transfer */
> +		time_left = wait_for_completion_timeout(&fep->mdio_done,
> +				usecs_to_jiffies(FEC_MII_TIMEOUT));
> +		if (time_left == 0) {
> +			netdev_err(fep->netdev, "MDIO address write timeout\n");
> +			ret  = -ETIMEDOUT;

Should be:
goto out;
> +		}
> +
> +		frame_op = FEC_MMFR_OP_READ_C45;
> +
> +	} else {
> +		/* C22 read */
> +		frame_op = FEC_MMFR_OP_READ;
> +		frame_start = FEC_MMFR_ST;
> +		frame_addr = regnum;
> +	}
> +
>  	/* start a read op */
> -	writel(FEC_MMFR_ST | FEC_MMFR_OP_READ |
> -		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
> +	writel(frame_start | frame_op |
> +		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) |
>  		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
> 
>  	/* wait for end of transfer */
> @@ -1804,7 +1834,7 @@ static int fec_enet_mdio_write(struct mii_bus *bus,
> int mii_id, int regnum,
>  	struct fec_enet_private *fep = bus->priv;
>  	struct device *dev = &fep->pdev->dev;
>  	unsigned long time_left;
> -	int ret;
> +	int ret, frame_start, frame_addr;
> 
>  	ret = pm_runtime_get_sync(dev);
>  	if (ret < 0)
> @@ -1814,9 +1844,32 @@ static int fec_enet_mdio_write(struct mii_bus
> *bus, int mii_id, int regnum,

bool is_c45 = !!(regnum & MII_ADDR_C45);
> 
>  	reinit_completion(&fep->mdio_done);
> 
> +	if (MII_ADDR_C45 & regnum) {

if (!is_c45) {
> +		frame_start = FEC_MMFR_ST_C45;
> +
> +		/* write address */
> +		frame_addr = (regnum >> 16);
> +		writel(frame_start | FEC_MMFR_OP_ADDR_WRITE |
> +		       FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) |
> +		       FEC_MMFR_TA | (regnum & 0xFFFF),
> +		       fep->hwp + FEC_MII_DATA);
> +
> +		/* wait for end of transfer */
> +		time_left = wait_for_completion_timeout(&fep->mdio_done,
> +			usecs_to_jiffies(FEC_MII_TIMEOUT));
> +		if (time_left == 0) {
> +			netdev_err(fep->netdev, "MDIO address write timeout\n");
> +			ret  = -ETIMEDOUT;
Like mdio read, it should be:
goto out; 
> +		}
> +	} else {
> +		/* C22 write */
> +		frame_start = FEC_MMFR_ST;
> +		frame_addr = regnum;
> +	}
> +
>  	/* start a write op */
> -	writel(FEC_MMFR_ST | FEC_MMFR_OP_WRITE |
> -		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
> +	writel(frame_start | FEC_MMFR_OP_WRITE |
> +		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) |
>  		FEC_MMFR_TA | FEC_MMFR_DATA(value),
>  		fep->hwp + FEC_MII_DATA);
> 
> --
> 2.7.4


^ permalink raw reply

* Re: regression in ath10k dma allocation
From: Christoph Hellwig @ 2019-08-20  2:14 UTC (permalink / raw)
  To: Nicolin Chen
  Cc: Hillf Danton, Tobias Klausmann, Christoph Hellwig, kvalo, davem,
	ath10k, linux-wireless, netdev, linux-kernel, m.szyprowski,
	robin.murphy, iommu, tobias.klausmann
In-Reply-To: <20190820015852.GA15830@Asurada-Nvidia.nvidia.com>

On Mon, Aug 19, 2019 at 06:58:52PM -0700, Nicolin Chen wrote:
> Right...the condition was in-between. However, not every caller
> of dma_alloc_contiguous() is supposed to have a coherent check.
> So we either add a 'bool coherent_ok' to the API or revert the
> dma-direct part back to the original. Probably former option is
> better?
> 
> Thank you for the debugging. I have been a bit distracted, may
> not be able to submit a fix very soon. Would you like to help?

Yeah, it turns out that while the idea for the dma_alloc_contiguous
helper was neat it didn't work out at all, and me pushing Nicolin
down that route was not a very smart idea.  Sorry for causing this
mess.

I think we'll just need to open code it for dma-direct for 5.3.
Hillf do you want to cook up a patch or should I do it?

^ permalink raw reply

* [PATCHv2 net] ipv6/addrconf: allow adding multicast addr if IFA_F_MCAUTOJOIN is set
From: Hangbin Liu @ 2019-08-20  2:19 UTC (permalink / raw)
  To: netdev
  Cc: Madhu Challa, David Ahern, David S . Miller, Jianlin Shi,
	Hangbin Liu
In-Reply-To: <20190813135232.27146-1-liuhangbin@gmail.com>

In commit 93a714d6b53d ("multicast: Extend ip address command to enable
multicast group join/leave on") we added a new flag IFA_F_MCAUTOJOIN
to make user able to add multicast address on ethernet interface.

This works for IPv4, but not for IPv6. See the inet6_addr_add code.

static int inet6_addr_add()
{
	...
	if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
		ipv6_mc_config(net->ipv6.mc_autojoin_sk, true...)
	}

	ifp = ipv6_add_addr(idev, cfg, true, extack); <- always fail with maddr
	if (!IS_ERR(ifp)) {
		...
	} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
		ipv6_mc_config(net->ipv6.mc_autojoin_sk, false...)
	}
}

But in ipv6_add_addr() it will check the address type and reject multicast
address directly. So this feature is never worked for IPv6.

We should not remove the multicast address check totally in ipv6_add_addr(),
but could accept multicast address only when IFA_F_MCAUTOJOIN flag supplied.

v2: update commit description

Reported-by: Jianlin Shi <jishi@redhat.com>
Fixes: 93a714d6b53d ("multicast: Extend ip address command to enable multicast group join/leave on")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 net/ipv6/addrconf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dc73888c7859..ced995f3fec4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1045,7 +1045,8 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
 	int err = 0;
 
 	if (addr_type == IPV6_ADDR_ANY ||
-	    addr_type & IPV6_ADDR_MULTICAST ||
+	    (addr_type & IPV6_ADDR_MULTICAST &&
+	     !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) ||
 	    (!(idev->dev->flags & IFF_LOOPBACK) &&
 	     !netif_is_l3_master(idev->dev) &&
 	     addr_type & IPV6_ADDR_LOOPBACK))
-- 
2.19.2


^ permalink raw reply related

* [PATCH] net: Fix __ip_mc_inc_group argument 3 input
From: Li RongQing @ 2019-08-20  2:25 UTC (permalink / raw)
  To: netdev

It expects gfp_t, but got unsigned int mode

Fixes: 6e2059b53f98 ("ipv4/igmp: init group mode as INCLUDE when join source group")
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
---
 net/ipv4/igmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 180f6896b98b..b8352d716253 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1475,7 +1475,7 @@ EXPORT_SYMBOL(__ip_mc_inc_group);
 
 void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 {
-	__ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE);
+	__ip_mc_inc_group(in_dev, addr, GFP_KERNEL);
 }
 EXPORT_SYMBOL(ip_mc_inc_group);
 
@@ -2197,7 +2197,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
 	iml->sflist = NULL;
 	iml->sfmode = mode;
 	rcu_assign_pointer(inet->mc_list, iml);
-	__ip_mc_inc_group(in_dev, addr, mode);
+	__ip_mc_inc_group(in_dev, addr, GFP_KERNEL);
 	err = 0;
 done:
 	return err;
-- 
2.16.2


^ permalink raw reply related

* Re: [PATCH v3] tun: fix use-after-free when register netdev failed
From: Jason Wang @ 2019-08-20  2:28 UTC (permalink / raw)
  To: David Miller, yangyingliang
  Cc: netdev, eric.dumazet, xiyou.wangcong, weiyongjun1
In-Reply-To: <20190819.182522.414877916903078544.davem@davemloft.net>


On 2019/8/20 上午9:25, David Miller wrote:
> From: Yang Yingliang <yangyingliang@huawei.com>
> Date: Mon, 19 Aug 2019 21:31:19 +0800
>
>> Call tun_attach() after register_netdevice() to make sure tfile->tun
>> is not published until the netdevice is registered. So the read/write
>> thread can not use the tun pointer that may freed by free_netdev().
>> (The tun and dev pointer are allocated by alloc_netdev_mqs(), they can
>> be freed by netdev_freemem().)
> register_netdevice() must always be the last operation in the order of
> network device setup.
>
> At the point register_netdevice() is called, the device is visible globally
> and therefore all of it's software state must be fully initialized and
> ready for us.
>
> You're going to have to find another solution to these problems.


The device is loosely coupled with sockets/queues. Each side is allowed 
to be go away without caring the other side. So in this case, there's a 
small window that network stack think the device has one queue but 
actually not, the code can then safely drop them. Maybe it's ok here 
with some comments?

Or if not, we can try to hold the device before tun_attach and drop it 
after register_netdevice().

Thanks


^ permalink raw reply

* Re: [PATCH V5 0/9] Fixes for vhost metadata acceleration
From: Jason Wang @ 2019-08-20  2:29 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kvm, virtualization, netdev, linux-kernel, linux-mm, jgg
In-Reply-To: <20190819162733-mutt-send-email-mst@kernel.org>


On 2019/8/20 上午5:08, Michael S. Tsirkin wrote:
> On Tue, Aug 13, 2019 at 04:12:49PM +0800, Jason Wang wrote:
>> On 2019/8/12 下午5:49, Michael S. Tsirkin wrote:
>>> On Mon, Aug 12, 2019 at 10:44:51AM +0800, Jason Wang wrote:
>>>> On 2019/8/11 上午1:52, Michael S. Tsirkin wrote:
>>>>> On Fri, Aug 09, 2019 at 01:48:42AM -0400, Jason Wang wrote:
>>>>>> Hi all:
>>>>>>
>>>>>> This series try to fix several issues introduced by meta data
>>>>>> accelreation series. Please review.
>>>>>>
>>>>>> Changes from V4:
>>>>>> - switch to use spinlock synchronize MMU notifier with accessors
>>>>>>
>>>>>> Changes from V3:
>>>>>> - remove the unnecessary patch
>>>>>>
>>>>>> Changes from V2:
>>>>>> - use seqlck helper to synchronize MMU notifier with vhost worker
>>>>>>
>>>>>> Changes from V1:
>>>>>> - try not use RCU to syncrhonize MMU notifier with vhost worker
>>>>>> - set dirty pages after no readers
>>>>>> - return -EAGAIN only when we find the range is overlapped with
>>>>>>      metadata
>>>>>>
>>>>>> Jason Wang (9):
>>>>>>      vhost: don't set uaddr for invalid address
>>>>>>      vhost: validate MMU notifier registration
>>>>>>      vhost: fix vhost map leak
>>>>>>      vhost: reset invalidate_count in vhost_set_vring_num_addr()
>>>>>>      vhost: mark dirty pages during map uninit
>>>>>>      vhost: don't do synchronize_rcu() in vhost_uninit_vq_maps()
>>>>>>      vhost: do not use RCU to synchronize MMU notifier with worker
>>>>>>      vhost: correctly set dirty pages in MMU notifiers callback
>>>>>>      vhost: do not return -EAGAIN for non blocking invalidation too early
>>>>>>
>>>>>>     drivers/vhost/vhost.c | 202 +++++++++++++++++++++++++-----------------
>>>>>>     drivers/vhost/vhost.h |   6 +-
>>>>>>     2 files changed, 122 insertions(+), 86 deletions(-)
>>>>> This generally looks more solid.
>>>>>
>>>>> But this amounts to a significant overhaul of the code.
>>>>>
>>>>> At this point how about we revert 7f466032dc9e5a61217f22ea34b2df932786bbfc
>>>>> for this release, and then re-apply a corrected version
>>>>> for the next one?
>>>> If possible, consider we've actually disabled the feature. How about just
>>>> queued those patches for next release?
>>>>
>>>> Thanks
>>> Sorry if I was unclear. My idea is that
>>> 1. I revert the disabled code
>>> 2. You send a patch readding it with all the fixes squashed
>>> 3. Maybe optimizations on top right away?
>>> 4. We queue *that* for next and see what happens.
>>>
>>> And the advantage over the patchy approach is that the current patches
>>> are hard to review. E.g.  it's not reasonable to ask RCU guys to review
>>> the whole of vhost for RCU usage but it's much more reasonable to ask
>>> about a specific patch.
>>
>> Ok. Then I agree to revert.
>>
>> Thanks
> Great, so please send the following:
> - revert
> - squashed and fixed patch


Just to confirm, do you want me to send a single series or two?

Thanks



^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox