Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH iproute2-next] tunnel: factorize printout of GRE key and flags
From: Andrea Claudi @ 2019-07-12 17:02 UTC (permalink / raw)
  To: netdev; +Cc: stephen, dsahern

print_tunnel() functions in ip6tunnel.c and iptunnel.c contains
the same code to print out GRE key and flags

This commit factorize the code in a helper function in tunnel.c

Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
 ip/ip6tunnel.c | 22 ++--------------------
 ip/iptunnel.c  | 19 ++-----------------
 ip/tunnel.c    | 26 ++++++++++++++++++++++++++
 ip/tunnel.h    |  3 +++
 4 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c
index 2e0f099cd7ced..d7684a673fdc4 100644
--- a/ip/ip6tunnel.c
+++ b/ip/ip6tunnel.c
@@ -120,26 +120,8 @@ static void print_tunnel(const void *t)
 	if (p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE)
 		printf(" allow-localremote");
 
-	if ((p->i_flags & GRE_KEY) && (p->o_flags & GRE_KEY) &&
-	    p->o_key == p->i_key)
-		printf(" key %u", ntohl(p->i_key));
-	else {
-		if (p->i_flags & GRE_KEY)
-			printf(" ikey %u", ntohl(p->i_key));
-		if (p->o_flags & GRE_KEY)
-			printf(" okey %u", ntohl(p->o_key));
-	}
-
-	if (p->proto == IPPROTO_GRE) {
-		if (p->i_flags & GRE_SEQ)
-			printf("%s  Drop packets out of sequence.", _SL_);
-		if (p->i_flags & GRE_CSUM)
-			printf("%s  Checksum in received packet is required.", _SL_);
-		if (p->o_flags & GRE_SEQ)
-			printf("%s  Sequence packets on output.", _SL_);
-		if (p->o_flags & GRE_CSUM)
-			printf("%s  Checksum output packets.", _SL_);
-	}
+	tnl_print_gre_flags(p->proto, p->i_flags, p->o_flags,
+			    p->i_key, p->o_key);
 }
 
 static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
diff --git a/ip/iptunnel.c b/ip/iptunnel.c
index 92a5cb923b6b0..66929e75c7448 100644
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -354,23 +354,8 @@ static void print_tunnel(const void *t)
 		}
 	}
 
-	if ((p->i_flags & GRE_KEY) && (p->o_flags & GRE_KEY) && p->o_key == p->i_key)
-		printf(" key %u", ntohl(p->i_key));
-	else if ((p->i_flags | p->o_flags) & GRE_KEY) {
-		if (p->i_flags & GRE_KEY)
-			printf(" ikey %u", ntohl(p->i_key));
-		if (p->o_flags & GRE_KEY)
-			printf(" okey %u", ntohl(p->o_key));
-	}
-
-	if (p->i_flags & GRE_SEQ)
-		printf("%s  Drop packets out of sequence.", _SL_);
-	if (p->i_flags & GRE_CSUM)
-		printf("%s  Checksum in received packet is required.", _SL_);
-	if (p->o_flags & GRE_SEQ)
-		printf("%s  Sequence packets on output.", _SL_);
-	if (p->o_flags & GRE_CSUM)
-		printf("%s  Checksum output packets.", _SL_);
+	tnl_print_gre_flags(p->iph.protocol, p->i_flags, p->o_flags,
+			    p->i_key, p->o_key);
 }
 
 
diff --git a/ip/tunnel.c b/ip/tunnel.c
index d0d55f37169e9..41b0ef3165fdc 100644
--- a/ip/tunnel.c
+++ b/ip/tunnel.c
@@ -308,6 +308,32 @@ void tnl_print_endpoint(const char *name, const struct rtattr *rta, int family)
 	}
 }
 
+void tnl_print_gre_flags(__u8 proto,
+			 __be16 i_flags, __be16 o_flags,
+			 __be32 i_key, __be32 o_key)
+{
+	if ((i_flags & GRE_KEY) && (o_flags & GRE_KEY) &&
+	    o_key == i_key) {
+		printf(" key %u", ntohl(i_key));
+	} else {
+		if (i_flags & GRE_KEY)
+			printf(" ikey %u", ntohl(i_key));
+		if (o_flags & GRE_KEY)
+			printf(" okey %u", ntohl(o_key));
+	}
+
+	if (proto == IPPROTO_GRE) {
+		if (i_flags & GRE_SEQ)
+			printf("%s  Drop packets out of sequence.", _SL_);
+		if (i_flags & GRE_CSUM)
+			printf("%s  Checksum in received packet is required.", _SL_);
+		if (o_flags & GRE_SEQ)
+			printf("%s  Sequence packets on output.", _SL_);
+		if (o_flags & GRE_CSUM)
+			printf("%s  Checksum output packets.", _SL_);
+	}
+}
+
 static void tnl_print_stats(const struct rtnl_link_stats64 *s)
 {
 	printf("%s", _SL_);
diff --git a/ip/tunnel.h b/ip/tunnel.h
index e530d07cbf6a2..604f8cbfd6db2 100644
--- a/ip/tunnel.h
+++ b/ip/tunnel.h
@@ -55,5 +55,8 @@ void tnl_print_encap(struct rtattr *tb[],
 		     int encap_sport, int encap_dport);
 void tnl_print_endpoint(const char *name,
 			const struct rtattr *rta, int family);
+void tnl_print_gre_flags(__u8 proto,
+			 __be16 i_flags, __be16 o_flags,
+			 __be32 i_key, __be32 o_key);
 
 #endif
-- 
2.20.1


^ permalink raw reply related

* [PATCH v2 1/9] rcu/update: Remove useless check for debug_locks
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

In rcu_read_lock_sched_held(), debug_locks can never be true at the
point we check it because we already check debug_locks in
debug_lockdep_rcu_enabled() in the beginning. Remove the check.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/update.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c3bf44ba42e5..bb961cd89e76 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -93,17 +93,13 @@ module_param(rcu_normal_after_boot, int, 0);
  */
 int rcu_read_lock_sched_held(void)
 {
-	int lockdep_opinion = 0;
-
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
 	if (!rcu_is_watching())
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
-	if (debug_locks)
-		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-	return lockdep_opinion || !preemptible();
+	return lock_is_held(&rcu_sched_lock_map) || !preemptible();
 }
 EXPORT_SYMBOL(rcu_read_lock_sched_held);
 #endif
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 2/9] rcu: Add support for consolidated-RCU reader checking
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

This patch adds support for checking RCU reader sections in list
traversal macros. Optionally, if the list macro is called under SRCU or
other lock/mutex protection, then appropriate lockdep expressions can be
passed to make the checks pass.

Existing list_for_each_entry_rcu() invocations don't need to pass the
optional fourth argument (cond) unless they are under some non-RCU
protection and needs to make lockdep check pass.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 include/linux/rculist.h  | 28 +++++++++++++++++++++++-----
 include/linux/rcupdate.h |  7 +++++++
 kernel/rcu/Kconfig.debug | 11 +++++++++++
 kernel/rcu/update.c      | 14 ++++++++++++++
 4 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e91ec9ddcd30..1048160625bb 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -40,6 +40,20 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
  */
 #define list_next_rcu(list)	(*((struct list_head __rcu **)(&(list)->next)))
 
+/*
+ * Check during list traversal that we are within an RCU reader
+ */
+
+#ifdef CONFIG_PROVE_RCU_LIST
+#define __list_check_rcu(dummy, cond, ...)				\
+	({								\
+	RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(),		\
+			 "RCU-list traversed in non-reader section!");	\
+	 })
+#else
+#define __list_check_rcu(dummy, cond, ...) ({})
+#endif
+
 /*
  * Insert a new entry between two known consecutive entries.
  *
@@ -343,14 +357,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the list_head within the struct.
+ * @cond:	optional lockdep expression if called from non-RCU protection.
  *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as list_add_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define list_for_each_entry_rcu(pos, head, member) \
-	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
-		&pos->member != (head); \
+#define list_for_each_entry_rcu(pos, head, member, cond...)		\
+	for (__list_check_rcu(dummy, ## cond, 0),			\
+	     pos = list_entry_rcu((head)->next, typeof(*pos), member);	\
+		&pos->member != (head);					\
 		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 /**
@@ -616,13 +632,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the hlist_node within the struct.
+ * @cond:	optional lockdep expression if called from non-RCU protection.
  *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as hlist_add_head_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define hlist_for_each_entry_rcu(pos, head, member)			\
-	for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
+#define hlist_for_each_entry_rcu(pos, head, member, cond...)		\
+	for (__list_check_rcu(dummy, ## cond, 0),			\
+	     pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
 			typeof(*(pos)), member);			\
 		pos;							\
 		pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 922bb6848813..712b464ab960 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -223,6 +223,7 @@ int debug_lockdep_rcu_enabled(void);
 int rcu_read_lock_held(void);
 int rcu_read_lock_bh_held(void);
 int rcu_read_lock_sched_held(void);
+int rcu_read_lock_any_held(void);
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
@@ -243,6 +244,12 @@ static inline int rcu_read_lock_sched_held(void)
 {
 	return !preemptible();
 }
+
+static inline int rcu_read_lock_any_held(void)
+{
+	return !preemptible();
+}
+
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 #ifdef CONFIG_PROVE_RCU
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 0ec7d1d33a14..b20d0e2903d1 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -7,6 +7,17 @@ menu "RCU Debugging"
 config PROVE_RCU
 	def_bool PROVE_LOCKING
 
+config PROVE_RCU_LIST
+	bool "RCU list lockdep debugging"
+	depends on PROVE_RCU
+	default n
+	help
+	  Enable RCU lockdep checking for list usages. By default it is
+	  turned off since there are several list RCU users that still
+	  need to be converted to pass a lockdep expression. To prevent
+	  false-positive splats, we keep it default disabled but once all
+	  users are converted, we can remove this config option.
+
 config TORTURE_TEST
 	tristate
 	default n
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index bb961cd89e76..0cc7be0fb6b5 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -294,6 +294,20 @@ int rcu_read_lock_bh_held(void)
 }
 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
 
+int rcu_read_lock_any_held(void)
+{
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	if (!rcu_is_watching())
+		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
+	if (lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map))
+		return 1;
+	return !preemptible();
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_any_held);
+
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 /**
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 4/9] ipv4: add lockdep condition to fix for_each_entry
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 net/ipv4/fib_frontend.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b298255f6fdb..ef7c9f8e8682 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -127,7 +127,8 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 	h = id & (FIB_TABLE_HASHSZ - 1);
 
 	head = &net->ipv4.fib_table_hash[h];
-	hlist_for_each_entry_rcu(tb, head, tb_hlist) {
+	hlist_for_each_entry_rcu(tb, head, tb_hlist,
+				 lockdep_rtnl_is_held()) {
 		if (tb->tb_id == id)
 			return tb;
 	}
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 3/9] rcu/sync: Remove custom check for reader-section
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Oleg Nesterov, Alexey Kuznetsov,
	Bjorn Helgaas, Borislav Petkov, c0d1n61at3, David S. Miller,
	edumazet, Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Paul E. McKenney, Pavel Machek, peterz,
	Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

The rcu/sync code was doing its own check whether we are in a reader
section. With RCU consolidating flavors and the generic helper added in
this series, this is no longer need. We can just use the generic helper
and it results in a nice cleanup.

Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
Please note: Only build and boot tested this particular patch so far.

 include/linux/rcu_sync.h |  5 ++---
 kernel/rcu/sync.c        | 22 ----------------------
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 6fc53a1345b3..c954f1efc919 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -39,9 +39,8 @@ extern void rcu_sync_lockdep_assert(struct rcu_sync *);
  */
 static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 {
-#ifdef CONFIG_PROVE_RCU
-	rcu_sync_lockdep_assert(rsp);
-#endif
+	RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
+			 "suspicious rcu_sync_is_idle() usage");
 	return !rsp->gp_state; /* GP_IDLE */
 }
 
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index a8304d90573f..535e02601f56 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -10,37 +10,25 @@
 #include <linux/rcu_sync.h>
 #include <linux/sched.h>
 
-#ifdef CONFIG_PROVE_RCU
-#define __INIT_HELD(func)	.held = func,
-#else
-#define __INIT_HELD(func)
-#endif
-
 static const struct {
 	void (*sync)(void);
 	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
 	void (*wait)(void);
-#ifdef CONFIG_PROVE_RCU
-	int  (*held)(void);
-#endif
 } gp_ops[] = {
 	[RCU_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
 		.wait = rcu_barrier,
-		__INIT_HELD(rcu_read_lock_held)
 	},
 	[RCU_SCHED_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
 		.wait = rcu_barrier,
-		__INIT_HELD(rcu_read_lock_sched_held)
 	},
 	[RCU_BH_SYNC] = {
 		.sync = synchronize_rcu,
 		.call = call_rcu,
 		.wait = rcu_barrier,
-		__INIT_HELD(rcu_read_lock_bh_held)
 	},
 };
 
@@ -49,16 +37,6 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 
 #define	rss_lock	gp_wait.lock
 
-#ifdef CONFIG_PROVE_RCU
-void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
-{
-	RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
-			 "suspicious rcu_sync_is_idle() usage");
-}
-
-EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
-#endif
-
 /**
  * rcu_sync_init() - Initialize an rcu_sync structure
  * @rsp: Pointer to rcu_sync structure to be initialized
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 7/9] x86/pci: Pass lockdep condition to pcm_mmcfg_list iterator
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

The pcm_mmcfg_list is traversed with list_for_each_entry_rcu without a
reader-lock held, because the pci_mmcfg_lock is already held. Make this
known to the list macro so that it fixes new lockdep warnings that
trigger due to lockdep checks added to list_for_each_entry_rcu().

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 arch/x86/pci/mmconfig-shared.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 7389db538c30..6fa42e9c4e6f 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -29,6 +29,7 @@
 static bool pci_mmcfg_running_state;
 static bool pci_mmcfg_arch_init_failed;
 static DEFINE_MUTEX(pci_mmcfg_lock);
+#define pci_mmcfg_lock_held() lock_is_held(&(pci_mmcfg_lock).dep_map)
 
 LIST_HEAD(pci_mmcfg_list);
 
@@ -54,7 +55,7 @@ static void list_add_sorted(struct pci_mmcfg_region *new)
 	struct pci_mmcfg_region *cfg;
 
 	/* keep list sorted by segment and starting bus number */
-	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
+	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) {
 		if (cfg->segment > new->segment ||
 		    (cfg->segment == new->segment &&
 		     cfg->start_bus >= new->start_bus)) {
@@ -118,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
 {
 	struct pci_mmcfg_region *cfg;
 
-	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
+	list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held())
 		if (cfg->segment == segment &&
 		    cfg->start_bus <= bus && bus <= cfg->end_bus)
 			return cfg;
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 9/9] doc: Update documentation about list_for_each_entry_rcu
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

This patch updates the documentation with information about
usage of lockdep with list_for_each_entry_rcu().

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 Documentation/RCU/lockdep.txt   | 15 +++++++++++----
 Documentation/RCU/whatisRCU.txt |  9 ++++++++-
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
index da51d3068850..3d967df3a801 100644
--- a/Documentation/RCU/lockdep.txt
+++ b/Documentation/RCU/lockdep.txt
@@ -96,7 +96,14 @@ other flavors of rcu_dereference().  On the other hand, it is illegal
 to use rcu_dereference_protected() if either the RCU-protected pointer
 or the RCU-protected data that it points to can change concurrently.
 
-There are currently only "universal" versions of the rcu_assign_pointer()
-and RCU list-/tree-traversal primitives, which do not (yet) check for
-being in an RCU read-side critical section.  In the future, separate
-versions of these primitives might be created.
+Similar to rcu_dereference_protected, The RCU list and hlist traversal
+primitives also check for whether there are called from within a reader
+section. However, an optional lockdep expression can be passed to them as
+the last argument in case they are called under other non-RCU protection.
+
+For example, the workqueue for_each_pwq() macro is implemented as follows.
+It is safe to call for_each_pwq() outside a reader section but under protection
+of wq->mutex:
+#define for_each_pwq(pwq, wq)
+	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,
+				lock_is_held(&(wq->mutex).dep_map))
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 981651a8b65d..a08c03735963 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -290,7 +290,7 @@ rcu_dereference()
 	at any time, including immediately after the rcu_dereference().
 	And, again like rcu_assign_pointer(), rcu_dereference() is
 	typically used indirectly, via the _rcu list-manipulation
-	primitives, such as list_for_each_entry_rcu().
+	primitives, such as list_for_each_entry_rcu() [2].
 
 	[1] The variant rcu_dereference_protected() can be used outside
 	of an RCU read-side critical section as long as the usage is
@@ -305,6 +305,13 @@ rcu_dereference()
 	a lockdep splat is emitted.  See RCU/Design/Requirements/Requirements.html
 	and the API's code comments for more details and example usage.
 
+	[2] In case the list_for_each_entry_rcu() primitive is intended
+	to be used outside of an RCU reader section such as when
+	protected by a lock, then an additional lockdep expression can be
+	passed as the last argument to it so that RCU lockdep checking code
+	knows that the dereference of the list pointers are safe. If the
+	indicated protection is not provided, a lockdep splat is emitted.
+
 The following diagram shows how each API communicates among the
 reader, updater, and reclaimer.
 
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 8/9] acpi: Use built-in RCU list checking for acpi_ioremaps list
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

list_for_each_entry_rcu has built-in RCU and lock checking. Make use of
it for acpi_ioremaps list traversal.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 drivers/acpi/osl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index f29e427d0d1d..c8b5d712c7ae 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/lockdep.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/kmod.h>
@@ -94,6 +95,7 @@ struct acpi_ioremap {
 
 static LIST_HEAD(acpi_ioremaps);
 static DEFINE_MUTEX(acpi_ioremap_lock);
+#define acpi_ioremap_lock_held() lock_is_held(&acpi_ioremap_lock.dep_map)
 
 static void __init acpi_request_region (struct acpi_generic_address *gas,
 	unsigned int length, char *desc)
@@ -220,7 +222,7 @@ acpi_map_lookup(acpi_physical_address phys, acpi_size size)
 {
 	struct acpi_ioremap *map;
 
-	list_for_each_entry_rcu(map, &acpi_ioremaps, list)
+	list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held())
 		if (map->phys <= phys &&
 		    phys + size <= map->phys + map->size)
 			return map;
@@ -263,7 +265,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size)
 {
 	struct acpi_ioremap *map;
 
-	list_for_each_entry_rcu(map, &acpi_ioremaps, list)
+	list_for_each_entry_rcu(map, &acpi_ioremaps, list, acpi_ioremap_lock_held())
 		if (map->virt <= virt &&
 		    virt + size <= map->virt + map->size)
 			return map;
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 6/9] workqueue: Convert for_each_wq to use built-in list check
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

list_for_each_entry_rcu now has support to check for RCU reader sections
as well as lock. Just use the support in it, instead of explictly
checking in the caller.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/workqueue.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9657315405de..5e88449bdd83 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -363,11 +363,6 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
 			 !lockdep_is_held(&wq_pool_mutex),		\
 			 "RCU or wq_pool_mutex should be held")
 
-#define assert_rcu_or_wq_mutex(wq)					\
-	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&			\
-			 !lockdep_is_held(&wq->mutex),			\
-			 "RCU or wq->mutex should be held")
-
 #define assert_rcu_or_wq_mutex_or_pool_mutex(wq)			\
 	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&			\
 			 !lockdep_is_held(&wq->mutex) &&		\
@@ -424,9 +419,8 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
  * ignored.
  */
 #define for_each_pwq(pwq, wq)						\
-	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node)		\
-		if (({ assert_rcu_or_wq_mutex(wq); false; })) { }	\
-		else
+	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,		\
+				 lock_is_held(&(wq->mutex).dep_map))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 5/9] driver/core: Convert to use built-in RCU list checking
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Greg Kroah-Hartman, Alexey Kuznetsov,
	Bjorn Helgaas, Borislav Petkov, c0d1n61at3, David S. Miller,
	edumazet, Hideaki YOSHIFUJI, H. Peter Anvin, Ingo Molnar,
	Jonathan Corbet, Josh Triplett, keescook, kernel-hardening,
	kernel-team, Lai Jiangshan, Len Brown, linux-acpi, linux-doc,
	linux-pci, linux-pm, Mathieu Desnoyers, neilb, netdev,
	Oleg Nesterov, Paul E. McKenney, Pavel Machek, peterz,
	Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712170024.111093-1-joel@joelfernandes.org>

list_for_each_entry_rcu has built-in RCU and lock checking. Make use of
it in driver core.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 drivers/base/base.h          |  1 +
 drivers/base/core.c          | 10 ++++++++++
 drivers/base/power/runtime.c | 15 ++++++++++-----
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/base/base.h b/drivers/base/base.h
index b405436ee28e..0d32544b6f91 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -165,6 +165,7 @@ static inline int devtmpfs_init(void) { return 0; }
 /* Device links support */
 extern int device_links_read_lock(void);
 extern void device_links_read_unlock(int idx);
+extern int device_links_read_lock_held(void);
 extern int device_links_check_suppliers(struct device *dev);
 extern void device_links_driver_bound(struct device *dev);
 extern void device_links_driver_cleanup(struct device *dev);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index fd7511e04e62..6c5ca9685647 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -68,6 +68,11 @@ void device_links_read_unlock(int idx)
 {
 	srcu_read_unlock(&device_links_srcu, idx);
 }
+
+int device_links_read_lock_held(void)
+{
+	return srcu_read_lock_held(&device_links_srcu);
+}
 #else /* !CONFIG_SRCU */
 static DECLARE_RWSEM(device_links_lock);
 
@@ -91,6 +96,11 @@ void device_links_read_unlock(int not_used)
 {
 	up_read(&device_links_lock);
 }
+
+int device_links_read_lock_held(void)
+{
+	return lock_is_held(&device_links_lock);
+}
 #endif /* !CONFIG_SRCU */
 
 /**
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 952a1e7057c7..7a10e8379a70 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -287,7 +287,8 @@ static int rpm_get_suppliers(struct device *dev)
 {
 	struct device_link *link;
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+				device_links_read_lock_held()) {
 		int retval;
 
 		if (!(link->flags & DL_FLAG_PM_RUNTIME) ||
@@ -309,7 +310,8 @@ static void rpm_put_suppliers(struct device *dev)
 {
 	struct device_link *link;
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+				device_links_read_lock_held()) {
 		if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND)
 			continue;
 
@@ -1640,7 +1642,8 @@ void pm_runtime_clean_up_links(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu(link, &dev->links.consumers, s_node) {
+	list_for_each_entry_rcu(link, &dev->links.consumers, s_node,
+				device_links_read_lock_held()) {
 		if (link->flags & DL_FLAG_STATELESS)
 			continue;
 
@@ -1662,7 +1665,8 @@ void pm_runtime_get_suppliers(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+				device_links_read_lock_held())
 		if (link->flags & DL_FLAG_PM_RUNTIME) {
 			link->supplier_preactivated = true;
 			refcount_inc(&link->rpm_active);
@@ -1683,7 +1687,8 @@ void pm_runtime_put_suppliers(struct device *dev)
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+				device_links_read_lock_held())
 		if (link->supplier_preactivated) {
 			link->supplier_preactivated = false;
 			if (refcount_dec_not_one(&link->rpm_active))
-- 
2.22.0.510.g264f2c817a-goog


^ permalink raw reply related

* [PATCH v2 0/9] Harden list_for_each_entry_rcu() and family
From: Joel Fernandes (Google) @ 2019-07-12 17:00 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google), Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Jonathan Corbet, Josh Triplett, keescook,
	kernel-hardening, kernel-team, Lai Jiangshan, Len Brown,
	linux-acpi, linux-doc, linux-pci, linux-pm, Mathieu Desnoyers,
	neilb, netdev, Oleg Nesterov, Paul E. McKenney, Pavel Machek,
	peterz, Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)

Hi,
This series aims to provide lockdep checking to RCU list macros for additional
kernel hardening.

RCU has a number of primitives for "consumption" of an RCU protected pointer.
Most of the time, these consumers make sure that such accesses are under a RCU
reader-section (such as rcu_dereference{,sched,bh} or under a lock, such as
with rcu_dereference_protected()).

However, there are other ways to consume RCU pointers, such as by
list_for_each_entry_rcu or hlist_for_each_enry_rcu. Unlike the rcu_dereference
family, these consumers do no lockdep checking at all. And with the growing
number of RCU list uses (1000+), it is possible for bugs to creep in and go
unnoticed which lockdep checks can catch.

Since RCU consolidation efforts last year, the different traditional RCU
flavors (preempt, bh, sched) are all consolidated. In other words, any of these
flavors can cause a reader section to occur and all of them must cease before
the reader section is considered to be unlocked. Thanks to this, we can
generically check if we are in an RCU reader. This is what patch 1 does. Note
that the list_for_each_entry_rcu and family are different from the
rcu_dereference family in that, there is no _bh or _sched version of this
macro. They are used under many different RCU reader flavors, and also SRCU.
Patch 1 adds a new internal function rcu_read_lock_any_held() which checks
if any reader section is active at all, when these macros are called. If no
reader section exists, then the optional fourth argument to
list_for_each_entry_rcu() can be a lockdep expression which is evaluated
(similar to how rcu_dereference_check() works). If no lockdep expression is
passed, and we are not in a reader, then a splat occurs. Just take off the
lockdep expression after applying the patches, by using the following diff and
see what happens:

+++ b/arch/x86/pci/mmconfig-shared.c
@@ -55,7 +55,7 @@ static void list_add_sorted(struct pci_mmcfg_region *new)
        struct pci_mmcfg_region *cfg;

        /* keep list sorted by segment and starting bus number */
-       list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) {
+       list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {

The optional argument trick to list_for_each_entry_rcu() can also be used in
the future to possibly remove rcu_dereference_{,bh,sched}_protected() API and
we can pass an optional lockdep expression to rcu_dereference() itself. Thus
eliminating 3 more RCU APIs.

Note that some list macro wrappers already do their own lockdep checking in the
caller side. These can be eliminated in favor of the built-in lockdep checking
in the list macro that this series adds. For example, workqueue code has a
assert_rcu_or_wq_mutex() function which is called in for_each_wq().  This
series replaces that in favor of the built-in check.

Also in the future, we can extend these checks to list_entry_rcu() and other
list macros as well, if needed.

Please note that I have kept this option default-disabled under a new config:
CONFIG_PROVE_RCU_LIST. This is so that until all users are converted to pass
the optional argument, we should keep the check disabled. There are about a
1000 or so users and it is not possible to pass in the optional lockdep
expression in a single series since it is done on a case-by-case basis. I did
convert a few users in this series itself.

v1->v2: Have assert_rcu_or_wq_mutex deleted (Daniel Jordan)
	Simplify rcu_read_lock_any_held()   (Peter Zijlstra)
	Simplified rcu-sync logic	    (Oleg Nesterov)
	Updated documentation and rculist comments.
	Added GregKH ack.

RFC->v1: 
	Simplify list checking macro (Rasmus Villemoes)

Joel Fernandes (Google) (9):
rcu/update: Remove useless check for debug_locks
rcu: Add support for consolidated-RCU reader checking
rcu/sync: Remove custom check for reader-section
ipv4: add lockdep condition to fix for_each_entry
driver/core: Convert to use built-in RCU list checking
workqueue: Convert for_each_wq to use built-in list check
x86/pci: Pass lockdep condition to pcm_mmcfg_list iterator
acpi: Use built-in RCU list checking for acpi_ioremaps list
doc: Update documentation about list_for_each_entry_rcu

Documentation/RCU/lockdep.txt   | 15 +++++++++++----
Documentation/RCU/whatisRCU.txt |  9 ++++++++-
arch/x86/pci/mmconfig-shared.c  |  5 +++--
drivers/acpi/osl.c              |  6 ++++--
drivers/base/base.h             |  1 +
drivers/base/core.c             | 10 ++++++++++
drivers/base/power/runtime.c    | 15 ++++++++++-----
include/linux/rcu_sync.h        |  5 ++---
include/linux/rculist.h         | 28 +++++++++++++++++++++++-----
include/linux/rcupdate.h        |  7 +++++++
kernel/rcu/Kconfig.debug        | 11 +++++++++++
kernel/rcu/sync.c               | 22 ----------------------
kernel/rcu/update.c             | 20 +++++++++++++++-----
kernel/workqueue.c              | 10 ++--------
net/ipv4/fib_frontend.c         |  3 ++-
15 files changed, 109 insertions(+), 58 deletions(-)

--
2.22.0.510.g264f2c817a-goog

^ permalink raw reply

* Re: [RFC PATCH net-next 0/3] net: batched receive in GRO path
From: Eric Dumazet @ 2019-07-12 16:48 UTC (permalink / raw)
  To: Edward Cree, Eric Dumazet, Paolo Abeni, David Miller; +Cc: netdev
In-Reply-To: <927da9ee-c2fc-8556-fbeb-e26ea1c98d1e@solarflare.com>



On 7/12/19 5:59 PM, Edward Cree wrote:
> On 10/07/2019 18:39, Eric Dumazet wrote:
>> Holding a small packet in the list up to the point we call busy_poll_stop()
>> will basically make busypoll non working anymore.
>>
>> napi_complete_done() has special behavior when busy polling is active.
> Yep, I get it now, sorry for being dumb :)
> Essentially we're saying that things coalesced by GRO are 'bulk' traffic and
>  can wait around, 


GRO can still be beneficial even when busypolling, since TCP stack
will send a single ACK back, and a read()/recv() will copy the whole train
instead of a single MSS.

I should have mentioned that we have a patch that I forgot to upstream adding
the PSH flag to all TSO packets, meaning the receiver can automatically learn
the boundary of a GRO packet and not have to wait for the napi->poll() end
(busypolling or not)

> but the rest is the stuff we're polling for for low latency.
> I'm putting a gro_normal_list() call after the trace_napi_poll() in
>  napi_busy_loop() and testing that, let's see how it goes...
> 

^ permalink raw reply

* Re: [PATCH v1 1/6] rcu: Add support for consolidated-RCU reader checking
From: Paul E. McKenney @ 2019-07-12 16:45 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Peter Zijlstra, linux-kernel, Alexey Kuznetsov, Bjorn Helgaas,
	Borislav Petkov, c0d1n61at3, David S. Miller, edumazet,
	Greg Kroah-Hartman, Hideaki YOSHIFUJI, H. Peter Anvin,
	Ingo Molnar, Josh Triplett, keescook, kernel-hardening,
	Lai Jiangshan, Len Brown, linux-acpi, linux-pci, linux-pm,
	Mathieu Desnoyers, neilb, netdev, oleg, Pavel Machek,
	Rafael J. Wysocki, Rasmus Villemoes, rcu, Steven Rostedt,
	Tejun Heo, Thomas Gleixner, will,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712151051.GB235410@google.com>

On Fri, Jul 12, 2019 at 11:10:51AM -0400, Joel Fernandes wrote:
> On Fri, Jul 12, 2019 at 01:11:25PM +0200, Peter Zijlstra wrote:
> > On Thu, Jul 11, 2019 at 07:43:56PM -0400, Joel Fernandes (Google) wrote:
> > > +int rcu_read_lock_any_held(void)
> > > +{
> > > +	int lockdep_opinion = 0;
> > > +
> > > +	if (!debug_lockdep_rcu_enabled())
> > > +		return 1;
> > > +	if (!rcu_is_watching())
> > > +		return 0;
> > > +	if (!rcu_lockdep_current_cpu_online())
> > > +		return 0;
> > > +
> > > +	/* Preemptible RCU flavor */
> > > +	if (lock_is_held(&rcu_lock_map))
> > 
> > you forgot debug_locks here.
> 
> Actually, it turns out debug_locks checking is not even needed. If
> debug_locks == 0, then debug_lockdep_rcu_enabled() returns 0 and we would not
> get to this point.
> 
> > > +		return 1;
> > > +
> > > +	/* BH flavor */
> > > +	if (in_softirq() || irqs_disabled())
> > 
> > I'm not sure I'd put irqs_disabled() under BH, also this entire
> > condition is superfluous, see below.
> > 
> > > +		return 1;
> > > +
> > > +	/* Sched flavor */
> > > +	if (debug_locks)
> > > +		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
> > > +	return lockdep_opinion || !preemptible();
> > 
> > that !preemptible() turns into:
> > 
> >   !(preempt_count()==0 && !irqs_disabled())
> > 
> > which is:
> > 
> >   preempt_count() != 0 || irqs_disabled()
> > 
> > and already includes irqs_disabled() and in_softirq().
> > 
> > > +}
> > 
> > So maybe something lke:
> > 
> > 	if (debug_locks && (lock_is_held(&rcu_lock_map) ||
> > 			    lock_is_held(&rcu_sched_lock_map)))
> > 		return true;
> 
> Agreed, I will do it this way (without the debug_locks) like:
> 
> ---8<-----------------------
> 
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index ba861d1716d3..339aebc330db 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -296,27 +296,15 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
>  
>  int rcu_read_lock_any_held(void)
>  {
> -	int lockdep_opinion = 0;
> -
>  	if (!debug_lockdep_rcu_enabled())
>  		return 1;
>  	if (!rcu_is_watching())
>  		return 0;
>  	if (!rcu_lockdep_current_cpu_online())
>  		return 0;
> -
> -	/* Preemptible RCU flavor */
> -	if (lock_is_held(&rcu_lock_map))
> -		return 1;
> -
> -	/* BH flavor */
> -	if (in_softirq() || irqs_disabled())
> -		return 1;
> -
> -	/* Sched flavor */
> -	if (debug_locks)
> -		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
> -	return lockdep_opinion || !preemptible();
> +	if (lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map))

OK, I will bite...  Why not also lock_is_held(&rcu_bh_lock_map)?

							Thanx, Paul

> +		return 1;
> +	return !preemptible();
>  }
>  EXPORT_SYMBOL_GPL(rcu_read_lock_any_held);
>  
> 

^ permalink raw reply

* Re: [PATCH] net: dsa: qca8k: replace legacy gpio include
From: Florian Fainelli @ 2019-07-12 16:43 UTC (permalink / raw)
  To: Christian Lamparter, netdev
  Cc: David S . Miller, Vivien Didelot, Andrew Lunn, kbuild test robot
In-Reply-To: <20190712153336.5018-1-chunkeey@gmail.com>

On 7/12/19 8:33 AM, Christian Lamparter wrote:
> This patch replaces the legacy bulk gpio.h include
> with the proper gpio/consumer.h variant. This was
> caught by the kbuild test robot that was running
> into an error because of this.
> 
> For more information why linux/gpio.h is bad can be found in:
> commit 56a46b6144e7 ("gpio: Clarify that <linux/gpio.h> is legacy")
> 
> Reported-by: kbuild test robot <lkp@intel.com>
> Link: https://www.spinics.net/lists/netdev/msg584447.html
> Fixes: a653f2f538f9 ("net: dsa: qca8k: introduce reset via gpio feature")
> Signed-off-by: Christian Lamparter <chunkeey@gmail.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply

* Re: [PATCH] net: dsa: qca8k: replace legacy gpio include
From: Vivien Didelot @ 2019-07-12 16:42 UTC (permalink / raw)
  To: Christian Lamparter
  Cc: netdev, David S . Miller, Florian Fainelli, Andrew Lunn,
	kbuild test robot
In-Reply-To: <20190712153336.5018-1-chunkeey@gmail.com>

On Fri, 12 Jul 2019 17:33:36 +0200, Christian Lamparter <chunkeey@gmail.com> wrote:
> This patch replaces the legacy bulk gpio.h include
> with the proper gpio/consumer.h variant. This was
> caught by the kbuild test robot that was running
> into an error because of this.
> 
> For more information why linux/gpio.h is bad can be found in:
> commit 56a46b6144e7 ("gpio: Clarify that <linux/gpio.h> is legacy")
> 
> Reported-by: kbuild test robot <lkp@intel.com>
> Link: https://www.spinics.net/lists/netdev/msg584447.html
> Fixes: a653f2f538f9 ("net: dsa: qca8k: introduce reset via gpio feature")
> Signed-off-by: Christian Lamparter <chunkeey@gmail.com>

Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>

^ permalink raw reply

* Re: [ovs-dev] [PATCH net-next] net: openvswitch: do not update max_headroom if new headroom is equal to old headroom
From: Gregory Rose @ 2019-07-12 16:18 UTC (permalink / raw)
  To: Taehee Yoo, davem, pshelar, netdev, dev
In-Reply-To: <20190705160809.5202-1-ap420073@gmail.com>


On 7/5/2019 9:08 AM, Taehee Yoo wrote:
> When a vport is deleted, the maximum headroom size would be changed.
> If the vport which has the largest headroom is deleted,
> the new max_headroom would be set.
> But, if the new headroom size is equal to the old headroom size,
> updating routine is unnecessary.
>
> Signed-off-by: Taehee Yoo <ap420073@gmail.com>
> ---
>   net/openvswitch/datapath.c | 39 +++++++++++++++++++++++++++-----------
>   1 file changed, 28 insertions(+), 11 deletions(-)
>
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index 33b388103741..892287d06c17 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -1958,10 +1958,9 @@ static struct vport *lookup_vport(struct net *net,
>   
>   }
>   
> -/* Called with ovs_mutex */
> -static void update_headroom(struct datapath *dp)
> +static unsigned int ovs_get_max_headroom(struct datapath *dp)
>   {
> -	unsigned dev_headroom, max_headroom = 0;
> +	unsigned int dev_headroom, max_headroom = 0;
>   	struct net_device *dev;
>   	struct vport *vport;
>   	int i;
> @@ -1975,10 +1974,19 @@ static void update_headroom(struct datapath *dp)
>   		}
>   	}
>   
> -	dp->max_headroom = max_headroom;
> +	return max_headroom;
> +}
> +
> +/* Called with ovs_mutex */
> +static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
> +{
> +	struct vport *vport;
> +	int i;
> +
> +	dp->max_headroom = new_headroom;
>   	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
>   		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
> -			netdev_set_rx_headroom(vport->dev, max_headroom);
> +			netdev_set_rx_headroom(vport->dev, new_headroom);
>   }
>   
>   static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
> @@ -1989,6 +1997,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
>   	struct sk_buff *reply;
>   	struct vport *vport;
>   	struct datapath *dp;
> +	unsigned int new_headroom;
>   	u32 port_no;
>   	int err;
>   
> @@ -2050,8 +2059,10 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
>   				      info->snd_portid, info->snd_seq, 0,
>   				      OVS_VPORT_CMD_NEW);
>   
> -	if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
> -		update_headroom(dp);
> +	new_headroom = netdev_get_fwd_headroom(vport->dev);
> +
> +	if (new_headroom > dp->max_headroom)
> +		ovs_update_headroom(dp, new_headroom);
>   	else
>   		netdev_set_rx_headroom(vport->dev, dp->max_headroom);
>   
> @@ -2122,11 +2133,12 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
>   
>   static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
>   {
> -	bool must_update_headroom = false;
> +	bool update_headroom = false;
>   	struct nlattr **a = info->attrs;
>   	struct sk_buff *reply;
>   	struct datapath *dp;
>   	struct vport *vport;
> +	unsigned int new_headroom;
>   	int err;
>   
>   	reply = ovs_vport_cmd_alloc_info();
> @@ -2152,12 +2164,17 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
>   	/* the vport deletion may trigger dp headroom update */
>   	dp = vport->dp;
>   	if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
> -		must_update_headroom = true;
> +		update_headroom = true;
> +
>   	netdev_reset_rx_headroom(vport->dev);
>   	ovs_dp_detach_port(vport);
>   
> -	if (must_update_headroom)
> -		update_headroom(dp);
> +	if (update_headroom) {
> +		new_headroom = ovs_get_max_headroom(dp);
> +
> +		if (new_headroom < dp->max_headroom)
> +			ovs_update_headroom(dp, new_headroom);
> +	}
>   	ovs_unlock();
>   
>   	ovs_notify(&dp_vport_genl_family, reply, info);

I did a compile test and ran the OVS kernel self-test suite.  Looks OK 
to me.
Tested-by: Greg Rose <gvrose8192@gmail.com>
Reviewed-by: Greg Rose <gvrose8192@gmail.com>


^ permalink raw reply

* Re: [PATCH v1 1/6] rcu: Add support for consolidated-RCU reader checking
From: Peter Zijlstra @ 2019-07-12 15:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Alexey Kuznetsov, Bjorn Helgaas, Borislav Petkov,
	c0d1n61at3, David S. Miller, edumazet, Greg Kroah-Hartman,
	Hideaki YOSHIFUJI, H. Peter Anvin, Ingo Molnar, Josh Triplett,
	keescook, kernel-hardening, Lai Jiangshan, Len Brown, linux-acpi,
	linux-pci, linux-pm, Mathieu Desnoyers, neilb, netdev, oleg,
	Paul E. McKenney, Pavel Machek, Rafael J. Wysocki,
	Rasmus Villemoes, rcu, Steven Rostedt, Tejun Heo, Thomas Gleixner,
	will, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)
In-Reply-To: <20190712151051.GB235410@google.com>

On Fri, Jul 12, 2019 at 11:10:51AM -0400, Joel Fernandes wrote:
> Agreed, I will do it this way (without the debug_locks) like:
> 
> ---8<-----------------------
> 
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index ba861d1716d3..339aebc330db 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -296,27 +296,15 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
>  
>  int rcu_read_lock_any_held(void)
>  {
>  	if (!debug_lockdep_rcu_enabled())
>  		return 1;
>  	if (!rcu_is_watching())
>  		return 0;
>  	if (!rcu_lockdep_current_cpu_online())
>  		return 0;
> +	if (lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map))
> +		return 1;
> +	return !preemptible();
>  }
>  EXPORT_SYMBOL_GPL(rcu_read_lock_any_held);

OK, that looks sane. Thanks!

^ permalink raw reply

* Re: [RFC PATCH net-next 0/3] net: batched receive in GRO path
From: Edward Cree @ 2019-07-12 15:59 UTC (permalink / raw)
  To: Eric Dumazet, Paolo Abeni, David Miller; +Cc: netdev
In-Reply-To: <38ff0ce0-7e26-1683-90f0-adc9c0ac9abe@gmail.com>

On 10/07/2019 18:39, Eric Dumazet wrote:
> Holding a small packet in the list up to the point we call busy_poll_stop()
> will basically make busypoll non working anymore.
>
> napi_complete_done() has special behavior when busy polling is active.
Yep, I get it now, sorry for being dumb :)
Essentially we're saying that things coalesced by GRO are 'bulk' traffic and
 can wait around, but the rest is the stuff we're polling for for low latency.
I'm putting a gro_normal_list() call after the trace_napi_poll() in
 napi_busy_loop() and testing that, let's see how it goes...

^ permalink raw reply

* Re: [PATCH v2 bpf-next 0/3] fix BTF verification size resolution
From: Daniel Borkmann @ 2019-07-12 15:51 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Yonghong Song, Andrii Nakryiko, bpf@vger.kernel.org,
	netdev@vger.kernel.org, Alexei Starovoitov, Kernel Team
In-Reply-To: <CAEf4BzY-7MLt8hvBqMMdhpAq3ih_KFjgWitN3TSf74FypeAPRw@mail.gmail.com>

On 07/12/2019 05:42 PM, Andrii Nakryiko wrote:
> On Fri, Jul 12, 2019 at 5:59 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>> On 07/12/2019 08:03 AM, Yonghong Song wrote:
>>> On 7/10/19 11:53 PM, Andrii Nakryiko wrote:
>>>> BTF size resolution logic isn't always resolving type size correctly, leading
>>>> to erroneous map creation failures due to value size mismatch.
>>>>
>>>> This patch set:
>>>> 1. fixes the issue (patch #1);
>>>> 2. adds tests for trickier cases (patch #2);
>>>> 3. and converts few test cases utilizing BTF-defined maps, that previously
>>>>     couldn't use typedef'ed arrays due to kernel bug (patch #3).
>>>>
>>>> Patch #1 can be applied against bpf tree, but selftest ones (#2 and #3) have
>>>> to go against bpf-next for now.
>>>
>>> Why #2 and #3 have to go to bpf-next? bpf tree also accepts tests,
>>> AFAIK. Maybe leave for Daniel and Alexei to decide in this particular case.
>>
>> Yes, corresponding test cases for fixes are also accepted for bpf tree, thanks.
> 
> Thanks for merging, Daniel! My thinking was that at the time I posted
> patch set, BTF-defined map tests weren't yet merged into bpf, so I
> assumed it has to go against bpf-next.

Not yet merged given the minor change needed resulting from Yonghong's feedback.

Thanks,
Daniel

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/3] bpf: fix BTF verifier size resolution logic
From: Yonghong Song @ 2019-07-12 15:47 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Andrii Nakryiko, bpf@vger.kernel.org, netdev@vger.kernel.org,
	Alexei Starovoitov, daniel@iogearbox.net, Kernel Team, Martin Lau
In-Reply-To: <CAEf4Bzb4vzwRVPegF51Kv6oqTXUAWqnhK-jAVs8SESyh74+XTA@mail.gmail.com>



On 7/12/19 8:36 AM, Andrii Nakryiko wrote:
> On Thu, Jul 11, 2019 at 10:59 PM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 7/10/19 11:53 PM, Andrii Nakryiko wrote:
>>> BTF verifier has a size resolution bug which in some circumstances leads to
>>> invalid size resolution for, e.g., TYPEDEF modifier.  This happens if we have
>>> [1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
>>> context ARRAY size won't be resolved (because for pointer it doesn't matter, so
>>> it's a sink in pointer context), but it will be permanently remembered as zero
>>> for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
>>> be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
>>> This, subsequently, will lead to erroneous map creation failure, if that
>>> TYPEDEF is specified as either key or value, as key_size/value_size won't
>>> correspond to resolved size of TYPEDEF (kernel will believe it's zero).
>>>
>>> Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
>>> won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
>>> calculated and stored.
>>>
>>> This bug manifests itself in rejecting BTF-defined maps that use array
>>> typedef as a value type:
>>>
>>> typedef int array_t[16];
>>>
>>> struct {
>>>       __uint(type, BPF_MAP_TYPE_ARRAY);
>>>       __type(value, array_t); /* i.e., array_t *value; */
>>> } test_map SEC(".maps");
>>>
>>> The fix consists on not relying on modifier's resolved_size and instead using
>>> modifier's resolved_id (type ID for "concrete" type to which modifier
>>> eventually resolves) and doing size determination for that resolved type. This
>>> allow to preserve existing "early DFS termination" logic for PTR or
>>> STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
>>> types.
>>>
>>> Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
>>> Cc: Martin KaFai Lau <kafai@fb.com>
>>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
>>> ---
>>>    kernel/bpf/btf.c | 14 ++++++++++----
>>>    1 file changed, 10 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
>>> index cad09858a5f2..22fe8b155e51 100644
>>> --- a/kernel/bpf/btf.c
>>> +++ b/kernel/bpf/btf.c
>>> @@ -1073,11 +1073,18 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
>>>                                 !btf_type_is_var(size_type)))
>>>                        return NULL;
>>>
>>> -             size = btf->resolved_sizes[size_type_id];
>>>                size_type_id = btf->resolved_ids[size_type_id];
>>>                size_type = btf_type_by_id(btf, size_type_id);
>>>                if (btf_type_nosize_or_null(size_type))
>>>                        return NULL;
>>> +             else if (btf_type_has_size(size_type))
>>> +                     size = size_type->size;
>>> +             else if (btf_type_is_array(size_type))
>>> +                     size = btf->resolved_sizes[size_type_id];
>>> +             else if (btf_type_is_ptr(size_type))
>>> +                     size = sizeof(void *);
>>> +             else
>>> +                     return NULL;
>>
>> Looks good to me. Not sure whether we need to do any adjustment for
>> var kind or not. Maybe we can do similar change in btf_var_resolve()
> 
> I don't think btf_var_resolve() needs any change. btf_var_resolve
> can't be referenced by modifier, so it doesn't have any problem. It's
> similar to array in that it's size will be determined correctly.

Correct. With your previous patch, the resolved_sizes[..] for var type 
is not used, so that is why I suggest to just set it to 0.

> 
> But I think btf_type_id_size() doesn't handle var case correctly, I'll do
> 
> +             else if (btf_type_is_array(size_type) ||
> btf_type_is_var(size_type))
> +                     size = btf->resolved_sizes[size_type_id];

This change should work too (to use btf->resolved_sizes[...]).

> 
> to fix that.
> 
>> to btf_modifier_resolve()? But I do not think it impacts correctness
>> similar to btf_modifier_resolve() below as you changed
>> btf_type_id_size() implementation in the above.
>>
>>>        }
>>>
>>>        *type_id = size_type_id;
>>> @@ -1602,7 +1609,6 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
>>>        const struct btf_type *next_type;
>>>        u32 next_type_id = t->type;
>>>        struct btf *btf = env->btf;
>>> -     u32 next_type_size = 0;
>>>
>>>        next_type = btf_type_by_id(btf, next_type_id);
>>>        if (!next_type || btf_type_is_resolve_source_only(next_type)) {
>>> @@ -1620,7 +1626,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
>>>         * save us a few type-following when we use it later (e.g. in
>>>         * pretty print).
>>>         */
>>> -     if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
>>> +     if (!btf_type_id_size(btf, &next_type_id, NULL)) {
>>>                if (env_type_is_resolved(env, next_type_id))
>>>                        next_type = btf_type_id_resolve(btf, &next_type_id);
>>>
>>> @@ -1633,7 +1639,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
>>>                }
>>>        }
>>>
>>> -     env_stack_pop_resolved(env, next_type_id, next_type_size);
>>> +     env_stack_pop_resolved(env, next_type_id, 0);
>>>
>>>        return 0;
>>>    }
>>>

^ permalink raw reply

* Re: [PATCH bpf-next] selftests/bpf: remove logic duplication in test_verifier.c
From: Andrii Nakryiko @ 2019-07-12 15:45 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Krzesimir Nowak, Andrii Nakryiko, Kernel Team, Alexei Starovoitov,
	bpf, Networking
In-Reply-To: <d6ff6022-56f7-de63-d3e1-8949360296ca@iogearbox.net>

On Fri, Jul 12, 2019 at 6:57 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 07/12/2019 09:53 AM, Krzesimir Nowak wrote:
> > On Thu, Jul 11, 2019 at 4:43 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> >>
> >> On Thu, Jul 11, 2019 at 5:13 AM Krzesimir Nowak <krzesimir@kinvolk.io> wrote:
> >>>
> >>> On Thu, Jul 11, 2019 at 3:08 AM Andrii Nakryiko <andriin@fb.com> wrote:
> >>>>
> >>>> test_verifier tests can specify single- and multi-runs tests. Internally
> >>>> logic of handling them is duplicated. Get rid of it by making single run
> >>>> retval specification to be a first retvals spec.
> >>>>
> >>>> Cc: Krzesimir Nowak <krzesimir@kinvolk.io>
> >>>> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> >>>
> >>> Looks good, one nit below.
> >>>
> >>> Acked-by: Krzesimir Nowak <krzesimir@kinvolk.io>
> >>>
> >>>> ---
> >>>>  tools/testing/selftests/bpf/test_verifier.c | 37 ++++++++++-----------
> >>>>  1 file changed, 18 insertions(+), 19 deletions(-)
> >>>>
> >>>> diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
> >>>> index b0773291012a..120ecdf4a7db 100644
> >>>> --- a/tools/testing/selftests/bpf/test_verifier.c
> >>>> +++ b/tools/testing/selftests/bpf/test_verifier.c
> >>>> @@ -86,7 +86,7 @@ struct bpf_test {
> >>>>         int fixup_sk_storage_map[MAX_FIXUPS];
> >>>>         const char *errstr;
> >>>>         const char *errstr_unpriv;
> >>>> -       uint32_t retval, retval_unpriv, insn_processed;
> >>>> +       uint32_t insn_processed;
> >>>>         int prog_len;
> >>>>         enum {
> >>>>                 UNDEF,
> >>>> @@ -95,16 +95,24 @@ struct bpf_test {
> >>>>         } result, result_unpriv;
> >>>>         enum bpf_prog_type prog_type;
> >>>>         uint8_t flags;
> >>>> -       __u8 data[TEST_DATA_LEN];
> >>>>         void (*fill_helper)(struct bpf_test *self);
> >>>>         uint8_t runs;
> >>>> -       struct {
> >>>> -               uint32_t retval, retval_unpriv;
> >>>> -               union {
> >>>> -                       __u8 data[TEST_DATA_LEN];
> >>>> -                       __u64 data64[TEST_DATA_LEN / 8];
> >>>> +       union {
> >>>> +               struct {
> >>>
> >>> Maybe consider moving the struct definition outside to further the
> >>> removal of the duplication?
> >>
> >> Can't do that because then retval/retval_unpriv/data won't be
> >> accessible as a normal field of struct bpf_test. It has to be in
> >> anonymous structs/unions, unfortunately.
> >>
> >
> > Ah, right.
> >
> > Meh.
> >
> > I tried something like this:
> >
> > #define BPF_DATA_STRUCT \
> >     struct { \
> >         uint32_t retval, retval_unpriv; \
> >         union { \
> >             __u8 data[TEST_DATA_LEN]; \
> >             __u64 data64[TEST_DATA_LEN / 8]; \
> >         }; \
> >     }
> >
> > and then:
> >
> >     union {
> >         BPF_DATA_STRUCT;
> >         BPF_DATA_STRUCT retvals[MAX_TEST_RUNS];
> >     };
> >
> > And that seems to compile at least. But question is: is this
> > acceptably ugly or unacceptably ugly? :)
>
> Both a bit ugly, but I'd have a slight preference towards the above,
> perhaps a bit more readable like:

Heh, I had slight preference the other way :) I'll update diff with
macro, though.

>
> #define bpf_testdata_struct_t                                   \
>         struct {                                                \
>                 uint32_t retval, retval_unpriv;                 \
>                 union {                                         \
>                         __u8 data[TEST_DATA_LEN];               \
>                         __u64 data64[TEST_DATA_LEN / 8];        \
>                 };                                              \
>         }
>         union {
>                 bpf_testdata_struct_t;
>                 bpf_testdata_struct_t retvals[MAX_TEST_RUNS];
>         };
>
> Thanks,
> Daniel

^ permalink raw reply

* Re: [PATCH bpf-next] net: Don't uninstall an XDP program when none is installed
From: Daniel Borkmann @ 2019-07-12 15:44 UTC (permalink / raw)
  To: Maxim Mikityanskiy, Alexei Starovoitov, David S. Miller
  Cc: Björn Töpel, bpf@vger.kernel.org,
	netdev@vger.kernel.org, Saeed Mahameed, Jakub Kicinski,
	Jesper Dangaard Brouer, John Fastabend, Martin KaFai Lau,
	Song Liu, Yonghong Song
In-Reply-To: <3124b473-1322-e98e-d5ab-60e584e74200@mellanox.com>

On 07/10/2019 01:16 PM, Maxim Mikityanskiy wrote:
> On 2019-06-12 19:14, Maxim Mikityanskiy wrote:
>> dev_change_xdp_fd doesn't perform any checks in case it uninstalls an
>> XDP program. It means that the driver's ndo_bpf can be called with
>> XDP_SETUP_PROG asking to set it to NULL even if it's already NULL. This
>> case happens if the user runs `ip link set eth0 xdp off` when there is
>> no XDP program attached.
>>
>> The drivers typically perform some heavy operations on XDP_SETUP_PROG,
>> so they all have to handle this case internally to return early if it
>> happens. This patch puts this check into the kernel code, so that all
>> drivers will benefit from it.
>>
>> Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
>> ---
>> Björn, please take a look at this, Saeed told me you were doing
>> something related, but I couldn't find it. If this fix is already
>> covered by your work, please tell about that.
>>
>>   net/core/dev.c | 3 +++
>>   1 file changed, 3 insertions(+)
>>
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index 66f7508825bd..68b3e3320ceb 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -8089,6 +8089,9 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
>>   			bpf_prog_put(prog);
>>   			return -EINVAL;
>>   		}
>> +	} else {
>> +		if (!__dev_xdp_query(dev, bpf_op, query))
>> +			return 0;
>>   	}
>>   
>>   	err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
>>
> 
> Alexei, so what about this patch? It's marked as "Changed Requested" in 
> patchwork, but Jakub's point looks resolved - I don't see any changes 
> required from my side.

I believe part of Jakub's feedback was that if we make this generic that this
does not generally address the case where both prog pointers are equal (whether
NULL or non-NULL).

Thanks,
Daniel

^ permalink raw reply

* Re: [PATCH v2 bpf-next 0/3] fix BTF verification size resolution
From: Andrii Nakryiko @ 2019-07-12 15:42 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Yonghong Song, Andrii Nakryiko, bpf@vger.kernel.org,
	netdev@vger.kernel.org, Alexei Starovoitov, Kernel Team
In-Reply-To: <bf74b176-9321-c175-359d-4c5cf58a72b4@iogearbox.net>

On Fri, Jul 12, 2019 at 5:59 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 07/12/2019 08:03 AM, Yonghong Song wrote:
> > On 7/10/19 11:53 PM, Andrii Nakryiko wrote:
> >> BTF size resolution logic isn't always resolving type size correctly, leading
> >> to erroneous map creation failures due to value size mismatch.
> >>
> >> This patch set:
> >> 1. fixes the issue (patch #1);
> >> 2. adds tests for trickier cases (patch #2);
> >> 3. and converts few test cases utilizing BTF-defined maps, that previously
> >>     couldn't use typedef'ed arrays due to kernel bug (patch #3).
> >>
> >> Patch #1 can be applied against bpf tree, but selftest ones (#2 and #3) have
> >> to go against bpf-next for now.
> >
> > Why #2 and #3 have to go to bpf-next? bpf tree also accepts tests,
> > AFAIK. Maybe leave for Daniel and Alexei to decide in this particular case.
>
> Yes, corresponding test cases for fixes are also accepted for bpf tree, thanks.

Thanks for merging, Daniel! My thinking was that at the time I posted
patch set, BTF-defined map tests weren't yet merged into bpf, so I
assumed it has to go against bpf-next.

>
> >> Andrii Nakryiko (3):
> >>    bpf: fix BTF verifier size resolution logic
> >>    selftests/bpf: add trickier size resolution tests
> >>    selftests/bpf: use typedef'ed arrays as map values
> >
> > Looks good to me. Except minor comments in patch 1/3, Ack the series.
> > Acked-by: Yonghong Song <yhs@fb.com>
> >
> >>
> >>   kernel/bpf/btf.c                              | 14 ++-
> >>   .../bpf/progs/test_get_stack_rawtp.c          |  3 +-
> >>   .../bpf/progs/test_stacktrace_build_id.c      |  3 +-
> >>   .../selftests/bpf/progs/test_stacktrace_map.c |  2 +-
> >>   tools/testing/selftests/bpf/test_btf.c        | 88 +++++++++++++++++++
> >>   5 files changed, 102 insertions(+), 8 deletions(-)
> >>
>

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/3] bpf: fix BTF verifier size resolution logic
From: Andrii Nakryiko @ 2019-07-12 15:36 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Andrii Nakryiko, bpf@vger.kernel.org, netdev@vger.kernel.org,
	Alexei Starovoitov, daniel@iogearbox.net, Kernel Team, Martin Lau
In-Reply-To: <ad29872e-a127-f21e-5581-03df5a388a55@fb.com>

On Thu, Jul 11, 2019 at 10:59 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 7/10/19 11:53 PM, Andrii Nakryiko wrote:
> > BTF verifier has a size resolution bug which in some circumstances leads to
> > invalid size resolution for, e.g., TYPEDEF modifier.  This happens if we have
> > [1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
> > context ARRAY size won't be resolved (because for pointer it doesn't matter, so
> > it's a sink in pointer context), but it will be permanently remembered as zero
> > for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
> > be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
> > This, subsequently, will lead to erroneous map creation failure, if that
> > TYPEDEF is specified as either key or value, as key_size/value_size won't
> > correspond to resolved size of TYPEDEF (kernel will believe it's zero).
> >
> > Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
> > won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
> > calculated and stored.
> >
> > This bug manifests itself in rejecting BTF-defined maps that use array
> > typedef as a value type:
> >
> > typedef int array_t[16];
> >
> > struct {
> >      __uint(type, BPF_MAP_TYPE_ARRAY);
> >      __type(value, array_t); /* i.e., array_t *value; */
> > } test_map SEC(".maps");
> >
> > The fix consists on not relying on modifier's resolved_size and instead using
> > modifier's resolved_id (type ID for "concrete" type to which modifier
> > eventually resolves) and doing size determination for that resolved type. This
> > allow to preserve existing "early DFS termination" logic for PTR or
> > STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
> > types.
> >
> > Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
> > Cc: Martin KaFai Lau <kafai@fb.com>
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> > ---
> >   kernel/bpf/btf.c | 14 ++++++++++----
> >   1 file changed, 10 insertions(+), 4 deletions(-)
> >
> > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > index cad09858a5f2..22fe8b155e51 100644
> > --- a/kernel/bpf/btf.c
> > +++ b/kernel/bpf/btf.c
> > @@ -1073,11 +1073,18 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
> >                                !btf_type_is_var(size_type)))
> >                       return NULL;
> >
> > -             size = btf->resolved_sizes[size_type_id];
> >               size_type_id = btf->resolved_ids[size_type_id];
> >               size_type = btf_type_by_id(btf, size_type_id);
> >               if (btf_type_nosize_or_null(size_type))
> >                       return NULL;
> > +             else if (btf_type_has_size(size_type))
> > +                     size = size_type->size;
> > +             else if (btf_type_is_array(size_type))
> > +                     size = btf->resolved_sizes[size_type_id];
> > +             else if (btf_type_is_ptr(size_type))
> > +                     size = sizeof(void *);
> > +             else
> > +                     return NULL;
>
> Looks good to me. Not sure whether we need to do any adjustment for
> var kind or not. Maybe we can do similar change in btf_var_resolve()

I don't think btf_var_resolve() needs any change. btf_var_resolve
can't be referenced by modifier, so it doesn't have any problem. It's
similar to array in that it's size will be determined correctly.

But I think btf_type_id_size() doesn't handle var case correctly, I'll do

+             else if (btf_type_is_array(size_type) ||
btf_type_is_var(size_type))
+                     size = btf->resolved_sizes[size_type_id];

to fix that.

> to btf_modifier_resolve()? But I do not think it impacts correctness
> similar to btf_modifier_resolve() below as you changed
> btf_type_id_size() implementation in the above.
>
> >       }
> >
> >       *type_id = size_type_id;
> > @@ -1602,7 +1609,6 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
> >       const struct btf_type *next_type;
> >       u32 next_type_id = t->type;
> >       struct btf *btf = env->btf;
> > -     u32 next_type_size = 0;
> >
> >       next_type = btf_type_by_id(btf, next_type_id);
> >       if (!next_type || btf_type_is_resolve_source_only(next_type)) {
> > @@ -1620,7 +1626,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
> >        * save us a few type-following when we use it later (e.g. in
> >        * pretty print).
> >        */
> > -     if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
> > +     if (!btf_type_id_size(btf, &next_type_id, NULL)) {
> >               if (env_type_is_resolved(env, next_type_id))
> >                       next_type = btf_type_id_resolve(btf, &next_type_id);
> >
> > @@ -1633,7 +1639,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
> >               }
> >       }
> >
> > -     env_stack_pop_resolved(env, next_type_id, next_type_size);
> > +     env_stack_pop_resolved(env, next_type_id, 0);
> >
> >       return 0;
> >   }
> >

^ permalink raw reply

* [PATCH] net: dsa: qca8k: replace legacy gpio include
From: Christian Lamparter @ 2019-07-12 15:33 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Florian Fainelli, Vivien Didelot, Andrew Lunn,
	kbuild test robot

This patch replaces the legacy bulk gpio.h include
with the proper gpio/consumer.h variant. This was
caught by the kbuild test robot that was running
into an error because of this.

For more information why linux/gpio.h is bad can be found in:
commit 56a46b6144e7 ("gpio: Clarify that <linux/gpio.h> is legacy")

Reported-by: kbuild test robot <lkp@intel.com>
Link: https://www.spinics.net/lists/netdev/msg584447.html
Fixes: a653f2f538f9 ("net: dsa: qca8k: introduce reset via gpio feature")
Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
---
 drivers/net/dsa/qca8k.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 27709f866c23..232e8cc96f6d 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -14,7 +14,7 @@
 #include <linux/of_platform.h>
 #include <linux/if_bridge.h>
 #include <linux/mdio.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/etherdevice.h>
 
 #include "qca8k.h"
-- 
2.20.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox