netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v2 1/6] bonding: simplify and use RCU protection for 3ad xmit path
@ 2013-09-04  9:43 Ding Tianhong
  2013-09-04 10:18 ` Veaceslav Falico
  0 siblings, 1 reply; 5+ messages in thread
From: Ding Tianhong @ 2013-09-04  9:43 UTC (permalink / raw)
  To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Veaceslav Falico, Netdev

The commit 278b20837511776dc9d5f6ee1c7fabd5479838bb
(bonding: initial RCU conversion) has convert the roundrobin, active-backup,
broadcast and xor xmit path to rcu protection, the performance will be better
for these mode, so this time, convert xmit path for 3ad mode.

Suggested-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Cc: Nikolay Aleksandrov <nikolay@redhat.com>
Cc: Veaceslav Falico <vfalico@redhat.com>
---
 drivers/net/bonding/bond_3ad.c | 32 ++++++++++++++------------------
 drivers/net/bonding/bonding.h  | 22 ++++++++++++++++++++++
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 9010265..7a3860f 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -143,7 +143,7 @@ static inline struct bonding *__get_bond_by_port(struct port *port)
  */
 static inline struct port *__get_first_port(struct bonding *bond)
 {
-	struct slave *first_slave = bond_first_slave(bond);
+	struct slave *first_slave = bond_first_slave_rcu(bond);
 
 	return first_slave ? &(SLAVE_AD_INFO(first_slave).port) : NULL;
 }
@@ -163,7 +163,7 @@ static inline struct port *__get_next_port(struct port *port)
 	// If there's no bond for this port, or this is the last slave
 	if (bond == NULL)
 		return NULL;
-	slave_next = bond_next_slave(bond, slave);
+	slave_next = bond_next_slave_rcu(bond, slave);
 	if (!slave_next || bond_is_first_slave(bond, slave_next))
 		return NULL;
 
@@ -2417,16 +2417,14 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
 
 int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 {
-	struct slave *slave, *start_at;
 	struct bonding *bond = netdev_priv(dev);
+	struct slave *slave;
 	int slave_agg_no;
 	int slaves_in_agg;
 	int agg_id;
-	int i;
 	struct ad_info ad_info;
 	int res = 1;
 
-	read_lock(&bond->lock);
 	if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
 		pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n",
 			 dev->name);
@@ -2444,13 +2442,17 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 
 	slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg);
 
-	bond_for_each_slave(bond, slave) {
+	bond_for_each_slave_rcu(bond, slave) {
 		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
 
 		if (agg && (agg->aggregator_identifier == agg_id)) {
-			slave_agg_no--;
-			if (slave_agg_no < 0)
-				break;
+			if (--slave_agg_no < 0) {
+				if (SLAVE_IS_OK(slave)) {
+					res = bond_dev_queue_xmit(bond,
+						skb, slave->dev);
+					goto out;
+				}
+			}
 		}
 	}
 
@@ -2460,23 +2462,17 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 		goto out;
 	}
 
-	start_at = slave;
-
-	bond_for_each_slave_from(bond, slave, i, start_at) {
-		int slave_agg_id = 0;
+	bond_for_each_slave_rcu(bond, slave) {
 		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
 
-		if (agg)
-			slave_agg_id = agg->aggregator_identifier;
-
-		if (SLAVE_IS_OK(slave) && agg && (slave_agg_id == agg_id)) {
+		if (SLAVE_IS_OK(slave) && agg &&
+			agg->aggregator_identifier == agg_id) {
 			res = bond_dev_queue_xmit(bond, skb, slave->dev);
 			break;
 		}
 	}
 
 out:
-	read_unlock(&bond->lock);
 	if (res) {
 		/* no suitable interface, frame not sent */
 		kfree_skb(skb);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 4bf52d5..9898493 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -74,6 +74,9 @@
 /* slave list primitives */
 #define bond_to_slave(ptr) list_entry(ptr, struct slave, list)
 
+/* slave list primitives, Caller must hold rcu_read_lock */
+#define bond_to_slave_rcu(ptr) list_entry_rcu(ptr, struct slave, list)
+
 /* IMPORTANT: bond_first/last_slave can return NULL in case of an empty list */
 #define bond_first_slave(bond) \
 	list_first_entry_or_null(&(bond)->slave_list, struct slave, list)
@@ -81,6 +84,16 @@
 	(list_empty(&(bond)->slave_list) ? NULL : \
 					   bond_to_slave((bond)->slave_list.prev))
 
+/**
+ * IMPORTANT: bond_first/last_slave_rcu can return NULL in case of an empty list
+ * Caller must hold rcu_read_lock
+ */
+#define bond_first_slave_rcu(bond) \
+	list_first_or_null_rcu(&(bond)->slave_list, struct slave, list)
+#define bond_last_slave_rcu(bond) \
+	(list_empty(&(bond)->slave_list) ? NULL : \
+						bond_to_slave_rcu((bond)->slave_list.prev))
+
 #define bond_is_first_slave(bond, pos) ((pos)->list.prev == &(bond)->slave_list)
 #define bond_is_last_slave(bond, pos) ((pos)->list.next == &(bond)->slave_list)
 
@@ -93,6 +106,15 @@
 	(bond_is_first_slave(bond, pos) ? bond_last_slave(bond) : \
 					  bond_to_slave((pos)->list.prev))
 
+/* Since bond_first/last_slave_rcu can return NULL, these can return NULL too */
+#define bond_next_slave_rcu(bond, pos) \
+	(bond_is_last_slave(bond, pos) ? bond_first_slave_rcu(bond) : \
+					 bond_to_slave_rcu((pos)->list.next))
+
+#define bond_prev_slave_rcu(bond, pos) \
+	(bond_is_first_slave(bond, pos) ? bond_last_slave_rcu(bond) : \
+					  bond_to_slave_rcu((pos)->list.prev))
+
 /**
  * bond_for_each_slave_from - iterate the slaves list from a starting point
  * @bond:	the bond holding this list.
-- 
1.8.2.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next v2 1/6] bonding: simplify and use RCU protection for 3ad xmit path
  2013-09-04  9:43 [PATCH net-next v2 1/6] bonding: simplify and use RCU protection for 3ad xmit path Ding Tianhong
@ 2013-09-04 10:18 ` Veaceslav Falico
  2013-09-04 14:53   ` Ding Tianhong
  2013-09-04 16:25   ` David Miller
  0 siblings, 2 replies; 5+ messages in thread
From: Veaceslav Falico @ 2013-09-04 10:18 UTC (permalink / raw)
  To: Ding Tianhong
  Cc: Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Netdev

On Wed, Sep 04, 2013 at 05:43:45PM +0800, Ding Tianhong wrote:
...snip...
>+/**
>+ * IMPORTANT: bond_first/last_slave_rcu can return NULL in case of an empty list
>+ * Caller must hold rcu_read_lock
>+ */
>+#define bond_first_slave_rcu(bond) \
>+	list_first_or_null_rcu(&(bond)->slave_list, struct slave, list)
>+#define bond_last_slave_rcu(bond) \
>+	(list_empty(&(bond)->slave_list) ? NULL : \
>+						bond_to_slave_rcu((bond)->slave_list.prev))

Here, bond_last_slave_rcu() is racy. The list can be non-empty when
list_empty() is verified, however afterwards it might become empty, when
you call bond_to_slave_rcu(), and thus you'll get
bond_to_slave(bond->slave_list) in the result, which is not a slave.

Take a look at list_first_or_null_rcu() for a reference. The main idea is
that it first gets the ->next pointer, with RCU protection, and then
verifies if it's the list head or not, and if not - it gets the container
already. This way the ->next pointer won't get away.

These kind of bugs are really rare, but are *EXTREMELY* hard to debug.

>+
> #define bond_is_first_slave(bond, pos) ((pos)->list.prev == &(bond)->slave_list)
> #define bond_is_last_slave(bond, pos) ((pos)->list.next == &(bond)->slave_list)
>
>@@ -93,6 +106,15 @@
> 	(bond_is_first_slave(bond, pos) ? bond_last_slave(bond) : \
> 					  bond_to_slave((pos)->list.prev))
>
>+/* Since bond_first/last_slave_rcu can return NULL, these can return NULL too */
>+#define bond_next_slave_rcu(bond, pos) \
>+	(bond_is_last_slave(bond, pos) ? bond_first_slave_rcu(bond) : \
>+					 bond_to_slave_rcu((pos)->list.next))
>+
>+#define bond_prev_slave_rcu(bond, pos) \
>+	(bond_is_first_slave(bond, pos) ? bond_last_slave_rcu(bond) : \
>+					  bond_to_slave_rcu((pos)->list.prev))
>+

These two are also racy. bond_is_last/first_slave() is not rcu-ified, and
thus you can't rely on it without proper locking. Same ideas apply as per
bond_first_slave_rcu().

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next v2 1/6] bonding: simplify and use RCU protection for 3ad xmit path
  2013-09-04 10:18 ` Veaceslav Falico
@ 2013-09-04 14:53   ` Ding Tianhong
  2013-09-04 16:25   ` David Miller
  1 sibling, 0 replies; 5+ messages in thread
From: Ding Tianhong @ 2013-09-04 14:53 UTC (permalink / raw)
  To: Veaceslav Falico
  Cc: Ding Tianhong, Jay Vosburgh, Andy Gospodarek, David S. Miller,
	Nikolay Aleksandrov, Netdev

于 2013/9/4 18:18, Veaceslav Falico 写道:
> On Wed, Sep 04, 2013 at 05:43:45PM +0800, Ding Tianhong wrote:
> ...snip...
>> +/**
>> + * IMPORTANT: bond_first/last_slave_rcu can return NULL in case of 
>> an empty list
>> + * Caller must hold rcu_read_lock
>> + */
>> +#define bond_first_slave_rcu(bond) \
>> + list_first_or_null_rcu(&(bond)->slave_list, struct slave, list)
>> +#define bond_last_slave_rcu(bond) \
>> + (list_empty(&(bond)->slave_list) ? NULL : \
>> + bond_to_slave_rcu((bond)->slave_list.prev))
>
> Here, bond_last_slave_rcu() is racy. The list can be non-empty when
> list_empty() is verified, however afterwards it might become empty, when
> you call bond_to_slave_rcu(), and thus you'll get
> bond_to_slave(bond->slave_list) in the result, which is not a slave.
>
> Take a look at list_first_or_null_rcu() for a reference. The main idea is
> that it first gets the ->next pointer, with RCU protection, and then
> verifies if it's the list head or not, and if not - it gets the container
> already. This way the ->next pointer won't get away.
>
> These kind of bugs are really rare, but are *EXTREMELY* hard to debug.
Thanks for your response and opinions, but I think your miss something,
the slave_list will not changed in the rcu_read_lock, so ,the bugs will not
happen.

>
>> +
>> #define bond_is_first_slave(bond, pos) ((pos)->list.prev == 
>> &(bond)->slave_list)
>> #define bond_is_last_slave(bond, pos) ((pos)->list.next == 
>> &(bond)->slave_list)
>>
>> @@ -93,6 +106,15 @@
>> (bond_is_first_slave(bond, pos) ? bond_last_slave(bond) : \
>> bond_to_slave((pos)->list.prev))
>>
>> +/* Since bond_first/last_slave_rcu can return NULL, these can return 
>> NULL too */
>> +#define bond_next_slave_rcu(bond, pos) \
>> + (bond_is_last_slave(bond, pos) ? bond_first_slave_rcu(bond) : \
>> + bond_to_slave_rcu((pos)->list.next))
>> +
>> +#define bond_prev_slave_rcu(bond, pos) \
>> + (bond_is_first_slave(bond, pos) ? bond_last_slave_rcu(bond) : \
>> + bond_to_slave_rcu((pos)->list.prev))
>> +
>
> These two are also racy. bond_is_last/first_slave() is not rcu-ified, and
> thus you can't rely on it without proper locking. Same ideas apply as per
> bond_first_slave_rcu().
> -- 
refer to the above answer.

> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next v2 1/6] bonding: simplify and use RCU protection for 3ad xmit path
  2013-09-04 10:18 ` Veaceslav Falico
  2013-09-04 14:53   ` Ding Tianhong
@ 2013-09-04 16:25   ` David Miller
  2013-09-05  2:06     ` Ding Tianhong
  1 sibling, 1 reply; 5+ messages in thread
From: David Miller @ 2013-09-04 16:25 UTC (permalink / raw)
  To: vfalico; +Cc: dingtianhong, fubar, andy, nikolay, netdev

From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 4 Sep 2013 12:18:24 +0200

> On Wed, Sep 04, 2013 at 05:43:45PM +0800, Ding Tianhong wrote:
> ...snip...
>>+/**
>>+ * IMPORTANT: bond_first/last_slave_rcu can return NULL in case of an
>>empty list
>>+ * Caller must hold rcu_read_lock
>>+ */
>>+#define bond_first_slave_rcu(bond) \
>>+	list_first_or_null_rcu(&(bond)->slave_list, struct slave, list)
>>+#define bond_last_slave_rcu(bond) \
>>+	(list_empty(&(bond)->slave_list) ? NULL : \
>>+ bond_to_slave_rcu((bond)->slave_list.prev))
> 
> Here, bond_last_slave_rcu() is racy. The list can be non-empty when
> list_empty() is verified, however afterwards it might become empty,
> when
> you call bond_to_slave_rcu(), and thus you'll get
> bond_to_slave(bond->slave_list) in the result, which is not a slave.
> 
> Take a look at list_first_or_null_rcu() for a reference. The main idea
> is
> that it first gets the ->next pointer, with RCU protection, and then
> verifies if it's the list head or not, and if not - it gets the
> container
> already. This way the ->next pointer won't get away.
> 
> These kind of bugs are really rare, but are *EXTREMELY* hard to debug.

I agree with this analysis.

Ding, "rcu_read_lock()" doesn't "lock" anything.  It's just a memory
barrier.

All the list can still change on you asynchronously to your accesses.

That's why list_first_or_null_rcu() is so carefully arranged.
Therefore, you must make similar accomodations.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next v2 1/6] bonding: simplify and use RCU protection for 3ad xmit path
  2013-09-04 16:25   ` David Miller
@ 2013-09-05  2:06     ` Ding Tianhong
  0 siblings, 0 replies; 5+ messages in thread
From: Ding Tianhong @ 2013-09-05  2:06 UTC (permalink / raw)
  To: David Miller; +Cc: vfalico, fubar, andy, nikolay, netdev

On 2013/9/5 0:25, David Miller wrote:
> From: Veaceslav Falico <vfalico@redhat.com>
> Date: Wed, 4 Sep 2013 12:18:24 +0200
> 
>> On Wed, Sep 04, 2013 at 05:43:45PM +0800, Ding Tianhong wrote:
>> ...snip...
>>> +/**
>>> + * IMPORTANT: bond_first/last_slave_rcu can return NULL in case of an
>>> empty list
>>> + * Caller must hold rcu_read_lock
>>> + */
>>> +#define bond_first_slave_rcu(bond) \
>>> +	list_first_or_null_rcu(&(bond)->slave_list, struct slave, list)
>>> +#define bond_last_slave_rcu(bond) \
>>> +	(list_empty(&(bond)->slave_list) ? NULL : \
>>> + bond_to_slave_rcu((bond)->slave_list.prev))
>>
>> Here, bond_last_slave_rcu() is racy. The list can be non-empty when
>> list_empty() is verified, however afterwards it might become empty,
>> when
>> you call bond_to_slave_rcu(), and thus you'll get
>> bond_to_slave(bond->slave_list) in the result, which is not a slave.
>>
>> Take a look at list_first_or_null_rcu() for a reference. The main idea
>> is
>> that it first gets the ->next pointer, with RCU protection, and then
>> verifies if it's the list head or not, and if not - it gets the
>> container
>> already. This way the ->next pointer won't get away.
>>
>> These kind of bugs are really rare, but are *EXTREMELY* hard to debug.
> 
> I agree with this analysis.
> 
> Ding, "rcu_read_lock()" doesn't "lock" anything.  It's just a memory
> barrier.
> 
> All the list can still change on you asynchronously to your accesses.
> 
> That's why list_first_or_null_rcu() is so carefully arranged.
> Therefore, you must make similar accomodations.
> 
> 
> 

yes, after a long time thinking, I found the problem and know how to do next, repair and resend it later.

> .
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2013-09-05  2:06 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-04  9:43 [PATCH net-next v2 1/6] bonding: simplify and use RCU protection for 3ad xmit path Ding Tianhong
2013-09-04 10:18 ` Veaceslav Falico
2013-09-04 14:53   ` Ding Tianhong
2013-09-04 16:25   ` David Miller
2013-09-05  2:06     ` Ding Tianhong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).