[PATCH net-next v2] rocker: move netevent neigh update to processes context

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next v2] rocker: move netevent neigh update to processes context
@ 2015-06-03  3:43 sfeldma
  2015-06-04  6:38 ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: sfeldma @ 2015-06-03  3:43 UTC (permalink / raw)
  To: netdev; +Cc: jiri, simon.horman, makita.toshiaki

From: Scott Feldman <sfeldma@gmail.com>

v2:

Changes based on review:

- David Miller <davem@davemloft.net> raise problem with system_wq not
  preserving queue order to execution order.  To fix, use driver-private
  ordered workqueue to preserve ordering of queued work.

- Jiri Pirko <jiri@resnulli.us> small change on kfree of work queue item.

v1:

In review of Simon's patchset "rocker: transaction fixes". it was noted
that rocker->neigh_tbl_next_index was unprotected in the call path below
and could race with other contexts calling rocker_port_ipv4_neigh():

	arp_process()
	neigh_update()
	rocker_neigh_update()
	rocker_port_ipv4_neigh()

To fix, move the neigh_update() event processing to process contexts and
hold rtnl_lock to call rocker_port_ipv4_neigh().  This will protect
rocker->neigh_tbl_next_index accesses and is more consistent with the rest
of the driver code where non-I/O processing is done under process context
with rtnl_lock held.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/rocker/rocker.c |   71 +++++++++++++++++++++++++++++-----
 1 file changed, 62 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 819289e..aaf5e38 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -240,6 +240,9 @@ struct rocker {
 	spinlock_t cmd_ring_lock;		/* for cmd ring accesses */
 	struct rocker_dma_ring_info cmd_ring;
 	struct rocker_dma_ring_info event_ring;
+	struct workqueue_struct *wq;		/* ordered workqueue for
+						 * async processing
+						 */
 	DECLARE_HASHTABLE(flow_tbl, 16);
 	spinlock_t flow_tbl_lock;		/* for flow tbl accesses */
 	u64 flow_tbl_next_cookie;
@@ -1464,7 +1467,7 @@ static void rocker_event_mac_vlan_seen_work(struct work_struct *work)
 			sw->addr, sw->vlan_id, sw->flags);
 	rtnl_unlock();
 
-	kfree(work);
+	kfree(sw);
 }
 
 static int rocker_event_mac_vlan_seen(const struct rocker *rocker,
@@ -1508,7 +1511,7 @@ static int rocker_event_mac_vlan_seen(const struct rocker *rocker,
 	ether_addr_copy(sw->addr, addr);
 	sw->vlan_id = vlan_id;
 
-	schedule_work(&sw->work);
+	queue_work(rocker_port->rocker->wq, &sw->work);
 
 	return 0;
 }
@@ -3523,6 +3526,7 @@ static int rocker_port_fdb_learn(struct rocker_port *rocker_port,
 				 enum switchdev_trans trans, int flags,
 				 const u8 *addr, __be16 vlan_id)
 {
+	struct rocker *rocker = rocker_port->rocker;
 	struct rocker_fdb_learn_work *lw;
 	enum rocker_of_dpa_table_id goto_tbl =
 		ROCKER_OF_DPA_TABLE_ID_ACL_POLICY;
@@ -3565,7 +3569,7 @@ static int rocker_port_fdb_learn(struct rocker_port *rocker_port,
 	if (trans == SWITCHDEV_TRANS_PREPARE)
 		rocker_port_kfree(trans, lw);
 	else
-		schedule_work(&lw->work);
+		queue_work(rocker->wq, &lw->work);
 
 	return 0;
 }
@@ -4962,6 +4966,12 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!rocker)
 		return -ENOMEM;
 
+	rocker->wq = alloc_ordered_workqueue("rocker", 0);
+	if (!rocker->wq) {
+		dev_err(&pdev->dev, "allocate workqueue failed\n");
+		goto err_wq;
+	}
+
 	err = pci_enable_device(pdev);
 	if (err) {
 		dev_err(&pdev->dev, "pci_enable_device failed\n");
@@ -5081,6 +5091,8 @@ err_pci_set_dma_mask:
 err_pci_request_regions:
 	pci_disable_device(pdev);
 err_pci_enable_device:
+	destroy_workqueue(rocker->wq);
+err_wq:
 	kfree(rocker);
 	return err;
 }
@@ -5099,6 +5111,7 @@ static void rocker_remove(struct pci_dev *pdev)
 	iounmap(rocker->hw_addr);
 	pci_release_regions(rocker->pdev);
 	pci_disable_device(rocker->pdev);
+	destroy_workqueue(rocker->wq);
 	kfree(rocker);
 }
 
@@ -5224,14 +5237,54 @@ static struct notifier_block rocker_netdevice_nb __read_mostly = {
  * Net event notifier event handler
  ************************************/
 
-static int rocker_neigh_update(struct net_device *dev, struct neighbour *n)
+struct rocker_neigh_update_work {
+	struct work_struct work;
+	struct rocker_port *rocker_port;
+	int flags;
+	__be32 ip_addr;
+	unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
+};
+
+static void rocker_event_neigh_update_work(struct work_struct *work)
+{
+	struct rocker_neigh_update_work *nw =
+		container_of(work, struct rocker_neigh_update_work, work);
+	int err;
+
+	rtnl_lock();
+	err = rocker_port_ipv4_neigh(nw->rocker_port, SWITCHDEV_TRANS_NONE,
+				     nw->flags, nw->ip_addr, nw->ha);
+	rtnl_unlock();
+
+	if (err)
+		netdev_warn(nw->rocker_port->dev,
+			    "failed to handle neigh %pI4 update (err %d)\n",
+			    &nw->ip_addr, err);
+
+	kfree(nw);
+}
+
+static int rocker_event_neigh_update(struct net_device *dev,
+				     struct neighbour *n)
 {
 	struct rocker_port *rocker_port = netdev_priv(dev);
-	int flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE;
-	__be32 ip_addr = *(__be32 *)n->primary_key;
+	struct rocker *rocker = rocker_port->rocker;
+	struct rocker_neigh_update_work *nw;
 
-	return rocker_port_ipv4_neigh(rocker_port, SWITCHDEV_TRANS_NONE,
-				      flags, ip_addr, n->ha);
+	nw = kmalloc(sizeof(*nw), GFP_ATOMIC);
+	if (!nw)
+		return -ENOMEM;
+
+	INIT_WORK(&nw->work, rocker_event_neigh_update_work);
+
+	nw->rocker_port = rocker_port;
+	nw->flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE;
+	nw->ip_addr = *(__be32 *)n->primary_key;
+	memcpy(nw->ha, n->ha, sizeof(nw->ha));
+
+	queue_work(rocker->wq, &nw->work);
+
+	return 0;
 }
 
 static int rocker_netevent_event(struct notifier_block *unused,
@@ -5248,7 +5301,7 @@ static int rocker_netevent_event(struct notifier_block *unused,
 		dev = n->dev;
 		if (!rocker_port_dev_check(dev))
 			return NOTIFY_DONE;
-		err = rocker_neigh_update(dev, n);
+		err = rocker_event_neigh_update(dev, n);
 		if (err)
 			netdev_warn(dev,
 				    "failed to handle neigh update (err %d)\n",
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] rocker: move netevent neigh update to processes context
  2015-06-03  3:43 [PATCH net-next v2] rocker: move netevent neigh update to processes context sfeldma
@ 2015-06-04  6:38 ` David Miller
  2015-06-04  8:20   ` Simon Horman
  0 siblings, 1 reply; 10+ messages in thread
From: David Miller @ 2015-06-04  6:38 UTC (permalink / raw)
  To: sfeldma; +Cc: netdev, jiri, simon.horman, makita.toshiaki

From: sfeldma@gmail.com
Date: Tue,  2 Jun 2015 20:43:28 -0700

> From: Scott Feldman <sfeldma@gmail.com>
> 
> v2:
> 
> Changes based on review:
> 
> - David Miller <davem@davemloft.net> raise problem with system_wq not
>   preserving queue order to execution order.  To fix, use driver-private
>   ordered workqueue to preserve ordering of queued work.
> 
> - Jiri Pirko <jiri@resnulli.us> small change on kfree of work queue item.
> 
> v1:
> 
> In review of Simon's patchset "rocker: transaction fixes". it was noted
> that rocker->neigh_tbl_next_index was unprotected in the call path below
> and could race with other contexts calling rocker_port_ipv4_neigh():

How it rocker->neigh_tbl_next_index not protected?

rocker->neigh_tbl_lock is _always_ held when it is accessed.

This patch, therefore, looks like completely unnecessary complexity
to me.  Furthermore, I would completely prefer if the operation stays
completely synchronous to the call path where the neigh operation
occurs rather than throwing it out to a workqueue.

Thanks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] rocker: move netevent neigh update to processes context
  2015-06-04  6:38 ` David Miller
@ 2015-06-04  8:20   ` Simon Horman
  2015-06-04  8:34     ` David Miller
  2015-06-04 16:12     ` Scott Feldman
  0 siblings, 2 replies; 10+ messages in thread
From: Simon Horman @ 2015-06-04  8:20 UTC (permalink / raw)
  To: David Miller; +Cc: sfeldma, netdev, jiri, makita.toshiaki

Hi Dave,

On Wed, Jun 03, 2015 at 11:38:29PM -0700, David Miller wrote:
> From: sfeldma@gmail.com
> Date: Tue,  2 Jun 2015 20:43:28 -0700
> 
> > From: Scott Feldman <sfeldma@gmail.com>
> > 
> > v2:
> > 
> > Changes based on review:
> > 
> > - David Miller <davem@davemloft.net> raise problem with system_wq not
> >   preserving queue order to execution order.  To fix, use driver-private
> >   ordered workqueue to preserve ordering of queued work.
> > 
> > - Jiri Pirko <jiri@resnulli.us> small change on kfree of work queue item.
> > 
> > v1:
> > 
> > In review of Simon's patchset "rocker: transaction fixes". it was noted
> > that rocker->neigh_tbl_next_index was unprotected in the call path below
> > and could race with other contexts calling rocker_port_ipv4_neigh():
> 
> How it rocker->neigh_tbl_next_index not protected?
> 
> rocker->neigh_tbl_lock is _always_ held when it is accessed.
> 
> This patch, therefore, looks like completely unnecessary complexity
> to me.  Furthermore, I would completely prefer if the operation stays
> completely synchronous to the call path where the neigh operation
> occurs rather than throwing it out to a workqueue.

What I was seeing is as follows:

1. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
   with trans = SWITCHDEV_TRANS_PREPARE

2. rocker_port_ipv4_neigh() is called by rocker_neigh_update()
   with trans = SWITCHDEV_TRANS_NONE.

   The call chain goes up to arp_process() via neigh_update().

3. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
   with trans = SWITCHDEV_TRANS_COMMIT

#1 and #2 are guarded by rtl across those calls but
#2 is not guarded by rtnl.

Inside both rocker_port_ipv4_nh() and rocker_port_ipv4_neigh()
neigh_tbl_lock lock is taken but it is not held across the
two calls to rocker_port_ipv4_nh within a single prepare->commit transaction.

I can double check that the above still occurs, but I'm not aware of any
recent changes that would cause it not to occur any more.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] rocker: move netevent neigh update to processes context
  2015-06-04  8:20   ` Simon Horman
@ 2015-06-04  8:34     ` David Miller
  2015-06-04  9:07       ` Simon Horman
  2015-06-04 16:12     ` Scott Feldman
  1 sibling, 1 reply; 10+ messages in thread
From: David Miller @ 2015-06-04  8:34 UTC (permalink / raw)
  To: simon.horman; +Cc: sfeldma, netdev, jiri, makita.toshiaki

From: Simon Horman <simon.horman@netronome.com>
Date: Thu, 4 Jun 2015 17:20:48 +0900

> What I was seeing is as follows:
> 
> 1. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
>    with trans = SWITCHDEV_TRANS_PREPARE
> 
> 2. rocker_port_ipv4_neigh() is called by rocker_neigh_update()
>    with trans = SWITCHDEV_TRANS_NONE.
> 
>    The call chain goes up to arp_process() via neigh_update().
> 
> 3. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
>    with trans = SWITCHDEV_TRANS_COMMIT
> 
> #1 and #2 are guarded by rtl across those calls but
> #2 is not guarded by rtnl.
> 
> Inside both rocker_port_ipv4_nh() and rocker_port_ipv4_neigh()
> neigh_tbl_lock lock is taken but it is not held across the
> two calls to rocker_port_ipv4_nh within a single prepare->commit transaction.
> 
> I can double check that the above still occurs, but I'm not aware of any
> recent changes that would cause it not to occur any more.

Ok, thanks for explaining.

I still don't want this to be moved asynchronosly from the ARP neigh
update event code path.

And therefore I'd like folks to look into another mechanism to fix
this.

If that means the prepare/commit engine is given a spinlock by the
driver to be held across the two calls, so be it.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] rocker: move netevent neigh update to processes context
  2015-06-04  8:34     ` David Miller
@ 2015-06-04  9:07       ` Simon Horman
  2015-06-04 15:34         ` Toshiaki Makita
  0 siblings, 1 reply; 10+ messages in thread
From: Simon Horman @ 2015-06-04  9:07 UTC (permalink / raw)
  To: David Miller; +Cc: simon.horman, sfeldma, netdev, jiri, makita.toshiaki

Hi Dave,

On Thu, Jun 04, 2015 at 01:34:09AM -0700, David Miller wrote:
> From: Simon Horman <simon.horman@netronome.com>
> Date: Thu, 4 Jun 2015 17:20:48 +0900
> 
> > What I was seeing is as follows:
> > 
> > 1. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
> >    with trans = SWITCHDEV_TRANS_PREPARE
> > 
> > 2. rocker_port_ipv4_neigh() is called by rocker_neigh_update()
> >    with trans = SWITCHDEV_TRANS_NONE.
> > 
> >    The call chain goes up to arp_process() via neigh_update().
> > 
> > 3. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
> >    with trans = SWITCHDEV_TRANS_COMMIT
> > 
> > #1 and #2 are guarded by rtl across those calls but
> > #2 is not guarded by rtnl.
> > 
> > Inside both rocker_port_ipv4_nh() and rocker_port_ipv4_neigh()
> > neigh_tbl_lock lock is taken but it is not held across the
> > two calls to rocker_port_ipv4_nh within a single prepare->commit transaction.
> > 
> > I can double check that the above still occurs, but I'm not aware of any
> > recent changes that would cause it not to occur any more.
> 
> Ok, thanks for explaining.
> 
> I still don't want this to be moved asynchronosly from the ARP neigh
> update event code path.
> 
> And therefore I'd like folks to look into another mechanism to fix
> this.
> 
> If that means the prepare/commit engine is given a spinlock by the
> driver to be held across the two calls, so be it.

I'm not opposed to such a scheme. But I'd like to note that it seems to me
that for it to resolve the problem above the lock would be need be held
both for "none" and "prepare->commit" transactions.  I think the former may
be a little tricky as it may occur in IRQ context but perhaps I am missing
something obvious.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] rocker: move netevent neigh update to processes context
  2015-06-04  9:07       ` Simon Horman
@ 2015-06-04 15:34         ` Toshiaki Makita
  2015-06-04 18:48           ` David Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Toshiaki Makita @ 2015-06-04 15:34 UTC (permalink / raw)
  To: Simon Horman, David Miller
  Cc: simon.horman, sfeldma, netdev, jiri, makita.toshiaki

On 15/06/04 (木) 18:07, Simon Horman wrote:
> Hi Dave,
>
> On Thu, Jun 04, 2015 at 01:34:09AM -0700, David Miller wrote:
>> From: Simon Horman <simon.horman@netronome.com>
>> Date: Thu, 4 Jun 2015 17:20:48 +0900
>>
>>> What I was seeing is as follows:
>>>
>>> 1. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
>>>     with trans = SWITCHDEV_TRANS_PREPARE
>>>
>>> 2. rocker_port_ipv4_neigh() is called by rocker_neigh_update()
>>>     with trans = SWITCHDEV_TRANS_NONE.
>>>
>>>     The call chain goes up to arp_process() via neigh_update().
>>>
>>> 3. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
>>>     with trans = SWITCHDEV_TRANS_COMMIT
>>>
>>> #1 and #2 are guarded by rtl across those calls but
>>> #2 is not guarded by rtnl.
>>>
>>> Inside both rocker_port_ipv4_nh() and rocker_port_ipv4_neigh()
>>> neigh_tbl_lock lock is taken but it is not held across the
>>> two calls to rocker_port_ipv4_nh within a single prepare->commit transaction.
>>>
>>> I can double check that the above still occurs, but I'm not aware of any
>>> recent changes that would cause it not to occur any more.
>>
>> Ok, thanks for explaining.
>>
>> I still don't want this to be moved asynchronosly from the ARP neigh
>> update event code path.
>>
>> And therefore I'd like folks to look into another mechanism to fix
>> this.
>>
>> If that means the prepare/commit engine is given a spinlock by the
>> driver to be held across the two calls, so be it.
>
> I'm not opposed to such a scheme. But I'd like to note that it seems to me
> that for it to resolve the problem above the lock would be need be held
> both for "none" and "prepare->commit" transactions.  I think the former may
> be a little tricky as it may occur in IRQ context but perhaps I am missing
> something obvious.

I'm thinking IRQ context does not match the prepare-commit model, and 
Scott's fix is needed. There are more critical problems Scott's patch fixes.
(I shortly explained it before, although it is not clearly stated in the 
commitlog. http://marc.info/?l=linux-netdev&m=143219842420093&w=2)

1. Operations from IRQ context could change the state of rocker, like 
hash tables. This could cause inconsistent states between prepare-commit 
(for example, prepare phase cannot find an entry but commit phase can 
find it), and leads to memory corruption (unreserved memory could be 
used or reserved memory could not be used in commit phase).

2. rocker_wait_event_timeout() can sleep, but it can be called from IRQ 
context.

   rocker_event_neigh_update()
    rocker_port_ipv4_neigh()
     rocker_group_l3_unicast()
      rocker_group_tbl_do()
       rocker_group_tbl_add()
        rocker_cmd_exec()
         rocker_wait_event_timeout()
          wait_event_timeout()
           might_sleep()

3. Currently memory allocation is always done with GFP_KERNEL in 
__rocker_port_mem_alloc(), but it can be called from IRQ context.

   rocker_event_neigh_update()
    rocker_port_ipv4_neigh()
     rocker_port_kzalloc()
      __rocker_port_mem_alloc()

Toshiaki Makita

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] rocker: move netevent neigh update to processes context
  2015-06-04  8:20   ` Simon Horman
  2015-06-04  8:34     ` David Miller
@ 2015-06-04 16:12     ` Scott Feldman
  2015-06-04 22:54       ` Simon Horman
  1 sibling, 1 reply; 10+ messages in thread
From: Scott Feldman @ 2015-06-04 16:12 UTC (permalink / raw)
  To: Simon Horman
  Cc: David Miller, Netdev, Jiří Pírko, Toshiaki Makita

On Thu, Jun 4, 2015 at 1:20 AM, Simon Horman <simon.horman@netronome.com> wrote:
> Hi Dave,
>
> On Wed, Jun 03, 2015 at 11:38:29PM -0700, David Miller wrote:
>> From: sfeldma@gmail.com
>> Date: Tue,  2 Jun 2015 20:43:28 -0700
>>
>> > From: Scott Feldman <sfeldma@gmail.com>
>> >
>> > v2:
>> >
>> > Changes based on review:
>> >
>> > - David Miller <davem@davemloft.net> raise problem with system_wq not
>> >   preserving queue order to execution order.  To fix, use driver-private
>> >   ordered workqueue to preserve ordering of queued work.
>> >
>> > - Jiri Pirko <jiri@resnulli.us> small change on kfree of work queue item.
>> >
>> > v1:
>> >
>> > In review of Simon's patchset "rocker: transaction fixes". it was noted
>> > that rocker->neigh_tbl_next_index was unprotected in the call path below
>> > and could race with other contexts calling rocker_port_ipv4_neigh():
>>
>> How it rocker->neigh_tbl_next_index not protected?
>>
>> rocker->neigh_tbl_lock is _always_ held when it is accessed.
>>
>> This patch, therefore, looks like completely unnecessary complexity
>> to me.  Furthermore, I would completely prefer if the operation stays
>> completely synchronous to the call path where the neigh operation
>> occurs rather than throwing it out to a workqueue.
>
> What I was seeing is as follows:
>
> 1. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
>    with trans = SWITCHDEV_TRANS_PREPARE
>
> 2. rocker_port_ipv4_neigh() is called by rocker_neigh_update()
>    with trans = SWITCHDEV_TRANS_NONE.
>
>    The call chain goes up to arp_process() via neigh_update().
>
> 3. rocker_port_ipv4_nh() is called via switchdev_port_obj_add()
>    with trans = SWITCHDEV_TRANS_COMMIT
>
> #1 and #2 are guarded by rtl across those calls but
> #2 is not guarded by rtnl.
>
> Inside both rocker_port_ipv4_nh() and rocker_port_ipv4_neigh()
> neigh_tbl_lock lock is taken but it is not held across the
> two calls to rocker_port_ipv4_nh within a single prepare->commit transaction.
>
> I can double check that the above still occurs, but I'm not aware of any
> recent changes that would cause it not to occur any more.

Simon, just focusing on this rocker->neigh_tbl_next_index++ issue at
the moment, I think the change below would make _rocker_neigh_add()
safe to call without needing to put neigh_update into process context.
It works because entry is stored between PREPARE and COMMIT, so once
entry->index is assigned in PREPARE (under neigh_tbl_lock), it'll be
re-used in COMMIT, even if the NONE thread also claims it's
entry->index (also under neigh_tbl_lock).

I know there are other issues you and Toshiaki Makita have pointed
out, but let's see if we can peel the onion one layer at a time.

--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -2904,10 +2904,10 @@ static void _rocker_neigh_add(struct rocker *rocker,
                              enum switchdev_trans trans,
                              struct rocker_neigh_tbl_entry *entry)
 {
-       entry->index = rocker->neigh_tbl_next_index;
+       if (trans != SWITCHDEV_TRANS_COMMIT)
+               entry->index = rocker->neigh_tbl_next_index++;
        if (trans == SWITCHDEV_TRANS_PREPARE)
                return;
-       rocker->neigh_tbl_next_index++;
        entry->ref_count++;
        hash_add(rocker->neigh_tbl, &entry->entry,
                 be32_to_cpu(entry->ip_addr));

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] rocker: move netevent neigh update to processes context
  2015-06-04 15:34         ` Toshiaki Makita
@ 2015-06-04 18:48           ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2015-06-04 18:48 UTC (permalink / raw)
  To: toshiaki.makita1
  Cc: horms, simon.horman, sfeldma, netdev, jiri, makita.toshiaki

From: Toshiaki Makita <toshiaki.makita1@gmail.com>
Date: Fri, 05 Jun 2015 00:34:50 +0900

> I'm thinking IRQ context does not match the prepare-commit model, and
> Scott's fix is needed. There are more critical problems Scott's patch
> fixes.
> (I shortly explained it before, although it is not clearly stated in
> the commitlog. http://marc.info/?l=linux-netdev&m=143219842420093&w=2)
> 
> 1. Operations from IRQ context could change the state of rocker, like
> hash tables. This could cause inconsistent states between
> prepare-commit (for example, prepare phase cannot find an entry but
> commit phase can find it), and leads to memory corruption (unreserved
> memory could be used or reserved memory could not be used in commit
> phase).

If you hold the spinlock across the prepare and commit operation there
is no problem.

It is exactly what I am suggesting and fixes all the bugs.

You add ->transaction_begin() and ->transaction_end() and these take
the driver's spinlock or whatever synchronization object to protect
the transaction.

Then there is no conflict between software interrupt based operations
and RTNL mutex held ones.

I'm not going to explain my preference for how to fix this any further
and will ignore any further submissions of a patch that tries to do
this by pushing things to a workqueue, sorry.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] rocker: move netevent neigh update to processes context
  2015-06-04 16:12     ` Scott Feldman
@ 2015-06-04 22:54       ` Simon Horman
  2015-06-05  1:33         ` Scott Feldman
  0 siblings, 1 reply; 10+ messages in thread
From: Simon Horman @ 2015-06-04 22:54 UTC (permalink / raw)
  To: Scott Feldman
  Cc: Simon Horman, David Miller, Netdev, Jiří Pírko,
	Toshiaki Makita

Hi Scott,

> Simon, just focusing on this rocker->neigh_tbl_next_index++ issue at
> the moment, I think the change below would make _rocker_neigh_add()
> safe to call without needing to put neigh_update into process context.
> It works because entry is stored between PREPARE and COMMIT, so once
> entry->index is assigned in PREPARE (under neigh_tbl_lock), it'll be
> re-used in COMMIT, even if the NONE thread also claims it's
> entry->index (also under neigh_tbl_lock).
> 
> I know there are other issues you and Toshiaki Makita have pointed
> out, but let's see if we can peel the onion one layer at a time.
> 
> --- a/drivers/net/ethernet/rocker/rocker.c
> +++ b/drivers/net/ethernet/rocker/rocker.c
> @@ -2904,10 +2904,10 @@ static void _rocker_neigh_add(struct rocker *rocker,
>                               enum switchdev_trans trans,
>                               struct rocker_neigh_tbl_entry *entry)
>  {
> -       entry->index = rocker->neigh_tbl_next_index;
> +       if (trans != SWITCHDEV_TRANS_COMMIT)
> +               entry->index = rocker->neigh_tbl_next_index++;
>         if (trans == SWITCHDEV_TRANS_PREPARE)
>                 return;
> -       rocker->neigh_tbl_next_index++;
>         entry->ref_count++;
>         hash_add(rocker->neigh_tbl, &entry->entry,
>                  be32_to_cpu(entry->ip_addr));

I agree that should work.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next v2] rocker: move netevent neigh update to processes context
  2015-06-04 22:54       ` Simon Horman
@ 2015-06-05  1:33         ` Scott Feldman
  0 siblings, 0 replies; 10+ messages in thread
From: Scott Feldman @ 2015-06-05  1:33 UTC (permalink / raw)
  To: Simon Horman
  Cc: Simon Horman, David Miller, Netdev, Jiří Pírko,
	Toshiaki Makita

On Thu, Jun 4, 2015 at 3:54 PM, Simon Horman <horms@verge.net.au> wrote:
> Hi Scott,
>
>> Simon, just focusing on this rocker->neigh_tbl_next_index++ issue at
>> the moment, I think the change below would make _rocker_neigh_add()
>> safe to call without needing to put neigh_update into process context.
>> It works because entry is stored between PREPARE and COMMIT, so once
>> entry->index is assigned in PREPARE (under neigh_tbl_lock), it'll be
>> re-used in COMMIT, even if the NONE thread also claims it's
>> entry->index (also under neigh_tbl_lock).
>>
>> I know there are other issues you and Toshiaki Makita have pointed
>> out, but let's see if we can peel the onion one layer at a time.
>>
>> --- a/drivers/net/ethernet/rocker/rocker.c
>> +++ b/drivers/net/ethernet/rocker/rocker.c
>> @@ -2904,10 +2904,10 @@ static void _rocker_neigh_add(struct rocker *rocker,
>>                               enum switchdev_trans trans,
>>                               struct rocker_neigh_tbl_entry *entry)
>>  {
>> -       entry->index = rocker->neigh_tbl_next_index;
>> +       if (trans != SWITCHDEV_TRANS_COMMIT)
>> +               entry->index = rocker->neigh_tbl_next_index++;
>>         if (trans == SWITCHDEV_TRANS_PREPARE)
>>                 return;
>> -       rocker->neigh_tbl_next_index++;
>>         entry->ref_count++;
>>         hash_add(rocker->neigh_tbl, &entry->entry,
>>                  be32_to_cpu(entry->ip_addr));
>
> I agree that should work.

I've been running it today with no problems.  So I'm preparing a new
patch that changes direction here, to address David's concern about
switching neigh event context.  Details:

1) Above little patch addresses the race issue with neigh_tbl_next_index

2) Keep neigh update event in event context and bring back a couple of
items I removed in the Spring Cleanup series:

   a) use ROCKER_OP_FLAG_NOWAIT to mark contexts that can't sleep
   b) if nowait, mem allocations use GFP_ATOMIC
   c) if nowait, cmd to device is issued, but we will not sleep to
wait for response from device; just hit-and-run

I was probably too aggressive with Spring Cleanup thinking we could
swing all device accesses to process context.

Thanks to all for the testing/review/feedback/patience.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-06-05  1:33 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-06-03  3:43 [PATCH net-next v2] rocker: move netevent neigh update to processes context sfeldma
2015-06-04  6:38 ` David Miller
2015-06-04  8:20   ` Simon Horman
2015-06-04  8:34     ` David Miller
2015-06-04  9:07       ` Simon Horman
2015-06-04 15:34         ` Toshiaki Makita
2015-06-04 18:48           ` David Miller
2015-06-04 16:12     ` Scott Feldman
2015-06-04 22:54       ` Simon Horman
2015-06-05  1:33         ` Scott Feldman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).