[PATCH 4/9] firewire: don't use PREPARE_DELAYED

public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
       [not found] <1392929071-16555-1-git-send-email-tj@kernel.org>
@ 2014-02-20 20:44 ` Tejun Heo
  2014-02-21  1:44   ` Peter Hurley
                     ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Tejun Heo @ 2014-02-20 20:44 UTC (permalink / raw)
  To: laijs; +Cc: linux-scsi, linux-kernel, Tejun Heo, linux1394-devel,
	target-devel

PREPARE_[DELAYED_]WORK() are being phased out.  They have few users
and a nasty surprise in terms of reentrancy guarantee as workqueue
considers work items to be different if they don't have the same work
function.

firewire core-device and sbp2 have been been multiplexing work items
with multiple work functions.  Introduce fw_device_workfn() and
sbp2_lu_workfn() which invoke fw_device->workfn and
sbp2_logical_unit->workfn respectively and always use the two
functions as the work functions and update the users to set the
->workfn fields instead of overriding work functions using
PREPARE_DELAYED_WORK().

It would probably be best to route this with other related updates
through the workqueue tree.

Compile tested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: linux1394-devel@lists.sourceforge.net
Cc: Chris Boot <bootc@bootc.net>
Cc: linux-scsi@vger.kernel.org
Cc: target-devel@vger.kernel.org
---
 drivers/firewire/core-device.c | 22 +++++++++++++++-------
 drivers/firewire/sbp2.c        | 17 +++++++++++++----
 include/linux/firewire.h       |  1 +
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index de4aa40..2c6d5e1 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -916,7 +916,7 @@ static int lookup_existing_device(struct device *dev, void *data)
 		old->config_rom_retries = 0;
 		fw_notice(card, "rediscovered device %s\n", dev_name(dev));
 
-		PREPARE_DELAYED_WORK(&old->work, fw_device_update);
+		old->workfn = fw_device_update;
 		fw_schedule_device_work(old, 0);
 
 		if (current_node == card->root_node)
@@ -1075,7 +1075,7 @@ static void fw_device_init(struct work_struct *work)
 	if (atomic_cmpxchg(&device->state,
 			   FW_DEVICE_INITIALIZING,
 			   FW_DEVICE_RUNNING) == FW_DEVICE_GONE) {
-		PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
+		device->workfn = fw_device_shutdown;
 		fw_schedule_device_work(device, SHUTDOWN_DELAY);
 	} else {
 		fw_notice(card, "created device %s: GUID %08x%08x, S%d00\n",
@@ -1196,13 +1196,20 @@ static void fw_device_refresh(struct work_struct *work)
 		  dev_name(&device->device), fw_rcode_string(ret));
  gone:
 	atomic_set(&device->state, FW_DEVICE_GONE);
-	PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
+	device->workfn = fw_device_shutdown;
 	fw_schedule_device_work(device, SHUTDOWN_DELAY);
  out:
 	if (node_id == card->root_node->node_id)
 		fw_schedule_bm_work(card, 0);
 }
 
+static void fw_device_workfn(struct work_struct *work)
+{
+	struct fw_device *device = container_of(to_delayed_work(work),
+						struct fw_device, work);
+	device->workfn(work);
+}
+
 void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 {
 	struct fw_device *device;
@@ -1252,7 +1259,8 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		 * power-up after getting plugged in.  We schedule the
 		 * first config rom scan half a second after bus reset.
 		 */
-		INIT_DELAYED_WORK(&device->work, fw_device_init);
+		device->workfn = fw_device_init;
+		INIT_DELAYED_WORK(&device->work, fw_device_workfn);
 		fw_schedule_device_work(device, INITIAL_DELAY);
 		break;
 
@@ -1268,7 +1276,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		if (atomic_cmpxchg(&device->state,
 			    FW_DEVICE_RUNNING,
 			    FW_DEVICE_INITIALIZING) == FW_DEVICE_RUNNING) {
-			PREPARE_DELAYED_WORK(&device->work, fw_device_refresh);
+			device->workfn = fw_device_refresh;
 			fw_schedule_device_work(device,
 				device->is_local ? 0 : INITIAL_DELAY);
 		}
@@ -1283,7 +1291,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		smp_wmb();  /* update node_id before generation */
 		device->generation = card->generation;
 		if (atomic_read(&device->state) == FW_DEVICE_RUNNING) {
-			PREPARE_DELAYED_WORK(&device->work, fw_device_update);
+			device->workfn = fw_device_update;
 			fw_schedule_device_work(device, 0);
 		}
 		break;
@@ -1308,7 +1316,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		device = node->data;
 		if (atomic_xchg(&device->state,
 				FW_DEVICE_GONE) == FW_DEVICE_RUNNING) {
-			PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
+			device->workfn = fw_device_shutdown;
 			fw_schedule_device_work(device,
 				list_empty(&card->link) ? 0 : SHUTDOWN_DELAY);
 		}
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 281029d..7aef911 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -146,6 +146,7 @@ struct sbp2_logical_unit {
 	 */
 	int generation;
 	int retries;
+	work_func_t workfn;
 	struct delayed_work work;
 	bool has_sdev;
 	bool blocked;
@@ -864,7 +865,7 @@ static void sbp2_login(struct work_struct *work)
 	/* set appropriate retry limit(s) in BUSY_TIMEOUT register */
 	sbp2_set_busy_timeout(lu);
 
-	PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect);
+	lu->workfn = sbp2_reconnect;
 	sbp2_agent_reset(lu);
 
 	/* This was a re-login. */
@@ -918,7 +919,7 @@ static void sbp2_login(struct work_struct *work)
 	 * If a bus reset happened, sbp2_update will have requeued
 	 * lu->work already.  Reset the work from reconnect to login.
 	 */
-	PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+	lu->workfn = sbp2_login;
 }
 
 static void sbp2_reconnect(struct work_struct *work)
@@ -952,7 +953,7 @@ static void sbp2_reconnect(struct work_struct *work)
 		    lu->retries++ >= 5) {
 			dev_err(tgt_dev(tgt), "failed to reconnect\n");
 			lu->retries = 0;
-			PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+			lu->workfn = sbp2_login;
 		}
 		sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5));
 
@@ -972,6 +973,13 @@ static void sbp2_reconnect(struct work_struct *work)
 	sbp2_conditionally_unblock(lu);
 }
 
+static void sbp2_lu_workfn(struct work_struct *work)
+{
+	struct sbp2_logical_unit *lu = container_of(to_delayed_work(work),
+						struct sbp2_logical_unit, work);
+	lu->workfn(work);
+}
+
 static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
 {
 	struct sbp2_logical_unit *lu;
@@ -998,7 +1006,8 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
 	lu->blocked  = false;
 	++tgt->dont_block;
 	INIT_LIST_HEAD(&lu->orb_list);
-	INIT_DELAYED_WORK(&lu->work, sbp2_login);
+	lu->workfn = sbp2_login;
+	INIT_DELAYED_WORK(&lu->work, sbp2_lu_workfn);
 
 	list_add_tail(&lu->link, &tgt->lu_list);
 	return 0;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 5d7782e..c3683bd 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -200,6 +200,7 @@ struct fw_device {
 	unsigned irmc:1;
 	unsigned bc_implemented:2;
 
+	work_func_t workfn;
 	struct delayed_work work;
 	struct fw_attribute_group attribute_group;
 };
-- 
1.8.5.3


------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-20 20:44 ` [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK Tejun Heo
@ 2014-02-21  1:44   ` Peter Hurley
  2014-02-21  1:59     ` Tejun Heo
  2014-02-21 20:45   ` Stefan Richter
  2014-03-07 15:26   ` [PATCH UPDATED " Tejun Heo
  2 siblings, 1 reply; 32+ messages in thread
From: Peter Hurley @ 2014-02-21  1:44 UTC (permalink / raw)
  To: Tejun Heo, laijs
  Cc: linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

On 02/20/2014 03:44 PM, Tejun Heo wrote:
> PREPARE_[DELAYED_]WORK() are being phased out.  They have few users
> and a nasty surprise in terms of reentrancy guarantee as workqueue
> considers work items to be different if they don't have the same work
> function.
>
> firewire core-device and sbp2 have been been multiplexing work items
> with multiple work functions.  Introduce fw_device_workfn() and
> sbp2_lu_workfn() which invoke fw_device->workfn and
> sbp2_logical_unit->workfn respectively and always use the two
> functions as the work functions and update the users to set the
> ->workfn fields instead of overriding work functions using
> PREPARE_DELAYED_WORK().
>
> It would probably be best to route this with other related updates
> through the workqueue tree.
>
> Compile tested.
>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
> Cc: linux1394-devel@lists.sourceforge.net
> Cc: Chris Boot <bootc@bootc.net>
> Cc: linux-scsi@vger.kernel.org
> Cc: target-devel@vger.kernel.org
> ---
>   drivers/firewire/core-device.c | 22 +++++++++++++++-------
>   drivers/firewire/sbp2.c        | 17 +++++++++++++----
>   include/linux/firewire.h       |  1 +
>   3 files changed, 29 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
> index de4aa40..2c6d5e1 100644
> --- a/drivers/firewire/core-device.c
> +++ b/drivers/firewire/core-device.c
> @@ -916,7 +916,7 @@ static int lookup_existing_device(struct device *dev, void *data)
>   		old->config_rom_retries = 0;
>   		fw_notice(card, "rediscovered device %s\n", dev_name(dev));
>
> -		PREPARE_DELAYED_WORK(&old->work, fw_device_update);
> +		old->workfn = fw_device_update;
>   		fw_schedule_device_work(old, 0);
>
>   		if (current_node == card->root_node)
> @@ -1075,7 +1075,7 @@ static void fw_device_init(struct work_struct *work)
>   	if (atomic_cmpxchg(&device->state,
>   			   FW_DEVICE_INITIALIZING,
>   			   FW_DEVICE_RUNNING) == FW_DEVICE_GONE) {
> -		PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
> +		device->workfn = fw_device_shutdown;
>   		fw_schedule_device_work(device, SHUTDOWN_DELAY);

Implied mb of test_and_set_bit() in queue_work_on() ensures that the
newly assigned work function is visible on all cpus before evaluating
whether or not the work can be queued.

Ok.

>   	} else {
>   		fw_notice(card, "created device %s: GUID %08x%08x, S%d00\n",
> @@ -1196,13 +1196,20 @@ static void fw_device_refresh(struct work_struct *work)
>   		  dev_name(&device->device), fw_rcode_string(ret));
>    gone:
>   	atomic_set(&device->state, FW_DEVICE_GONE);
> -	PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
> +	device->workfn = fw_device_shutdown;
>   	fw_schedule_device_work(device, SHUTDOWN_DELAY);
>    out:
>   	if (node_id == card->root_node->node_id)
>   		fw_schedule_bm_work(card, 0);
>   }
>
> +static void fw_device_workfn(struct work_struct *work)
> +{
> +	struct fw_device *device = container_of(to_delayed_work(work),
> +						struct fw_device, work);

I think this needs an smp_rmb() here.

> +	device->workfn(work);
> +}
> +

Otherwise this cpu could speculatively load workfn before
set_work_pool_and_clear_pending(), which means that the old workfn
could have been loaded but PENDING was still set and caused queue_work_on()
to reject the work as already pending.

Result: the new work function never runs.

But this exposes a more general problem that I believe workqueue should
prevent; speculated loads and stores in the work item function should be
prevented from occurring before clearing PENDING in
set_work_pool_and_clear_pending().

IOW, the beginning of the work function should act like a barrier in
the same way that queue_work_on() (et. al.) already does.

Regards,
Peter Hurley

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21  1:44   ` Peter Hurley
@ 2014-02-21  1:59     ` Tejun Heo
  2014-02-21  2:07       ` Peter Hurley
  0 siblings, 1 reply; 32+ messages in thread
From: Tejun Heo @ 2014-02-21  1:59 UTC (permalink / raw)
  To: Peter Hurley
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

Hello,

On Thu, Feb 20, 2014 at 08:44:46PM -0500, Peter Hurley wrote:
> >+static void fw_device_workfn(struct work_struct *work)
> >+{
> >+	struct fw_device *device = container_of(to_delayed_work(work),
> >+						struct fw_device, work);
> 
> I think this needs an smp_rmb() here.

The patch is equivalent transformation and the whole thing is
guaranteed to have gone through pool->lock.  No explicit rmb
necessary.

> IOW, the beginning of the work function should act like a barrier in
> the same way that queue_work_on() (et. al.) already does.

workqueue already has enough barriers; otherwise, the whole kernel
would have crumbled long time ago.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21  1:59     ` Tejun Heo
@ 2014-02-21  2:07       ` Peter Hurley
  2014-02-21  2:13         ` Tejun Heo
  0 siblings, 1 reply; 32+ messages in thread
From: Peter Hurley @ 2014-02-21  2:07 UTC (permalink / raw)
  To: Tejun Heo; +Cc: laijs, linux-scsi, linux-kernel, linux1394-devel, target-devel

On 02/20/2014 08:59 PM, Tejun Heo wrote:
> Hello,
>
> On Thu, Feb 20, 2014 at 08:44:46PM -0500, Peter Hurley wrote:
>>> +static void fw_device_workfn(struct work_struct *work)
>>> +{
>>> +	struct fw_device *device = container_of(to_delayed_work(work),
>>> +						struct fw_device, work);
>>
>> I think this needs an smp_rmb() here.
>
> The patch is equivalent transformation and the whole thing is
> guaranteed to have gone through pool->lock.  No explicit rmb
> necessary.

The spin_unlock_irq(&pool->lock) only guarantees completion of
memory operations _before_ the unlock; memory operations which occur
_after_ the unlock may be speculated before the unlock.

IOW, unlock is not a memory barrier for operations that occur after.

>> IOW, the beginning of the work function should act like a barrier in
>> the same way that queue_work_on() (et. al.) already does.
>
> workqueue already has enough barriers; otherwise, the whole kernel
> would have crumbled long time ago.

See above.

Regards,
Peter Hurley

------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21  2:07       ` Peter Hurley
@ 2014-02-21  2:13         ` Tejun Heo
  2014-02-21  5:13           ` Peter Hurley
  0 siblings, 1 reply; 32+ messages in thread
From: Tejun Heo @ 2014-02-21  2:13 UTC (permalink / raw)
  To: Peter Hurley
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

On Thu, Feb 20, 2014 at 09:07:27PM -0500, Peter Hurley wrote:
> On 02/20/2014 08:59 PM, Tejun Heo wrote:
> >Hello,
> >
> >On Thu, Feb 20, 2014 at 08:44:46PM -0500, Peter Hurley wrote:
> >>>+static void fw_device_workfn(struct work_struct *work)
> >>>+{
> >>>+	struct fw_device *device = container_of(to_delayed_work(work),
> >>>+						struct fw_device, work);
> >>
> >>I think this needs an smp_rmb() here.
> >
> >The patch is equivalent transformation and the whole thing is
> >guaranteed to have gone through pool->lock.  No explicit rmb
> >necessary.
> 
> The spin_unlock_irq(&pool->lock) only guarantees completion of
> memory operations _before_ the unlock; memory operations which occur
> _after_ the unlock may be speculated before the unlock.
> 
> IOW, unlock is not a memory barrier for operations that occur after.

It's not just unlock.  It's lock / unlock pair on the same lock from
both sides.  Nothing can sip through that.

-- 
tejun

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21  2:13         ` Tejun Heo
@ 2014-02-21  5:13           ` Peter Hurley
  2014-02-21 10:03             ` Tejun Heo
  0 siblings, 1 reply; 32+ messages in thread
From: Peter Hurley @ 2014-02-21  5:13 UTC (permalink / raw)
  To: Tejun Heo
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

On 02/20/2014 09:13 PM, Tejun Heo wrote:
> On Thu, Feb 20, 2014 at 09:07:27PM -0500, Peter Hurley wrote:
>> On 02/20/2014 08:59 PM, Tejun Heo wrote:
>>> Hello,
>>>
>>> On Thu, Feb 20, 2014 at 08:44:46PM -0500, Peter Hurley wrote:
>>>>> +static void fw_device_workfn(struct work_struct *work)
>>>>> +{
>>>>> +	struct fw_device *device = container_of(to_delayed_work(work),
>>>>> +						struct fw_device, work);
>>>>
>>>> I think this needs an smp_rmb() here.
>>>
>>> The patch is equivalent transformation and the whole thing is
>>> guaranteed to have gone through pool->lock.  No explicit rmb
>>> necessary.
>>
>> The spin_unlock_irq(&pool->lock) only guarantees completion of
>> memory operations _before_ the unlock; memory operations which occur
>> _after_ the unlock may be speculated before the unlock.
>>
>> IOW, unlock is not a memory barrier for operations that occur after.
>
> It's not just unlock.  It's lock / unlock pair on the same lock from
> both sides.  Nothing can sip through that.

CPU 0                            | CPU 1
                                  |
  INIT_WORK(fw_device_workfn)     |
                                  |
  workfn = funcA                  |
  queue_work_on()                 |
  .                               | process_one_work()
  .                               |   ..
  .                               |   worker->current_func = work->func
  .                               |
  .                               |   speculative load of workfn = funcA
  .                               |   .
  workfn = funcB                  |   .
  queue_work_on()                 |   .
    local_irq_save()              |   .
    test_and_set_bit() == 1       |   .
                                  |   set_work_pool_and_clear_pending()
    work is not queued            |     smp_wmb
     funcB never runs             |     set_work_data()
                                  |       atomic_set()
                                  |   spin_unlock_irq()
                                  |
                                  |   worker->current_func(work)  @ fw_device_workfn
                                  |      workfn()  @ funcA


The speculative load of workfn on CPU 1 is valid because no rmb will occur
between the load and the execution of workfn() on CPU 1.

Thus funcB will never execute because, in this circumstance, a second
worker is not queued (because PENDING had not yet been cleared).

Regards,
Peter Hurley

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21  5:13           ` Peter Hurley
@ 2014-02-21 10:03             ` Tejun Heo
  2014-02-21 12:51               ` Peter Hurley
  0 siblings, 1 reply; 32+ messages in thread
From: Tejun Heo @ 2014-02-21 10:03 UTC (permalink / raw)
  To: Peter Hurley
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

On Fri, Feb 21, 2014 at 12:13:16AM -0500, Peter Hurley wrote:
> CPU 0                            | CPU 1
>                                  |
>  INIT_WORK(fw_device_workfn)     |
>                                  |
>  workfn = funcA                  |
>  queue_work_on()                 |
>  .                               | process_one_work()
>  .                               |   ..
>  .                               |   worker->current_func = work->func
>  .                               |
>  .                               |   speculative load of workfn = funcA
>  .                               |   .
>  workfn = funcB                  |   .
>  queue_work_on()                 |   .
>    local_irq_save()              |   .
>    test_and_set_bit() == 1       |   .
>                                  |   set_work_pool_and_clear_pending()
>    work is not queued            |     smp_wmb
>     funcB never runs             |     set_work_data()
>                                  |       atomic_set()
>                                  |   spin_unlock_irq()
>                                  |
>                                  |   worker->current_func(work)  @ fw_device_workfn
>                                  |      workfn()  @ funcA
> 
> 
> The speculative load of workfn on CPU 1 is valid because no rmb will occur
> between the load and the execution of workfn() on CPU 1.
> 
> Thus funcB will never execute because, in this circumstance, a second
> worker is not queued (because PENDING had not yet been cleared).

There's no right or wrong execution.  Executions of either funcA or
funcB are correct results.  The only memory ordering guarantee
workqueue gives is that anything written before the work item is
queued will be visible when that instance starts executing.  When a
work item is not queued, no ordering is guaranteed between the
queueing attempt and the execution of the existing instance.  We can
add such guarantee, not sure how much it'd matter but it's not like
it's gonna cost a lot either.

This doesn't have much to do with the current series tho.  In fact,
PREPARE_WORK can't ever be made to give such guarantee.  The function
pointer has to fetched before clearing of PENDING.

-- 
tejun

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21 10:03             ` Tejun Heo
@ 2014-02-21 12:51               ` Peter Hurley
  2014-02-21 13:06                 ` Tejun Heo
  0 siblings, 1 reply; 32+ messages in thread
From: Peter Hurley @ 2014-02-21 12:51 UTC (permalink / raw)
  To: Tejun Heo; +Cc: laijs, linux-scsi, linux-kernel, linux1394-devel, target-devel

On 02/21/2014 05:03 AM, Tejun Heo wrote:
> On Fri, Feb 21, 2014 at 12:13:16AM -0500, Peter Hurley wrote:
>> CPU 0                            | CPU 1
>>                                   |
>>   INIT_WORK(fw_device_workfn)     |
>>                                   |
>>   workfn = funcA                  |
>>   queue_work_on()                 |
>>   .                               | process_one_work()
>>   .                               |   ..
>>   .                               |   worker->current_func = work->func
>>   .                               |
>>   .                               |   speculative load of workfn = funcA
>>   .                               |   .
>>   workfn = funcB                  |   .
>>   queue_work_on()                 |   .
>>     local_irq_save()              |   .
>>     test_and_set_bit() == 1       |   .
>>                                   |   set_work_pool_and_clear_pending()
>>     work is not queued            |     smp_wmb
>>      funcB never runs             |     set_work_data()
>>                                   |       atomic_set()
>>                                   |   spin_unlock_irq()
>>                                   |
>>                                   |   worker->current_func(work)  @ fw_device_workfn
>>                                   |      workfn()  @ funcA
>>
>>
>> The speculative load of workfn on CPU 1 is valid because no rmb will occur
>> between the load and the execution of workfn() on CPU 1.
>>
>> Thus funcB will never execute because, in this circumstance, a second
>> worker is not queued (because PENDING had not yet been cleared).
>
> There's no right or wrong execution.  Executions of either funcA or
> funcB are correct results.  The only memory ordering guarantee
> workqueue gives is that anything written before the work item is
> queued will be visible when that instance starts executing.  When a
> work item is not queued, no ordering is guaranteed between the
> queueing attempt and the execution of the existing instance.

I think the vast majority of kernel code which uses the workqueue
assumes there is a memory ordering guarantee.

Meaning that if a work item is not queue-able then the previously
queued instance _has not yet started_ and so, by deduction, must be
able to see the newly written values.

Consider:

    add something important to list to work on
    queue work

or

    update index in buffer indicating more data
    queue work

Neither of these uses expect that the workqueue does not guarantee
that this latest data is acted upon.

Another way to look at this problem is that process_one_work()
doesn't become the existing instance _until_ PENDING is cleared.

> We can
> add such guarantee, not sure how much it'd matter but it's not like
> it's gonna cost a lot either.
>
> This doesn't have much to do with the current series tho.  In fact,
> PREPARE_WORK can't ever be made to give such guarantee.

Yes, I agree that PREPARE_DELAYED_WORK was also broken usage with the
same problem. [And there are other bugs in that firewire device work
code which I'm working on.]

> The function pointer has to fetched before clearing of PENDING.

Why?

As long as the load takes place within the pool->lock, I don't think
it matters (especially now PREPARE_WORK is removed).

Regards,
Peter Hurley


------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21 12:51               ` Peter Hurley
@ 2014-02-21 13:06                 ` Tejun Heo
  2014-02-21 16:53                   ` Peter Hurley
  0 siblings, 1 reply; 32+ messages in thread
From: Tejun Heo @ 2014-02-21 13:06 UTC (permalink / raw)
  To: Peter Hurley
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

Hello,

On Fri, Feb 21, 2014 at 07:51:48AM -0500, Peter Hurley wrote:
> I think the vast majority of kernel code which uses the workqueue
> assumes there is a memory ordering guarantee.

Not really.  Workqueues haven't even guaranteed non-reentrancy until
recently, forcing everybody to lock explicitly in the work function.
I don't think there'd be many, if any, use cases which depend on
ordering guarantee on duplicate queueing.

> Another way to look at this problem is that process_one_work()
> doesn't become the existing instance _until_ PENDING is cleared.

Sure, having that guarantee definitely is nicer and all we need seems
to be mb_after_unlock in the execution path.  Please feel free to
submit a patch.

> >add such guarantee, not sure how much it'd matter but it's not like
> >it's gonna cost a lot either.
> >
> >This doesn't have much to do with the current series tho.  In fact,
> >PREPARE_WORK can't ever be made to give such guarantee.
> 
> Yes, I agree that PREPARE_DELAYED_WORK was also broken usage with the
> same problem. [And there are other bugs in that firewire device work
> code which I'm working on.]
> 
> >The function pointer has to fetched before clearing of PENDING.
> 
> Why?
> 
> As long as the load takes place within the pool->lock, I don't think
> it matters (especially now PREPARE_WORK is removed).

Hmmm... I was talking about PREPARE_WORK().  Clearing PENDING means
that the work item is released from the worker context and may be
freed or reused at any time (hmm... this may not be true anymore as
non-syncing variants of cancel_work are gone), so clearing PENDING
should be the last access to the work item and thus we can't use that
as the barrier event for fetching its work function.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21 13:06                 ` Tejun Heo
@ 2014-02-21 16:53                   ` Peter Hurley
  2014-02-21 16:57                     ` Tejun Heo
  0 siblings, 1 reply; 32+ messages in thread
From: Peter Hurley @ 2014-02-21 16:53 UTC (permalink / raw)
  To: Tejun Heo
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

Hi Tejun,

On 02/21/2014 08:06 AM, Tejun Heo wrote:
> Hello,
>
> On Fri, Feb 21, 2014 at 07:51:48AM -0500, Peter Hurley wrote:
>> I think the vast majority of kernel code which uses the workqueue
>> assumes there is a memory ordering guarantee.
>
> Not really.  Workqueues haven't even guaranteed non-reentrancy until
> recently, forcing everybody to lock explicitly in the work function.
> I don't think there'd be many, if any, use cases which depend on
> ordering guarantee on duplicate queueing.

I added some in 3.12 :)

>> Another way to look at this problem is that process_one_work()
>> doesn't become the existing instance _until_ PENDING is cleared.
>
> Sure, having that guarantee definitely is nicer and all we need seems
> to be mb_after_unlock in the execution path.  Please feel free to
> submit a patch.

Ok, I can do that. But AFAIK it'll have to be an smp_rmb(); there is
no mb__after unlock.

[ After thinking about it some, I don't think preventing speculative
   writes before clearing PENDING if useful or necessary, so that's
   why I'm suggesting only the rmb. ]

>>> add such guarantee, not sure how much it'd matter but it's not like
>>> it's gonna cost a lot either.
>>>
>>> This doesn't have much to do with the current series tho.  In fact,
>>> PREPARE_WORK can't ever be made to give such guarantee.
>>
>> Yes, I agree that PREPARE_DELAYED_WORK was also broken usage with the
>> same problem. [And there are other bugs in that firewire device work
>> code which I'm working on.]
>>
>>> The function pointer has to fetched before clearing of PENDING.
>>
>> Why?
>>
>> As long as the load takes place within the pool->lock, I don't think
>> it matters (especially now PREPARE_WORK is removed).
>
> Hmmm... I was talking about PREPARE_WORK().  Clearing PENDING means
> that the work item is released from the worker context and may be
> freed or reused at any time (hmm... this may not be true anymore as
> non-syncing variants of cancel_work are gone), so clearing PENDING
> should be the last access to the work item and thus we can't use that
> as the barrier event for fetching its work function.

Yeah, it seems like the work item lifetime is at least guaranteed
while either PENDING is set _or_ while the pool->lock is held
after PENDING is cleared.

Regards,
Peter Hurley

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21 16:53                   ` Peter Hurley
@ 2014-02-21 16:57                     ` Tejun Heo
  2014-02-21 23:01                       ` Peter Hurley
  0 siblings, 1 reply; 32+ messages in thread
From: Tejun Heo @ 2014-02-21 16:57 UTC (permalink / raw)
  To: Peter Hurley
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

Yo,

On Fri, Feb 21, 2014 at 11:53:46AM -0500, Peter Hurley wrote:
> Ok, I can do that. But AFAIK it'll have to be an smp_rmb(); there is
> no mb__after unlock.

We do have smp_mb__after_unlock_lock().

> [ After thinking about it some, I don't think preventing speculative
>   writes before clearing PENDING if useful or necessary, so that's
>   why I'm suggesting only the rmb. ]

But smp_mb__after_unlock_lock() would be cheaper on most popular
archs, I think.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-20 20:44 ` [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK Tejun Heo
  2014-02-21  1:44   ` Peter Hurley
@ 2014-02-21 20:45   ` Stefan Richter
  2014-03-07 15:26   ` [PATCH UPDATED " Tejun Heo
  2 siblings, 0 replies; 32+ messages in thread
From: Stefan Richter @ 2014-02-21 20:45 UTC (permalink / raw)
  To: Tejun Heo; +Cc: laijs, linux-scsi, linux-kernel, target-devel, linux1394-devel

On Feb 20 Tejun Heo wrote:
> PREPARE_[DELAYED_]WORK() are being phased out.  They have few users
> and a nasty surprise in terms of reentrancy guarantee as workqueue
> considers work items to be different if they don't have the same work
> function.
> 
> firewire core-device and sbp2 have been been multiplexing work items
> with multiple work functions.  Introduce fw_device_workfn() and
> sbp2_lu_workfn() which invoke fw_device->workfn and
> sbp2_logical_unit->workfn respectively and always use the two
> functions as the work functions and update the users to set the
> ->workfn fields instead of overriding work functions using
> PREPARE_DELAYED_WORK().
> 
> It would probably be best to route this with other related updates
> through the workqueue tree.
> 
> Compile tested.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
> Cc: linux1394-devel@lists.sourceforge.net

Acked-by: Stefan Richter <stefanr@s5r6.in-berlin.de>

And lightly runtime-tested as well.

This doesn't actually touch sbp-target; the following Cc's could be
dropped:

> Cc: Chris Boot <bootc@bootc.net>
> Cc: linux-scsi@vger.kernel.org
> Cc: target-devel@vger.kernel.org
> ---
>  drivers/firewire/core-device.c | 22 +++++++++++++++-------
>  drivers/firewire/sbp2.c        | 17 +++++++++++++----
>  include/linux/firewire.h       |  1 +
>  3 files changed, 29 insertions(+), 11 deletions(-)

-- 
Stefan Richter
-=====-====- --=- =-=-=
http://arcgraph.de/sr/

------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21 16:57                     ` Tejun Heo
@ 2014-02-21 23:01                       ` Peter Hurley
  2014-02-21 23:18                         ` Tejun Heo
  2014-02-22 18:43                         ` James Bottomley
  0 siblings, 2 replies; 32+ messages in thread
From: Peter Hurley @ 2014-02-21 23:01 UTC (permalink / raw)
  To: Tejun Heo
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

On 02/21/2014 11:57 AM, Tejun Heo wrote:
> Yo,
>
> On Fri, Feb 21, 2014 at 11:53:46AM -0500, Peter Hurley wrote:
>> Ok, I can do that. But AFAIK it'll have to be an smp_rmb(); there is
>> no mb__after unlock.
>
> We do have smp_mb__after_unlock_lock().
>
>> [ After thinking about it some, I don't think preventing speculative
>>    writes before clearing PENDING if useful or necessary, so that's
>>    why I'm suggesting only the rmb. ]
>
> But smp_mb__after_unlock_lock() would be cheaper on most popular
> archs, I think.

smp_mb__after_unlock_lock() is only for ordering memory operations
between two spin-locked sections on either the same lock or by
the same task/cpu. Like:

    i = 1
    spin_unlock(lock1)
    spin_lock(lock2)
    smp_mb__after_unlock_lock()
    j = 1

This guarantees that the store to j happens after the store to i.
Without it, a cpu can

    spin_lock(lock2)
    j = 1
    i = 1
    spin_unlock(lock1)

Regards,
Peter Hurley

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21 23:01                       ` Peter Hurley
@ 2014-02-21 23:18                         ` Tejun Heo
  2014-02-21 23:46                           ` Peter Hurley
  2014-02-22 18:43                         ` James Bottomley
  1 sibling, 1 reply; 32+ messages in thread
From: Tejun Heo @ 2014-02-21 23:18 UTC (permalink / raw)
  To: Peter Hurley
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

On Fri, Feb 21, 2014 at 06:01:29PM -0500, Peter Hurley wrote:
> smp_mb__after_unlock_lock() is only for ordering memory operations
> between two spin-locked sections on either the same lock or by
> the same task/cpu. Like:
> 
>    i = 1
>    spin_unlock(lock1)
>    spin_lock(lock2)
>    smp_mb__after_unlock_lock()
>    j = 1
> 
> This guarantees that the store to j happens after the store to i.
> Without it, a cpu can
> 
>    spin_lock(lock2)
>    j = 1
>    i = 1
>    spin_unlock(lock1)

Hmmm?  I'm pretty sure that's a full barrier.  Local processor is
always in order (w.r.t. the compiler).

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21 23:18                         ` Tejun Heo
@ 2014-02-21 23:46                           ` Peter Hurley
  2014-02-22 14:38                             ` Tejun Heo
  0 siblings, 1 reply; 32+ messages in thread
From: Peter Hurley @ 2014-02-21 23:46 UTC (permalink / raw)
  To: Tejun Heo
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

On 02/21/2014 06:18 PM, Tejun Heo wrote:
> On Fri, Feb 21, 2014 at 06:01:29PM -0500, Peter Hurley wrote:
>> smp_mb__after_unlock_lock() is only for ordering memory operations
>> between two spin-locked sections on either the same lock or by
>> the same task/cpu. Like:
>>
>>     i = 1
>>     spin_unlock(lock1)
>>     spin_lock(lock2)
>>     smp_mb__after_unlock_lock()
>>     j = 1
>>
>> This guarantees that the store to j happens after the store to i.
>> Without it, a cpu can
>>
>>     spin_lock(lock2)
>>     j = 1
>>     i = 1
>>     spin_unlock(lock1)
> ;
> Hmmm?  I'm pretty sure that's a full barrier.  Local processor is
> always in order (w.r.t. the compiler).

It's a long story but the short version is that
Documentation/memory-barriers.txt recently was overhauled to reflect
what cpus actually do and what the different archs actually
deliver.

Turns out that unlock + lock is not guaranteed by all archs to be
a full barrier. Thus the smb_mb__after_unlock_lock().

This is now all spelled out in memory-barriers.txt under the
sub-heading "IMPLICIT KERNEL MEMORY BARRIERS".

Regards,
Peter Hurley

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21 23:46                           ` Peter Hurley
@ 2014-02-22 14:38                             ` Tejun Heo
  2014-02-22 14:48                               ` Peter Hurley
  0 siblings, 1 reply; 32+ messages in thread
From: Tejun Heo @ 2014-02-22 14:38 UTC (permalink / raw)
  To: Peter Hurley
  Cc: laijs, linux-scsi, linux-kernel, linux1394-devel, target-devel

Hey,

On Fri, Feb 21, 2014 at 06:46:24PM -0500, Peter Hurley wrote:
> It's a long story but the short version is that
> Documentation/memory-barriers.txt recently was overhauled to reflect
> what cpus actually do and what the different archs actually
> deliver.
> 
> Turns out that unlock + lock is not guaranteed by all archs to be
> a full barrier. Thus the smb_mb__after_unlock_lock().
> 
> This is now all spelled out in memory-barriers.txt under the
> sub-heading "IMPLICIT KERNEL MEMORY BARRIERS".

So, that one is for unlock/lock sequence, not smp_mb__after_unlock().
Urgh... kinda dislike adding smp_rmb() there as it's one of the
barriers which translate to something weighty on most, if not all,
archs.

Thanks.

-- 
tejun

------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-22 14:38                             ` Tejun Heo
@ 2014-02-22 14:48                               ` Peter Hurley
  0 siblings, 0 replies; 32+ messages in thread
From: Peter Hurley @ 2014-02-22 14:48 UTC (permalink / raw)
  To: Tejun Heo
  Cc: laijs, linux-kernel, Stefan Richter, linux1394-devel, Chris Boot,
	linux-scsi, target-devel

On 02/22/2014 09:38 AM, Tejun Heo wrote:
> Hey,
>
> On Fri, Feb 21, 2014 at 06:46:24PM -0500, Peter Hurley wrote:
>> It's a long story but the short version is that
>> Documentation/memory-barriers.txt recently was overhauled to reflect
>> what cpus actually do and what the different archs actually
>> deliver.
>>
>> Turns out that unlock + lock is not guaranteed by all archs to be
>> a full barrier. Thus the smb_mb__after_unlock_lock().
>>
>> This is now all spelled out in memory-barriers.txt under the
>> sub-heading "IMPLICIT KERNEL MEMORY BARRIERS".
>
> So, that one is for unlock/lock sequence, not smp_mb__after_unlock().
> Urgh... kinda dislike adding smp_rmb() there as it's one of the
> barriers which translate to something weighty on most, if not all,
> archs.

The ticket lock, which is used on x86 to implement spinlocks, has no
fence at all for the fast path (only the compiler barrier), so even on
x86 there has to be a real barrier.

IOW, there really isn't an arch that can get away without doing this
barrier because it follows an unlock.

Regards,
Peter Hurley

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-21 23:01                       ` Peter Hurley
  2014-02-21 23:18                         ` Tejun Heo
@ 2014-02-22 18:43                         ` James Bottomley
  2014-02-22 18:48                           ` Peter Hurley
  1 sibling, 1 reply; 32+ messages in thread
From: James Bottomley @ 2014-02-22 18:43 UTC (permalink / raw)
  To: Peter Hurley
  Cc: Tejun Heo, laijs, linux-kernel, Stefan Richter, linux1394-devel,
	Chris Boot, linux-scsi, target-devel


On Fri, 2014-02-21 at 18:01 -0500, Peter Hurley wrote:
> On 02/21/2014 11:57 AM, Tejun Heo wrote:
> > Yo,
> >
> > On Fri, Feb 21, 2014 at 11:53:46AM -0500, Peter Hurley wrote:
> >> Ok, I can do that. But AFAIK it'll have to be an smp_rmb(); there is
> >> no mb__after unlock.
> >
> > We do have smp_mb__after_unlock_lock().
> >
> >> [ After thinking about it some, I don't think preventing speculative
> >>    writes before clearing PENDING if useful or necessary, so that's
> >>    why I'm suggesting only the rmb. ]
> >
> > But smp_mb__after_unlock_lock() would be cheaper on most popular
> > archs, I think.
> 
> smp_mb__after_unlock_lock() is only for ordering memory operations
> between two spin-locked sections on either the same lock or by
> the same task/cpu. Like:
> 
>     i = 1
>     spin_unlock(lock1)
>     spin_lock(lock2)
>     smp_mb__after_unlock_lock()
>     j = 1
> 
> This guarantees that the store to j happens after the store to i.
> Without it, a cpu can
> 
>     spin_lock(lock2)
>     j = 1
>     i = 1
>     spin_unlock(lock1)

No the CPU cannot.  If the CPU were allowed to reorder locking
sequences, we'd get speculation induced ABBA deadlocks.  The rules are
quite simple: loads and stores cannot speculate out of critical
sections.  All architectures have barriers in place to prevent this ...
I know from personal experience because the barriers on PARISC were
originally too weak and we did get some speculation out of the critical
sections, which was very nasty to debug.

Stuff may speculate into critical sections from non-critical but never
out of them and critical section boundaries may not reorder to cause an
overlap.

James

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-22 18:43                         ` James Bottomley
@ 2014-02-22 18:48                           ` Peter Hurley
  2014-02-22 18:52                             ` James Bottomley
  0 siblings, 1 reply; 32+ messages in thread
From: Peter Hurley @ 2014-02-22 18:48 UTC (permalink / raw)
  Cc: Tejun Heo, laijs, linux-kernel, Stefan Richter, linux1394-devel,
	Chris Boot, linux-scsi, target-devel

On 02/22/2014 01:43 PM, James Bottomley wrote:
>
> On Fri, 2014-02-21 at 18:01 -0500, Peter Hurley wrote:
>> On 02/21/2014 11:57 AM, Tejun Heo wrote:
>>> Yo,
>>>
>>> On Fri, Feb 21, 2014 at 11:53:46AM -0500, Peter Hurley wrote:
>>>> Ok, I can do that. But AFAIK it'll have to be an smp_rmb(); there is
>>>> no mb__after unlock.
>>>
>>> We do have smp_mb__after_unlock_lock().
>>>
>>>> [ After thinking about it some, I don't think preventing speculative
>>>>     writes before clearing PENDING if useful or necessary, so that's
>>>>     why I'm suggesting only the rmb. ]
>>>
>>> But smp_mb__after_unlock_lock() would be cheaper on most popular
>>> archs, I think.
>>
>> smp_mb__after_unlock_lock() is only for ordering memory operations
>> between two spin-locked sections on either the same lock or by
>> the same task/cpu. Like:
>>
>>      i = 1
>>      spin_unlock(lock1)
>>      spin_lock(lock2)
>>      smp_mb__after_unlock_lock()
>>      j = 1
>>
>> This guarantees that the store to j happens after the store to i.
>> Without it, a cpu can
>>
>>      spin_lock(lock2)
>>      j = 1
>>      i = 1
>>      spin_unlock(lock1)
>
> No the CPU cannot.  If the CPU were allowed to reorder locking
> sequences, we'd get speculation induced ABBA deadlocks.  The rules are
> quite simple: loads and stores cannot speculate out of critical
> sections.

If you look carefully, you'll notice that the stores have not been
moved from their respective critical sections; simply that the two
critical sections overlap because they use different locks.

Regards,
Peter Hurley

PS - Your reply address is unroutable.

> All architectures have barriers in place to prevent this ...
> I know from personal experience because the barriers on PARISC were
> originally too weak and we did get some speculation out of the critical
> sections, which was very nasty to debug.
>
> Stuff may speculate into critical sections from non-critical but never
> out of them and critical section boundaries may not reorder to cause an
> overlap.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-22 18:48                           ` Peter Hurley
@ 2014-02-22 18:52                             ` James Bottomley
  2014-02-22 19:03                               ` Peter Hurley
  0 siblings, 1 reply; 32+ messages in thread
From: James Bottomley @ 2014-02-22 18:52 UTC (permalink / raw)
  To: Peter Hurley
  Cc: Tejun Heo, laijs, linux-kernel, Stefan Richter, linux1394-devel,
	Chris Boot, linux-scsi, target-devel

On Sat, 2014-02-22 at 13:48 -0500, Peter Hurley wrote:
> On 02/22/2014 01:43 PM, James Bottomley wrote:
> >
> > On Fri, 2014-02-21 at 18:01 -0500, Peter Hurley wrote:
> >> On 02/21/2014 11:57 AM, Tejun Heo wrote:
> >>> Yo,
> >>>
> >>> On Fri, Feb 21, 2014 at 11:53:46AM -0500, Peter Hurley wrote:
> >>>> Ok, I can do that. But AFAIK it'll have to be an smp_rmb(); there is
> >>>> no mb__after unlock.
> >>>
> >>> We do have smp_mb__after_unlock_lock().
> >>>
> >>>> [ After thinking about it some, I don't think preventing speculative
> >>>>     writes before clearing PENDING if useful or necessary, so that's
> >>>>     why I'm suggesting only the rmb. ]
> >>>
> >>> But smp_mb__after_unlock_lock() would be cheaper on most popular
> >>> archs, I think.
> >>
> >> smp_mb__after_unlock_lock() is only for ordering memory operations
> >> between two spin-locked sections on either the same lock or by
> >> the same task/cpu. Like:
> >>
> >>      i = 1
> >>      spin_unlock(lock1)
> >>      spin_lock(lock2)
> >>      smp_mb__after_unlock_lock()
> >>      j = 1
> >>
> >> This guarantees that the store to j happens after the store to i.
> >> Without it, a cpu can
> >>
> >>      spin_lock(lock2)
> >>      j = 1
> >>      i = 1
> >>      spin_unlock(lock1)
> >
> > No the CPU cannot.  If the CPU were allowed to reorder locking
> > sequences, we'd get speculation induced ABBA deadlocks.  The rules are
> > quite simple: loads and stores cannot speculate out of critical
> > sections.
> 
> If you look carefully, you'll notice that the stores have not been
> moved from their respective critical sections; simply that the two
> critical sections overlap because they use different locks.

You didn't look carefully enough at what I wrote.  You may not reorder
critical sections so they overlap regardless of whether the locks are
independent or not.  This is because we'd get ABBA deadlocks due to
speculation (A represents lock1 and B lock 2 in your example).

James

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-22 18:52                             ` James Bottomley
@ 2014-02-22 19:03                               ` Peter Hurley
  2014-02-23  1:23                                 ` memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK) Stefan Richter
  2014-02-23 20:05                                 ` [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK James Bottomley
  0 siblings, 2 replies; 32+ messages in thread
From: Peter Hurley @ 2014-02-22 19:03 UTC (permalink / raw)
  To: James Bottomley
  Cc: laijs, linux-scsi, linux-kernel, Tejun Heo, linux1394-devel,
	target-devel

On 02/22/2014 01:52 PM, James Bottomley wrote:
> On Sat, 2014-02-22 at 13:48 -0500, Peter Hurley wrote:
>> On 02/22/2014 01:43 PM, James Bottomley wrote:
>>>
>>> On Fri, 2014-02-21 at 18:01 -0500, Peter Hurley wrote:
>>>> On 02/21/2014 11:57 AM, Tejun Heo wrote:
>>>>> Yo,
>>>>>
>>>>> On Fri, Feb 21, 2014 at 11:53:46AM -0500, Peter Hurley wrote:
>>>>>> Ok, I can do that. But AFAIK it'll have to be an smp_rmb(); there is
>>>>>> no mb__after unlock.
>>>>>
>>>>> We do have smp_mb__after_unlock_lock().
>>>>>
>>>>>> [ After thinking about it some, I don't think preventing speculative
>>>>>>      writes before clearing PENDING if useful or necessary, so that's
>>>>>>      why I'm suggesting only the rmb. ]
>>>>>
>>>>> But smp_mb__after_unlock_lock() would be cheaper on most popular
>>>>> archs, I think.
>>>>
>>>> smp_mb__after_unlock_lock() is only for ordering memory operations
>>>> between two spin-locked sections on either the same lock or by
>>>> the same task/cpu. Like:
>>>>
>>>>       i = 1
>>>>       spin_unlock(lock1)
>>>>       spin_lock(lock2)
>>>>       smp_mb__after_unlock_lock()
>>>>       j = 1
>>>>
>>>> This guarantees that the store to j happens after the store to i.
>>>> Without it, a cpu can
>>>>
>>>>       spin_lock(lock2)
>>>>       j = 1
>>>>       i = 1
>>>>       spin_unlock(lock1)
>>>
>>> No the CPU cannot.  If the CPU were allowed to reorder locking
>>> sequences, we'd get speculation induced ABBA deadlocks.  The rules are
>>> quite simple: loads and stores cannot speculate out of critical
>>> sections.
>>
>> If you look carefully, you'll notice that the stores have not been
>> moved from their respective critical sections; simply that the two
>> critical sections overlap because they use different locks.
>
> You didn't look carefully enough at what I wrote.  You may not reorder
> critical sections so they overlap regardless of whether the locks are
> independent or not.  This is because we'd get ABBA deadlocks due to
> speculation (A represents lock1 and B lock 2 in your example).

On 11/26/2013 02:00 PM, Linus Torvalds wrote:
 > On Tue, Nov 26, 2013 at 1:59 AM, Peter Zijlstra <peterz@infradead.org> wrote:
 >>
 >> If you now want to weaken this definition, then that needs consideration
 >> because we actually rely on things like
 >>
 >> spin_unlock(l1);
 >> spin_lock(l2);
 >>
 >> being full barriers.
 >
 > Btw, maybe we should just stop that assumption. The complexity of this
 > discussion makes me go "maybe we should stop with subtle assumptions
 > that happen to be obviously true on x86 due to historical
 > implementations, but aren't obviously true even *there* any more with
 > the MCS lock".
 >
 > We already have a concept of
 >
 >          smp_mb__before_spinlock();
 >          spin_lock():
 >
 > for sequences where we *know* we need to make getting a spin-lock be a
 > full memory barrier. It's free on x86 (and remains so even with the
 > MCS lock, regardless of any subtle issues, if only because even the
 > MCS lock starts out with a locked atomic, never mind the contention
 > slow-case). Of course, that macro is only used inside the scheduler,
 > and is actually documented to not really be a full memory barrier, but
 > it handles the case we actually care about.
 >
 > IOW, where do we really care about the "unlock+lock" is a memory
 > barrier? And could we make those places explicit, and then do
 > something similar to the above to them?
 >
 >                         Linus
 >

http://www.spinics.net/lists/linux-mm/msg65653.html

and the resultant text from Documentation/memory-barriers.txt

An ACQUIRE followed by a RELEASE may not be assumed to be full memory barrier
because it is possible for an access preceding the ACQUIRE to happen after the
ACQUIRE, and an access following the RELEASE to happen before the RELEASE, and
the two accesses can themselves then cross:

	*A = a;
	ACQUIRE M
	RELEASE M
	*B = b;

may occur as:

	ACQUIRE M, STORE *B, STORE *A, RELEASE M

This same reordering can of course occur if the lock's ACQUIRE and RELEASE are
to the same lock variable, but only from the perspective of another CPU not
holding that lock.

In short, a RELEASE followed by an ACQUIRE may -not- be assumed to be a full
memory barrier because it is possible for a preceding RELEASE to pass a
later ACQUIRE from the viewpoint of the CPU, but not from the viewpoint
of the compiler.  Note that deadlocks cannot be introduced by this
interchange because if such a deadlock threatened, the RELEASE would
simply complete.

If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the
ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation.  This
will produce a full barrier if either (a) the RELEASE and the ACQUIRE are
executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on the
same variable.  The smp_mb__after_unlock_lock() primitive is free on many
architectures.  Without smp_mb__after_unlock_lock(), the critical sections
corresponding to the RELEASE and the ACQUIRE can cross:

	*A = a;
	RELEASE M
	ACQUIRE N
	*B = b;

could occur as:

	ACQUIRE N, STORE *B, STORE *A, RELEASE M

With smp_mb__after_unlock_lock(), they cannot, so that:

	*A = a;
	RELEASE M
	ACQUIRE N
	smp_mb__after_unlock_lock();
	*B = b;

will always occur as either of the following:

	STORE *A, RELEASE, ACQUIRE, STORE *B
	STORE *A, ACQUIRE, RELEASE, STORE *B

If the RELEASE and ACQUIRE were instead both operating on the same lock
variable, only the first of these two alternatives can occur.

Regards,
Peter Hurley



------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 32+ messages in thread

* memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK)
  2014-02-22 19:03                               ` Peter Hurley
@ 2014-02-23  1:23                                 ` Stefan Richter
  2014-02-23 16:37                                   ` Paul E. McKenney
  2014-02-23 20:05                                 ` [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK James Bottomley
  1 sibling, 1 reply; 32+ messages in thread
From: Stefan Richter @ 2014-02-23  1:23 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Peter Hurley, James Bottomley, Tejun Heo, laijs, linux-kernel,
	linux1394-devel, Chris Boot, linux-scsi, target-devel

Hi Paul,

in patch "Documentation/memory-barriers.txt: Downgrade UNLOCK+BLOCK" (sic),
you wrote:
+     Memory operations issued before the LOCK may be completed after the
+     LOCK operation has completed.  An smp_mb__before_spinlock(), combined
+     with a following LOCK, orders prior loads against subsequent stores
+     and stores and prior stores against subsequent stores.  Note that

Is there a "and stores" too many?  Or was one "stores" mistyped and meant
to be something else?  Or what else is meant?

@@ -1677,13 +1681,57 @@ LOCK, and an access following the UNLOCK to happen before the UNLOCK, and the
 two accesses can themselves then cross:
 
 	*A = a;
-	LOCK
-	UNLOCK
+	LOCK M
+	UNLOCK M
 	*B = b;
 
 may occur as:
 
-	LOCK, STORE *B, STORE *A, UNLOCK
+	LOCK M, STORE *B, STORE *A, UNLOCK M
+
+This same reordering can of course occur if the LOCK and UNLOCK are
+to the same lock variable, but only from the perspective of another
+CPU not holding that lock.

The example says "LOCK M" and "UNLOCK M" (since the patch).  I read
this as LOCK and UNLOCK to the same variable, M.  Why does the
following sentence then say that "this same reordering can... occur
if the LOCK and UNLOCK are to the same lock variable"?  This sentence
would make sense if the example had been about LOCK M, UNLOCK N.

+In short, an UNLOCK followed by a LOCK may -not- be assumed to be a full
+memory barrier because it is possible for a preceding UNLOCK to pass a
+later LOCK from the viewpoint of the CPU, but not from the viewpoint
+of the compiler.  Note that deadlocks cannot be introduced by this
+interchange because if such a deadlock threatened, the UNLOCK would
+simply complete.

So rather than deadlock, "the UNLOCK would simply complete".  But
/why/ does it complete?  It is left unclear (to me at least), why
it would do so.  IOW, what mechanism will make it always proceed
to the UNLOCK?  Without knowing that, it is left entirely unclear
(to me) why the deadlock wouldn't happen.
-- 
Stefan Richter
-=====-====- --=- =-===
http://arcgraph.de/sr/

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK)
  2014-02-23  1:23                                 ` memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK) Stefan Richter
@ 2014-02-23 16:37                                   ` Paul E. McKenney
  2014-02-23 20:35                                     ` Peter Hurley
  0 siblings, 1 reply; 32+ messages in thread
From: Paul E. McKenney @ 2014-02-23 16:37 UTC (permalink / raw)
  To: Stefan Richter
  Cc: Peter Hurley, linux-scsi, linux-kernel, James Bottomley,
	target-devel, Tejun Heo, linux1394-devel, laijs

On Sun, Feb 23, 2014 at 02:23:03AM +0100, Stefan Richter wrote:
> Hi Paul,
> 
> in patch "Documentation/memory-barriers.txt: Downgrade UNLOCK+BLOCK" (sic),
> you wrote:
> +     Memory operations issued before the LOCK may be completed after the
> +     LOCK operation has completed.  An smp_mb__before_spinlock(), combined
> +     with a following LOCK, orders prior loads against subsequent stores
> +     and stores and prior stores against subsequent stores.  Note that
> 
> Is there a "and stores" too many?  Or was one "stores" mistyped and meant
> to be something else?  Or what else is meant?

Good catch!  (This should also answer Peter Hurley's concern on another
email thread.)

The last "stores" on the third line should be a "loads":

	Memory operations issued before the ACQUIRE may be
	completed after the ACQUIRE operation has completed.  An
	smp_mb__before_spinlock(), combined with a following ACQUIRE,
	orders prior loads against subsequent loads and stores and
	also orders prior stores against subsequent stores.  Note that
	this is weaker than smp_mb()!  The smp_mb__before_spinlock()
	primitive is free on many architectures.

> @@ -1677,13 +1681,57 @@ LOCK, and an access following the UNLOCK to happen before the UNLOCK, and the
>  two accesses can themselves then cross:
> 
>  	*A = a;
> -	LOCK
> -	UNLOCK
> +	LOCK M
> +	UNLOCK M
>  	*B = b;
> 
>  may occur as:
> 
> -	LOCK, STORE *B, STORE *A, UNLOCK
> +	LOCK M, STORE *B, STORE *A, UNLOCK M
> +
> +This same reordering can of course occur if the LOCK and UNLOCK are
> +to the same lock variable, but only from the perspective of another
> +CPU not holding that lock.
> 
> The example says "LOCK M" and "UNLOCK M" (since the patch).  I read
> this as LOCK and UNLOCK to the same variable, M.  Why does the
> following sentence then say that "this same reordering can... occur
> if the LOCK and UNLOCK are to the same lock variable"?  This sentence
> would make sense if the example had been about LOCK M, UNLOCK N.

Good point.  How about the following?

	When the ACQUIRE and RELEASE are a lock acquisition and release,
	respectively, this same reordering can of course occur if the
	lock's ACQUIRE and RELEASE are to the same lock variable, but
	only from the perspective of another CPU not holding that lock.

> +In short, an UNLOCK followed by a LOCK may -not- be assumed to be a full
> +memory barrier because it is possible for a preceding UNLOCK to pass a
> +later LOCK from the viewpoint of the CPU, but not from the viewpoint
> +of the compiler.  Note that deadlocks cannot be introduced by this
> +interchange because if such a deadlock threatened, the UNLOCK would
> +simply complete.
> 
> So rather than deadlock, "the UNLOCK would simply complete".  But
> /why/ does it complete?  It is left unclear (to me at least), why
> it would do so.  IOW, what mechanism will make it always proceed
> to the UNLOCK?  Without knowing that, it is left entirely unclear
> (to me) why the deadlock wouldn't happen.

One key point is that we are only talking about the CPU doing the
interchanging, not the compiler.  If the compiler (or, for that matter,
the developer) switched the operations, deadlock -could- occur.

But suppose the CPU interchanged the operations.  In this case, the
unlock precede the lock in the assembly code.  The CPU simply elected
to try executing the lock operation first.  If there is a deadlock,
this lock operation will simply spin (or try to sleep, but more on
that later).  The CPU will eventually execute the unlock operation
(which again preceded the lock operation in the assembly code), which
will unravel the potential deadlock.

But what if the lock is for a sleeplock?  In that case, the code will
try to enter the scheduler, where it will eventually encounter a
memory barrier, which will force the unlock operation to complete,
again unraveling the deadlock.

Please see below for a patch against the current version of
Documentation/memory-barriers.txt.  Does this update help?

						Thanx, Paul

------------------------------------------------------------------------

commit aba6b0e82c9de53eb032844f1932599f148ff68d
Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Date:   Sun Feb 23 08:34:24 2014 -0800

    Documentation/memory-barriers.txt: Clarify release/acquire ordering
    
    This commit fixes a couple of typos and clarifies what happens when
    the CPU chooses to execute a later lock acquisition before a prior
    lock release, in particular, why deadlock is avoided.
    
    Reported-by: Peter Hurley <peter@hurleysoftware.com>
    Reported-by: James Bottomley <James.Bottomley@HansenPartnership.com>
    Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
    Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 9dde54c55b24..c8932e06edf1 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1674,12 +1674,12 @@ for each construct.  These operations all imply certain barriers:
      Memory operations issued after the ACQUIRE will be completed after the
      ACQUIRE operation has completed.
 
-     Memory operations issued before the ACQUIRE may be completed after the
-     ACQUIRE operation has completed.  An smp_mb__before_spinlock(), combined
-     with a following ACQUIRE, orders prior loads against subsequent stores and
-     stores and prior stores against subsequent stores.  Note that this is
-     weaker than smp_mb()!  The smp_mb__before_spinlock() primitive is free on
-     many architectures.
+     Memory operations issued before the ACQUIRE may be completed after
+     the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
+     combined with a following ACQUIRE, orders prior loads against
+     subsequent loads and stores and also orders prior stores against
+     subsequent stores.  Note that this is weaker than smp_mb()!  The
+     smp_mb__before_spinlock() primitive is free on many architectures.
 
  (2) RELEASE operation implication:
 
@@ -1717,23 +1717,47 @@ the two accesses can themselves then cross:
 
 	*A = a;
 	ACQUIRE M
-	RELEASE M
+	RELEASE N
 	*B = b;
 
 may occur as:
 
-	ACQUIRE M, STORE *B, STORE *A, RELEASE M
-
-This same reordering can of course occur if the lock's ACQUIRE and RELEASE are
-to the same lock variable, but only from the perspective of another CPU not
-holding that lock.
-
-In short, a RELEASE followed by an ACQUIRE may -not- be assumed to be a full
-memory barrier because it is possible for a preceding RELEASE to pass a
-later ACQUIRE from the viewpoint of the CPU, but not from the viewpoint
-of the compiler.  Note that deadlocks cannot be introduced by this
-interchange because if such a deadlock threatened, the RELEASE would
-simply complete.
+	ACQUIRE M, STORE *B, STORE *A, RELEASE N
+
+When the ACQUIRE and RELEASE are a lock acquisition and release,
+respectively, this same reordering can of course occur if the lock's
+ACQUIRE and RELEASE are to the same lock variable, but only from the
+perspective of another CPU not holding that lock.
+
+In short, a RELEASE followed by an ACQUIRE may -not- be assumed to be
+a full memory barrier because it is possible for a preceding RELEASE
+to pass a later ACQUIRE from the viewpoint of the CPU, but not from the
+viewpoint of the compiler.  Note that the CPU cannot introduce deadlocks
+with this interchange because if such a deadlock threatened, the RELEASE
+would simply complete.
+
+	Why does this work?
+
+	One key point is that we are only talking about the CPU doing
+	the interchanging, not the compiler.  If the compiler (or, for
+	that matter, the developer) switched the operations, deadlock
+	-could- occur.
+
+	But suppose the CPU interchanged the operations.  In this case,
+	the unlock precedes the lock in the assembly code.  The CPU simply
+	elected to try executing the later lock operation first.  If there
+	is a deadlock, this lock operation will simply spin (or try to
+	sleep, but more on that later).  The CPU will eventually execute
+	the unlock operation (which again preceded the lock operation
+	in the assembly code), which will unravel the potential deadlock,
+	allowing the lock operation to succeed.
+
+	But what if the lock is a sleeplock?  In that case, the code will
+	try to enter the scheduler, where it will eventually encounter
+	a memory barrier, which will force the earlier unlock operation
+	to complete, again unraveling the deadlock.  There might be
+	a sleep-unlock race, but the locking primitive needs to resolve
+	such races properly in any case.
 
 If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the
 ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation.  This


------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-22 19:03                               ` Peter Hurley
  2014-02-23  1:23                                 ` memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK) Stefan Richter
@ 2014-02-23 20:05                                 ` James Bottomley
  2014-02-23 22:32                                   ` Peter Hurley
  1 sibling, 1 reply; 32+ messages in thread
From: James Bottomley @ 2014-02-23 20:05 UTC (permalink / raw)
  To: Peter Hurley
  Cc: Tejun Heo, laijs, linux-kernel, Stefan Richter, linux1394-devel,
	Chris Boot, linux-scsi, target-devel

On Sat, 2014-02-22 at 14:03 -0500, Peter Hurley wrote:
> If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the
> ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation.  This
> will produce a full barrier if either (a) the RELEASE and the ACQUIRE are
> executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on the
> same variable.  The smp_mb__after_unlock_lock() primitive is free on many
> architectures.  Without smp_mb__after_unlock_lock(), the critical sections
> corresponding to the RELEASE and the ACQUIRE can cross:
> 
> 	*A = a;
> 	RELEASE M
> 	ACQUIRE N
> 	*B = b;
> 
> could occur as:
> 
> 	ACQUIRE N, STORE *B, STORE *A, RELEASE M

Ah, OK, that's an error in the documentation.  The example should read

	*A = a;
 	RELEASE *N*
 	ACQUIRE *M*
 	*B = b;

The point being you can't have speculation that entangles critical
sections, as I've been saying, because that would speculate you into
ABBA deadlocks.  Paul McKenny will submit a patch fixing the bug in
documentation.

James



^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK)
  2014-02-23 16:37                                   ` Paul E. McKenney
@ 2014-02-23 20:35                                     ` Peter Hurley
  2014-02-23 23:50                                       ` Paul E. McKenney
  0 siblings, 1 reply; 32+ messages in thread
From: Peter Hurley @ 2014-02-23 20:35 UTC (permalink / raw)
  To: paulmck, Stefan Richter
  Cc: James Bottomley, Tejun Heo, laijs, linux-kernel, linux1394-devel,
	Chris Boot, linux-scsi, target-devel

Hi Paul,

On 02/23/2014 11:37 AM, Paul E. McKenney wrote:
> commit aba6b0e82c9de53eb032844f1932599f148ff68d
> Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> Date:   Sun Feb 23 08:34:24 2014 -0800
>
>      Documentation/memory-barriers.txt: Clarify release/acquire ordering
>
>      This commit fixes a couple of typos and clarifies what happens when
>      the CPU chooses to execute a later lock acquisition before a prior
>      lock release, in particular, why deadlock is avoided.
>
>      Reported-by: Peter Hurley <peter@hurleysoftware.com>
>      Reported-by: James Bottomley <James.Bottomley@HansenPartnership.com>
>      Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
>      Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>
> diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
> index 9dde54c55b24..c8932e06edf1 100644
> --- a/Documentation/memory-barriers.txt
> +++ b/Documentation/memory-barriers.txt
> @@ -1674,12 +1674,12 @@ for each construct.  These operations all imply certain barriers:
>        Memory operations issued after the ACQUIRE will be completed after the
>        ACQUIRE operation has completed.
>
> -     Memory operations issued before the ACQUIRE may be completed after the
> -     ACQUIRE operation has completed.  An smp_mb__before_spinlock(), combined
> -     with a following ACQUIRE, orders prior loads against subsequent stores and
> -     stores and prior stores against subsequent stores.  Note that this is
> -     weaker than smp_mb()!  The smp_mb__before_spinlock() primitive is free on
> -     many architectures.
> +     Memory operations issued before the ACQUIRE may be completed after
> +     the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
> +     combined with a following ACQUIRE, orders prior loads against
> +     subsequent loads and stores and also orders prior stores against
> +     subsequent stores.  Note that this is weaker than smp_mb()!  The
> +     smp_mb__before_spinlock() primitive is free on many architectures.
>
>    (2) RELEASE operation implication:
>
> @@ -1717,23 +1717,47 @@ the two accesses can themselves then cross:
>
>   	*A = a;
>   	ACQUIRE M
> -	RELEASE M
> +	RELEASE N
>   	*B = b;
>
>   may occur as:
>
> -	ACQUIRE M, STORE *B, STORE *A, RELEASE M

This example should remain as is; it refers to the porosity of a critical
section to loads and stores occurring outside that critical section, and
importantly that LOCK + UNLOCK is not a full barrier. It documents that
memory operations from either side of the critical section may cross
(in the absence of other specific memory barriers). IOW, it is the example
to implication #1 above.

Regards,
Peter Hurley

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-23 20:05                                 ` [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK James Bottomley
@ 2014-02-23 22:32                                   ` Peter Hurley
  0 siblings, 0 replies; 32+ messages in thread
From: Peter Hurley @ 2014-02-23 22:32 UTC (permalink / raw)
  To: James Bottomley, Paul E. McKenney
  Cc: Tejun Heo, Lai Jiangshan, linux-kernel, Stefan Richter,
	linux1394-devel, Chris Boot, linux-scsi, target-devel

Hi James,

On 02/23/2014 03:05 PM, James Bottomley wrote:
> On Sat, 2014-02-22 at 14:03 -0500, Peter Hurley wrote:
>> If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the
>> ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation.  This
>> will produce a full barrier if either (a) the RELEASE and the ACQUIRE are
>> executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on the
>> same variable.  The smp_mb__after_unlock_lock() primitive is free on many
>> architectures.  Without smp_mb__after_unlock_lock(), the critical sections
>> corresponding to the RELEASE and the ACQUIRE can cross:
>>
>> 	*A = a;
>> 	RELEASE M
>> 	ACQUIRE N
>> 	*B = b;
>>
>> could occur as:
>>
>> 	ACQUIRE N, STORE *B, STORE *A, RELEASE M
>
> Ah, OK, that's an error in the documentation.

AFAIK, Paul will not be changing the quoted text above.

> The example should read
>
> 	*A = a;
>   	RELEASE *N*
>   	ACQUIRE *M*
>   	*B = b;
>
> The point being you can't have speculation that entangles critical
> sections, as I've been saying, because that would speculate you into
> ABBA deadlocks.  Paul McKenny will submit a patch fixing the bug in
> documentation.

The reason why there is no deadlock here is because the RELEASE M is
not dependent on the ACQUIRE N to complete.

If the attempt to ACQUIRE N is speculated before the RELEASE M, two
possibilities exist:
1. N is not owned, so the ACQUIRE is immediately successful, or
2. N is owned, so the attempted ACQUIRE is not immediately successful.

However, in both cases the RELEASE M will still complete, having already
been started (since it occurred before in the instruction stream).

Regards,
Peter Hurley

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK)
  2014-02-23 20:35                                     ` Peter Hurley
@ 2014-02-23 23:50                                       ` Paul E. McKenney
  2014-02-24  0:09                                         ` Peter Hurley
  2014-02-24  0:32                                         ` Stefan Richter
  0 siblings, 2 replies; 32+ messages in thread
From: Paul E. McKenney @ 2014-02-23 23:50 UTC (permalink / raw)
  To: Peter Hurley
  Cc: Stefan Richter, James Bottomley, Tejun Heo, laijs, linux-kernel,
	linux1394-devel, Chris Boot, linux-scsi, target-devel

On Sun, Feb 23, 2014 at 03:35:31PM -0500, Peter Hurley wrote:
> Hi Paul,
> 
> On 02/23/2014 11:37 AM, Paul E. McKenney wrote:
> >commit aba6b0e82c9de53eb032844f1932599f148ff68d
> >Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> >Date:   Sun Feb 23 08:34:24 2014 -0800
> >
> >     Documentation/memory-barriers.txt: Clarify release/acquire ordering
> >
> >     This commit fixes a couple of typos and clarifies what happens when
> >     the CPU chooses to execute a later lock acquisition before a prior
> >     lock release, in particular, why deadlock is avoided.
> >
> >     Reported-by: Peter Hurley <peter@hurleysoftware.com>
> >     Reported-by: James Bottomley <James.Bottomley@HansenPartnership.com>
> >     Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
> >     Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> >
> >diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
> >index 9dde54c55b24..c8932e06edf1 100644
> >--- a/Documentation/memory-barriers.txt
> >+++ b/Documentation/memory-barriers.txt
> >@@ -1674,12 +1674,12 @@ for each construct.  These operations all imply certain barriers:
> >       Memory operations issued after the ACQUIRE will be completed after the
> >       ACQUIRE operation has completed.
> >
> >-     Memory operations issued before the ACQUIRE may be completed after the
> >-     ACQUIRE operation has completed.  An smp_mb__before_spinlock(), combined
> >-     with a following ACQUIRE, orders prior loads against subsequent stores and
> >-     stores and prior stores against subsequent stores.  Note that this is
> >-     weaker than smp_mb()!  The smp_mb__before_spinlock() primitive is free on
> >-     many architectures.
> >+     Memory operations issued before the ACQUIRE may be completed after
> >+     the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
> >+     combined with a following ACQUIRE, orders prior loads against
> >+     subsequent loads and stores and also orders prior stores against
> >+     subsequent stores.  Note that this is weaker than smp_mb()!  The
> >+     smp_mb__before_spinlock() primitive is free on many architectures.
> >
> >   (2) RELEASE operation implication:
> >
> >@@ -1717,23 +1717,47 @@ the two accesses can themselves then cross:
> >
> >  	*A = a;
> >  	ACQUIRE M
> >-	RELEASE M
> >+	RELEASE N
> >  	*B = b;
> >
> >  may occur as:
> >
> >-	ACQUIRE M, STORE *B, STORE *A, RELEASE M
> 
> This example should remain as is; it refers to the porosity of a critical
> section to loads and stores occurring outside that critical section, and
> importantly that LOCK + UNLOCK is not a full barrier. It documents that
> memory operations from either side of the critical section may cross
> (in the absence of other specific memory barriers). IOW, it is the example
> to implication #1 above.

Good point, I needed to apply the changes further down.  How does the
following updated patch look?

							Thanx, Paul

------------------------------------------------------------------------

commit 528c2771288df7f98f9224a56b93bdb2db27ec70
Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Date:   Sun Feb 23 08:34:24 2014 -0800

    Documentation/memory-barriers.txt: Clarify release/acquire ordering
    
    This commit fixes a couple of typos and clarifies what happens when
    the CPU chooses to execute a later lock acquisition before a prior
    lock release, in particular, why deadlock is avoided.
    
    Reported-by: Peter Hurley <peter@hurleysoftware.com>
    Reported-by: James Bottomley <James.Bottomley@HansenPartnership.com>
    Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
    Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 9dde54c55b24..9ea6de4eb252 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1674,12 +1674,12 @@ for each construct.  These operations all imply certain barriers:
      Memory operations issued after the ACQUIRE will be completed after the
      ACQUIRE operation has completed.
 
-     Memory operations issued before the ACQUIRE may be completed after the
-     ACQUIRE operation has completed.  An smp_mb__before_spinlock(), combined
-     with a following ACQUIRE, orders prior loads against subsequent stores and
-     stores and prior stores against subsequent stores.  Note that this is
-     weaker than smp_mb()!  The smp_mb__before_spinlock() primitive is free on
-     many architectures.
+     Memory operations issued before the ACQUIRE may be completed after
+     the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
+     combined with a following ACQUIRE, orders prior loads against
+     subsequent loads and stores and also orders prior stores against
+     subsequent stores.  Note that this is weaker than smp_mb()!  The
+     smp_mb__before_spinlock() primitive is free on many architectures.
 
  (2) RELEASE operation implication:
 
@@ -1724,24 +1724,20 @@ may occur as:
 
 	ACQUIRE M, STORE *B, STORE *A, RELEASE M
 
-This same reordering can of course occur if the lock's ACQUIRE and RELEASE are
-to the same lock variable, but only from the perspective of another CPU not
-holding that lock.
-
-In short, a RELEASE followed by an ACQUIRE may -not- be assumed to be a full
-memory barrier because it is possible for a preceding RELEASE to pass a
-later ACQUIRE from the viewpoint of the CPU, but not from the viewpoint
-of the compiler.  Note that deadlocks cannot be introduced by this
-interchange because if such a deadlock threatened, the RELEASE would
-simply complete.
+When the ACQUIRE and RELEASE are a lock acquisition and release,
+respectively, this same reordering can of course occur if the lock's
+ACQUIRE and RELEASE are to the same lock variable, but only from the
+perspective of another CPU not holding that lock.  In short, a RELEASE
+followed by an ACQUIRE may -not- be assumed to be a full memory barrier.
 
 If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the
 ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation.  This
 will produce a full barrier if either (a) the RELEASE and the ACQUIRE are
 executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on the
 same variable.  The smp_mb__after_unlock_lock() primitive is free on many
-architectures.  Without smp_mb__after_unlock_lock(), the critical sections
-corresponding to the RELEASE and the ACQUIRE can cross:
+architectures.  Without smp_mb__after_unlock_lock(), the CPU's execution of
+the critical sections corresponding to the RELEASE and the ACQUIRE can cross,
+so that:
 
 	*A = a;
 	RELEASE M
@@ -1752,7 +1748,36 @@ could occur as:
 
 	ACQUIRE N, STORE *B, STORE *A, RELEASE M
 
-With smp_mb__after_unlock_lock(), they cannot, so that:
+It might appear that this rearrangement could introduce a deadlock.
+However, this cannot happen because if such a deadlock threatened,
+the RELEASE would simply complete, thereby avoiding the deadlock.
+
+	Why does this work?
+
+	One key point is that we are only talking about the CPU doing
+	the interchanging, not the compiler.  If the compiler (or, for
+	that matter, the developer) switched the operations, deadlock
+	-could- occur.
+
+	But suppose the CPU interchanged the operations.  In this case,
+	the unlock precedes the lock in the assembly code.  The CPU simply
+	elected to try executing the later lock operation first.  If there
+	is a deadlock, this lock operation will simply spin (or try to
+	sleep, but more on that later).  The CPU will eventually execute
+	the unlock operation (which again preceded the lock operation
+	in the assembly code), which will unravel the potential deadlock,
+	allowing the lock operation to succeed.
+
+	But what if the lock is a sleeplock?  In that case, the code will
+	try to enter the scheduler, where it will eventually encounter
+	a memory barrier, which will force the earlier unlock operation
+	to complete, again unraveling the deadlock.  There might be
+	a sleep-unlock race, but the locking primitive needs to resolve
+	such races properly in any case.
+
+With smp_mb__after_unlock_lock(), the two critical sections cannot overlap.
+For example, with the following code, the store to *A will always be
+seen by other CPUs before the store to *B:
 
 	*A = a;
 	RELEASE M
@@ -1760,13 +1785,18 @@ With smp_mb__after_unlock_lock(), they cannot, so that:
 	smp_mb__after_unlock_lock();
 	*B = b;
 
-will always occur as either of the following:
+The operations will always occur in one of the following orders:
 
-	STORE *A, RELEASE, ACQUIRE, STORE *B
-	STORE *A, ACQUIRE, RELEASE, STORE *B
+	STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B
+	STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B
+	ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B
 
-If the RELEASE and ACQUIRE were instead both operating on the same lock
-variable, only the first of these two alternatives can occur.
+If the RELEASE and ACQUIRE were instead both operating on the
+same lock variable, only the first of these two alternatives can
+occur.  In addition, the more strongly ordered systems may rule out
+some of the above orders.  But in any case, as noted earlier, the
+smp_mb__after_unlock_lock() ensures that the store to *A will always be
+seen as happening before the store to *B.
 
 Locks and semaphores may not provide any guarantee of ordering on UP compiled
 systems, and so cannot be counted on in such a situation to actually achieve


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK)
  2014-02-23 23:50                                       ` Paul E. McKenney
@ 2014-02-24  0:09                                         ` Peter Hurley
  2014-02-24 16:26                                           ` Paul E. McKenney
  2014-02-24  0:32                                         ` Stefan Richter
  1 sibling, 1 reply; 32+ messages in thread
From: Peter Hurley @ 2014-02-24  0:09 UTC (permalink / raw)
  To: paulmck
  Cc: Stefan Richter, James Bottomley, Tejun Heo, laijs, linux-kernel,
	linux1394-devel, Chris Boot, linux-scsi, target-devel

On 02/23/2014 06:50 PM, Paul E. McKenney wrote:
> On Sun, Feb 23, 2014 at 03:35:31PM -0500, Peter Hurley wrote:
>> Hi Paul,
>>
>> On 02/23/2014 11:37 AM, Paul E. McKenney wrote:
>>> commit aba6b0e82c9de53eb032844f1932599f148ff68d
>>> Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>>> Date:   Sun Feb 23 08:34:24 2014 -0800
>>>
>>>      Documentation/memory-barriers.txt: Clarify release/acquire ordering
>>>
>>>      This commit fixes a couple of typos and clarifies what happens when
>>>      the CPU chooses to execute a later lock acquisition before a prior
>>>      lock release, in particular, why deadlock is avoided.
>>>
>>>      Reported-by: Peter Hurley <peter@hurleysoftware.com>
>>>      Reported-by: James Bottomley <James.Bottomley@HansenPartnership.com>
>>>      Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
>>>      Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>>>
>>> diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
>>> index 9dde54c55b24..c8932e06edf1 100644
>>> --- a/Documentation/memory-barriers.txt
>>> +++ b/Documentation/memory-barriers.txt
>>> @@ -1674,12 +1674,12 @@ for each construct.  These operations all imply certain barriers:
>>>        Memory operations issued after the ACQUIRE will be completed after the
>>>        ACQUIRE operation has completed.
>>>
>>> -     Memory operations issued before the ACQUIRE may be completed after the
>>> -     ACQUIRE operation has completed.  An smp_mb__before_spinlock(), combined
>>> -     with a following ACQUIRE, orders prior loads against subsequent stores and
>>> -     stores and prior stores against subsequent stores.  Note that this is
>>> -     weaker than smp_mb()!  The smp_mb__before_spinlock() primitive is free on
>>> -     many architectures.
>>> +     Memory operations issued before the ACQUIRE may be completed after
>>> +     the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
>>> +     combined with a following ACQUIRE, orders prior loads against
>>> +     subsequent loads and stores and also orders prior stores against
>>> +     subsequent stores.  Note that this is weaker than smp_mb()!  The
>>> +     smp_mb__before_spinlock() primitive is free on many architectures.
>>>
>>>    (2) RELEASE operation implication:
>>>
>>> @@ -1717,23 +1717,47 @@ the two accesses can themselves then cross:
>>>
>>>   	*A = a;
>>>   	ACQUIRE M
>>> -	RELEASE M
>>> +	RELEASE N
>>>   	*B = b;
>>>
>>>   may occur as:
>>>
>>> -	ACQUIRE M, STORE *B, STORE *A, RELEASE M
>>
>> This example should remain as is; it refers to the porosity of a critical
>> section to loads and stores occurring outside that critical section, and
>> importantly that LOCK + UNLOCK is not a full barrier. It documents that
>> memory operations from either side of the critical section may cross
>> (in the absence of other specific memory barriers). IOW, it is the example
>> to implication #1 above.
>
> Good point, I needed to apply the changes further down.  How does the
> following updated patch look?
>
> 							Thanx, Paul
>
> ------------------------------------------------------------------------
>
> commit 528c2771288df7f98f9224a56b93bdb2db27ec70
> Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> Date:   Sun Feb 23 08:34:24 2014 -0800
>
>      Documentation/memory-barriers.txt: Clarify release/acquire ordering
>
>      This commit fixes a couple of typos and clarifies what happens when
>      the CPU chooses to execute a later lock acquisition before a prior
>      lock release, in particular, why deadlock is avoided.
>
>      Reported-by: Peter Hurley <peter@hurleysoftware.com>
>      Reported-by: James Bottomley <James.Bottomley@HansenPartnership.com>
>      Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
>      Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>
> diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
> index 9dde54c55b24..9ea6de4eb252 100644
> --- a/Documentation/memory-barriers.txt
> +++ b/Documentation/memory-barriers.txt
> @@ -1674,12 +1674,12 @@ for each construct.  These operations all imply certain barriers:
>        Memory operations issued after the ACQUIRE will be completed after the
>        ACQUIRE operation has completed.
>
> -     Memory operations issued before the ACQUIRE may be completed after the
> -     ACQUIRE operation has completed.  An smp_mb__before_spinlock(), combined
> -     with a following ACQUIRE, orders prior loads against subsequent stores and
> -     stores and prior stores against subsequent stores.  Note that this is
> -     weaker than smp_mb()!  The smp_mb__before_spinlock() primitive is free on
> -     many architectures.
> +     Memory operations issued before the ACQUIRE may be completed after
> +     the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
> +     combined with a following ACQUIRE, orders prior loads against
> +     subsequent loads and stores and also orders prior stores against
> +     subsequent stores.  Note that this is weaker than smp_mb()!  The
> +     smp_mb__before_spinlock() primitive is free on many architectures.
>
>    (2) RELEASE operation implication:
>
> @@ -1724,24 +1724,20 @@ may occur as:
>
>   	ACQUIRE M, STORE *B, STORE *A, RELEASE M
>
> -This same reordering can of course occur if the lock's ACQUIRE and RELEASE are
> -to the same lock variable, but only from the perspective of another CPU not
> -holding that lock.
> -
> -In short, a RELEASE followed by an ACQUIRE may -not- be assumed to be a full
> -memory barrier because it is possible for a preceding RELEASE to pass a
> -later ACQUIRE from the viewpoint of the CPU, but not from the viewpoint
> -of the compiler.  Note that deadlocks cannot be introduced by this
> -interchange because if such a deadlock threatened, the RELEASE would
> -simply complete.
> +When the ACQUIRE and RELEASE are a lock acquisition and release,
> +respectively, this same reordering can of course occur if the lock's
                                            ^^^^^^^
                                           [delete?]

> +ACQUIRE and RELEASE are to the same lock variable, but only from the
> +perspective of another CPU not holding that lock.

In the above, are you introducing UNLOCK + LOCK not being a full barrier
or are you further elaborating the non-barrier that is LOCK + UNLOCK.

If you mean the first, might I suggest something like,

"Similarly, a RELEASE followed by an ACQUIRE may -not- be assumed to be a full
memory barrier."

as the introductory sentence.

>  In short, a RELEASE
> +followed by an ACQUIRE may -not- be assumed to be a full memory barrier.


>   If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the
>   ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation.  This
>   will produce a full barrier if either (a) the RELEASE and the ACQUIRE are
>   executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on the
>   same variable.  The smp_mb__after_unlock_lock() primitive is free on many
> -architectures.  Without smp_mb__after_unlock_lock(), the critical sections
> -corresponding to the RELEASE and the ACQUIRE can cross:
> +architectures.  Without smp_mb__after_unlock_lock(), the CPU's execution of
> +the critical sections corresponding to the RELEASE and the ACQUIRE can cross,
> +so that:
>
>   	*A = a;
>   	RELEASE M
> @@ -1752,7 +1748,36 @@ could occur as:
>
>   	ACQUIRE N, STORE *B, STORE *A, RELEASE M
>
> -With smp_mb__after_unlock_lock(), they cannot, so that:
> +It might appear that this rearrangement could introduce a deadlock.
> +However, this cannot happen because if such a deadlock threatened,
> +the RELEASE would simply complete, thereby avoiding the deadlock.
> +
> +	Why does this work?
> +
> +	One key point is that we are only talking about the CPU doing
> +	the interchanging, not the compiler.  If the compiler (or, for
                  ^
             reordering?
> +	that matter, the developer) switched the operations, deadlock
> +	-could- occur.
> +
> +	But suppose the CPU interchanged the operations.  In this case,
                                  ^
                               reordered?

> +	the unlock precedes the lock in the assembly code.  The CPU simply
> +	elected to try executing the later lock operation first.  If there
> +	is a deadlock, this lock operation will simply spin (or try to
> +	sleep, but more on that later).  The CPU will eventually execute
> +	the unlock operation (which again preceded the lock operation
                                       ^^
                                     [delete?]
> +	in the assembly code), which will unravel the potential deadlock,
> +	allowing the lock operation to succeed.
> +
> +	But what if the lock is a sleeplock?  In that case, the code will
> +	try to enter the scheduler, where it will eventually encounter
> +	a memory barrier, which will force the earlier unlock operation
> +	to complete, again unraveling the deadlock.  There might be
> +	a sleep-unlock race, but the locking primitive needs to resolve
> +	such races properly in any case.
> +
> +With smp_mb__after_unlock_lock(), the two critical sections cannot overlap.
> +For example, with the following code, the store to *A will always be
> +seen by other CPUs before the store to *B:
>
>   	*A = a;
>   	RELEASE M
> @@ -1760,13 +1785,18 @@ With smp_mb__after_unlock_lock(), they cannot, so that:
>   	smp_mb__after_unlock_lock();
>   	*B = b;
>
> -will always occur as either of the following:
> +The operations will always occur in one of the following orders:
>
> -	STORE *A, RELEASE, ACQUIRE, STORE *B
> -	STORE *A, ACQUIRE, RELEASE, STORE *B
> +	STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B
> +	STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B
> +	ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B
>
> -If the RELEASE and ACQUIRE were instead both operating on the same lock
> -variable, only the first of these two alternatives can occur.
> +If the RELEASE and ACQUIRE were instead both operating on the
> +same lock variable, only the first of these two alternatives can
> +occur.  In addition, the more strongly ordered systems may rule out
> +some of the above orders.  But in any case, as noted earlier, the
> +smp_mb__after_unlock_lock() ensures that the store to *A will always be
> +seen as happening before the store to *B.
>
>   Locks and semaphores may not provide any guarantee of ordering on UP compiled
>   systems, and so cannot be counted on in such a situation to actually achieve

Thanks for your work on these docs (and rcu and locks and multi-arch barriers
in general :) )

Regards,
Peter Hurley

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK)
  2014-02-23 23:50                                       ` Paul E. McKenney
  2014-02-24  0:09                                         ` Peter Hurley
@ 2014-02-24  0:32                                         ` Stefan Richter
  2014-02-24 16:27                                           ` Paul E. McKenney
  1 sibling, 1 reply; 32+ messages in thread
From: Stefan Richter @ 2014-02-24  0:32 UTC (permalink / raw)
  To: paulmck
  Cc: Peter Hurley, James Bottomley, Tejun Heo, laijs, linux-kernel,
	linux1394-devel, Chris Boot, linux-scsi, target-devel

On Feb 23 Paul E. McKenney wrote:
>>> Please see below for a patch against the current version of
>>> Documentation/memory-barriers.txt.  Does this update help?

Thank you, this clarifies it.

[...]
A new nit:
> +The operations will always occur in one of the following orders:
>  
> -	STORE *A, RELEASE, ACQUIRE, STORE *B
> -	STORE *A, ACQUIRE, RELEASE, STORE *B
> +	STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B
> +	STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B
> +	ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B
>  
> -If the RELEASE and ACQUIRE were instead both operating on the same lock
> -variable, only the first of these two alternatives can occur.
> +If the RELEASE and ACQUIRE were instead both operating on the
> +same lock variable, only the first of these two alternatives can
> +occur.
                                               ^^^
...these {,three} alternatives...
-- 
Stefan Richter
-=====-====- --=- ==---
http://arcgraph.de/sr/

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK)
  2014-02-24  0:09                                         ` Peter Hurley
@ 2014-02-24 16:26                                           ` Paul E. McKenney
  0 siblings, 0 replies; 32+ messages in thread
From: Paul E. McKenney @ 2014-02-24 16:26 UTC (permalink / raw)
  To: Peter Hurley
  Cc: laijs, linux-scsi, linux-kernel, James Bottomley, Tejun Heo,
	linux1394-devel, target-devel

On Sun, Feb 23, 2014 at 07:09:55PM -0500, Peter Hurley wrote:
> On 02/23/2014 06:50 PM, Paul E. McKenney wrote:
> >On Sun, Feb 23, 2014 at 03:35:31PM -0500, Peter Hurley wrote:
> >>Hi Paul,
> >>
> >>On 02/23/2014 11:37 AM, Paul E. McKenney wrote:
> >>>commit aba6b0e82c9de53eb032844f1932599f148ff68d
> >>>Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> >>>Date:   Sun Feb 23 08:34:24 2014 -0800
> >>>
> >>>     Documentation/memory-barriers.txt: Clarify release/acquire ordering
> >>>
> >>>     This commit fixes a couple of typos and clarifies what happens when
> >>>     the CPU chooses to execute a later lock acquisition before a prior
> >>>     lock release, in particular, why deadlock is avoided.
> >>>
> >>>     Reported-by: Peter Hurley <peter@hurleysoftware.com>
> >>>     Reported-by: James Bottomley <James.Bottomley@HansenPartnership.com>
> >>>     Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
> >>>     Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> >>>
> >>>diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
> >>>index 9dde54c55b24..c8932e06edf1 100644
> >>>--- a/Documentation/memory-barriers.txt
> >>>+++ b/Documentation/memory-barriers.txt
> >>>@@ -1674,12 +1674,12 @@ for each construct.  These operations all imply certain barriers:
> >>>       Memory operations issued after the ACQUIRE will be completed after the
> >>>       ACQUIRE operation has completed.
> >>>
> >>>-     Memory operations issued before the ACQUIRE may be completed after the
> >>>-     ACQUIRE operation has completed.  An smp_mb__before_spinlock(), combined
> >>>-     with a following ACQUIRE, orders prior loads against subsequent stores and
> >>>-     stores and prior stores against subsequent stores.  Note that this is
> >>>-     weaker than smp_mb()!  The smp_mb__before_spinlock() primitive is free on
> >>>-     many architectures.
> >>>+     Memory operations issued before the ACQUIRE may be completed after
> >>>+     the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
> >>>+     combined with a following ACQUIRE, orders prior loads against
> >>>+     subsequent loads and stores and also orders prior stores against
> >>>+     subsequent stores.  Note that this is weaker than smp_mb()!  The
> >>>+     smp_mb__before_spinlock() primitive is free on many architectures.
> >>>
> >>>   (2) RELEASE operation implication:
> >>>
> >>>@@ -1717,23 +1717,47 @@ the two accesses can themselves then cross:
> >>>
> >>>  	*A = a;
> >>>  	ACQUIRE M
> >>>-	RELEASE M
> >>>+	RELEASE N
> >>>  	*B = b;
> >>>
> >>>  may occur as:
> >>>
> >>>-	ACQUIRE M, STORE *B, STORE *A, RELEASE M
> >>
> >>This example should remain as is; it refers to the porosity of a critical
> >>section to loads and stores occurring outside that critical section, and
> >>importantly that LOCK + UNLOCK is not a full barrier. It documents that
> >>memory operations from either side of the critical section may cross
> >>(in the absence of other specific memory barriers). IOW, it is the example
> >>to implication #1 above.
> >
> >Good point, I needed to apply the changes further down.  How does the
> >following updated patch look?
> >
> >							Thanx, Paul
> >
> >------------------------------------------------------------------------
> >
> >commit 528c2771288df7f98f9224a56b93bdb2db27ec70
> >Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> >Date:   Sun Feb 23 08:34:24 2014 -0800
> >
> >     Documentation/memory-barriers.txt: Clarify release/acquire ordering
> >
> >     This commit fixes a couple of typos and clarifies what happens when
> >     the CPU chooses to execute a later lock acquisition before a prior
> >     lock release, in particular, why deadlock is avoided.
> >
> >     Reported-by: Peter Hurley <peter@hurleysoftware.com>
> >     Reported-by: James Bottomley <James.Bottomley@HansenPartnership.com>
> >     Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
> >     Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> >
> >diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
> >index 9dde54c55b24..9ea6de4eb252 100644
> >--- a/Documentation/memory-barriers.txt
> >+++ b/Documentation/memory-barriers.txt
> >@@ -1674,12 +1674,12 @@ for each construct.  These operations all imply certain barriers:
> >       Memory operations issued after the ACQUIRE will be completed after the
> >       ACQUIRE operation has completed.
> >
> >-     Memory operations issued before the ACQUIRE may be completed after the
> >-     ACQUIRE operation has completed.  An smp_mb__before_spinlock(), combined
> >-     with a following ACQUIRE, orders prior loads against subsequent stores and
> >-     stores and prior stores against subsequent stores.  Note that this is
> >-     weaker than smp_mb()!  The smp_mb__before_spinlock() primitive is free on
> >-     many architectures.
> >+     Memory operations issued before the ACQUIRE may be completed after
> >+     the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
> >+     combined with a following ACQUIRE, orders prior loads against
> >+     subsequent loads and stores and also orders prior stores against
> >+     subsequent stores.  Note that this is weaker than smp_mb()!  The
> >+     smp_mb__before_spinlock() primitive is free on many architectures.
> >
> >   (2) RELEASE operation implication:
> >
> >@@ -1724,24 +1724,20 @@ may occur as:
> >
> >  	ACQUIRE M, STORE *B, STORE *A, RELEASE M
> >
> >-This same reordering can of course occur if the lock's ACQUIRE and RELEASE are
> >-to the same lock variable, but only from the perspective of another CPU not
> >-holding that lock.
> >-
> >-In short, a RELEASE followed by an ACQUIRE may -not- be assumed to be a full
> >-memory barrier because it is possible for a preceding RELEASE to pass a
> >-later ACQUIRE from the viewpoint of the CPU, but not from the viewpoint
> >-of the compiler.  Note that deadlocks cannot be introduced by this
> >-interchange because if such a deadlock threatened, the RELEASE would
> >-simply complete.
> >+When the ACQUIRE and RELEASE are a lock acquisition and release,
> >+respectively, this same reordering can of course occur if the lock's
>                                            ^^^^^^^
>                                           [delete?]

If you insist.  ;-)  Done.

> >+ACQUIRE and RELEASE are to the same lock variable, but only from the
> >+perspective of another CPU not holding that lock.
> 
> In the above, are you introducing UNLOCK + LOCK not being a full barrier
> or are you further elaborating the non-barrier that is LOCK + UNLOCK.
> 
> If you mean the first, might I suggest something like,
> 
> "Similarly, a RELEASE followed by an ACQUIRE may -not- be assumed to be a full
> memory barrier."
> 
> as the introductory sentence.

Nope, just got the "ACQUIRE" and "RELEASE" backwards.  The sentence
below now reads:

	In short, a ACQUIRE followed by an RELEASE may -not- be assumed
	to be a full memory barrier.

> > In short, a RELEASE
> >+followed by an ACQUIRE may -not- be assumed to be a full memory barrier.

But I do need an transition sentence before the following paragraph.

I added this to the beginning of the following paragraph:

	Similarly, the reverse case of a RELEASE followed by an ACQUIRE
	does not imply a full memory barrier.

> >  If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the
> >  ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation.  This
> >  will produce a full barrier if either (a) the RELEASE and the ACQUIRE are
> >  executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on the
> >  same variable.  The smp_mb__after_unlock_lock() primitive is free on many
> >-architectures.  Without smp_mb__after_unlock_lock(), the critical sections
> >-corresponding to the RELEASE and the ACQUIRE can cross:
> >+architectures.  Without smp_mb__after_unlock_lock(), the CPU's execution of
> >+the critical sections corresponding to the RELEASE and the ACQUIRE can cross,
> >+so that:
> >
> >  	*A = a;
> >  	RELEASE M
> >@@ -1752,7 +1748,36 @@ could occur as:
> >
> >  	ACQUIRE N, STORE *B, STORE *A, RELEASE M
> >
> >-With smp_mb__after_unlock_lock(), they cannot, so that:
> >+It might appear that this rearrangement could introduce a deadlock.
> >+However, this cannot happen because if such a deadlock threatened,
> >+the RELEASE would simply complete, thereby avoiding the deadlock.
> >+
> >+	Why does this work?
> >+
> >+	One key point is that we are only talking about the CPU doing
> >+	the interchanging, not the compiler.  If the compiler (or, for
>                  ^
>             reordering?
> >+	that matter, the developer) switched the operations, deadlock
> >+	-could- occur.
> >+
> >+	But suppose the CPU interchanged the operations.  In this case,
>                                  ^
>                               reordered?

Agreed, avoiding random use of synonyms makes it clearer.  These are
now changed as you suggest.

> >+	the unlock precedes the lock in the assembly code.  The CPU simply
> >+	elected to try executing the later lock operation first.  If there
> >+	is a deadlock, this lock operation will simply spin (or try to
> >+	sleep, but more on that later).  The CPU will eventually execute
> >+	the unlock operation (which again preceded the lock operation
>                                       ^^
>                                     [delete?]

I guess I didn't explicitly call out this before, so agree on deleting
"again".  Done.

> >+	in the assembly code), which will unravel the potential deadlock,
> >+	allowing the lock operation to succeed.
> >+
> >+	But what if the lock is a sleeplock?  In that case, the code will
> >+	try to enter the scheduler, where it will eventually encounter
> >+	a memory barrier, which will force the earlier unlock operation
> >+	to complete, again unraveling the deadlock.  There might be
> >+	a sleep-unlock race, but the locking primitive needs to resolve
> >+	such races properly in any case.
> >+
> >+With smp_mb__after_unlock_lock(), the two critical sections cannot overlap.
> >+For example, with the following code, the store to *A will always be
> >+seen by other CPUs before the store to *B:
> >
> >  	*A = a;
> >  	RELEASE M
> >@@ -1760,13 +1785,18 @@ With smp_mb__after_unlock_lock(), they cannot, so that:
> >  	smp_mb__after_unlock_lock();
> >  	*B = b;
> >
> >-will always occur as either of the following:
> >+The operations will always occur in one of the following orders:
> >
> >-	STORE *A, RELEASE, ACQUIRE, STORE *B
> >-	STORE *A, ACQUIRE, RELEASE, STORE *B
> >+	STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B
> >+	STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B
> >+	ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B
> >
> >-If the RELEASE and ACQUIRE were instead both operating on the same lock
> >-variable, only the first of these two alternatives can occur.
> >+If the RELEASE and ACQUIRE were instead both operating on the
> >+same lock variable, only the first of these two alternatives can
> >+occur.  In addition, the more strongly ordered systems may rule out
> >+some of the above orders.  But in any case, as noted earlier, the
> >+smp_mb__after_unlock_lock() ensures that the store to *A will always be
> >+seen as happening before the store to *B.
> >
> >  Locks and semaphores may not provide any guarantee of ordering on UP compiled
> >  systems, and so cannot be counted on in such a situation to actually achieve
> 
> Thanks for your work on these docs (and rcu and locks and multi-arch barriers
> in general :) )

Glad you like them, and thank you for your review and comments!

							Thanx, Paul


------------------------------------------------------------------------------
Flow-based real-time traffic analytics software. Cisco certified tool.
Monitor traffic, SLAs, QoS, Medianet, WAAS etc. with NetFlow Analyzer
Customize your own dashboards, set traffic alerts and generate reports.
Network behavioral analysis & security monitoring. All-in-one tool.
http://pubads.g.doubleclick.net/gampad/clk?id=126839071&iu=/4140/ostg.clktrk

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK)
  2014-02-24  0:32                                         ` Stefan Richter
@ 2014-02-24 16:27                                           ` Paul E. McKenney
  0 siblings, 0 replies; 32+ messages in thread
From: Paul E. McKenney @ 2014-02-24 16:27 UTC (permalink / raw)
  To: Stefan Richter
  Cc: Peter Hurley, James Bottomley, Tejun Heo, laijs, linux-kernel,
	linux1394-devel, Chris Boot, linux-scsi, target-devel

On Mon, Feb 24, 2014 at 01:32:54AM +0100, Stefan Richter wrote:
> On Feb 23 Paul E. McKenney wrote:
> >>> Please see below for a patch against the current version of
> >>> Documentation/memory-barriers.txt.  Does this update help?
> 
> Thank you, this clarifies it.
> 
> [...]
> A new nit:
> > +The operations will always occur in one of the following orders:
> >  
> > -	STORE *A, RELEASE, ACQUIRE, STORE *B
> > -	STORE *A, ACQUIRE, RELEASE, STORE *B
> > +	STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B
> > +	STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B
> > +	ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B
> >  
> > -If the RELEASE and ACQUIRE were instead both operating on the same lock
> > -variable, only the first of these two alternatives can occur.
> > +If the RELEASE and ACQUIRE were instead both operating on the
> > +same lock variable, only the first of these two alternatives can
> > +occur.
>                                                ^^^
> ...these {,three} alternatives...

Good catch!  I chose the empty string.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH UPDATED 4/9] firewire: don't use PREPARE_DELAYED_WORK
  2014-02-20 20:44 ` [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK Tejun Heo
  2014-02-21  1:44   ` Peter Hurley
  2014-02-21 20:45   ` Stefan Richter
@ 2014-03-07 15:26   ` Tejun Heo
  2 siblings, 0 replies; 32+ messages in thread
From: Tejun Heo @ 2014-03-07 15:26 UTC (permalink / raw)
  To: laijs; +Cc: linux-scsi, linux-kernel, linux1394-devel, target-devel

PREPARE_[DELAYED_]WORK() are being phased out.  They have few users
and a nasty surprise in terms of reentrancy guarantee as workqueue
considers work items to be different if they don't have the same work
function.

firewire core-device and sbp2 have been been multiplexing work items
with multiple work functions.  Introduce fw_device_workfn() and
sbp2_lu_workfn() which invoke fw_device->workfn and
sbp2_logical_unit->workfn respectively and always use the two
functions as the work functions and update the users to set the
->workfn fields instead of overriding work functions using
PREPARE_DELAYED_WORK().

This fixes a variety of possible regressions since a2c1c57be8d9
"workqueue: consider work function when searching for busy work items"
due to which fw_workqueue lost its required non-reentrancy property.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: linux1394-devel@lists.sourceforge.net
Cc: stable@vger.kernel.org # v3.9+
Cc: stable@vger.kernel.org # v3.8.2+
Cc: stable@vger.kernel.org # v3.4.60+
Cc: stable@vger.kernel.org # v3.2.40+
---
Applied to wq/for-3.14-fixes.

Thanks.

 drivers/firewire/core-device.c | 22 +++++++++++++++-------
 drivers/firewire/sbp2.c        | 17 +++++++++++++----
 include/linux/firewire.h       |  1 +
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index de4aa40..2c6d5e1 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -916,7 +916,7 @@ static int lookup_existing_device(struct device *dev, void *data)
 		old->config_rom_retries = 0;
 		fw_notice(card, "rediscovered device %s\n", dev_name(dev));
 
-		PREPARE_DELAYED_WORK(&old->work, fw_device_update);
+		old->workfn = fw_device_update;
 		fw_schedule_device_work(old, 0);
 
 		if (current_node == card->root_node)
@@ -1075,7 +1075,7 @@ static void fw_device_init(struct work_struct *work)
 	if (atomic_cmpxchg(&device->state,
 			   FW_DEVICE_INITIALIZING,
 			   FW_DEVICE_RUNNING) == FW_DEVICE_GONE) {
-		PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
+		device->workfn = fw_device_shutdown;
 		fw_schedule_device_work(device, SHUTDOWN_DELAY);
 	} else {
 		fw_notice(card, "created device %s: GUID %08x%08x, S%d00\n",
@@ -1196,13 +1196,20 @@ static void fw_device_refresh(struct work_struct *work)
 		  dev_name(&device->device), fw_rcode_string(ret));
  gone:
 	atomic_set(&device->state, FW_DEVICE_GONE);
-	PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
+	device->workfn = fw_device_shutdown;
 	fw_schedule_device_work(device, SHUTDOWN_DELAY);
  out:
 	if (node_id == card->root_node->node_id)
 		fw_schedule_bm_work(card, 0);
 }
 
+static void fw_device_workfn(struct work_struct *work)
+{
+	struct fw_device *device = container_of(to_delayed_work(work),
+						struct fw_device, work);
+	device->workfn(work);
+}
+
 void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 {
 	struct fw_device *device;
@@ -1252,7 +1259,8 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		 * power-up after getting plugged in.  We schedule the
 		 * first config rom scan half a second after bus reset.
 		 */
-		INIT_DELAYED_WORK(&device->work, fw_device_init);
+		device->workfn = fw_device_init;
+		INIT_DELAYED_WORK(&device->work, fw_device_workfn);
 		fw_schedule_device_work(device, INITIAL_DELAY);
 		break;
 
@@ -1268,7 +1276,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		if (atomic_cmpxchg(&device->state,
 			    FW_DEVICE_RUNNING,
 			    FW_DEVICE_INITIALIZING) == FW_DEVICE_RUNNING) {
-			PREPARE_DELAYED_WORK(&device->work, fw_device_refresh);
+			device->workfn = fw_device_refresh;
 			fw_schedule_device_work(device,
 				device->is_local ? 0 : INITIAL_DELAY);
 		}
@@ -1283,7 +1291,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		smp_wmb();  /* update node_id before generation */
 		device->generation = card->generation;
 		if (atomic_read(&device->state) == FW_DEVICE_RUNNING) {
-			PREPARE_DELAYED_WORK(&device->work, fw_device_update);
+			device->workfn = fw_device_update;
 			fw_schedule_device_work(device, 0);
 		}
 		break;
@@ -1308,7 +1316,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
 		device = node->data;
 		if (atomic_xchg(&device->state,
 				FW_DEVICE_GONE) == FW_DEVICE_RUNNING) {
-			PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown);
+			device->workfn = fw_device_shutdown;
 			fw_schedule_device_work(device,
 				list_empty(&card->link) ? 0 : SHUTDOWN_DELAY);
 		}
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 281029d..7aef911 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -146,6 +146,7 @@ struct sbp2_logical_unit {
 	 */
 	int generation;
 	int retries;
+	work_func_t workfn;
 	struct delayed_work work;
 	bool has_sdev;
 	bool blocked;
@@ -864,7 +865,7 @@ static void sbp2_login(struct work_struct *work)
 	/* set appropriate retry limit(s) in BUSY_TIMEOUT register */
 	sbp2_set_busy_timeout(lu);
 
-	PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect);
+	lu->workfn = sbp2_reconnect;
 	sbp2_agent_reset(lu);
 
 	/* This was a re-login. */
@@ -918,7 +919,7 @@ static void sbp2_login(struct work_struct *work)
 	 * If a bus reset happened, sbp2_update will have requeued
 	 * lu->work already.  Reset the work from reconnect to login.
 	 */
-	PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+	lu->workfn = sbp2_login;
 }
 
 static void sbp2_reconnect(struct work_struct *work)
@@ -952,7 +953,7 @@ static void sbp2_reconnect(struct work_struct *work)
 		    lu->retries++ >= 5) {
 			dev_err(tgt_dev(tgt), "failed to reconnect\n");
 			lu->retries = 0;
-			PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+			lu->workfn = sbp2_login;
 		}
 		sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5));
 
@@ -972,6 +973,13 @@ static void sbp2_reconnect(struct work_struct *work)
 	sbp2_conditionally_unblock(lu);
 }
 
+static void sbp2_lu_workfn(struct work_struct *work)
+{
+	struct sbp2_logical_unit *lu = container_of(to_delayed_work(work),
+						struct sbp2_logical_unit, work);
+	lu->workfn(work);
+}
+
 static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
 {
 	struct sbp2_logical_unit *lu;
@@ -998,7 +1006,8 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
 	lu->blocked  = false;
 	++tgt->dont_block;
 	INIT_LIST_HEAD(&lu->orb_list);
-	INIT_DELAYED_WORK(&lu->work, sbp2_login);
+	lu->workfn = sbp2_login;
+	INIT_DELAYED_WORK(&lu->work, sbp2_lu_workfn);
 
 	list_add_tail(&lu->link, &tgt->lu_list);
 	return 0;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 5d7782e..c3683bd 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -200,6 +200,7 @@ struct fw_device {
 	unsigned irmc:1;
 	unsigned bc_implemented:2;
 
+	work_func_t workfn;
 	struct delayed_work work;
 	struct fw_attribute_group attribute_group;
 };
-- 
1.8.5.3


------------------------------------------------------------------------------
Subversion Kills Productivity. Get off Subversion & Make the Move to Perforce.
With Perforce, you get hassle-free workflows. Merge that actually works. 
Faster operations. Version large binaries.  Built-in WAN optimization and the
freedom to use Git, Perforce or both. Make the move to Perforce.
http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk

^ permalink raw reply related	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2014-03-07 15:26 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1392929071-16555-1-git-send-email-tj@kernel.org>
2014-02-20 20:44 ` [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK Tejun Heo
2014-02-21  1:44   ` Peter Hurley
2014-02-21  1:59     ` Tejun Heo
2014-02-21  2:07       ` Peter Hurley
2014-02-21  2:13         ` Tejun Heo
2014-02-21  5:13           ` Peter Hurley
2014-02-21 10:03             ` Tejun Heo
2014-02-21 12:51               ` Peter Hurley
2014-02-21 13:06                 ` Tejun Heo
2014-02-21 16:53                   ` Peter Hurley
2014-02-21 16:57                     ` Tejun Heo
2014-02-21 23:01                       ` Peter Hurley
2014-02-21 23:18                         ` Tejun Heo
2014-02-21 23:46                           ` Peter Hurley
2014-02-22 14:38                             ` Tejun Heo
2014-02-22 14:48                               ` Peter Hurley
2014-02-22 18:43                         ` James Bottomley
2014-02-22 18:48                           ` Peter Hurley
2014-02-22 18:52                             ` James Bottomley
2014-02-22 19:03                               ` Peter Hurley
2014-02-23  1:23                                 ` memory-barriers.txt again (was Re: [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK) Stefan Richter
2014-02-23 16:37                                   ` Paul E. McKenney
2014-02-23 20:35                                     ` Peter Hurley
2014-02-23 23:50                                       ` Paul E. McKenney
2014-02-24  0:09                                         ` Peter Hurley
2014-02-24 16:26                                           ` Paul E. McKenney
2014-02-24  0:32                                         ` Stefan Richter
2014-02-24 16:27                                           ` Paul E. McKenney
2014-02-23 20:05                                 ` [PATCH 4/9] firewire: don't use PREPARE_DELAYED_WORK James Bottomley
2014-02-23 22:32                                   ` Peter Hurley
2014-02-21 20:45   ` Stefan Richter
2014-03-07 15:26   ` [PATCH UPDATED " Tejun Heo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox