* [PATCH] ?mb() -> smp_?mb() conversion
@ 2005-03-21 22:59 Anton Blanchard
2005-03-21 23:06 ` David S. Miller
0 siblings, 1 reply; 16+ messages in thread
From: Anton Blanchard @ 2005-03-21 22:59 UTC (permalink / raw)
To: linux-arch
Hi,
On a UP machine loads and stores should appear in program order so we
dont need most memory barriers. Convert a number of ?mb() etc to smp_?mb()
so we only take a hit on SMP builds.
What do people think of this patch?
Thinking some more about this, perhaps we should remove the ?mb() barriers
and instead have only io_?mb() and smp_?mb(). This has the advantage of
making it clear what barriers should be used in drivers to order
cacheable and non cacheable memory (a problem on ppc/ppc64 at least).
diff -puN fs/buffer.c~barrier_rework_2 fs/buffer.c
--- foobar2/fs/buffer.c~barrier_rework_2 2005-03-21 12:01:06.773011661 +1100
+++ foobar2-anton/fs/buffer.c 2005-03-21 12:01:06.848005928 +1100
@@ -218,7 +218,7 @@ struct super_block *freeze_bdev(struct b
sb = get_super(bdev);
if (sb && !(sb->s_flags & MS_RDONLY)) {
sb->s_frozen = SB_FREEZE_WRITE;
- wmb();
+ smp_wmb();
sync_inodes_sb(sb, 0);
DQUOT_SYNC(sb);
@@ -235,7 +235,7 @@ struct super_block *freeze_bdev(struct b
sync_inodes_sb(sb, 1);
sb->s_frozen = SB_FREEZE_TRANS;
- wmb();
+ smp_wmb();
sync_blockdev(sb->s_bdev);
@@ -263,7 +263,7 @@ void thaw_bdev(struct block_device *bdev
if (sb->s_op->unlockfs)
sb->s_op->unlockfs(sb);
sb->s_frozen = SB_UNFROZEN;
- wmb();
+ smp_wmb();
wake_up(&sb->s_wait_unfrozen);
drop_super(sb);
}
diff -puN ipc/mqueue.c~barrier_rework_2 ipc/mqueue.c
--- foobar2/ipc/mqueue.c~barrier_rework_2 2005-03-21 12:01:06.779011202 +1100
+++ foobar2-anton/ipc/mqueue.c 2005-03-21 12:01:06.851005699 +1100
@@ -767,7 +767,7 @@ static inline void pipelined_send(struct
list_del(&receiver->list);
receiver->state = STATE_PENDING;
wake_up_process(receiver->task);
- wmb();
+ smp_wmb();
receiver->state = STATE_READY;
}
@@ -786,7 +786,7 @@ static inline void pipelined_receive(str
list_del(&sender->list);
sender->state = STATE_PENDING;
wake_up_process(sender->task);
- wmb();
+ smp_wmb();
sender->state = STATE_READY;
}
diff -puN kernel/kthread.c~barrier_rework_2 kernel/kthread.c
--- foobar2/kernel/kthread.c~barrier_rework_2 2005-03-21 12:01:06.784010820 +1100
+++ foobar2-anton/kernel/kthread.c 2005-03-21 12:01:06.853005546 +1100
@@ -174,7 +174,7 @@ int kthread_stop(struct task_struct *k)
/* Must init completion *before* thread sees kthread_stop_info.k */
init_completion(&kthread_stop_info.done);
- wmb();
+ smp_wmb();
/* Now set kthread_should_stop() to true, and wake it up. */
kthread_stop_info.k = k;
diff -puN kernel/profile.c~barrier_rework_2 kernel/profile.c
--- foobar2/kernel/profile.c~barrier_rework_2 2005-03-21 12:01:06.790010361 +1100
+++ foobar2-anton/kernel/profile.c 2005-03-21 12:01:06.855005393 +1100
@@ -528,7 +528,7 @@ static int __init create_hash_tables(voi
return 0;
out_cleanup:
prof_on = 0;
- mb();
+ smp_mb();
on_each_cpu(profile_nop, NULL, 0, 1);
for_each_online_cpu(cpu) {
struct page *page;
diff -puN kernel/ptrace.c~barrier_rework_2 kernel/ptrace.c
--- foobar2/kernel/ptrace.c~barrier_rework_2 2005-03-21 12:01:06.795009979 +1100
+++ foobar2-anton/kernel/ptrace.c 2005-03-21 12:01:06.856005316 +1100
@@ -135,7 +135,7 @@ int ptrace_attach(struct task_struct *ta
(current->gid != task->sgid) ||
(current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
goto bad;
- rmb();
+ smp_rmb();
if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
goto bad;
/* the same process cannot be attached many times */
diff -puN kernel/stop_machine.c~barrier_rework_2 kernel/stop_machine.c
--- foobar2/kernel/stop_machine.c~barrier_rework_2 2005-03-21 12:01:06.800009597 +1100
+++ foobar2-anton/kernel/stop_machine.c 2005-03-21 12:01:06.858005164 +1100
@@ -32,7 +32,7 @@ static int stopmachine(void *cpu)
set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
/* Ack: we are alive */
- mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
+ smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
atomic_inc(&stopmachine_thread_ack);
/* Simple state machine */
@@ -42,14 +42,14 @@ static int stopmachine(void *cpu)
local_irq_disable();
irqs_disabled = 1;
/* Ack: irqs disabled. */
- mb(); /* Must read state first. */
+ smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
} else if (stopmachine_state == STOPMACHINE_PREPARE
&& !prepared) {
/* Everyone is in place, hold CPU. */
preempt_disable();
prepared = 1;
- mb(); /* Must read state first. */
+ smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
}
/* Yield in first stage: migration threads need to
@@ -61,7 +61,7 @@ static int stopmachine(void *cpu)
}
/* Ack: we are exiting. */
- mb(); /* Must read state first. */
+ smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
if (irqs_disabled)
@@ -76,7 +76,7 @@ static int stopmachine(void *cpu)
static void stopmachine_set_state(enum stopmachine_state state)
{
atomic_set(&stopmachine_thread_ack, 0);
- wmb();
+ smp_wmb();
stopmachine_state = state;
while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
cpu_relax();
diff -puN kernel/sys.c~barrier_rework_2 kernel/sys.c
--- foobar2/kernel/sys.c~barrier_rework_2 2005-03-21 12:01:06.804009291 +1100
+++ foobar2-anton/kernel/sys.c 2005-03-21 12:01:06.862004858 +1100
@@ -525,7 +525,7 @@ asmlinkage long sys_setregid(gid_t rgid,
if (new_egid != old_egid)
{
current->mm->dumpable = 0;
- wmb();
+ smp_wmb();
}
if (rgid != (gid_t) -1 ||
(egid != (gid_t) -1 && egid != old_rgid))
@@ -556,7 +556,7 @@ asmlinkage long sys_setgid(gid_t gid)
if(old_egid != gid)
{
current->mm->dumpable=0;
- wmb();
+ smp_wmb();
}
current->gid = current->egid = current->sgid = current->fsgid = gid;
}
@@ -565,7 +565,7 @@ asmlinkage long sys_setgid(gid_t gid)
if(old_egid != gid)
{
current->mm->dumpable=0;
- wmb();
+ smp_wmb();
}
current->egid = current->fsgid = gid;
}
@@ -596,7 +596,7 @@ static int set_user(uid_t new_ruid, int
if(dumpclear)
{
current->mm->dumpable = 0;
- wmb();
+ smp_wmb();
}
current->uid = new_ruid;
return 0;
@@ -653,7 +653,7 @@ asmlinkage long sys_setreuid(uid_t ruid,
if (new_euid != old_euid)
{
current->mm->dumpable=0;
- wmb();
+ smp_wmb();
}
current->fsuid = current->euid = new_euid;
if (ruid != (uid_t) -1 ||
@@ -703,7 +703,7 @@ asmlinkage long sys_setuid(uid_t uid)
if (old_euid != uid)
{
current->mm->dumpable = 0;
- wmb();
+ smp_wmb();
}
current->fsuid = current->euid = uid;
current->suid = new_suid;
@@ -748,7 +748,7 @@ asmlinkage long sys_setresuid(uid_t ruid
if (euid != current->euid)
{
current->mm->dumpable = 0;
- wmb();
+ smp_wmb();
}
current->euid = euid;
}
@@ -798,7 +798,7 @@ asmlinkage long sys_setresgid(gid_t rgid
if (egid != current->egid)
{
current->mm->dumpable = 0;
- wmb();
+ smp_wmb();
}
current->egid = egid;
}
@@ -845,7 +845,7 @@ asmlinkage long sys_setfsuid(uid_t uid)
if (uid != old_fsuid)
{
current->mm->dumpable = 0;
- wmb();
+ smp_wmb();
}
current->fsuid = uid;
}
@@ -875,7 +875,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
if (gid != old_fsgid)
{
current->mm->dumpable = 0;
- wmb();
+ smp_wmb();
}
current->fsgid = gid;
key_fsgid_changed(current);
diff -puN kernel/timer.c~barrier_rework_2 kernel/timer.c
--- foobar2/kernel/timer.c~barrier_rework_2 2005-03-21 12:01:06.809008909 +1100
+++ foobar2-anton/kernel/timer.c 2005-03-21 12:01:06.866004552 +1100
@@ -1007,7 +1007,7 @@ asmlinkage long sys_getppid(void)
* Make sure we read the pid before re-reading the
* parent pointer:
*/
- rmb();
+ smp_rmb();
parent = me->group_leader->real_parent;
if (old != parent)
continue;
diff -puN lib/rwsem-spinlock.c~barrier_rework_2 lib/rwsem-spinlock.c
--- foobar2/lib/rwsem-spinlock.c~barrier_rework_2 2005-03-21 12:01:06.814008527 +1100
+++ foobar2-anton/lib/rwsem-spinlock.c 2005-03-21 12:01:06.867004476 +1100
@@ -76,7 +76,7 @@ __rwsem_do_wake(struct rw_semaphore *sem
list_del(&waiter->list);
tsk = waiter->task;
/* Don't touch waiter after ->task has been NULLed */
- mb();
+ smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
@@ -91,7 +91,7 @@ __rwsem_do_wake(struct rw_semaphore *sem
list_del(&waiter->list);
tsk = waiter->task;
- mb();
+ smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
@@ -123,7 +123,7 @@ __rwsem_wake_one_writer(struct rw_semaph
list_del(&waiter->list);
tsk = waiter->task;
- mb();
+ smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
diff -puN lib/rwsem.c~barrier_rework_2 lib/rwsem.c
--- foobar2/lib/rwsem.c~barrier_rework_2 2005-03-21 12:01:06.819008145 +1100
+++ foobar2-anton/lib/rwsem.c 2005-03-21 12:01:06.869004323 +1100
@@ -74,7 +74,7 @@ __rwsem_do_wake(struct rw_semaphore *sem
*/
list_del(&waiter->list);
tsk = waiter->task;
- mb();
+ smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
@@ -117,7 +117,7 @@ __rwsem_do_wake(struct rw_semaphore *sem
waiter = list_entry(next, struct rwsem_waiter, list);
next = waiter->list.next;
tsk = waiter->task;
- mb();
+ smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
diff -puN mm/mempool.c~barrier_rework_2 mm/mempool.c
--- foobar2/mm/mempool.c~barrier_rework_2 2005-03-21 12:01:06.824007762 +1100
+++ foobar2-anton/mm/mempool.c 2005-03-21 12:01:06.870004246 +1100
@@ -210,7 +210,7 @@ repeat_alloc:
* If the pool is less than 50% full and we can perform effective
* page reclaim then try harder to allocate an element.
*/
- mb();
+ smp_mb();
if ((gfp_mask & __GFP_FS) && (gfp_mask != gfp_nowait) &&
(pool->curr_nr <= pool->min_nr/2)) {
element = pool->alloc(gfp_mask, pool->pool_data);
@@ -236,7 +236,7 @@ repeat_alloc:
return NULL;
prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
- mb();
+ smp_mb();
if (!pool->curr_nr)
io_schedule();
finish_wait(&pool->wait, &wait);
@@ -257,7 +257,7 @@ void mempool_free(void *element, mempool
{
unsigned long flags;
- mb();
+ smp_mb();
if (pool->curr_nr < pool->min_nr) {
spin_lock_irqsave(&pool->lock, flags);
if (pool->curr_nr < pool->min_nr) {
_
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-21 22:59 [PATCH] ?mb() -> smp_?mb() conversion Anton Blanchard
@ 2005-03-21 23:06 ` David S. Miller
2005-03-22 10:43 ` David Howells
0 siblings, 1 reply; 16+ messages in thread
From: David S. Miller @ 2005-03-21 23:06 UTC (permalink / raw)
To: Anton Blanchard; +Cc: linux-arch
On Tue, 22 Mar 2005 09:59:04 +1100
Anton Blanchard <anton@samba.org> wrote:
> Thinking some more about this, perhaps we should remove the ?mb() barriers
> and instead have only io_?mb() and smp_?mb(). This has the advantage of
> making it clear what barriers should be used in drivers to order
> cacheable and non cacheable memory (a problem on ppc/ppc64 at least).
No objections to this idea.
If I remember correctly, the ?mb() routines _were_ the original
SMP memory barriers.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-21 23:06 ` David S. Miller
@ 2005-03-22 10:43 ` David Howells
2005-03-22 13:13 ` Matthew Wilcox
0 siblings, 1 reply; 16+ messages in thread
From: David Howells @ 2005-03-22 10:43 UTC (permalink / raw)
To: David S. Miller; +Cc: Anton Blanchard, linux-arch
David S. Miller <davem@davemloft.net> wrote:
> If I remember correctly, the ?mb() routines _were_ the original
> SMP memory barriers.
Would it be worth renaming the mb/rmb/wmb to io_mb/io_rmb/io_wmb? After all, I
believe they should only be used to flush I/O memory accesses. This would, I
think, make the distinction between memory barriers for I/O and memory
barriers for SMP more obvious.
David
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 10:43 ` David Howells
@ 2005-03-22 13:13 ` Matthew Wilcox
2005-03-22 14:27 ` David Howells
2005-03-22 16:03 ` Anton Blanchard
0 siblings, 2 replies; 16+ messages in thread
From: Matthew Wilcox @ 2005-03-22 13:13 UTC (permalink / raw)
To: David Howells; +Cc: David S. Miller, Anton Blanchard, linux-arch
On Tue, Mar 22, 2005 at 10:43:49AM +0000, David Howells wrote:
> David S. Miller <davem@davemloft.net> wrote:
>
> > If I remember correctly, the ?mb() routines _were_ the original
> > SMP memory barriers.
>
> Would it be worth renaming the mb/rmb/wmb to io_mb/io_rmb/io_wmb? After all, I
> believe they should only be used to flush I/O memory accesses. This would, I
> think, make the distinction between memory barriers for I/O and memory
> barriers for SMP more obvious.
Are you joking or genuinely confused?
--
"Next the statesmen will invent cheap lies, putting the blame upon
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince
himself that the war is just, and will thank God for the better sleep
he enjoys after this process of grotesque self-deception." -- Mark Twain
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 13:13 ` Matthew Wilcox
@ 2005-03-22 14:27 ` David Howells
2005-03-22 16:03 ` Anton Blanchard
1 sibling, 0 replies; 16+ messages in thread
From: David Howells @ 2005-03-22 14:27 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: David S. Miller, Anton Blanchard, linux-arch
Matthew Wilcox <matthew@wil.cx> wrote:
> > > If I remember correctly, the ?mb() routines _were_ the original
> > > SMP memory barriers.
> >
> > Would it be worth renaming the mb/rmb/wmb to io_mb/io_rmb/io_wmb? After
> > all, I believe they should only be used to flush I/O memory accesses. This
> > would, I think, make the distinction between memory barriers for I/O and
> > memory barriers for SMP more obvious.
>
> Are you joking or genuinely confused?
Must be the latter.
As far as I know memory barriers are only used for I/O and SMP. I don't see
where else they should be required.
Of course, the tiny bit of documentation in the kernel sources isn't much
help; it doesn't even mention the smp_*mb() functions.
David
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 13:13 ` Matthew Wilcox
2005-03-22 14:27 ` David Howells
@ 2005-03-22 16:03 ` Anton Blanchard
2005-03-22 16:34 ` Matthew Wilcox
` (2 more replies)
1 sibling, 3 replies; 16+ messages in thread
From: Anton Blanchard @ 2005-03-22 16:03 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: David Howells, David S. Miller, linux-arch
> > Would it be worth renaming the mb/rmb/wmb to io_mb/io_rmb/io_wmb?
> > After all, I believe they should only be used to flush I/O memory
> > accesses. This would, I think, make the distinction between memory
> > barriers for I/O and memory barriers for SMP more obvious.
>
> Are you joking or genuinely confused?
To be fair there are a lot of confused people out there. A few examples:
1. My original patch showed there are a number of places we use memory
barriers on UP when not required. Getting rid of mb/rmb/wmb would help
this, people are unlikely to sprinkle io_mb in the scheduler code :)
2. drivers/net/typhoon.c
INIT_COMMAND_NO_RESPONSE(cmd, TYPHOON_CMD_HELLO_RESP);
smp_wmb();
writel(ring->lastWrite, tp->ioaddr + TYPHOON_REG_CMD_READY);
it looks a lot like smp_wmb is being used to order IO.
3. On ppc64 we recently had to upgrade our barriers to make sure
mb/wmb/rmb ordered IO. This is because drivers do this (example taken
from e1000):
tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64). */
wmb();
tx_ring->next_to_use = i;
E1000_WRITE_REG(&adapter->hw, TDT, i);
Renaming mb/wmb/rmb to io_mb/io_wmb/io_rmb would fit in well here.
4. Its not clear other architectures are insuring wmb/rmb/mb are
ordering IO. Checking ia64:
* Note: "mb()" and its variants cannot be used as a fence to order
* accesses to memory mapped I/O registers. For that, mf.a needs to
* be used. However, we don't want to always use mf.a because (a)
* it's (presumably) much slower than mf and (b) mf.a is supported for
* sequential memory pages only.
Anton
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 16:03 ` Anton Blanchard
@ 2005-03-22 16:34 ` Matthew Wilcox
2005-03-22 16:48 ` David Howells
2005-03-22 18:15 ` Jesse Barnes
2005-03-23 6:23 ` Paul Mackerras
2 siblings, 1 reply; 16+ messages in thread
From: Matthew Wilcox @ 2005-03-22 16:34 UTC (permalink / raw)
To: Anton Blanchard
Cc: Matthew Wilcox, David Howells, David S. Miller, linux-arch
On Wed, Mar 23, 2005 at 03:03:24AM +1100, Anton Blanchard wrote:
>
> > > Would it be worth renaming the mb/rmb/wmb to io_mb/io_rmb/io_wmb?
> > > After all, I believe they should only be used to flush I/O memory
> > > accesses. This would, I think, make the distinction between memory
> > > barriers for I/O and memory barriers for SMP more obvious.
> >
> > Are you joking or genuinely confused?
>
> To be fair there are a lot of confused people out there. A few examples:
Yep, and I'm one of them. Never having worked on the alpha or sparc
ports, i'm pretty barrier-uneducated. Let me recap my understanding
(and thanks to dhowells for our irc chat earlier) ...
- barrier() is a *compiler* barrier. It does not affect how the CPU
reorders instructions.
- wmb() ensure that other CPUs and devices doing DMA observe writes
to memory before the wmb() before the writes after the wmb()
- rmb() ensures preceeding reads from memory complete before reads that
come after the rmb()
- mb() is the same as wmb(); rmb();
- io_*mb() are equivalent, for io memory.
- smp_*mb() no barrier on UP and specified barrier on SMP
> 1. My original patch showed there are a number of places we use memory
> barriers on UP when not required. Getting rid of mb/rmb/wmb would help
> this, people are unlikely to sprinkle io_mb in the scheduler code :)
>
> 2. drivers/net/typhoon.c
>
> INIT_COMMAND_NO_RESPONSE(cmd, TYPHOON_CMD_HELLO_RESP);
> smp_wmb();
> writel(ring->lastWrite, tp->ioaddr + TYPHOON_REG_CMD_READY);
>
> it looks a lot like smp_wmb is being used to order IO.
This is a tricky one because I think we're writing to memory, then
writing the address of memory to IO. So we need to ensure that memory
writes complete before the next IO write, right?
--
"Next the statesmen will invent cheap lies, putting the blame upon
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince
himself that the war is just, and will thank God for the better sleep
he enjoys after this process of grotesque self-deception." -- Mark Twain
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 16:34 ` Matthew Wilcox
@ 2005-03-22 16:48 ` David Howells
2005-03-22 17:13 ` David S. Miller
0 siblings, 1 reply; 16+ messages in thread
From: David Howells @ 2005-03-22 16:48 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: Anton Blanchard, David S. Miller, linux-arch
Matthew Wilcox <matthew@wil.cx> wrote:
> - io_*mb() are equivalent, for io memory.
I don't think these actually exist at the moment. My suggestion was that we
rename {r,w,}mb() to io_{r,w,}mb().
David
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 16:48 ` David Howells
@ 2005-03-22 17:13 ` David S. Miller
2005-03-22 17:44 ` James Bottomley
` (2 more replies)
0 siblings, 3 replies; 16+ messages in thread
From: David S. Miller @ 2005-03-22 17:13 UTC (permalink / raw)
To: David Howells; +Cc: matthew, anton, linux-arch
On Tue, 22 Mar 2005 16:48:14 +0000
David Howells <dhowells@redhat.com> wrote:
> Matthew Wilcox <matthew@wil.cx> wrote:
>
> > - io_*mb() are equivalent, for io memory.
>
> I don't think these actually exist at the moment. My suggestion was that we
> rename {r,w,}mb() to io_{r,w,}mb().
This is the first time I've seen that ?mb() should order I/O
accesses. My sparc64 versions certainly don't handle that
correctly. :-) That being said, I think we're all being
educated to so me extent in this thread.
On sparc64, we have a "membar" instruction. It takes a bitmask,
and each bit says what ordering constraint is desired. So, for
example, there is a bit that says "complete all previous loads
before executing any future stores", it looks like:
membar #LoadStore
if you wanted to order previous stores with future stores as well,
you'd do something like:
membar #LoadStore | #StoreStore
Anyways, there is a special bit called "#Sync" which usually sits
by itself and it causes all previous memory operations (even deferred
ones) to complete before that membar finishes execution. This is
the only way to get deferred errors and traps (f.e. deferred store
causes memory bus error) to get signalled synchronously.
Finally, there is the #Lookaside bit which is the only way to order
cacheable vs. non-cacheable accesses. I would need to add this bit
to the ?mb() macros if I/O ordering is really required.
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 17:13 ` David S. Miller
@ 2005-03-22 17:44 ` James Bottomley
2005-03-22 18:09 ` Jesse Barnes
2005-03-22 18:00 ` David Howells
2005-03-22 21:59 ` Paul Mackerras
2 siblings, 1 reply; 16+ messages in thread
From: James Bottomley @ 2005-03-22 17:44 UTC (permalink / raw)
To: David S. Miller; +Cc: David Howells, matthew, anton, linux-arch
On Tue, 2005-03-22 at 09:13 -0800, David S. Miller wrote:
> On Tue, 22 Mar 2005 16:48:14 +0000
> David Howells <dhowells@redhat.com> wrote:
>
> > Matthew Wilcox <matthew@wil.cx> wrote:
> >
> > > - io_*mb() are equivalent, for io memory.
> >
> > I don't think these actually exist at the moment. My suggestion was that we
> > rename {r,w,}mb() to io_{r,w,}mb().
>
> This is the first time I've seen that ?mb() should order I/O
> accesses. My sparc64 versions certainly don't handle that
> correctly. :-) That being said, I think we're all being
> educated to so me extent in this thread.
Actually, by and large, they shouldn't the I/O domain is usually
completely separate from the processor and memory one.
There is one exception for the altix and that's the mmiowb()
instruction. You can read about it in the linux-arch archives (if you
keep them) under the subject:
[PATCH] I/O space write barrier
which is, I think where this is coming from.
As a summary: the altix has a method of imposing write ordering on the
PCI domain, which is what mmiowb does. They prefer it in certain
circumstances to a read that does a flush of posted writes because it's
a lot cheaper on their hardware. The only drivers (at least in SCSI)
that I've seen modified to use it are the ones that get plugged into the
altix.
As an aside, the main ordering problem it prevents is the SMP one where
two CPUs do writes into the PCI domain that they order with spinlocks.
Even though, temporally, the writes are sequenced leaving the CPUs, the
altix PCI domain can still re-order them if the mmiowb() isn't present
(so it sounds like it's really a smp_mmiowb()...)
James
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 17:44 ` James Bottomley
@ 2005-03-22 18:09 ` Jesse Barnes
0 siblings, 0 replies; 16+ messages in thread
From: Jesse Barnes @ 2005-03-22 18:09 UTC (permalink / raw)
To: James Bottomley
Cc: David S. Miller, David Howells, matthew, anton, linux-arch
On Tuesday, March 22, 2005 9:44 am, James Bottomley wrote:
> As an aside, the main ordering problem it prevents is the SMP one where
> two CPUs do writes into the PCI domain that they order with spinlocks.
> Even though, temporally, the writes are sequenced leaving the CPUs, the
> altix PCI domain can still re-order them if the mmiowb() isn't present
> (so it sounds like it's really a smp_mmiowb()...)
Correct. And it's necessary because spin_unlock* don't do any I/O ordering by
default (in fact they're only one way memory barriers on alpha and ia64
iirc).
Jesse
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 17:13 ` David S. Miller
2005-03-22 17:44 ` James Bottomley
@ 2005-03-22 18:00 ` David Howells
2005-03-22 21:59 ` Paul Mackerras
2 siblings, 0 replies; 16+ messages in thread
From: David Howells @ 2005-03-22 18:00 UTC (permalink / raw)
To: David S. Miller; +Cc: matthew, anton, linux-arch
David S. Miller <davem@davemloft.net> wrote:
> > I don't think these actually exist at the moment. My suggestion was that we
> > rename {r,w,}mb() to io_{r,w,}mb().
>
> This is the first time I've seen that ?mb() should order I/O
> accesses. My sparc64 versions certainly don't handle that
> correctly. :-) That being said, I think we're all being
> educated to so me extent in this thread.
The FRV CPUs have a MEMBAR instruction; it takes no parameters and acts as a
mb().
You need this when accessing memory-mapped I/O (which is all I/O). Otherwise
reads and writes to peripherals may cross under some circumstances.
We've implemented the logic of when to insert MEMBAR into the FRV compiler,
using __builtin_read8/16/32() and __builtin_write8/16/32().
David
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 17:13 ` David S. Miller
2005-03-22 17:44 ` James Bottomley
2005-03-22 18:00 ` David Howells
@ 2005-03-22 21:59 ` Paul Mackerras
2 siblings, 0 replies; 16+ messages in thread
From: Paul Mackerras @ 2005-03-22 21:59 UTC (permalink / raw)
To: David S. Miller; +Cc: David Howells, matthew, anton, linux-arch
David S. Miller writes:
> This is the first time I've seen that ?mb() should order I/O
> accesses. My sparc64 versions certainly don't handle that
> correctly. :-) That being said, I think we're all being
> educated to so me extent in this thread.
Certainly driver writers expect that the combination of a wmb()
followed by a writel() will enforce ordering between any previous
writes to memory and the I/O write in the writel (as in the typhoon.c
example that Anton posted). On ppc64 this means that we have to use a
sync instruction rather than any lighter-weight memory barrier for
wmb.
Paul.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 16:03 ` Anton Blanchard
2005-03-22 16:34 ` Matthew Wilcox
@ 2005-03-22 18:15 ` Jesse Barnes
2005-03-22 18:24 ` Jesse Barnes
2005-03-23 6:23 ` Paul Mackerras
2 siblings, 1 reply; 16+ messages in thread
From: Jesse Barnes @ 2005-03-22 18:15 UTC (permalink / raw)
To: Anton Blanchard
Cc: Matthew Wilcox, David Howells, David S. Miller, linux-arch
On Tuesday, March 22, 2005 8:03 am, Anton Blanchard wrote:
> > > Would it be worth renaming the mb/rmb/wmb to io_mb/io_rmb/io_wmb?
> > > After all, I believe they should only be used to flush I/O memory
> > > accesses. This would, I think, make the distinction between memory
> > > barriers for I/O and memory barriers for SMP more obvious.
> >
> > Are you joking or genuinely confused?
>
> To be fair there are a lot of confused people out there. A few examples:
>
> 1. My original patch showed there are a number of places we use memory
> barriers on UP when not required. Getting rid of mb/rmb/wmb would help
> this, people are unlikely to sprinkle io_mb in the scheduler code :)
>
> 2. drivers/net/typhoon.c
>
> INIT_COMMAND_NO_RESPONSE(cmd, TYPHOON_CMD_HELLO_RESP);
> smp_wmb();
> writel(ring->lastWrite, tp->ioaddr +
> TYPHOON_REG_CMD_READY);
I think this is the same as (3) below, since the first line is writing memory.
So I'd agree that we need an I/O vs. memory barrier of some sort for
platforms like ppc64 where they can be reordered independently.
>
> it looks a lot like smp_wmb is being used to order IO.
>
> 3. On ppc64 we recently had to upgrade our barriers to make sure
> mb/wmb/rmb ordered IO. This is because drivers do this (example taken
> from e1000):
>
> tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
>
> /* Force memory writes to complete before letting h/w
> * know there are new descriptors to fetch. (Only
> * applicable for weak-ordered memory model archs,
> * such as IA-64). */
> wmb();
>
> tx_ring->next_to_use = i;
> E1000_WRITE_REG(&adapter->hw, TDT, i);
>
> Renaming mb/wmb/rmb to io_mb/io_wmb/io_rmb would fit in well here.
Yep.
> 4. Its not clear other architectures are insuring wmb/rmb/mb are
> ordering IO. Checking ia64:
>
> * Note: "mb()" and its variants cannot be used as a fence to order
> * accesses to memory mapped I/O registers. For that, mf.a needs to
> * be used. However, we don't want to always use mf.a because (a)
> * it's (presumably) much slower than mf and (b) mf.a is supported for
> * sequential memory pages only.
Right. And then there's pure I/O ordering, which as James pointed out can be
implemented with mmiowb. So let's see, we have
memory vs. memory writes: smp_wmb()
I/O vs. I/O writes: mmiowb() (and/or io_wmb()?)
memory vs. I/O writes: io_wmb()
right? And for reads:
memory vs. memory reads: smp_rmb
I/O vs. I/O reads: io_rmb()?
memory vs. I/O reads: io_rmb()
Is that an accurate summary?
Jesse
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 18:15 ` Jesse Barnes
@ 2005-03-22 18:24 ` Jesse Barnes
0 siblings, 0 replies; 16+ messages in thread
From: Jesse Barnes @ 2005-03-22 18:24 UTC (permalink / raw)
To: Anton Blanchard
Cc: Matthew Wilcox, David Howells, David S. Miller, linux-arch
On Tuesday, March 22, 2005 10:15 am, Jesse Barnes wrote:
> > 2. drivers/net/typhoon.c
> >
> > INIT_COMMAND_NO_RESPONSE(cmd, TYPHOON_CMD_HELLO_RESP);
> > smp_wmb();
> > writel(ring->lastWrite, tp->ioaddr +
> > TYPHOON_REG_CMD_READY);
>
> I think this is the same as (3) below, since the first line is writing
> memory. So I'd agree that we need an I/O vs. memory barrier of some sort
> for platforms like ppc64 where they can be reordered independently.
I should clarify this: if both of the stores above are to I/O space and the
CPU reorders them even coming from the same CPU (since I assume the above is
in a critical section), you've got a broken CPU, don't you? I mean, stores
to I/O space are usually done with 'volatile' semantics, which on ia64 at
least means that they're ordered wrt one another, so the memory barrier above
wouldn't be necessary (again, assuming they're both writing to I/O space).
If your platform doesn't do this, I'd expect lots more problems since it seems
that the majority of driver code assumes strong memory ordering to I/O space.
Jesse
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH] ?mb() -> smp_?mb() conversion
2005-03-22 16:03 ` Anton Blanchard
2005-03-22 16:34 ` Matthew Wilcox
2005-03-22 18:15 ` Jesse Barnes
@ 2005-03-23 6:23 ` Paul Mackerras
2 siblings, 0 replies; 16+ messages in thread
From: Paul Mackerras @ 2005-03-23 6:23 UTC (permalink / raw)
To: Anton Blanchard
Cc: Matthew Wilcox, David Howells, David S. Miller, linux-arch
Anton Blanchard writes:
> 2. drivers/net/typhoon.c
>
> INIT_COMMAND_NO_RESPONSE(cmd, TYPHOON_CMD_HELLO_RESP);
> smp_wmb();
> writel(ring->lastWrite, tp->ioaddr + TYPHOON_REG_CMD_READY);
>
> it looks a lot like smp_wmb is being used to order IO.
There's a word for that, it has 3 letters, starts with B and ends with
G. 8-)
Paul.
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2005-03-23 6:23 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-03-21 22:59 [PATCH] ?mb() -> smp_?mb() conversion Anton Blanchard
2005-03-21 23:06 ` David S. Miller
2005-03-22 10:43 ` David Howells
2005-03-22 13:13 ` Matthew Wilcox
2005-03-22 14:27 ` David Howells
2005-03-22 16:03 ` Anton Blanchard
2005-03-22 16:34 ` Matthew Wilcox
2005-03-22 16:48 ` David Howells
2005-03-22 17:13 ` David S. Miller
2005-03-22 17:44 ` James Bottomley
2005-03-22 18:09 ` Jesse Barnes
2005-03-22 18:00 ` David Howells
2005-03-22 21:59 ` Paul Mackerras
2005-03-22 18:15 ` Jesse Barnes
2005-03-22 18:24 ` Jesse Barnes
2005-03-23 6:23 ` Paul Mackerras
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox