Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* bug in arm_kprobe_decode_insn
From: Marcin Slusarz @ 2011-02-20 18:24 UTC (permalink / raw)
  To: linux-arm-kernel

Hi

In arch/arm/kernel/kprobes-decode.c there's a function arm_kprobe_decode_insn
which does:

} else if ((insn & 0x0e000000) == 0x0c400000) {
...

This is always false, so code below is dead.

I'm not sure about what was intended here, but I think this change might be correct:

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 2c1f005..8f6ed43 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1437,7 +1437,7 @@ arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 		return space_cccc_1100_010x(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x0c400000) {
+	} else if ((insn & 0x0e000000) == 0x0c000000) {
 
 		return space_cccc_110x(insn, asi);
 
--


I found this bug by coccinelle (http://coccinelle.lip6.fr/).
(The script is ugly, so I'm not attaching it here)


Marcin Slusarz

^ permalink raw reply related

* MMC quirks relating to performance/lifetime.
From: Arnd Bergmann @ 2011-02-20 15:23 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <AANLkTincLPnW90xvE304dCjkTbKUQGf6H8yWDVdz5Kh4@mail.gmail.com>

On Sunday 20 February 2011 06:56:39 Andrei Warkentin wrote:
> On Sat, Feb 19, 2011 at 5:20 AM, Arnd Bergmann <arnd@arndb.de> wrote:

> > The numbers you see here are taken over multiple runs. Do you see a lot
> > of fluctuation when doing this with --count=1?
> >
> 
> Yep. Quite a bit.
> 
> # ./flashbench -c 1 -A -b 1024 /dev/block/mmcblk0p9
> write align 8388608	pre 4.52ms	on 7.58ms	post 3.93ms	diff 3.36ms
> write align 4194304	pre 5.97ms	on 8.69ms	post 4.36ms	diff 3.53ms
> write align 2097152	pre 3.57ms	on 7.96ms	post 4.6ms	diff 3.88ms
> write align 1048576	pre 5.33ms	on 27.4ms	post 4.88ms	diff 22.3ms
> write align 524288	pre 49.3ms	on 31.4ms	post 14.9ms	diff -679265
> write align 262144	pre 39.7ms	on 38.3ms	post 5.27ms	diff 15.8ms
> write align 131072	pre 33.8ms	on 45.4ms	post 5.26ms	diff 25.9ms
> write align 65536	pre 34.4ms	on 40.9ms	post 3.3ms	diff 22.1ms
> write align 32768	pre 30.2ms	on 44.8ms	post 5.13ms	diff 27.1ms
> write align 16384	pre 44.5ms	on 5.05ms	post 33.3ms	diff -338542
> write align 8192	pre 25.5ms	on 70.6ms	post 25.3ms	diff 45.2ms
> write align 4096	pre 4.89ms	on 4.47ms	post 5.29ms	diff -623390
> write align 2048	pre 4.88ms	on 4.89ms	post 5.2ms	diff -155781
> # ./flashbench -c 1 -A -b 1024 /dev/block/mmcblk0p9
> write align 8388608	pre 4.68ms	on 9.06ms	post 5.14ms	diff 4.15ms
> write align 4194304	pre 4.37ms	on 7.49ms	post 4.59ms	diff 3.01ms
> write align 2097152	pre 23.7ms	on 1.9ms	post 14.8ms	diff -173218
> write align 1048576	pre 14.8ms	on 19.9ms	post 4.75ms	diff 10.2ms
> write align 524288	pre 20.2ms	on 24.9ms	post 10.7ms	diff 9.46ms
> write align 262144	pre 20.2ms	on 3.01ms	post 20.1ms	diff -171062
> write align 131072	pre 25.9ms	on 24.9ms	post 9.85ms	diff 7.06ms
> write align 65536	pre 15.5ms	on 30.3ms	post 2.95ms	diff 21.1ms
> write align 32768	pre 27.3ms	on 19.1ms	post 5.86ms	diff 2.5ms
> write align 16384	pre 25.4ms	on 55.9ms	post 12.7ms	diff 36.9ms
> write align 8192	pre 4.8ms	on 102ms	post 9.47ms	diff 94.8ms
> write align 4096	pre 4.92ms	on 5.16ms	post 4.98ms	diff 207?s
> write align 2048	pre 4.64ms	on 4.92ms	post 5.45ms	diff -121860
> # ./flashbench -c 1 -A -b 1024 /dev/block/mmcblk0p9
> write align 8388608	pre 15.8ms	on 9.39ms	post 4.68ms	diff -854295
> write align 4194304	pre 4.76ms	on 7.54ms	post 3.82ms	diff 3.24ms
> write align 2097152	pre 19.9ms	on 9.73ms	post 4.44ms	diff -244517
> write align 1048576	pre 14.5ms	on 19.1ms	post 5.21ms	diff 9.23ms
> write align 524288	pre 24.9ms	on 29ms	post 5.89ms	diff 13.6ms
> write align 262144	pre 24.9ms	on 2.41ms	post 20.8ms	diff -204328
> write align 131072	pre 25.6ms	on 30ms	post 4.84ms	diff 14.8ms
> write align 65536	pre 26.4ms	on 24.4ms	post 6.16ms	diff 8.12ms
> write align 32768	pre 15ms	on 30.6ms	post 15.4ms	diff 15.4ms
> write align 16384	pre 16.1ms	on 45.4ms	post 16.5ms	diff 29.1ms
> write align 8192	pre 5.88ms	on 107ms	post 5.45ms	diff 101ms
> write align 4096	pre 5.17ms	on 5.78ms	post 4.83ms	diff 778?s
> write align 2048	pre 3.99ms	on 5.27ms	post 3.97ms	diff 1.29ms
> # ./flashbench -c 1 -A -b 1024 /dev/block/mmcblk0p9
> write align 8388608	pre 16.1ms	on 8.37ms	post 5.44ms	diff -241222
> write align 4194304	pre 4.07ms	on 7.27ms	post 3.89ms	diff 3.29ms
> write align 2097152	pre 24.2ms	on 18.5ms	post 5.63ms	diff 3.59ms
> write align 1048576	pre 4.08ms	on 18.9ms	post 5.46ms	diff 14.1ms
> write align 524288	pre 25.1ms	on 28ms	post 14.6ms	diff 8.13ms
> write align 262144	pre 15.8ms	on 30ms	post 5.4ms	diff 19.4ms
> write align 131072	pre 24.7ms	on 30.8ms	post 4.43ms	diff 16.2ms
> write align 65536	pre 5ms	on 40.5ms	post 5.95ms	diff 35.1ms
> write align 32768	pre 24.7ms	on 30.6ms	post 4.92ms	diff 15.8ms
> write align 16384	pre 25.2ms	on 132ms	post 10.2ms	diff 114ms
> write align 8192	pre 7.64ms	on 111ms	post 9.18ms	diff 102ms
> write align 4096	pre 5.11ms	on 3.92ms	post 5.4ms	diff -134159
> write align 2048	pre 3.92ms	on 4.41ms	post 4.51ms	diff 196?s

Every value is the average of eight measurements, so there are probably
some that include the 100ms garbage collection, and others that don't.
I'm more confused about this now than I was before.

> > Also, does the same happen with other blocksizes, e.g. 4096 or 8192, passed
> > to flashbench?
>
> # echo 0 > /sys/block/mmcblk0/device/page_size
> # ./flashbench -A -b 1024 /dev/block/mmcblk0p9
> write align 65536	pre 3.33ms	on 6.57ms	post 3.65ms	diff 3.08ms
> write align 32768	pre 3.68ms	on 6.6ms	post 3.7ms	diff 2.91ms
> write align 16384	pre 3.64ms	on 97.6ms	post 3.26ms	diff 94.2ms
> write align 8192	pre 3.49ms	on 115ms	post 3.62ms	diff 112ms
> write align 4096	pre 3.91ms	on 3.91ms	post 3.9ms	diff 360ns
> write align 2048	pre 3.92ms	on 3.92ms	post 3.92ms	diff -1374ns
> # ./flashbench -A -b 2048 /dev/block/mmcblk0p9
> write align 65536	pre 4.02ms	on 7.22ms	post 4.14ms	diff 3.14ms
> write align 32768	pre 4ms	on 7.07ms	post 3.95ms	diff 3.1ms
> write align 16384	pre 3.66ms	on 106ms	post 3.4ms	diff 102ms
> write align 8192	pre 3.56ms	on 106ms	post 3.36ms	diff 103ms
> write align 4096	pre 3.61ms	on 4.1ms	post 4.35ms	diff 117?s
> # ./flashbench -A -b 4096 /dev/block/mmcblk0p9
> write align 65536	pre 3.89ms	on 6.97ms	post 3.96ms	diff 3.04ms
> write align 32768	pre 3.89ms	on 6.97ms	post 3.96ms	diff 3.04ms
> write align 16384	pre 3.74ms	on 114ms	post 4.05ms	diff 110ms
> write align 8192	pre 4.25ms	on 115ms	post 4.8ms	diff 110ms
> # ./flashbench -A -b 8192 /dev/block/mmcblk0p9
> write align 65536	pre 4.11ms	on 7.46ms	post 4.24ms	diff 3.29ms
> write align 32768	pre 4.15ms	on 7.45ms	post 4.25ms	diff 3.25ms
> write align 16384	pre 4.24ms	on 96.1ms	post 3.83ms	diff 92.1ms

Ok, that is very consistent then at least.

> The following I thought this was interesting. I did it to see the big
> time go away, since it would end up being a 16K write straddling an 8K
> boundary, but the pre and post results I don't understand at all.
> 
> # ./flashbench -A -b 16384  /dev/block/mmcblk0p9
> write align 8388608	pre 121ms	on 7.76ms	post 116ms	diff -110845
> write align 4194304	pre 129ms	on 7.57ms	post 115ms	diff -114863
> write align 2097152	pre 121ms	on 7.78ms	post 123ms	diff -114318
> write align 1048576	pre 131ms	on 7.74ms	post 106ms	diff -110856
> write align 524288	pre 131ms	on 7.58ms	post 116ms	diff -115926
> write align 262144	pre 131ms	on 7.55ms	post 115ms	diff -115591
> write align 131072	pre 131ms	on 7.54ms	post 116ms	diff -115617
> write align 65536	pre 131ms	on 7.54ms	post 115ms	diff -115579
> write align 32768	pre 125ms	on 6.89ms	post 116ms	diff -113408

The description of the test case is probably suboptimal. What this does
is 32 KB accesses, with 32 KB alignment in the pre and post case, but 16 KB
alignment in the "on" case. The idea here is that it should never do
any access with less than "--blocksize" aligment.

This is what I think happens:
Since the partition is over 64 MB size and it can have 7 4 MB allocation units open,
writing to 8 locations on the drive separated 8 MB causes it to do garbage collection
all the time for 32KB accesses and larger. However, the "on" measurement is only
16 KB aligned, so it goes into T's buffer A for small writes, and does not hit
the garbage collection all the time, so it ends up being a lot faster.

	Arnd

^ permalink raw reply

* MMC quirks relating to performance/lifetime.
From: Arnd Bergmann @ 2011-02-20 15:03 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <AANLkTi=GO8N15pCnZWuuJ6FfyQMytFt23LPS5oYMNuMu@mail.gmail.com>

On Sunday 20 February 2011 05:39:06 Andrei Warkentin wrote:
> Actually it would be a good idea to also bail/warn if you do the au
> test with more open au's than the size of the passed device allows,
> since it'll just wrap around and skew the results.

Yes, that's a bug. I never noticed because all the devices I tested
have much more space than the test can possibly exercise. I'll
fix it tomorrow.

> > Right, you should try larger values for --open-au-nr here. It's at
> > least a good sign that the drive can do random access inside a segment
> > and that it can have at least 4 segments open. This is much better
> > than I expected from your descriptions at first.
> 
> Actually the Toshiba one seems to have 7 AUs if I interpret this correctly.
> ^C
> # ./flashbench -O -0 6  -b 512 /dev/block/mmcblk0p9
> 4MiB    5.91M/s
> 2MiB    8.84M/s
> 1MiB    10.8M/s
> 512KiB  13M/s
> 256KiB  13.6M/s
> 
> ^C
> # ./flashbench -O -0 7  -b 512 /dev/block/mmcblk0p9
> 4MiB    6.32M/s
> 2MiB    8.63M/s
> 1MiB    10.5M/s
> 512KiB  13.2M/s
> 256KiB  13M/s
> ^[[A^[[D^[[A128KiB  12.3M/s
> ^C
> # ./flashbench -O -0 8  -b 512 /dev/block/mmcblk0p9
> 4MiB    6.65M/s
> 2MiB    7.02M/s
> 1MiB    6.36M/s
> 512KiB  3.17M/s
> 256KiB  1.53M/s

Yes, very good. I've never seen 7, but I've seen all other numbers
betwen 1 and 8 ;-).

> The Sandisk one has 20 AUs.
> 
> # ./flashbench -O -0 20  -b 512 /dev/block/mmcblk0p9
> 4MiB    11.3M/s
> 2MiB    12.8M/s
> 1MiB    9.87M/s
> 512KiB  9.97M/s
> 256KiB  9.13M/s
> 128KiB  8.05M/s
> ^C
> # ./flashbench -O -0 50  -b 512 /dev/block/mmcblk0p9
> 4MiB    7.19M/s
> ^C
> # ./flashbench -O -0 2  -b 512 /dev/block/mmcblk0p9
> ^C
> # ./flashbench -O -0 22  -b 512 /dev/block/mmcblk0p9
> 4MiB    11.6M/s
> 2MiB    12.3M/s
> 1MiB    5.13M/s
> 512KiB  2.57M/s
> 256KiB  1.59M/s
> 128KiB  1.16M/s
> 64KiB   776K/s
> ^C
> # ./flashbench -O -0 21  -b 512 /dev/block/mmcblk0p9
> 4MiB    11.2M/s
> 2MiB    12.4M/s
> 1MiB    4.65M/s
> 512KiB  1.95M/s
> 256KiB  955K/s

20 is a lot, more than any other device I've tested, but that's
good. Sandisk keeps impressing me ;-)

Are you sure you have the allocation unit size correctly for
this device and you don't get into the wrap-around bug
you mention above?

If it indeed uses 4 MB allocation units, flashbench will show
only 10 open segments when run with --erasesize=$[8*1024*1024],
but 20 open segments when run with --erasesize=$[2*1024*1024].

>From your flashbench -a run, I would guess that it uses
8 MB allocation units, although the data is not 100% conclusive
there.

> > However, the drop from 32 KB to 16 KB in performance is horrifying
> > for the Toshiba drive, it's clear that this one does not like
> > to be accessed smaller than 32 KB at a time, an obvious optimization
> > for FAT32 with 32 KB clusters. How does this change with your
> > kernel patches?
> 
> Since the only performance-increasing patch here would be just the one
> that splits unaligned accesses, I wouldn't expect any improvements for
> page-aligned accesses < 32KB. As you can see here...

Ok.

> > For the sandisk drive, it's funny how it is consistently faster
> > doing random access than linear access. I don't think I've seem that
> > before. It does seem to have some cache for linear access using
> > smaller than 16 KB, and can probably combine them when it's only
> > writing to a single segment.
> 
> Yes, that is pretty interesting. Smaller than 16K? Not smaller than
> 32K? I wonder what it is doing...

My interpretation is that it uses 16 KB pages, but can do two page-sized
writes in a single access (multi-plane write). Anything smaller than
a page goes to a temporary buffer first (like the Toshiba chip), but
gets flushed when the next one is not contiguous. If you manage to fill
the entire 16 KB page using small contiguous writes, it can do a single
efficient write access instead.

To confirm that 16 KB is the page size, you can try 

flashbench -s --scatter-span=1 --scatter-order=10 -o plot.data \
	/dev/mmcblk1 -c 32 --blocksize=16384
gnuplot -p -e 'plot "plot.data" '

On most MLC flashes, this will show a pattern alternating between slow
and fast pages like the one from https://lwn.net/Articles/428836/

	Arnd

^ permalink raw reply

* [PATCH v2 3/3] ARM: PXA: PXAFB: Fix typo in ypos assignment
From: Vasily Khoruzhick @ 2011-02-20 15:02 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1298214147-11578-1-git-send-email-anarsoul@gmail.com>

Sascha Hauer <s.hauer@pengutronix.de> pointed that
ypos takes value of xpos due to typo.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
---
 drivers/video/pxafb.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 54a9ab8..80ea7e6 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -766,7 +766,7 @@ static int overlayfb_check_var(struct fb_var_screeninfo *var,
 	int xpos, ypos, pfor, bpp;
 
 	xpos = NONSTD_TO_XPOS(var->nonstd);
-	ypos = NONSTD_TO_XPOS(var->nonstd);
+	ypos = NONSTD_TO_YPOS(var->nonstd);
 	pfor = NONSTD_TO_PFOR(var->nonstd);
 
 	bpp = pxafb_var_to_bpp(var);
@@ -864,7 +864,7 @@ static int overlayfb_set_par(struct fb_info *info)
 
 	bpp  = pxafb_var_to_bpp(var);
 	xpos = NONSTD_TO_XPOS(var->nonstd);
-	ypos = NONSTD_TO_XPOS(var->nonstd);
+	ypos = NONSTD_TO_YPOS(var->nonstd);
 	pfor = NONSTD_TO_PFOR(var->nonstd);
 
 	ofb->control[0] = OVLxC1_PPL(var->xres) | OVLxC1_LPO(var->yres) |
-- 
1.7.4

^ permalink raw reply related

* [PATCH v2 2/3] ARM: PXA: PXAFB: Fix plane Z-ordering problem
From: Vasily Khoruzhick @ 2011-02-20 15:02 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1298214147-11578-1-git-send-email-anarsoul@gmail.com>

pxafb_overlay_init is not right place to change Z-ordering,
move it to main plane initialization.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
---
 drivers/video/pxafb.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 41a499f..54a9ab8 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -940,8 +940,6 @@ static int __devinit pxafb_overlay_init(struct pxafb_info *fbi)
 	/* mask all IU/BS/EOF/SOF interrupts */
 	lcd_writel(fbi, LCCR5, ~0);
 
-	/* place overlay(s) on top of base */
-	fbi->lccr0 |= LCCR0_OUC;
 	pr_info("PXA Overlay driver loaded successfully!\n");
 	return 0;
 }
@@ -1825,6 +1823,12 @@ static struct pxafb_info * __devinit pxafb_init_fbinfo(struct device *dev)
 
 	pxafb_decode_mach_info(fbi, inf);
 
+#ifdef CONFIG_FB_PXA_OVERLAY
+	/* place overlay(s) on top of base */
+	if (pxafb_overlay_supported())
+		fbi->lccr0 |= LCCR0_OUC;
+#endif
+
 	init_waitqueue_head(&fbi->ctrlr_wait);
 	INIT_WORK(&fbi->task, pxafb_task);
 	mutex_init(&fbi->ctrlr_lock);
-- 
1.7.4

^ permalink raw reply related

* [PATCH v2 1/3] ARM: PXA: PXAFB: Fix double-free issue
From: Vasily Khoruzhick @ 2011-02-20 15:02 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20110217110321.GF24627@n2100.arm.linux.org.uk>

From: Russell King - ARM Linux <linux@arm.linux.org.uk>

Release callback tries to free memory even if it was not allocated in
map_video_memory, fix it.

Added by Vasily Khoruzhick:

- Clear x_res/y_res fields of fb.var on release, to make sure
our callback will be called on next FBIOPUT_VSCREENINFO ioctl.
- Disable overlay only if it was enabled.
- Don't touch FDADR1 if it's not necessary

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
---
v2: remove unnecessary newlines, add comment about FDADR1

 drivers/video/pxafb.c |   49 ++++++++++++++++++++++++++++++++++---------------
 drivers/video/pxafb.h |    2 +-
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 825b665..41a499f 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -629,6 +629,9 @@ static void overlay1fb_disable(struct pxafb_layer *ofb)
 {
 	uint32_t lccr5 = lcd_readl(ofb->fbi, LCCR5);
 
+	if (!(lcd_readl(ofb->fbi, OVL1C1) & OVLxC1_OEN))
+		return;
+
 	lcd_writel(ofb->fbi, OVL1C1, ofb->control[0] & ~OVLxC1_OEN);
 
 	lcd_writel(ofb->fbi, LCSR1, LCSR1_BS(1));
@@ -687,6 +690,9 @@ static void overlay2fb_disable(struct pxafb_layer *ofb)
 {
 	uint32_t lccr5 = lcd_readl(ofb->fbi, LCCR5);
 
+	if (!(lcd_readl(ofb->fbi, OVL2C1) & OVLxC1_OEN))
+		return;
+
 	lcd_writel(ofb->fbi, OVL2C1, ofb->control[0] & ~OVLxC1_OEN);
 
 	lcd_writel(ofb->fbi, LCSR1, LCSR1_BS(2));
@@ -720,12 +726,10 @@ static int overlayfb_open(struct fb_info *info, int user)
 	if (user == 0)
 		return -ENODEV;
 
-	/* allow only one user at a time */
-	if (atomic_inc_and_test(&ofb->usage))
-		return -EBUSY;
+	if (ofb->usage++ == 0)
+		/* unblank the base framebuffer */
+		fb_blank(&ofb->fbi->fb, FB_BLANK_UNBLANK);
 
-	/* unblank the base framebuffer */
-	fb_blank(&ofb->fbi->fb, FB_BLANK_UNBLANK);
 	return 0;
 }
 
@@ -733,12 +737,24 @@ static int overlayfb_release(struct fb_info *info, int user)
 {
 	struct pxafb_layer *ofb = (struct pxafb_layer*) info;
 
-	atomic_dec(&ofb->usage);
-	ofb->ops->disable(ofb);
-
-	free_pages_exact(ofb->video_mem, ofb->video_mem_size);
-	ofb->video_mem = NULL;
-	ofb->video_mem_size = 0;
+	if (--ofb->usage == 0) {
+		ofb->ops->disable(ofb);
+		ofb->fb.var.height	= -1;
+		ofb->fb.var.width	= -1;
+		ofb->fb.var.xres = ofb->fb.var.xres_virtual = 0;
+		ofb->fb.var.yres = ofb->fb.var.yres_virtual = 0;
+
+		mutex_lock(&ofb->fb.mm_lock);
+		ofb->fb.fix.smem_start	= 0;
+		ofb->fb.fix.smem_len	= 0;
+		mutex_unlock(&ofb->fb.mm_lock);
+
+		if (ofb->video_mem) {
+			free_pages_exact(ofb->video_mem, ofb->video_mem_size);
+			ofb->video_mem = NULL;
+			ofb->video_mem_size = 0;
+		}
+	}
 	return 0;
 }
 
@@ -817,7 +833,8 @@ static int overlayfb_map_video_memory(struct pxafb_layer *ofb)
 		if (ofb->video_mem_size >= size)
 			return 0;
 
-		free_pages_exact(ofb->video_mem, ofb->video_mem_size);
+		/* don't re-allocate: userspace may have the buffer mapped */
+		return -EINVAL;
 	}
 
 	ofb->video_mem = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
@@ -891,7 +908,7 @@ static void __devinit init_pxafb_overlay(struct pxafb_info *fbi,
 
 	ofb->id = id;
 	ofb->ops = &ofb_ops[id];
-	atomic_set(&ofb->usage, 0);
+	ofb->usage = 0;
 	ofb->fbi = fbi;
 	init_completion(&ofb->branch_done);
 }
@@ -1368,7 +1385,8 @@ static int pxafb_activate_var(struct fb_var_screeninfo *var,
 	    (lcd_readl(fbi, LCCR3) != fbi->reg_lccr3) ||
 	    (lcd_readl(fbi, LCCR4) != fbi->reg_lccr4) ||
 	    (lcd_readl(fbi, FDADR0) != fbi->fdadr[0]) ||
-	    (lcd_readl(fbi, FDADR1) != fbi->fdadr[1]))
+	    ((fbi->lccr0 & LCCR0_SDS) &&
+	    (lcd_readl(fbi, FDADR1) != fbi->fdadr[1])))
 		pxafb_schedule_work(fbi, C_REENABLE);
 
 	return 0;
@@ -1420,7 +1438,8 @@ static void pxafb_enable_controller(struct pxafb_info *fbi)
 	lcd_writel(fbi, LCCR0, fbi->reg_lccr0 & ~LCCR0_ENB);
 
 	lcd_writel(fbi, FDADR0, fbi->fdadr[0]);
-	lcd_writel(fbi, FDADR1, fbi->fdadr[1]);
+	if (fbi->lccr0 & LCCR0_SDS)
+		lcd_writel(fbi, FDADR1, fbi->fdadr[1]);
 	lcd_writel(fbi, LCCR0, fbi->reg_lccr0 | LCCR0_ENB);
 }
 
diff --git a/drivers/video/pxafb.h b/drivers/video/pxafb.h
index 2353521..84e3ae1 100644
--- a/drivers/video/pxafb.h
+++ b/drivers/video/pxafb.h
@@ -92,7 +92,7 @@ struct pxafb_layer_ops {
 struct pxafb_layer {
 	struct fb_info		fb;
 	int			id;
-	atomic_t		usage;
+	uint32_t		usage;
 	uint32_t		control[2];
 
 	struct pxafb_layer_ops	*ops;
-- 
1.7.4

^ permalink raw reply related

* [PATCH v2 1/2] ARM: IMX5: cpuidle driver
From: Yong Shen @ 2011-02-20 14:58 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20110217105415.GB24426@pengutronix.de>

Hi Sascha,

I had sent out v3 patch before the your last comments.
I noticed how davinci is doing, but some SOCs like omap, they also do it in
another way like my code.
However, if you prefer the way davinci is doing, I will redo it. Please
confirm.

thanks
Yong

On Thu, Feb 17, 2011 at 11:54 AM, Sascha Hauer <s.hauer@pengutronix.de>wrote:

> On Thu, Feb 17, 2011 at 09:18:11AM +0100, Yong Shen wrote:
> > >
> > >
> > > > +     return 0;
> > > > +}
> > > > +
> > > > +late_initcall(imx_cpuidle_init);
> > >
> > > We have a late_initcall here which needs to be protected from other
> > > cpus. On the other hand we depend on board code calling
> > > imx_cpuidle_board_params() before this initcall. I think the board code
> > > should call a imx_cpuidle_init(struct imx_cpuidle_params
> > > *cpuidle_params) instead which makes the flow of execution more clear.
> > >
> > > imx_cpuidle_init can not be called directly in board code, since it is
> too
> > early to register cpuidle driver and device which depend on some other
> > system resource.
>
> I see. Maybe we should make this a platform driver then like for example
> davinci does.
>
> Sascha
>
> --
> Pengutronix e.K.                           |                             |
> Industrial Linux Solutions                 | http://www.pengutronix.de/  |
> Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
> Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20110220/59d8ac2c/attachment-0001.html>

^ permalink raw reply

* MMC quirks relating to performance/lifetime.
From: Arnd Bergmann @ 2011-02-20 14:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <AANLkTimGrT_p_5xdXJv5UURPMC0cwJCPkZ=xcX5Nk=o=@mail.gmail.com>

[adding linux-fsdevel to Cc, see http://lwn.net/Articles/428941/ and
http://comments.gmane.org/gmane.linux.ports.arm.kernel/105607 for more
on this discussion.]

On Sunday 20 February 2011 12:27:39 Andrei Warkentin wrote:
> On Thu, Feb 17, 2011 at 9:47 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> > I think I'd try to reduce the number of sysfs files needed for this.
> > What are the values you would typically set here?
> >
> > My feeling is that separating unaligned page writes from full pages
> > or multiples of pages could always be benefitial for all cards, or at
> > least harmless, but that will require more measurements.
> > Whether to do the reliable write or not could be a simple flag
> > if the numbers are the same.
> 
> I thought about this some more, and I realized it would be ugly if
> everybody added enable_workaround_sec_start/enable_workaround_sec_end
> for every novel idea of working around some issue with
> performance/reliability on mmc/sd cards.
> 
> What about letting the user/embedder create policies for how certain
> accesses are done? That way you give runtime-accessible
> blocks for tuning mmc block layer while having one interface to
> manipulate (and combine) multiple workarounds, all the while catching
> conflicts and
> without forcing specific policy in code.
> 
> Essentially under /sys/block/mmcblk0/device you have an attribute
> called "policies". Example:
> 
> # echo mypol0 > /sys/block/mmcblk0/device/policies
> # ls /sys/block/mmcblk0/device/mypol0
> debug
> delete
> start_block
> end_block
> access_size_low
> access_size_high
> write_policy
> erase_policy
> read_policy
> # cat /sys/block/mmcblk0/device/mypol0/write_policy
> Current: none
> 0x00000001: Split unaligned writes across page_size
> 0x00000002: Split writes into page_size chunks and write using reliable writes
> 0x00000004: Use reliable writes for WRITE_META blocks.
> # cat /sys/block/mmcblk0/device/mypol0/erase_policy
> Current: none
> 0x00000001: Use secure erase.
> # echo 1 > delete
> # Policy is deleted.
> 
> The policies are all stored in a rb-tree. First order of business
> inside mmc_blk_issue_rw_rq/mmc_blk_issue_* is to fetch an existing
> policy given the access type and block start/end (which both tells
> where the access is going and the size of the access). Later, it's
> that policy information which controls how the request is translated
> into MMC commands. I'm almost done with a prototype.

I think it's good to discuss all the options, but my feeling is that
we should not add so much complexity at the interface level, because
we will never be able to change all that again. In general, sysfs
files should contain simple values that are self-descriptive (a simple
number or one word), and should have no side-effects (unlike the delete
or the policies attributes you describe).

The behavior of the Toshiba chip is peculiar enough to justify having
some workarounds for it, including run-time selected ones, but I'm
looking for something much simpler. I'd certainly be interested in
the patch you come up with and any performance results, but I don't
think it can be merged like that.

In the end, Chris will have to make the decision on mmc patches of
course -- I'm just trying to contribute experience from other subsystems.

What I see as a more promising approach is to add the tunables
to attributes of the CFQ I/O scheduler once we know what we want.
This will allow doing the same optimizations to non-MMC devices such
as USB sticks or CF/IDE cards without reimplementing it in other
subsystems, and give more control over the individual requests than
the MMC layer has.

E.g. the I/O scheduler can also make sure that we always submit all
blocks from the start of one erase unit (e.g. 4 MB) to the end, but
not try to merge requests across erase unit boundaries. It can
also try to group the requests in aligned power-of-two sized chunks
rather than merging as many sectors as possible up to the maximum
request size, ignoring the alignment.

	Arnd

^ permalink raw reply

* barriers in mutexes?
From: Russell King - ARM Linux @ 2011-02-20 13:14 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <4D4ADE2F.50105@codeaurora.org>

On Thu, Feb 03, 2011 at 08:56:15AM -0800, Steve Muckle wrote:
> On 02/03/11 03:08, Russell King - ARM Linux wrote:
> > On Wed, Feb 02, 2011 at 06:05:46PM -0800, Steve Muckle wrote:
> >> Looking at kernel/mutex.c and arch/arm/include/asm/mutex.h, it appears
> >> there aren't any memory barriers in the fast path of mutexes. I see this
> >> was discussed here a long time back:
> >>
> >> http://patchwork.kernel.org/patch/25856/
> >>
> >> How was this resolved?
> > 
> > bac4e96 ([ARM] barriers: improve xchg, bitops and atomic SMP barriers)
> 
> The fast path of mutexes on ARM >= v6 doesn't seem to use these
> operations - it's just ldrex/strex. Looking at the v7 ARM ARM it seems
> barriers are still required with ldrex/strex.

Patch welcome.

^ permalink raw reply

* [RFC,PATCH 1/3] Add a common struct clk
From: Russell King - ARM Linux @ 2011-02-20 13:13 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <4D5A100F.9000809@codeaurora.org>

On Mon, Feb 14, 2011 at 09:33:03PM -0800, Saravana Kannan wrote:
> Assuming Russell and/or the community agrees on the semantics of  
> "parent", without the generic implementation grabbing the prepare_lock  
> while setting the parent, there is no way for the specific clock driver  
> implementations to cleanly ensure correctness. The only option for them  
> would be to peek into the generic clock struct and grab the prepare lock  
> -- to me that would be an ugly hack and/or layering violation that would  
> cause problems later on.
>
> Russell/All,
>
> What's the meaning of a parent clock? Do you agree with my definition --  
> "the parent clock is the clock that generates the clock signal from  
> which the child clock derives (divide, etc) it's clock signal from."? Or  
> is it open to interpretation by each implementation?

Your definition seems sane - I'm not sure what use a parent clock which
had nothing to do with a child would be.

As for the locking issue, I've no idea on that at the moment.  I don't
think implementations should grab the prepare lock, I think that's
something the generic code should take care of for clk_set_rate(),
clk_set_parent() etc.

^ permalink raw reply

* [RFC,PATCH 1/3] Add a common struct clk
From: Russell King - ARM Linux @ 2011-02-20 13:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <201102151733.30332.jeremy.kerr@canonical.com>

On Tue, Feb 15, 2011 at 05:33:29PM +0800, Jeremy Kerr wrote:
> Hi Russell,
> 
> > > Why is that? Consider two devices using one clock; one does some
> > > initialisation based on the return value of clk_get_rate(), the other
> > > calls clk_set_rate() some time later. Now the first device is
> > > incorrectly initialised.
> > 
> > What about a clock sourced from a PLL which provides the dotclock for a
> > framebuffer device?  On every mode set, should the clk have to be disabled,
> > unprepared, rate set, re-prepared and re-enabled?
> 
> Sounds heavy-handed, but I honestly have no idea if that's reasonable or not.
> 
> Other options are:
> 
>  * Require that the driver has called clk_prepare, and that prepare_count
>    is 1 during the set_rate call (indicating that this is the only user); or
> 
>  * Leave the set_rate and set_parent semantics as-is and assume that anything
>    calling either knows what it's doing (and that it won't affect other
>    devices)
> 
> Are you OK if we address this separately to the API unification though?

Absolutely.  I think there's enough issues already without adding new
changes on top.  The unification step should do just that - unify.  It
should not introduce new restrictions that are not absolutely necessary
for the unification step.

^ permalink raw reply

* Machine ID question
From: Russell King - ARM Linux @ 2011-02-20 13:04 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <201102141612.17340.alexander.stein@systec-electronic.com>

On Mon, Feb 14, 2011 at 04:12:16PM +0100, Alexander Stein wrote:
> On Thursday 10 February 2011, 09:29:15 Uwe Kleine-K?nig wrote:
> > On Wed, Feb 09, 2011 at 02:23:07PM -0700, John Linn wrote:
> > > I used the form to create a new machine ID for the new Xilinx platform
> > > in anticipation of
> > > the code making it into the mainline sometime in the future.
> > > 
> > > The proposed patches are using ARCH_XILINX in the Kconfig.
> > 
> > You should use MACH_XILINX in the patches.  There are some ARCH_... in
> > the machine db, but I think nowadays you cannot (and should not) create
> > them there.
> 
> Uhm, the last entry in the db was created today (14th Feb). Is there a reason 
> why there no new entries should be created?

huh?

The machine registry hands out MACH_foo identifiers.  It used to hand
them out as ARCH_foo identifiers, but we decided that was misleading -
it's dealing with machines, not architectures.  So a MACH_ prefix is
more correct than an ARCH_ prefix.

^ permalink raw reply

* [PATCH v2] ARM: vfp: Always save VFP state in vfp_pm_suspend
From: Russell King - ARM Linux @ 2011-02-20 12:57 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <AANLkTim4ANuErvT4c5mBnu+Yg5m9sz6N8xugryXRJeN7@mail.gmail.com>

On Wed, Feb 16, 2011 at 11:36:45AM -0800, Colin Cross wrote:
> On Tue, Feb 15, 2011 at 9:03 AM, Russell King - ARM Linux
> <linux@arm.linux.org.uk> wrote:
> > On Mon, Feb 14, 2011 at 02:55:47PM -0800, Colin Cross wrote:
> >> diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
> >> index 66bf8d1..7231d18 100644
> >> --- a/arch/arm/vfp/vfpmodule.c
> >> +++ b/arch/arm/vfp/vfpmodule.c
> >> @@ -415,13 +415,13 @@ static int vfp_pm_suspend(struct sys_device *dev, pm_message_t state)
> >> ? ? ? struct thread_info *ti = current_thread_info();
> >> ? ? ? u32 fpexc = fmrx(FPEXC);
> >>
> >> - ? ? /* if vfp is on, then save state for resumption */
> >> - ? ? if (fpexc & FPEXC_EN) {
> >> + ? ? /* save state for resume */
> >> + ? ? if (last_VFP_context[ti->cpu]) {
> >
> > I'm not entirely happy with this.
> >
> > It is true that last_VFP_context[] when non-NULL indicates who owns the
> > hardware VFP state, so saving it would seem logical. ?However, this new
> > code now saves the state with the saved fpexc indicating that it's disabled.
> >
> > This will cause a VFP exception to misbehave by reloading the state, and
> > then disabling the VFP unit. ?That will cause another VFP exception which
> > will find the VFP unit disabled, and re-enable it. ?All in all, this is
> > rather wasteful.
> >
> > So...
> > ? ? ? ?/* If lazy disable, re-enable the VFP ready for it to be saved */
> > ? ? ? ?if (last_VFP_context[ti->cpu] != &ti->vfpstate) {
> > ? ? ? ? ? ? ? ?fpexc |= FPEXC_EN;
> > ? ? ? ? ? ? ? ?fmxr(FPEXC, fpexc);
> > ? ? ? ?}
> > ? ? ? ?/* If VFP is on, then save state for resumption */
> > ? ? ? ?if (fpexc & FPEXC_EN) {
> > ? ? ? ? ? ? ? ?...
> 
> I think v2 of the patch handles this case correctly:
> 	/* save state for resume */
> 	if (last_VFP_context[ti->cpu]) {
> 		printk(KERN_DEBUG "%s: saving vfp state\n", __func__);
> 		fmxr(FPEXC, fpexc | FPEXC_EN);
> 		vfp_save_state(last_VFP_context[ti->cpu], fpexc);

This saves fpexc with the enable flag possibly clear.

> 		last_VFP_context[ti->cpu] = NULL;
> 		fmxr(FPEXC, fpexc & ~FPEXC_EN);
> 	}
> 
> This version enables the VFP if it was not enabled, but saves the
> original fpexc value.

Which is wrong as I said above.

^ permalink raw reply

* [PATCH 2/2] ARM: tegra: add TrimSlice board
From: Mike Rapoport @ 2011-02-20 12:43 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <4D58C91F.9030303@compulab.co.il>

On 02/14/11 08:18, Mike Rapoport wrote:
> On 02/13/11 21:06, Colin Cross wrote:
>>>>
>>>> Ack, the machine type is merged so I'll put this in for-next.  Do you
>>>> want it enabled in tegra_defconfig?
>>>
>>> If 'select TEGRA_PCI' does not trouble you, I'd be glad to have the TrimSlice in
>>> tegra_defconfig :)
>>
>> My board without PCI doesn't boot if CONFIG_TEGRA_PCI is selected, so
>> I guess it should stay off for now.  Any idea why it wouldn't boot?
> 
> The PCI subsystem should be re-gated, otherwise any access to PCI hangs the
> system. Besides, on Harmony you need to enable two additional voltage supplies
> to provide PEX_CLK voltage.
> I'll send the patches that handle it, hopefully in the next few days.

I was little bit too enthusiastic about it :( To enable PCI on Harmony and
probably on Ventana one need to have I2C and TPS up and running before setting
up the PCI clocks...
I have working implementation on top of your  linux-tegra-2.6.37 branch (1), but
it's not quite ready to go upstream.

So, let's keep the TrimSlice out of common defconfig for now

[1]
http://git.kernel.org/?p=linux/kernel/git/rppt/linux-tegra.git;a=shortlog;h=refs/heads/tegra/devel

-- 
Sincerely yours,
Mike.

^ permalink raw reply

* [RFC PATCH 2/2] ARMv7: Invalidate the TLB before freeing page tables
From: Russell King - ARM Linux @ 2011-02-20 12:12 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1297780926.14691.164.camel@e102109-lin.cambridge.arm.com>

On Tue, Feb 15, 2011 at 02:42:06PM +0000, Catalin Marinas wrote:
> On Tue, 2011-02-15 at 12:14 +0000, Russell King - ARM Linux wrote:
> > On Tue, Feb 15, 2011 at 11:32:42AM +0000, Russell King - ARM Linux wrote:
> > > The point of TLB shootdown is that we unmap the entries from the page
> > > tables, then issue the TLB flushes, and then free the pages and page
> > > tables after that.  All that Peter's patch tries to do is to get ARM to
> > > use the generic stuff.
> > 
> > As Peter's patch preserves the current behaviour, that's not sufficient.
> > So, let's do this our own way and delay pages and page table frees on
> > ARMv6 and v7.  Untested.
> 
> ARMv7 should be enough, I'm not aware of any pre-v7 with this behaviour.

ARM11MPCore.  Any SMP system can access a page which was free'd by the
tlb code but hasn't been flushed from the hardware TLBs.  So maybe we
want it to be "defined(CONFIG_SMP) || defined(CONFIG_CPU_32v7)" ?

> > diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
> > index f41a6f5..1ca3e16 100644
> > --- a/arch/arm/include/asm/tlb.h
> > +++ b/arch/arm/include/asm/tlb.h
> > @@ -30,6 +30,16 @@
> >  #include <asm/pgalloc.h>
> > 
> >  /*
> > + * As v6 and v7 speculatively prefetch, which can drag new entries into the
> > + * TLB, we need to delay freeing pages and page tables.
> > + */
> > +#if defined(CONFIG_CPU_32v6) || defined(CONFIG_CPU_32v7)
> > +#define tlb_fast_mode(tlb)     0
> > +#else
> > +#define tlb_fast_mode(tlb)     1
> > +#endif
> 
> We could make this v7 only. If you want it to be more dynamic, we can
> check the MMFR0[3:0] bits (Cortex-A15 sets them to 4). But
> architecturally we should assume that intermediate page table levels may
> be cached.

I don't think that a runtime check justifies the optimization.  We're
talking about the difference between storing a set of pages in an array
and freeing them later vs freeing them one at a time.  Doing a test per
page is probably more expensive than just storing them in an array.

> > -#define tlb_remove_page(tlb,page)      free_page_and_swap_cache(page)
> > -#define pte_free_tlb(tlb, ptep, addr)  pte_free((tlb)->mm, ptep)
> > +#define pte_free_tlb(tlb, ptep, addr)  __pte_free_tlb(tlb, ptep, addr)
> >  #define pmd_free_tlb(tlb, pmdp, addr)  pmd_free((tlb)->mm, pmdp)
> 
> With LPAE, we'll need a __pmd_free_tlb() but I can add this as part of
> my patches.

Yes.

> Acked-by: Catalin Marinas <catalin.marinas@arm.com>

Thanks.

^ permalink raw reply

* [PATCH 2/6] ARM: pm: add generic CPU suspend/resume support
From: Russell King - ARM Linux @ 2011-02-20 12:00 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20110215110453.GB11199@n2100.arm.linux.org.uk>

Kukjin, could you test this update as well please?

Thanks.

On Tue, Feb 15, 2011 at 11:04:53AM +0000, Russell King - ARM Linux wrote:
> On Fri, Feb 11, 2011 at 06:50:57PM -0800, Colin Cross wrote:
> > > +ENDPROC(cpu_resume_turn_mmu_on)
> > > +cpu_resume_after_mmu:
> > > + ? ? ? str ? ? r5, [r2, r4, lsl #2] ? ?@ restore old mapping
> > > +#ifdef MULTI_CACHE
> > > + ? ? ? ldr ? ? r10, =cpu_cache
> > > + ? ? ? ldr ? ? pc, [r10, #CACHE_FLUSH_KERN_ALL]
> > > +#else
> > > + ? ? ? b ? ? ? __cpuc_flush_kern_all
> > > +#endif
> 
> I think we can eliminate this cache flush by delaying the cache enable
> as below.  Could you see whether Tegra 2 survives this please?
> Thanks.
> 
> diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
> index bed1876..193be5f 100644
> --- a/arch/arm/kernel/sleep.S
> +++ b/arch/arm/kernel/sleep.S
> @@ -4,6 +4,7 @@
>  #include <asm/assembler.h>
>  #include <asm/glue-cache.h>
>  #include <asm/glue-proc.h>
> +#include <asm/system.h>
>  	.text
>  
>  /*
> @@ -81,25 +82,22 @@ ENTRY(cpu_resume_mmu)
>  	str	r3, [r2, r4, lsl #2]	@ setup 1:1 mapping for mmu code
>  	sub	r2, r2, r1
>  	ldr	r3, =cpu_resume_after_mmu
> +	bic	r1, r0, #CR_C		@ ensure D-cache is disabled
>  	b	cpu_resume_turn_mmu_on
>  ENDPROC(cpu_resume_mmu)
>  	.ltorg
>  	.align	5
>  cpu_resume_turn_mmu_on:
> -	mcr	p15, 0, r0, c1, c0, 0	@ turn on MMU, caches, etc
> -	mrc	p15, 0, r0, c0, c0, 0	@ read id reg
> -	mov	r0, r0
> -	mov	r0, r0
> +	mcr	p15, 0, r1, c1, c0, 0	@ turn on MMU, I-cache, etc
> +	mrc	p15, 0, r1, c0, c0, 0	@ read id reg
> +	mov	r1, r1
> +	mov	r1, r1
>  	mov	pc, r3			@ jump to virtual address
>  ENDPROC(cpu_resume_turn_mmu_on)
>  cpu_resume_after_mmu:
>  	str	r5, [r2, r4, lsl #2]	@ restore old mapping
> -#ifdef MULTI_CACHE
> -	ldr	r10, =cpu_cache
> -	ldr	pc, [r10, #CACHE_FLUSH_KERN_ALL]
> -#else
> -	b	__cpuc_flush_kern_all
> -#endif
> +	mcr	p15, 0, r0, c1, c0, 0	@ turn on D-cache
> +	mov	pc, lr
>  
>  /*
>   * Note: Yes, part of the following code is located into the .data section.
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* MMC quirks relating to performance/lifetime.
From: Andrei Warkentin @ 2011-02-20 11:27 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <201102171647.46190.arnd@arndb.de>

On Thu, Feb 17, 2011 at 9:47 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> I think I'd try to reduce the number of sysfs files needed for this.
> What are the values you would typically set here?
>
> My feeling is that separating unaligned page writes from full pages
> or multiples of pages could always be benefitial for all cards, or at
> least harmless, but that will require more measurements.
> Whether to do the reliable write or not could be a simple flag
> if the numbers are the same.

I thought about this some more, and I realized it would be ugly if
everybody added enable_workaround_sec_start/enable_workaround_sec_end
for every novel idea of working around some issue with
performance/reliability on mmc/sd cards.

What about letting the user/embedder create policies for how certain
accesses are done? That way you give runtime-accessible
blocks for tuning mmc block layer while having one interface to
manipulate (and combine) multiple workarounds, all the while catching
conflicts and
without forcing specific policy in code.

Essentially under /sys/block/mmcblk0/device you have an attribute
called "policies". Example:

# echo mypol0 > /sys/block/mmcblk0/device/policies
# ls /sys/block/mmcblk0/device/mypol0
debug
delete
start_block
end_block
access_size_low
access_size_high
write_policy
erase_policy
read_policy
# cat /sys/block/mmcblk0/device/mypol0/write_policy
Current: none
0x00000001: Split unaligned writes across page_size
0x00000002: Split writes into page_size chunks and write using reliable writes
0x00000004: Use reliable writes for WRITE_META blocks.
# cat /sys/block/mmcblk0/device/mypol0/erase_policy
Current: none
0x00000001: Use secure erase.
# echo 1 > delete
# Policy is deleted.

The policies are all stored in a rb-tree. First order of business
inside mmc_blk_issue_rw_rq/mmc_blk_issue_* is to fetch an existing
policy given the access type and block start/end (which both tells
where the access is going and the size of the access). Later, it's
that policy information which controls how the request is translated
into MMC commands. I'm almost done with a prototype.

I noticed that all sysfs attributes are managed by code under
core/mmc.c and core/sd.c, duplicating where necessary. I think some of
the new block-related settings like page_size (or policies) are
generic enough that they should live in the card/block code. How about
putting all future sysfs block related things into block-sysfs.c?

Thanks,
A

^ permalink raw reply

* [PATCH 1/5] ARM: smp: Select local timers vs dummy timersupport runtime
From: Santosh Shilimkar @ 2011-02-20 11:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20110220110341.GA15494@n2100.arm.linux.org.uk>

> -----Original Message-----
> From: Russell King - ARM Linux [mailto:linux at arm.linux.org.uk]
> Sent: Sunday, February 20, 2011 4:34 PM
> To: Santosh Shilimkar
> Cc: linux-omap at vger.kernel.org; khilman at ti.com; linux-arm-
> kernel at lists.infradead.org; tony at atomide.com; David Brown; Daniel
> Walker; Bryan Huntsman; Kukjin Kim; Paul Mundt; Magnus Damm; Colin
> Cross; Erik Gilling; Srinidhi Kasagar; Linus Walleij
> Subject: Re: [PATCH 1/5] ARM: smp: Select local timers vs dummy
> timersupport runtime
>
> On Sat, Feb 12, 2011 at 04:59:43PM +0530, Santosh Shilimkar wrote:
> > -#ifndef CONFIG_LOCAL_TIMERS
> >  static void broadcast_timer_set_mode(enum clock_event_mode mode,
> >  	struct clock_event_device *evt)
> >  {
> >  }
> >
> > -static void local_timer_setup(struct clock_event_device *evt)
> > +static void dummy_timer_setup(struct clock_event_device *evt)
>
> Please call this broadcast_timer_setup().

Right. Will fix this.

Regards
Santosh

^ permalink raw reply

* [PATCH 1/5] ARM: smp: Select local timers vs dummy timer support runtime
From: Russell King - ARM Linux @ 2011-02-20 11:03 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1297510187-31547-2-git-send-email-santosh.shilimkar@ti.com>

On Sat, Feb 12, 2011 at 04:59:43PM +0530, Santosh Shilimkar wrote:
> -#ifndef CONFIG_LOCAL_TIMERS
>  static void broadcast_timer_set_mode(enum clock_event_mode mode,
>  	struct clock_event_device *evt)
>  {
>  }
>  
> -static void local_timer_setup(struct clock_event_device *evt)
> +static void dummy_timer_setup(struct clock_event_device *evt)

Please call this broadcast_timer_setup().

^ permalink raw reply

* [PATCH] msm: headsmp.S: Fix section mismatch
From: Russell King - ARM Linux @ 2011-02-20  9:10 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1298168817.17118.2.camel@m0nster>

On Sat, Feb 19, 2011 at 06:26:57PM -0800, Daniel Walker wrote:
> On Fri, 2011-02-18 at 19:14 -0800, Stephen Boyd wrote:
> > WARNING: vmlinux.o(.cpuinit.text+0xc80): Section mismatch in
> > reference from the function boot_secondary() to the variable
> > .init.text:msm_secondary_startup
> > The function __cpuinit boot_secondary() references a variable
> > __init msm_secondary_startup.  If msm_secondary_startup is only
> > used by boot_secondary then annotate msm_secondary_startup with
> > a matching annotation.
> 
> Description is pretty gross.. Can you just explain how the section
> mismatch is happening.

That's the error message, which is explanitory.

mem_secondary_startup is in the __init section, which is always discarded.
boot_secondary is in the __cpuinit section, which may or may not be
discarded, and if CPU hotplug is enabled, may be called after the __init
section has been discarded.

^ permalink raw reply

* [PATCH] arm: mach-at91: remove double-semicolons
From: Jean-Christophe PLAGNIOL-VILLARD @ 2011-02-20  8:38 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1298151940-14411-1-git-send-email-sylvain.calador@gmail.com>

On 22:45 Sat 19 Feb     , Sylvain Calador wrote:
> Remove double-semicolons.
> 
> Signed-off-by: Sylvain Calador <sylvain.calador@gmail.com>
Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>

Best Regards,
J.

^ permalink raw reply

* MMC quirks relating to performance/lifetime.
From: Andrei Warkentin @ 2011-02-20  5:56 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <201102191220.54815.arnd@arndb.de>

On Sat, Feb 19, 2011 at 5:20 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Saturday 19 February 2011 00:17:51 Andrei Warkentin wrote:
>> # echo 0 > /sys/block/mmcblk0/device/page_size
>> # ./flashbench -A -b 1024 /dev/block/mmcblk0p9
>> write align 8388608 ? ? pre 3.59ms ? ? ?on 6.54ms ? ? ? post 3.65ms ? ? diff 2.92ms
>> write align 4194304 ? ? pre 4.13ms ? ? ?on 7.37ms ? ? ? post 4.27ms ? ? diff 3.17ms
>> write align 2097152 ? ? pre 3.62ms ? ? ?on 6.81ms ? ? ? post 3.94ms ? ? diff 3.03ms
>> write align 1048576 ? ? pre 3.62ms ? ? ?on 6.53ms ? ? ? post 3.55ms ? ? diff 2.95ms
>> write align 524288 ? ? ?pre 3.62ms ? ? ?on 6.51ms ? ? ? post 3.63ms ? ? diff 2.88ms
>> write align 262144 ? ? ?pre 3.62ms ? ? ?on 6.51ms ? ? ? post 3.63ms ? ? diff 2.89ms
>> write align 131072 ? ? ?pre 3.62ms ? ? ?on 6.5ms ? ? ? ?post 3.63ms ? ? diff 2.88ms
>> write align 65536 ? ? ? pre 3.61ms ? ? ?on 6.49ms ? ? ? post 3.62ms ? ? diff 2.88ms
>> write align 32768 ? ? ? pre 3.61ms ? ? ?on 6.49ms ? ? ? post 3.61ms ? ? diff 2.88ms
>> write align 16384 ? ? ? pre 3.68ms ? ? ?on 107ms ? ? ? ?post 3.51ms ? ? diff 103ms
>> write align 8192 ? ? ? ?pre 3.74ms ? ? ?on 121ms ? ? ? ?post 3.91ms ? ? diff 117ms
>> write align 4096 ? ? ? ?pre 3.88ms ? ? ?on 3.87ms ? ? ? post 3.87ms ? ? diff -2937ns
>> write align 2048 ? ? ? ?pre 3.89ms ? ? ?on 3.88ms ? ? ? post 3.88ms ? ? diff -8734ns
>> # fjnh84 at fjnh84-desktop:~/src/n/src/flash$ adb -s 17006185428011d7 shell
>> # echo 8192 > /sys/block/mmcblk0/device/page_size
>> # cd data
>> # ./flashbench -A -b 1024 /dev/block/mmcblk0p9
>> write align 8388608 ? ? pre 3.33ms ? ? ?on 6.8ms ? ? ? ?post 3.65ms ? ? diff 3.31ms
>> write align 4194304 ? ? pre 4.34ms ? ? ?on 8.14ms ? ? ? post 4.53ms ? ? diff 3.71ms
>> write align 2097152 ? ? pre 3.64ms ? ? ?on 7.31ms ? ? ? post 4.09ms ? ? diff 3.44ms
>> write align 1048576 ? ? pre 3.65ms ? ? ?on 7.52ms ? ? ? post 3.65ms ? ? diff 3.87ms
>> write align 524288 ? ? ?pre 3.62ms ? ? ?on 6.8ms ? ? ? ?post 3.63ms ? ? diff 3.17ms
>> write align 262144 ? ? ?pre 3.62ms ? ? ?on 6.84ms ? ? ? post 3.63ms ? ? diff 3.22ms
>> write align 131072 ? ? ?pre 3.62ms ? ? ?on 6.85ms ? ? ? post 3.44ms ? ? diff 3.32ms
>> write align 65536 ? ? ? pre 3.39ms ? ? ?on 6.8ms ? ? ? ?post 3.66ms ? ? diff 3.28ms
>> write align 32768 ? ? ? pre 3.64ms ? ? ?on 6.86ms ? ? ? post 3.66ms ? ? diff 3.21ms
>> write align 16384 ? ? ? pre 3.67ms ? ? ?on 6.86ms ? ? ? post 3.65ms ? ? diff 3.2ms
>> write align 8192 ? ? ? ?pre 3.66ms ? ? ?on 6.84ms ? ? ? post 3.64ms ? ? diff 3.19ms
>> write align 4096 ? ? ? ?pre 3.71ms ? ? ?on 3.71ms ? ? ? post 3.64ms ? ? diff 38.6?s
>> write align 2048 ? ? ? ?pre 3.71ms ? ? ?on 3.71ms ? ? ? post 3.72ms ? ? diff -656ns
>>
>> This was with the split unaligned accesses patch... Which I am
>> attaching for comments.
>
> I agree, this is very fascinating behavior. 100ms second latency for a
> single 2KB access is definitely something we should try to avoid, and I
> wonder why the drive decides to do that. It must get into a state where
> it requires an extra garbage collection (you mentioned that earlier).
>
> The numbers you see here are taken over multiple runs. Do you see a lot
> of fluctuation when doing this with --count=1?
>

Yep. Quite a bit.

# ./flashbench -c 1 -A -b 1024 /dev/block/mmcblk0p9
write align 8388608	pre 4.52ms	on 7.58ms	post 3.93ms	diff 3.36ms
write align 4194304	pre 5.97ms	on 8.69ms	post 4.36ms	diff 3.53ms
write align 2097152	pre 3.57ms	on 7.96ms	post 4.6ms	diff 3.88ms
write align 1048576	pre 5.33ms	on 27.4ms	post 4.88ms	diff 22.3ms
write align 524288	pre 49.3ms	on 31.4ms	post 14.9ms	diff -679265
write align 262144	pre 39.7ms	on 38.3ms	post 5.27ms	diff 15.8ms
write align 131072	pre 33.8ms	on 45.4ms	post 5.26ms	diff 25.9ms
write align 65536	pre 34.4ms	on 40.9ms	post 3.3ms	diff 22.1ms
write align 32768	pre 30.2ms	on 44.8ms	post 5.13ms	diff 27.1ms
write align 16384	pre 44.5ms	on 5.05ms	post 33.3ms	diff -338542
write align 8192	pre 25.5ms	on 70.6ms	post 25.3ms	diff 45.2ms
write align 4096	pre 4.89ms	on 4.47ms	post 5.29ms	diff -623390
write align 2048	pre 4.88ms	on 4.89ms	post 5.2ms	diff -155781
# ./flashbench -c 1 -A -b 1024 /dev/block/mmcblk0p9
write align 8388608	pre 4.68ms	on 9.06ms	post 5.14ms	diff 4.15ms
write align 4194304	pre 4.37ms	on 7.49ms	post 4.59ms	diff 3.01ms
write align 2097152	pre 23.7ms	on 1.9ms	post 14.8ms	diff -173218
write align 1048576	pre 14.8ms	on 19.9ms	post 4.75ms	diff 10.2ms
write align 524288	pre 20.2ms	on 24.9ms	post 10.7ms	diff 9.46ms
write align 262144	pre 20.2ms	on 3.01ms	post 20.1ms	diff -171062
write align 131072	pre 25.9ms	on 24.9ms	post 9.85ms	diff 7.06ms
write align 65536	pre 15.5ms	on 30.3ms	post 2.95ms	diff 21.1ms
write align 32768	pre 27.3ms	on 19.1ms	post 5.86ms	diff 2.5ms
write align 16384	pre 25.4ms	on 55.9ms	post 12.7ms	diff 36.9ms
write align 8192	pre 4.8ms	on 102ms	post 9.47ms	diff 94.8ms
write align 4096	pre 4.92ms	on 5.16ms	post 4.98ms	diff 207?s
write align 2048	pre 4.64ms	on 4.92ms	post 5.45ms	diff -121860
# ./flashbench -c 1 -A -b 1024 /dev/block/mmcblk0p9
write align 8388608	pre 15.8ms	on 9.39ms	post 4.68ms	diff -854295
write align 4194304	pre 4.76ms	on 7.54ms	post 3.82ms	diff 3.24ms
write align 2097152	pre 19.9ms	on 9.73ms	post 4.44ms	diff -244517
write align 1048576	pre 14.5ms	on 19.1ms	post 5.21ms	diff 9.23ms
write align 524288	pre 24.9ms	on 29ms	post 5.89ms	diff 13.6ms
write align 262144	pre 24.9ms	on 2.41ms	post 20.8ms	diff -204328
write align 131072	pre 25.6ms	on 30ms	post 4.84ms	diff 14.8ms
write align 65536	pre 26.4ms	on 24.4ms	post 6.16ms	diff 8.12ms
write align 32768	pre 15ms	on 30.6ms	post 15.4ms	diff 15.4ms
write align 16384	pre 16.1ms	on 45.4ms	post 16.5ms	diff 29.1ms
write align 8192	pre 5.88ms	on 107ms	post 5.45ms	diff 101ms
write align 4096	pre 5.17ms	on 5.78ms	post 4.83ms	diff 778?s
write align 2048	pre 3.99ms	on 5.27ms	post 3.97ms	diff 1.29ms
# ./flashbench -c 1 -A -b 1024 /dev/block/mmcblk0p9
write align 8388608	pre 16.1ms	on 8.37ms	post 5.44ms	diff -241222
write align 4194304	pre 4.07ms	on 7.27ms	post 3.89ms	diff 3.29ms
write align 2097152	pre 24.2ms	on 18.5ms	post 5.63ms	diff 3.59ms
write align 1048576	pre 4.08ms	on 18.9ms	post 5.46ms	diff 14.1ms
write align 524288	pre 25.1ms	on 28ms	post 14.6ms	diff 8.13ms
write align 262144	pre 15.8ms	on 30ms	post 5.4ms	diff 19.4ms
write align 131072	pre 24.7ms	on 30.8ms	post 4.43ms	diff 16.2ms
write align 65536	pre 5ms	on 40.5ms	post 5.95ms	diff 35.1ms
write align 32768	pre 24.7ms	on 30.6ms	post 4.92ms	diff 15.8ms
write align 16384	pre 25.2ms	on 132ms	post 10.2ms	diff 114ms
write align 8192	pre 7.64ms	on 111ms	post 9.18ms	diff 102ms
write align 4096	pre 5.11ms	on 3.92ms	post 5.4ms	diff -134159
write align 2048	pre 3.92ms	on 4.41ms	post 4.51ms	diff 196?s

> Also, does the same happen with other blocksizes, e.g. 4096 or 8192, passed
> to flashbench?
>

# echo 0 > /sys/block/mmcblk0/device/page_size
# ./flashbench -A -b 1024 /dev/block/mmcblk0p9
write align 8388608	pre 3.63ms	on 6.51ms	post 3.66ms	diff 2.86ms
write align 4194304	pre 3.61ms	on 6.51ms	post 3.62ms	diff 2.89ms
write align 2097152	pre 3.61ms	on 6.49ms	post 3.62ms	diff 2.87ms
write align 1048576	pre 3.64ms	on 6.55ms	post 3.62ms	diff 2.92ms
write align 524288	pre 3.64ms	on 6.57ms	post 3.66ms	diff 2.92ms
write align 262144	pre 3.44ms	on 6.45ms	post 3.66ms	diff 2.9ms
write align 131072	pre 3.64ms	on 6.56ms	post 3.67ms	diff 2.91ms
write align 65536	pre 3.33ms	on 6.57ms	post 3.65ms	diff 3.08ms
write align 32768	pre 3.68ms	on 6.6ms	post 3.7ms	diff 2.91ms
write align 16384	pre 3.64ms	on 97.6ms	post 3.26ms	diff 94.2ms
write align 8192	pre 3.49ms	on 115ms	post 3.62ms	diff 112ms
write align 4096	pre 3.91ms	on 3.91ms	post 3.9ms	diff 360ns
write align 2048	pre 3.92ms	on 3.92ms	post 3.92ms	diff -1374ns
# ./flashbench -A -b 2048 /dev/block/mmcblk0p9
write align 8388608	pre 3.76ms	on 7.23ms	post 4.18ms	diff 3.27ms
write align 4194304	pre 3.65ms	on 6.56ms	post 3.66ms	diff 2.9ms
write align 2097152	pre 3.9ms	on 6.99ms	post 3.67ms	diff 3.2ms
write align 1048576	pre 4.03ms	on 7.09ms	post 4.07ms	diff 3.04ms
write align 524288	pre 4.04ms	on 7.26ms	post 4.16ms	diff 3.16ms
write align 262144	pre 3.8ms	on 7.26ms	post 4.06ms	diff 3.33ms
write align 131072	pre 4.05ms	on 7.25ms	post 4.18ms	diff 3.14ms
write align 65536	pre 4.02ms	on 7.22ms	post 4.14ms	diff 3.14ms
write align 32768	pre 4ms	on 7.07ms	post 3.95ms	diff 3.1ms
write align 16384	pre 3.66ms	on 106ms	post 3.4ms	diff 102ms
write align 8192	pre 3.56ms	on 106ms	post 3.36ms	diff 103ms
write align 4096	pre 3.61ms	on 4.1ms	post 4.35ms	diff 117?s
# ./flashbench -A -b 4096 /dev/block/mmcblk0p9
write align 8388608	pre 3.64ms	on 6.95ms	post 3.96ms	diff 3.15ms
write align 4194304	pre 3.65ms	on 6.56ms	post 3.66ms	diff 2.9ms
write align 2097152	pre 3.89ms	on 6.79ms	post 3.66ms	diff 3.01ms
write align 1048576	pre 3.88ms	on 6.88ms	post 3.95ms	diff 2.97ms
write align 524288	pre 3.72ms	on 6.97ms	post 3.93ms	diff 3.15ms
write align 262144	pre 3.89ms	on 6.93ms	post 3.95ms	diff 3.01ms
write align 131072	pre 3.9ms	on 6.98ms	post 3.96ms	diff 3.05ms
write align 65536	pre 3.89ms	on 6.97ms	post 3.96ms	diff 3.04ms
write align 32768	pre 3.89ms	on 6.97ms	post 3.96ms	diff 3.04ms
write align 16384	pre 3.74ms	on 114ms	post 4.05ms	diff 110ms
write align 8192	pre 4.25ms	on 115ms	post 4.8ms	diff 110ms
# ./flashbench -A -b 8192 /dev/block/mmcblk0p9
write align 8388608	pre 3.84ms	on 7.53ms	post 4.29ms	diff 3.47ms
write align 4194304	pre 3.58ms	on 6.54ms	post 3.6ms	diff 2.95ms
write align 2097152	pre 4.12ms	on 7.27ms	post 3.87ms	diff 3.28ms
write align 1048576	pre 4.14ms	on 7.49ms	post 4.24ms	diff 3.3ms
write align 524288	pre 4.12ms	on 7.46ms	post 4.23ms	diff 3.29ms
write align 262144	pre 4.14ms	on 7.45ms	post 3.97ms	diff 3.4ms
write align 131072	pre 3.89ms	on 7.43ms	post 4.24ms	diff 3.37ms
write align 65536	pre 4.11ms	on 7.46ms	post 4.24ms	diff 3.29ms
write align 32768	pre 4.15ms	on 7.45ms	post 4.25ms	diff 3.25ms
write align 16384	pre 4.24ms	on 96.1ms	post 3.83ms	diff 92.1ms

The following I thought this was interesting. I did it to see the big
time go away, since it would end up being a 16K write straddling an 8K
boundary, but the pre and post results I don't understand at all.

# ./flashbench -A -b 16384  /dev/block/mmcblk0p9
write align 8388608	pre 121ms	on 7.76ms	post 116ms	diff -110845
write align 4194304	pre 129ms	on 7.57ms	post 115ms	diff -114863
write align 2097152	pre 121ms	on 7.78ms	post 123ms	diff -114318
write align 1048576	pre 131ms	on 7.74ms	post 106ms	diff -110856
write align 524288	pre 131ms	on 7.58ms	post 116ms	diff -115926
write align 262144	pre 131ms	on 7.55ms	post 115ms	diff -115591
write align 131072	pre 131ms	on 7.54ms	post 116ms	diff -115617
write align 65536	pre 131ms	on 7.54ms	post 115ms	diff -115579
write align 32768	pre 125ms	on 6.89ms	post 116ms	diff -113408

^ permalink raw reply

* MMC quirks relating to performance/lifetime.
From: Andrei Warkentin @ 2011-02-20  4:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <201102191054.48706.arnd@arndb.de>

On Sat, Feb 19, 2011 at 3:54 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Friday 18 February 2011 23:40:16 Andrei Warkentin wrote:
>> On Fri, Feb 18, 2011 at 1:47 PM, Andrei Warkentin <andreiw@motorola.com> wrote:
>>
>> Flashbench timings for both Sandisk and Toshiba cards. Attaching due to size.
>
> Very nice, thanks for the measurement!
>
> I don't think having the results inline in the mail is a problem,
> it would even make it easier to quote.
>
>> Some interesting things that I don't understand. For the align test, I
>> extended it to do a write align test (-A). I tried two partitions that
>> I could write over, and both read and writes behaved differently for
>> the two partitions on same device. Odd. They are both 4MB aligned.
>
> I never did a write align test because the results will be highly
> unreliable as soon as you get into thrashing. Your results seem
> to be meaningful still, so maybe we should have it after all, but
> I'll put a big warning on it.
>

Actually it would be a good idea to also bail/warn if you do the au
test with more open au's than the size of the passed device allows,
since it'll just wrap around and skew the results.

>> On the sandisk it was the write align that made the page size stand
>> out. ?The read align had pretty constant results.
>
> I've noticed on other Sandisk media that the read align test is
> sometimes useless. It may help to do a full erase of the partition,
> or to fill it with data before running the test.
>
>> On the toshiba the results varied wildly for the two partitions. For
>> partition 6, there was a clear pattern in the diff values for read
>> align. For 9, it was all over the place. For 9 with the write align,
>> 8K and 16K the crossing writes took ~115ms!! Look in attached files
>> for all the data.
>
> Partition 6 is a lot smaller, so you have the accesses less than a
> segment apart, so it shows other effects.
>
>> The AU tests were interesting too, especially how with several open
>> AUs the throughput is higher for certain smaller sizes on sandisk, but
>> if I interpret it correctly both cards have at least 4 AUs, as I
>> didn't see yet a significant drop for small sizes. The larger ones I
>> am running now on mmcblk0p9 which is sufficiently larger for these
>> tests... (mmcblk0p6 is only 40mb, p9 is 314 mb)
>
> Right, you should try larger values for --open-au-nr here. It's at
> least a good sign that the drive can do random access inside a segment
> and that it can have at least 4 segments open. This is much better
> than I expected from your descriptions at first.

Actually the Toshiba one seems to have 7 AUs if I interpret this correctly.
^C
# ./flashbench -O -0 6  -b 512 /dev/block/mmcblk0p9
4MiB    5.91M/s
2MiB    8.84M/s
1MiB    10.8M/s
512KiB  13M/s
256KiB  13.6M/s

^C
# ./flashbench -O -0 7  -b 512 /dev/block/mmcblk0p9
4MiB    6.32M/s
2MiB    8.63M/s
1MiB    10.5M/s
512KiB  13.2M/s
256KiB  13M/s
^[[A^[[D^[[A128KiB  12.3M/s
^C
# ./flashbench -O -0 8  -b 512 /dev/block/mmcblk0p9
4MiB    6.65M/s
2MiB    7.02M/s
1MiB    6.36M/s
512KiB  3.17M/s
256KiB  1.53M/s

The Sandisk one has 20 AUs.

# ./flashbench -O -0 20  -b 512 /dev/block/mmcblk0p9
4MiB    11.3M/s
2MiB    12.8M/s
1MiB    9.87M/s
512KiB  9.97M/s
256KiB  9.13M/s
128KiB  8.05M/s
^C
# ./flashbench -O -0 50  -b 512 /dev/block/mmcblk0p9
4MiB    7.19M/s
^C
# ./flashbench -O -0 2  -b 512 /dev/block/mmcblk0p9
^C
# ./flashbench -O -0 22  -b 512 /dev/block/mmcblk0p9
4MiB    11.6M/s
2MiB    12.3M/s
1MiB    5.13M/s
512KiB  2.57M/s
256KiB  1.59M/s
128KiB  1.16M/s
64KiB   776K/s
^C
# ./flashbench -O -0 21  -b 512 /dev/block/mmcblk0p9
4MiB    11.2M/s
2MiB    12.4M/s
1MiB    4.65M/s
512KiB  1.95M/s
256KiB  955K/s

>
> However, the drop from 32 KB to 16 KB in performance is horrifying
> for the Toshiba drive, it's clear that this one does not like
> to be accessed smaller than 32 KB at a time, an obvious optimization
> for FAT32 with 32 KB clusters. How does this change with your
> kernel patches?

Since the only performance-increasing patch here would be just the one
that splits unaligned accesses, I wouldn't expect any improvements for
page-aligned accesses < 32KB. As you can see here...

# cat /sys/block/mmcblk0/device/page_size
8192
# ./flashbench -O -0 1  -b 512 /dev/block/mmcblk0p9
4MiB    6.81M/s
2MiB    7.73M/s
1MiB    9.21M/s
512KiB  9.98M/s
256KiB  10.3M/s
128KiB  10.2M/s
64KiB   9.76M/s
32KiB   8.52M/s
16KiB   3.68M/s
8KiB    1.72M/s
4KiB    837K/s
^C
# echo 0 >  /sys/block/mmcblk0/device/page_size
# ./flashbench -O -0 1  -b 512 /dev/block/mmcblk0p9
4MiB    6.42M/s
2MiB    7.79M/s
1MiB    9.22M/s
512KiB  10M/s
256KiB  9.94M/s
128KiB  10.1M/s
64KiB   9.68M/s
32KiB   8.5M/s
16KiB   3.65M/s
8KiB    1.73M/s
4KiB    838K/s
2KiB    417K/s
^C
#


>
> For the sandisk drive, it's funny how it is consistently faster
> doing random access than linear access. I don't think I've seem that
> before. It does seem to have some cache for linear access using
> smaller than 16 KB, and can probably combine them when it's only
> writing to a single segment.

Yes, that is pretty interesting. Smaller than 16K? Not smaller than
32K? I wonder what it is doing...

^ permalink raw reply

* [PATCH 4/4] ARM: tegra: Move pinmux init call
From: Stephen Warren @ 2011-02-20  3:38 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1298173137-9242-1-git-send-email-swarren@nvidia.com>

In order for the clock initialization to pick up the results of the
pinmux initialization (which will initialize various parameters of
clocks cdev1, cdev2), the pinmux initialization must happen first.
Move the pinmux init to achieve this.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/mach-tegra/board-harmony.c   |    4 ++--
 arch/arm/mach-tegra/board-trimslice.c |    4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-tegra/board-harmony.c b/arch/arm/mach-tegra/board-harmony.c
index b9dbdb1..eea5ad6 100644
--- a/arch/arm/mach-tegra/board-harmony.c
+++ b/arch/arm/mach-tegra/board-harmony.c
@@ -104,12 +104,12 @@ static __initdata struct tegra_clk_init_table harmony_clk_init_table[] = {
 
 static void __init tegra_harmony_init(void)
 {
+	harmony_pinmux_init();
+
 	tegra_common_init();
 
 	tegra_clk_init_from_table(harmony_clk_init_table);
 
-	harmony_pinmux_init();
-
 	platform_add_devices(harmony_devices, ARRAY_SIZE(harmony_devices));
 }
 
diff --git a/arch/arm/mach-tegra/board-trimslice.c b/arch/arm/mach-tegra/board-trimslice.c
index ef233b2..aef6abb 100644
--- a/arch/arm/mach-tegra/board-trimslice.c
+++ b/arch/arm/mach-tegra/board-trimslice.c
@@ -85,12 +85,12 @@ subsys_initcall(tegra_trimslice_pci_init);
 
 static void __init tegra_trimslice_init(void)
 {
+	trimslice_pinmux_init();
+
 	tegra_common_init();
 
 	tegra_clk_init_from_table(trimslice_clk_init_table);
 
-	trimslice_pinmux_init();
-
 	platform_add_devices(trimslice_devices, ARRAY_SIZE(trimslice_devices));
 }
 
-- 
1.7.1

^ permalink raw reply related

* [PATCH 3/4] ARM: tegra: pinmux: Expose set_func, add get_*
From: Stephen Warren @ 2011-02-20  3:38 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1298173137-9242-1-git-send-email-swarren@nvidia.com>

Expose set_func for consistency with other set_* functions, plus other
drivers can use this basic API more easily than
tegra_pinmux_config_*_table.

Implement get_* to allow other drivers (e.g. clock) to initialize their
state (e.g. cdev[12] parenting/enable) based on pinmux state.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/mach-tegra/include/mach/pinmux.h |    9 ++++
 arch/arm/mach-tegra/pinmux.c              |   64 ++++++++++++++++++++++++++---
 2 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-tegra/include/mach/pinmux.h b/arch/arm/mach-tegra/include/mach/pinmux.h
index defd877..368dc3e 100644
--- a/arch/arm/mach-tegra/include/mach/pinmux.h
+++ b/arch/arm/mach-tegra/include/mach/pinmux.h
@@ -218,6 +218,15 @@ struct tegra_pingroup_desc {
 extern const struct tegra_pingroup_desc tegra_soc_pingroups[];
 extern const struct tegra_drive_pingroup_desc tegra_soc_drive_pingroups[];
 
+int tegra_pinmux_get_func(enum tegra_pingroup pg,
+	enum tegra_mux_func *func);
+int tegra_pinmux_get_tristate(enum tegra_pingroup pg,
+	enum tegra_tristate *tristate);
+int tegra_pinmux_get_pullupdown(enum tegra_pingroup pg,
+	enum tegra_pullupdown *pupd);
+
+int tegra_pinmux_set_func(enum tegra_pingroup pg,
+	enum tegra_mux_func func);
 int tegra_pinmux_set_tristate(enum tegra_pingroup pg,
 	enum tegra_tristate tristate);
 int tegra_pinmux_set_pullupdown(enum tegra_pingroup pg,
diff --git a/arch/arm/mach-tegra/pinmux.c b/arch/arm/mach-tegra/pinmux.c
index f80d507..1c69d4d 100644
--- a/arch/arm/mach-tegra/pinmux.c
+++ b/arch/arm/mach-tegra/pinmux.c
@@ -180,14 +180,66 @@ static inline void pg_writel(unsigned long value, unsigned long offset)
 	writel(value, IO_TO_VIRT(TEGRA_APB_MISC_BASE + offset));
 }
 
-static int tegra_pinmux_set_func(const struct tegra_pingroup_config *config)
+int tegra_pinmux_get_func(enum tegra_pingroup pg,
+	enum tegra_mux_func *func)
+{
+	int mux;
+	unsigned long reg;
+
+	if (pg < 0 || pg >=  TEGRA_MAX_PINGROUP)
+		return -ERANGE;
+
+	if (pingroups[pg].mux_reg < 0)
+		return -EINVAL;
+
+	reg = pg_readl(pingroups[pg].mux_reg);
+	mux = (reg >> pingroups[pg].mux_bit) & 3;
+	*func = pingroups[pg].funcs[mux];
+
+	return 0;
+}
+
+int tegra_pinmux_get_tristate(enum tegra_pingroup pg,
+	enum tegra_tristate *tristate)
+{
+	unsigned long reg;
+
+	if (pg < 0 || pg >=  TEGRA_MAX_PINGROUP)
+		return -ERANGE;
+
+	if (pingroups[pg].tri_reg < 0)
+		return -EINVAL;
+
+	reg = pg_readl(pingroups[pg].tri_reg);
+	*tristate = reg & (1 << pingroups[pg].tri_bit);
+
+	return 0;
+}
+
+int tegra_pinmux_get_pullupdown(enum tegra_pingroup pg,
+	enum tegra_pullupdown *pupd)
+{
+	unsigned long reg;
+
+	if (pg < 0 || pg >=  TEGRA_MAX_PINGROUP)
+		return -ERANGE;
+
+	if (pingroups[pg].pupd_reg < 0)
+		return -EINVAL;
+
+	reg = pg_readl(pingroups[pg].pupd_reg);
+	*pupd = (reg >> pingroups[pg].pupd_bit) & 3;
+
+	return 0;
+}
+
+int tegra_pinmux_set_func(enum tegra_pingroup pg,
+	enum tegra_mux_func func)
 {
 	int mux = -1;
 	int i;
 	unsigned long reg;
 	unsigned long flags;
-	enum tegra_pingroup pg = config->pingroup;
-	enum tegra_mux_func func = config->func;
 
 	if (pg < 0 || pg >=  TEGRA_MAX_PINGROUP)
 		return -ERANGE;
@@ -291,7 +343,7 @@ static void tegra_pinmux_config_pingroup(const struct tegra_pingroup_config *con
 	int err;
 
 	if (pingroups[pingroup].mux_reg >= 0) {
-		err = tegra_pinmux_set_func(config);
+		err = tegra_pinmux_set_func(pingroup, func);
 		if (err < 0)
 			pr_err("pinmux: can't set pingroup %s func to %s: %d\n",
 			       pingroup_name(pingroup), func_name(func), err);
@@ -598,7 +650,7 @@ void tegra_pinmux_set_safe_pinmux_table(const struct tegra_pingroup_config *conf
 			continue;
 		}
 		c.func = pingroups[c.pingroup].func_safe;
-		err = tegra_pinmux_set_func(&c);
+		err = tegra_pinmux_set_func(c.pingroup, c.func);
 		if (err < 0)
 			pr_err("%s: tegra_pinmux_set_func returned %d setting "
 			       "%s to %s\n", __func__, err,
@@ -618,7 +670,7 @@ void tegra_pinmux_config_pinmux_table(const struct tegra_pingroup_config *config
 			WARN_ON(1);
 			continue;
 		}
-		err = tegra_pinmux_set_func(&config[i]);
+		err = tegra_pinmux_set_func(config[i].pingroup, config[i].func);
 		if (err < 0)
 			pr_err("%s: tegra_pinmux_set_func returned %d setting "
 			       "%s to %s\n", __func__, err,
-- 
1.7.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox