* [patch 6/12] hold atomic kmaps across generic_file_read
@ 2002-08-10 0:57 Andrew Morton
2002-08-10 1:33 ` Linus Torvalds
0 siblings, 1 reply; 52+ messages in thread
From: Andrew Morton @ 2002-08-10 0:57 UTC (permalink / raw)
To: Linus Torvalds; +Cc: lkml
This patch allows the kernel to hold atomic kmaps across copy_*_user.
>From an idea by Linus and/or Martin Bligh and/or Andrea.
The basic idea is: when the kernel takes an atomic kmap via the new
kmap_copy_user() function it records state about that kmap in
current->copy_user_state. If a pagefault is taken then the page fault
handler will fix up the copy_*_user state prior to returning to
copy_*_user.
An optimisation to this (Andrea) is to use a sequence number to detect
whether the copy_*_user's fixmap slot was reused during the processing
of the pagefault. If not, and we're on the same CPU then no fixup is
needed.
The fixup code in the pagefault path will rewrite the CPU's ESI or EDI
register to point at the fixed up kmap. This means that the caller of
kmap_copy_user() MUST be using a copy function which uses ESI or EDI in
the normal manner.
The interfaces are designed so that non-x86 architectures which are
using highmem can implement the same trick.
If a different copy_*_user implementation is written then new fixup
code will be needed.
The only new copy_*_user implementation of which I am aware is the
"efficient copy_*_user routines" from Mala Anand and colleagues. They
use ESI/EDI as well - this code has been successfully tested against
those patches.
This patch uses kmap_copy_user() in file_read_actor().
This patch breaks the ramdisk driver when it is used as a module,
unless you've applied Rusty's patch which exports __per_cpu_data.
arch/i386/kernel/i386_ksyms.c | 5 ++
arch/i386/lib/usercopy.c | 10 +++++
arch/i386/mm/fault.c | 71 +++++++++++++++++++++++++++++++++++
include/asm-i386/highmem.h | 5 ++
include/asm-i386/kmap_types.h | 3 +
include/asm-i386/processor.h | 2 +
include/asm-ppc/kmap_types.h | 1
include/asm-sparc/kmap_types.h | 1
include/asm-x86_64/kmap_types.h | 1
include/linux/highmem.h | 80 ++++++++++++++++++++++++++++++++++++++++
include/linux/sched.h | 5 ++
mm/filemap.c | 11 +++--
12 files changed, 189 insertions, 6 deletions
--- 2.5.30/arch/i386/kernel/i386_ksyms.c~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/arch/i386/kernel/i386_ksyms.c Fri Aug 9 17:36:42 2002
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/tty.h>
+#include <linux/highmem.h>
#include <asm/semaphore.h>
#include <asm/processor.h>
@@ -74,6 +75,10 @@ EXPORT_SYMBOL(pm_idle);
EXPORT_SYMBOL(pm_power_off);
EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(apm_info);
+
+#ifdef CONFIG_HIGHMEM
+EXPORT_SYMBOL(kmap_atomic_seq);
+#endif
#ifdef CONFIG_DEBUG_IOVIRT
EXPORT_SYMBOL(__io_virt_debug);
--- 2.5.30/arch/i386/lib/usercopy.c~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/arch/i386/lib/usercopy.c Fri Aug 9 17:36:42 2002
@@ -11,6 +11,16 @@
#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
+/*
+ * We cannot use the mmx functions here with the kmap_atomic fixup
+ * code.
+ *
+ * But CONFIG_X86_USE_3DNOW_AND_WORKS never gets defined anywhere.
+ * Maybe kill this code?
+ */
+
+#error this will not work
+
unsigned long
__generic_copy_to_user(void *to, const void *from, unsigned long n)
{
--- 2.5.30/arch/i386/mm/fault.c~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/arch/i386/mm/fault.c Fri Aug 9 17:36:42 2002
@@ -13,6 +13,7 @@
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/highmem.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/interrupt.h>
@@ -129,6 +130,70 @@ void bust_spinlocks(int yes)
console_loglevel = loglevel_save;
}
+#ifdef CONFIG_HIGHMEM
+
+/*
+ * per-cpu, per-atomic-kmap sequence numbers. Incremented in kmap_atomic.
+ * If these change, we know that an atomic kmap slot has been reused.
+ */
+int kmap_atomic_seq[KM_TYPE_NR] __per_cpu_data = {0};
+
+/*
+ * Note the CPU ID and the currently-held atomic kmap's sequence number
+ */
+static inline void note_atomic_kmap(struct pt_regs *regs)
+{
+ struct copy_user_state *cus = current->copy_user_state;
+
+ if (cus) {
+ cus->cpu = smp_processor_id();
+ cus->seq = this_cpu(kmap_atomic_seq[cus->type]);
+ }
+}
+
+/*
+ * After processing the fault, look to see whether we have switched CPUs
+ * or whether the fault handler has used the same kmap slot (it must have
+ * scheduled to another task). If so, drop the kmap and get a new one.
+ * And then fix up the machine register which copy_*_user() is using so
+ * that it gets the correct address relative to the the new kmap.
+ */
+static void
+__check_atomic_kmap(struct copy_user_state *cus, struct pt_regs *regs)
+{
+ const int cpu = smp_processor_id();
+
+ if (cus->seq != per_cpu(kmap_atomic_seq[cus->type], cpu) ||
+ cus->cpu != cpu) {
+ long *reg;
+ unsigned offset;
+
+ kunmap_atomic(cus->kaddr, cus->type);
+ cus->kaddr = kmap_atomic(cus->page, cus->type);
+ if (cus->src)
+ reg = ®s->esi;
+ else
+ reg = ®s->edi;
+ offset = *reg & (PAGE_SIZE - 1);
+ *reg = ((long)cus->kaddr) | offset;
+ }
+}
+
+static inline void check_atomic_kmap(struct pt_regs *regs)
+{
+ struct copy_user_state *cus = current->copy_user_state;
+
+ if (cus)
+ __check_atomic_kmap(cus, regs);
+}
+
+#else
+static inline void note_atomic_kmap(struct pt_regs *regs)
+{}
+static inline void check_atomic_kmap(struct pt_regs *regs)
+{}
+#endif
+
asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
/*
@@ -187,6 +252,8 @@ asmlinkage void do_page_fault(struct pt_
if (in_interrupt() || !mm)
goto no_context;
+ note_atomic_kmap(regs);
+
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
@@ -248,8 +315,10 @@ good_area:
tsk->maj_flt++;
break;
case VM_FAULT_SIGBUS:
+ check_atomic_kmap(regs);
goto do_sigbus;
case VM_FAULT_OOM:
+ check_atomic_kmap(regs);
goto out_of_memory;
default:
BUG();
@@ -264,6 +333,7 @@ good_area:
tsk->thread.screen_bitmap |= 1 << bit;
}
up_read(&mm->mmap_sem);
+ check_atomic_kmap(regs);
return;
/*
@@ -272,6 +342,7 @@ good_area:
*/
bad_area:
up_read(&mm->mmap_sem);
+ check_atomic_kmap(regs);
/* User mode accesses just cause a SIGSEGV */
if (error_code & 4) {
--- 2.5.30/include/asm-i386/highmem.h~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/include/asm-i386/highmem.h Fri Aug 9 17:36:42 2002
@@ -22,6 +22,7 @@
#include <linux/config.h>
#include <linux/interrupt.h>
+#include <linux/percpu.h>
#include <asm/kmap_types.h>
#include <asm/tlbflush.h>
@@ -76,6 +77,8 @@ static inline void kunmap(struct page *p
* be used in IRQ contexts, so in some (very limited) cases we need
* it.
*/
+extern int kmap_atomic_seq[KM_TYPE_NR] __per_cpu_data;
+
static inline void *kmap_atomic(struct page *page, enum km_type type)
{
enum fixed_addresses idx;
@@ -93,7 +96,7 @@ static inline void *kmap_atomic(struct p
#endif
set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
__flush_tlb_one(vaddr);
-
+ this_cpu(kmap_atomic_seq[type])++;
return (void*) vaddr;
}
--- 2.5.30/include/asm-i386/kmap_types.h~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/include/asm-i386/kmap_types.h Fri Aug 9 17:36:42 2002
@@ -19,7 +19,8 @@ D(5) KM_BIO_SRC_IRQ,
D(6) KM_BIO_DST_IRQ,
D(7) KM_PTE0,
D(8) KM_PTE1,
-D(9) KM_TYPE_NR
+D(9) KM_FILEMAP,
+D(10) KM_TYPE_NR
};
#undef D
--- 2.5.30/include/asm-i386/processor.h~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/include/asm-i386/processor.h Fri Aug 9 17:36:42 2002
@@ -488,4 +488,6 @@ extern inline void prefetchw(const void
#endif
+#define ARCH_HAS_KMAP_FIXUP
+
#endif /* __ASM_I386_PROCESSOR_H */
--- 2.5.30/include/asm-ppc/kmap_types.h~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/include/asm-ppc/kmap_types.h Fri Aug 9 17:36:42 2002
@@ -15,6 +15,7 @@ enum km_type {
KM_BIO_DST_IRQ,
KM_PTE0,
KM_PTE1,
+ KM_FILEMAP,
KM_TYPE_NR
};
--- 2.5.30/include/asm-sparc/kmap_types.h~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/include/asm-sparc/kmap_types.h Fri Aug 9 17:36:42 2002
@@ -9,6 +9,7 @@ enum km_type {
KM_USER1,
KM_BIO_SRC_IRQ,
KM_BIO_DST_IRQ,
+ KM_FILEMAP,
KM_TYPE_NR
};
--- 2.5.30/include/asm-x86_64/kmap_types.h~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/include/asm-x86_64/kmap_types.h Fri Aug 9 17:36:42 2002
@@ -9,6 +9,7 @@ enum km_type {
KM_USER1,
KM_BIO_SRC_IRQ,
KM_BIO_DST_IRQ,
+ KM_FILEMAP,
KM_TYPE_NR
};
--- 2.5.30/include/linux/highmem.h~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/include/linux/highmem.h Fri Aug 9 17:36:42 2002
@@ -3,6 +3,7 @@
#include <linux/config.h>
#include <linux/fs.h>
+#include <asm/processor.h>
#include <asm/cacheflush.h>
#ifdef CONFIG_HIGHMEM
@@ -10,6 +11,7 @@
extern struct page *highmem_start_page;
#include <asm/highmem.h>
+#include <asm/kmap_types.h>
/* declarations for linux/mm/highmem.c */
unsigned int nr_free_highpages(void);
@@ -71,5 +73,83 @@ static inline void copy_user_highpage(st
kunmap_atomic(vfrom, KM_USER0);
kunmap_atomic(vto, KM_USER1);
}
+
+#if defined(CONFIG_HIGHMEM) && defined(ARCH_HAS_KMAP_FIXUP)
+/*
+ * Used when performing a copy_*_user while holding an atomic kmap
+ */
+struct copy_user_state {
+ struct page *page; /* The page which is kmap_atomiced */
+ void *kaddr; /* Its mapping */
+ enum km_type type; /* Its offset */
+ int src; /* 1: fixup ESI. 0: Fixup EDI */
+ int cpu; /* CPU which the kmap was taken on */
+ int seq; /* The kmap's sequence number */
+};
+
+/*
+ * `src' is true if the kmap_atomic virtual address is the source of the copy.
+ */
+static inline void *
+kmap_copy_user(struct copy_user_state *cus, struct page *page,
+ enum km_type type, int src)
+{
+ cus->page = page;
+ cus->kaddr = kmap_atomic(page, type);
+ if (PageHighMem(page)) {
+ cus->type = type;
+ cus->src = src;
+ BUG_ON(current->copy_user_state != NULL);
+ current->copy_user_state = cus;
+ }
+ return cus->kaddr;
+}
+
+static inline void kunmap_copy_user(struct copy_user_state *cus)
+{
+ if (PageHighMem(cus->page)) {
+ BUG_ON(current->copy_user_state != cus);
+ kunmap_atomic(cus->kaddr, cus->type);
+ current->copy_user_state = NULL;
+ cus->page = NULL; /* debug */
+ }
+}
+
+/*
+ * After a copy_*_user, the kernel virtual address may be different. So
+ * use kmap_copy_user_addr() to get the new value.
+ */
+static inline void *kmap_copy_user_addr(struct copy_user_state *cus)
+{
+ return cus->kaddr;
+}
+
+#else
+
+struct copy_user_state {
+ struct page *page;
+};
+
+/*
+ * This must be a macro because `type' may be undefined
+ */
+
+#define kmap_copy_user(cus, page, type, src) \
+ ({ \
+ (cus)->page = (page); \
+ kmap(page); \
+ })
+
+static inline void kunmap_copy_user(struct copy_user_state *cus)
+{
+ kunmap(cus->page);
+}
+
+static inline void *kmap_copy_user_addr(struct copy_user_state *cus)
+{
+ return page_address(cus->page);
+}
+
+#endif
#endif /* _LINUX_HIGHMEM_H */
--- 2.5.30/include/linux/sched.h~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/include/linux/sched.h Fri Aug 9 17:36:42 2002
@@ -245,6 +245,8 @@ extern struct user_struct root_user;
typedef struct prio_array prio_array_t;
+struct copy_user_state;
+
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
struct thread_info *thread_info;
@@ -366,6 +368,9 @@ struct task_struct {
/* journalling filesystem info */
void *journal_info;
struct dentry *proc_dentry;
+#ifdef CONFIG_HIGHMEM
+ struct copy_user_state *copy_user_state;
+#endif
};
extern void __put_task_struct(struct task_struct *tsk);
--- 2.5.30/mm/filemap.c~kmap_atomic_reads Fri Aug 9 17:36:42 2002
+++ 2.5.30-akpm/mm/filemap.c Fri Aug 9 17:37:02 2002
@@ -16,6 +16,7 @@
#include <linux/kernel_stat.h>
#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/iobuf.h>
@@ -1020,18 +1021,20 @@ no_cached_page:
UPDATE_ATIME(inode);
}
-int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
+int file_read_actor(read_descriptor_t *desc, struct page *page,
+ unsigned long offset, unsigned long size)
{
char *kaddr;
+ struct copy_user_state copy_user_state;
unsigned long left, count = desc->count;
if (size > count)
size = count;
- kaddr = kmap(page);
+ kaddr = kmap_copy_user(©_user_state, page, KM_FILEMAP, 1);
left = __copy_to_user(desc->buf, kaddr + offset, size);
- kunmap(page);
-
+ kunmap_copy_user(©_user_state);
+
if (left) {
size -= left;
desc->error = -EFAULT;
.
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 0:57 [patch 6/12] hold atomic kmaps across generic_file_read Andrew Morton @ 2002-08-10 1:33 ` Linus Torvalds 2002-08-10 3:53 ` Andrew Morton ` (2 more replies) 0 siblings, 3 replies; 52+ messages in thread From: Linus Torvalds @ 2002-08-10 1:33 UTC (permalink / raw) To: Andrew Morton; +Cc: lkml On Fri, 9 Aug 2002, Andrew Morton wrote: > > This patch allows the kernel to hold atomic kmaps across copy_*_user. > >From an idea by Linus and/or Martin Bligh and/or Andrea. Argh. I've come to hate this approach, I should have told you. That magic %esi/%edi thing disturbs me, even if I was one of the people responsible for polluting your virgin brain with the idea. It just makes me squirm, not just because there may be memcopies that would prefer to use other registers, but because I just think it's too damn fragile to play with register contents from exceptions. So I would suggest instead: - do_page_fault() already does an if (in_interrupt() || !mm) goto no_context; and the fact is, the "in_interrupt()" should really be an "preempt_count()", since it's illegal to take a page fault not just in interrupts, but while non-preemptible in general. - now, if we do the copy_to/from_user() from a preempt-safe area, the _existing_ code (with the above one-liner fix) already returns a partial error (ie no new code-paths - copy_to/from_user() already has to handle the EFAULT case) - which means that we can do the kmap_copy_to_user() with _zero_ new code, by just wrapping it something like this: repeat: kmap_atomic(..); // this increments preempt count nr = copy_from_user(..); kunmap_atomic(..); /* bytes uncopied? */ if (nr) { if (!get_user(dummy, start_addr) && !get_user(dummy, end_addr)) goto repeat; .. handle EFAULT .. } Yes, the above requires some care about getting the details right, but notice how it requires absolutely no magic new code, and how it actually uses existing well-documented (and has-to-work-anyway) features. And notice how it works as a _much_ more generic fix - the above actually allows the true anti-deadlock thing where you can basically "test" whether the page is already mapped with zero cost, and if it isn't mapped (and you worry about deadlocking because you've already locked the page that we're writing into), you can make the slow path do a careful "look up the page tables by hand" thing. In other words, you can use the above trick to get rid of that horrible "__get_user(dummy..)" thing that is one huge big hack right now in generic_file_write(). (And yes, it requires incrementing the preempt count in kmap/kunmap even if preemption is otherwise disabled, big deal). Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 1:33 ` Linus Torvalds @ 2002-08-10 3:53 ` Andrew Morton 2002-08-10 3:53 ` Linus Torvalds 2002-08-11 0:34 ` Andrew Morton 2002-08-12 7:45 ` Rusty Russell 2 siblings, 1 reply; 52+ messages in thread From: Andrew Morton @ 2002-08-10 3:53 UTC (permalink / raw) To: Linus Torvalds; +Cc: lkml Linus Torvalds wrote: > > ... > repeat: > kmap_atomic(..); // this increments preempt count > nr = copy_from_user(..); > kunmap_atomic(..); > > /* bytes uncopied? */ > if (nr) { > if (!get_user(dummy, start_addr) && > !get_user(dummy, end_addr)) > goto repeat; > .. handle EFAULT .. > } > > Yes, the above requires some care about getting the details right, but > notice how it requires absolutely no magic new code, and how it actually > uses existing well-documented (and has-to-work-anyway) features. > OK. The kunmap_atomic() could happen on a different CPU, which will die with CONFIG_DEBUG_HIGHMEM but apart from that, looks much saner. We'll need need to manually fault in the user page on the generic_file_read() path before taking the kmap, because reading into an unmapped page is a common case: malloc/read. Actually, p = malloc(lots); write(fd, p, lots); isn't totally uncommon either, so the prefault on the write path would help highmem machines (in which case it'd be best to leave it there for all machines). > And notice how it works as a _much_ more generic fix - the above actually > allows the true anti-deadlock thing where you can basically "test" whether > the page is already mapped with zero cost, and if it isn't mapped (and you > worry about deadlocking because you've already locked the page that we're > writing into), you can make the slow path do a careful "look up the page > tables by hand" thing. I don't understand what the pagetable walk is here for? The kernel will sometimes need to read the page from disk to service the fault, but it's locked... We could drop the page lock before the __get_user, but that may break the expectations of some filesystem's prepare/commit pair. So I'm not clear on how we can lose the (racy, especially with preemption) "one huge big hack". The implicit use of preempt_count to mean "in kmap_copy_user" may turn ugly. But if so another tsk->flags bit can be created. We'll see... ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 3:53 ` Andrew Morton @ 2002-08-10 3:53 ` Linus Torvalds 2002-08-10 6:12 ` Andrew Morton 0 siblings, 1 reply; 52+ messages in thread From: Linus Torvalds @ 2002-08-10 3:53 UTC (permalink / raw) To: Andrew Morton; +Cc: lkml On Fri, 9 Aug 2002, Andrew Morton wrote: > Linus Torvalds wrote: > > > > ... > > repeat: > > kmap_atomic(..); // this increments preempt count > > nr = copy_from_user(..); > > kunmap_atomic(..); > > > > /* bytes uncopied? */ > > if (nr) { > > if (!get_user(dummy, start_addr) && > > !get_user(dummy, end_addr)) > > goto repeat; > > .. handle EFAULT .. > > } > > > > Yes, the above requires some care about getting the details right, but > > notice how it requires absolutely no magic new code, and how it actually > > uses existing well-documented (and has-to-work-anyway) features. > > > > OK. The kunmap_atomic() could happen on a different CPU, which will > die with CONFIG_DEBUG_HIGHMEM but apart from that, looks much saner. No no no. It cannot happen on another CPU, since even if we take a page fault, we will all be inside a preempt-safe region (the first thing kmap_atomic() does is to increment the preempt count, the last thing the kunmap does is to decrement it). There's nothing that sleeps anywhere, there's nothing that can cause a schedule. Exactly because the page fault handler will _see_ that we're in a critical region, and will do the "fixup()" thing for us. > We'll need need to manually fault in the user page on the > generic_file_read() path before taking the kmap, because reading > into an unmapped page is a common case: malloc/read. I actually suspect that most reads are fairly small, and the page already exists. But who knows.. pre-loading is certainly easy (a single instruction). > > And notice how it works as a _much_ more generic fix - the above actually > > allows the true anti-deadlock thing where you can basically "test" whether > > the page is already mapped with zero cost, and if it isn't mapped (and you > > worry about deadlocking because you've already locked the page that we're > > writing into), you can make the slow path do a careful "look up the page > > tables by hand" thing. > > I don't understand what the pagetable walk is here for? > > The kernel will sometimes need to read the page from disk to service > the fault, but it's locked... > > We could drop the page lock before the __get_user, but that may > break the expectations of some filesystem's prepare/commit pair. The thing is, we can _notice_ when the bad case happens (same page), and we can for that special case do special logic. We couldn't do that before, simply because we can't afford to do the page table walk all the time. But we _can_ afford to do it for the rare cases that would trap (the deadlock being one of them). Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 3:53 ` Linus Torvalds @ 2002-08-10 6:12 ` Andrew Morton 2002-08-10 7:25 ` Linus Torvalds 0 siblings, 1 reply; 52+ messages in thread From: Andrew Morton @ 2002-08-10 6:12 UTC (permalink / raw) To: Linus Torvalds; +Cc: lkml Linus Torvalds wrote: > > ... > > We'll need need to manually fault in the user page on the > > generic_file_read() path before taking the kmap, because reading > > into an unmapped page is a common case: malloc/read. > > I actually suspect that most reads are fairly small, and the page already > exists. But who knows.. pre-loading is certainly easy (a single > instruction). These things can be measured ;) Across a `make -j6 bzImage' the kernel reads 166,000 pages via file_read_actor(). And 31,000 of those generated a fault in the copy_*_user. (It wrote 14,400 pages and, of course, none of those faulted). And if gcc is getting a fault 20% of the time, some other apps will get many more. Which implies that we must prefault the page in file_read_actor to get full benefit. And if we do that, I'll bet you Victor's dollar that the fixup path is never executed. I'd have to disable the prefault even to be able to test it. What would be nice is a way of formalising the prefault, to pin the mm's pages across the copy_*_user() in some manner, perhaps? ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 6:12 ` Andrew Morton @ 2002-08-10 7:25 ` Linus Torvalds 2002-08-10 9:08 ` Andrew Morton ` (3 more replies) 0 siblings, 4 replies; 52+ messages in thread From: Linus Torvalds @ 2002-08-10 7:25 UTC (permalink / raw) To: Andrew Morton; +Cc: lkml On Fri, 9 Aug 2002, Andrew Morton wrote: > > What would be nice is a way of formalising the prefault, to pin > the mm's pages across the copy_*_user() in some manner, perhaps? Too easy to create a DoS-type attack with any trivial implementation. However, I don't think pinning is worthwhile, since even if the page goes away, the prefaulting was just a performance optimization. The code should work fine without it. In fact, it would probably be good to _not_ prefault for a development kernel, and verify that the code works without it. That way we can sleep safe in the knowledge that there isn't some race through code that requires the prefaulting.. I agree that if you could guarantee pinning the out-of-line code would be a bit simpler, but since we have to handle the EFAULT case anyway, I doubt that it is _that_ much simpler. Also, there are actually advantages to doing it the "hard" way. If we ever want to, we can actually play clever tricks that avoid doing the copy at all with the slow path. Example tricks: we can, if we want to, do a read() with no copy for a common case by adding a COW-bit to the page cache, and if you do aligned reads into a page that will fault on write, you can just map in the page cache page directly, mark it COW in the page cache (assuming the page count tells us we're the only user, of course), and mark it COW in the mapping. The nice thing is, this actually works correctly even if the user re-uses the area for reading multiple times (because the read() will trap not because the page isn't mapped, but because it is mapped COW on something that will write to user space). The unmapped case is better, though, since we don't need to do TLB invalidates for that case (which makes this potentially worthwhile even on SMP). I don't know if this is common, but it _would_ make read() have definite advantages over mmap() on files that are seldom written to or mmap'ed in a process (which is most of them, gut feel). In particular, once you fault for _one_ page, you can just map in as many pages as the read() tried to read in one go - so you can avoid any future work as well. Imagine doing a fstat(fd..) buf = aligned_malloc(st->st_size) read(fd, buf, st->st_size); and having it magically populate the VM directly with the whole file mapping, with _one_ failed page fault. And the above is actually a fairly common thing. See how many people have tried to optimize using mmap vs read, and what they _all_ really wanted was this "populate the pages in one go" thing. Is it a good idea? I don't know. But it would seem to fall very cleanly out of the atomic kmap path - without affecting the fast path at _all_. It would be a very specific and localized optimization, with no impact on the rest of the system, since it's using the same fixup() logic that we have to have anyway. (Yeah, the COW bit on the page cache is special, and it would need page mapping and obviously file writing to do something like .. if (page->flags & PAGE_COW) { page->flags &= ~PAGE_COW; if (page->count > 1) { remove-and-reinsert-new-page(); } } .. by hand before mapping it writable or writing to it. And the read() optimization would _only_ work if nobody is using mmap() on the file at the same time for those pages). This would definitely be 2.7.x material, I'm just explaining why I like the flexibility of the approach (as opposed to a very static "memcpy-only- special-case" thing). Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 7:25 ` Linus Torvalds @ 2002-08-10 9:08 ` Andrew Morton 2002-08-10 12:44 ` Daniel Phillips ` (2 subsequent siblings) 3 siblings, 0 replies; 52+ messages in thread From: Andrew Morton @ 2002-08-10 9:08 UTC (permalink / raw) To: Linus Torvalds; +Cc: lkml Linus Torvalds wrote: > > On Fri, 9 Aug 2002, Andrew Morton wrote: > > > > What would be nice is a way of formalising the prefault, to pin > > the mm's pages across the copy_*_user() in some manner, perhaps? > > Too easy to create a DoS-type attack with any trivial implementation. hmm, yes. The pin has to be held across ->prepare_write. That tears it. > However, I don't think pinning is worthwhile, since even if the page goes > away, the prefaulting was just a performance optimization. The code should > work fine without it. In fact, it would probably be good to _not_ prefault > for a development kernel, and verify that the code works without it. That > way we can sleep safe in the knowledge that there isn't some race through > code that requires the prefaulting.. OK. That covers reads. But we need to do something short-term to get these large performance benefits, and I don't know how to properly fix the write deadlock. The choices here are: - live with the current __get_user thing - make filemap_nopage aware of the problem, via a new `struct page *' in task_struct (this would be very messy on the reader side). - or? (Of course, the write deadlock is a different and longstanding problem, and I don't _have_ to fix it here, weasel, weasel) > I agree that if you could guarantee pinning the out-of-line code would be > a bit simpler, but since we have to handle the EFAULT case anyway, I doubt > that it is _that_ much simpler. > > Also, there are actually advantages to doing it the "hard" way. If we ever > want to, we can actually play clever tricks that avoid doing the copy at > all with the slow path. > > Example tricks: we can, if we want to, do a read() with no copy for a > common case by adding a COW-bit to the page cache, and if you do aligned > reads into a page that will fault on write, you can just map in the page > cache page directly, mark it COW in the page cache (assuming the page > count tells us we're the only user, of course), and mark it COW in the > mapping. glibc malloc currently returns well-aligned-address + 8. If it were taught to return well-aligned-address+0 then presumably a lot of applications would automatically benefit from these zero-copy reads. ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 7:25 ` Linus Torvalds 2002-08-10 9:08 ` Andrew Morton @ 2002-08-10 12:44 ` Daniel Phillips 2002-08-10 17:01 ` Linus Torvalds 2002-08-10 14:16 ` Rik van Riel 2002-08-10 17:36 ` Jamie Lokier 3 siblings, 1 reply; 52+ messages in thread From: Daniel Phillips @ 2002-08-10 12:44 UTC (permalink / raw) To: Linus Torvalds, Andrew Morton; +Cc: lkml On Saturday 10 August 2002 09:25, Linus Torvalds wrote: > Example tricks: we can, if we want to, do a read() with no copy for a > common case by adding a COW-bit to the page cache, and if you do aligned > reads into a page that will fault on write, you can just map in the page > cache page directly, mark it COW in the page cache (assuming the page > count tells us we're the only user, of course), and mark it COW in the > mapping. > > The nice thing is, this actually works correctly even if the user re-uses > the area for reading multiple times (because the read() will trap not > because the page isn't mapped, but because it is mapped COW on something > that will write to user space). The unmapped case is better, though, since > we don't need to do TLB invalidates for that case (which makes this > potentially worthwhile even on SMP). > > I don't know if this is common, but it _would_ make read() have definite > advantages over mmap() on files that are seldom written to or mmap'ed in a > process (which is most of them, gut feel). In particular, once you fault > for _one_ page, you can just map in as many pages as the read() tried to > read in one go - so you can avoid any future work as well. > > Imagine doing a > > fstat(fd..) > buf = aligned_malloc(st->st_size) > read(fd, buf, st->st_size); > > and having it magically populate the VM directly with the whole file > mapping, with _one_ failed page fault. And the above is actually a fairly > common thing. See how many people have tried to optimize using mmap vs > read, and what they _all_ really wanted was this "populate the pages in > one go" thing. > > Is it a good idea? I don't know. But it would seem to fall very cleanly > out of the atomic kmap path - without affecting the fast path at _all_. Sorry, this connection is too subtle for me. I see why we want to do this, and in fact I've been researching how to do it for the last few weeks, but I don't see how it's related to the atomic kmap path. Could you please explain, in words of one syllable? While I'm feeling disoriented, what exactly is the deadlock path for a write from a mmaped, not uptodate page, to the same page? And why does __get_user need to touch the page in *two* places to instantiate it? Also, how do we know the page won't get evicted before grab_cache_page gets to it? -- Daniel ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 12:44 ` Daniel Phillips @ 2002-08-10 17:01 ` Linus Torvalds 2002-08-10 18:16 ` Daniel Phillips 0 siblings, 1 reply; 52+ messages in thread From: Linus Torvalds @ 2002-08-10 17:01 UTC (permalink / raw) To: Daniel Phillips; +Cc: Andrew Morton, lkml On Sat, 10 Aug 2002, Daniel Phillips wrote: > Sorry, this connection is too subtle for me. I see why we want to do > this, and in fact I've been researching how to do it for the last few > weeks, but I don't see how it's related to the atomic kmap path. Could > you please explain, in words of one syllable? We cannot do that optimization generally. I'll give you two reasons, both of which are sufficient on their own: - doing the page table walk is simply slower than doing the memcpy if the page is just there. So you have to have a good heuristic on when it might be worthwhile to do page table tricks. That heuristic should include "is the page directly accessible". Which is exactly what you get if you have a "atomic copy_to_user() that returns failure if it cannot be done atomically". - Even if walking the page tables were to be fast (ie ignoring #1), replacing a page in virtual memory is absolutely not. Especially not on SMP, where replacing a page in memory implies doing CPU crosscalls in order to invalidate the TLB on other CPU's for the old page. So before you do the "clever VM stuff", you had better have a heuristic that says "this page isn't mapped, so it doesn't need the expensive cross-calls". Again: guess what gives you pretty much exactly that heuristic? See? The fact is, "memcpy()" is damned fast for a lot of cases, because it natively uses the TLB and existing caches. It's slow for other cases, but you want to have a good _heuristic_ for when you might want to try to avoid the slow case without avoiding the fast case. Without that heuristic you can't do the optimization sanely. And obviously the heuristic should be a really fast one. The atomic copy_to_user() is the _perfect_ heuristic, because if it just does the memcpy there is absolutely zero overhead (it just does it). The overhead comes in only in the case where we're going to be slowed down by the fault anyway, _and_ where we want to do the clever tricks. > While I'm feeling disoriented, what exactly is the deadlock path for a > write from a mmaped, not uptodate page, to the same page? And why does > __get_user need to touch the page in *two* places to instantiate it? It doesn't touch it twice. It touches _both_ of the potential pages that will be involved in the memcpy - since the copy may well not be page-aligned in user space. The deadlock is when you do a write of a page into a mapping of the very same page that isn't yet mapped. What happens is: - the write has gotten the page lock. Since the wrie knows that the whole page is going to be overwritten, it is _not_ marked uptodate, and the old contents (garbage from the allocation) are left alone. - the copy_from_user() pagefaults and tries to bring in the _same_ page into user land. - that involves reading in the page and making sure it is up-to-date - but since the write has already locked the page, you now have a deadlock. The write cannot continue, since it needs the old contents, and the old contents cannot be read in since the write holds the page lock. The "copy_from_user() atomically" solves the problem quite nicely. If the atomic copy fails, we can afford to do the things that we cannot afford to do normally (because the thing never triggers under real load, and real load absolutely _needs_ to not try to get the page up-to-date before the write). So with the atomic copy-from-user, we can trap the problem only when it is a problem, and go full speed normally. Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 17:01 ` Linus Torvalds @ 2002-08-10 18:16 ` Daniel Phillips 2002-08-10 18:32 ` Linus Torvalds 0 siblings, 1 reply; 52+ messages in thread From: Daniel Phillips @ 2002-08-10 18:16 UTC (permalink / raw) To: Linus Torvalds; +Cc: Andrew Morton, lkml On Saturday 10 August 2002 19:01, Linus Torvalds wrote: > On Sat, 10 Aug 2002, Daniel Phillips wrote: > > Sorry, this connection is too subtle for me. I see why we want to do > > this, and in fact I've been researching how to do it for the last few > > weeks, but I don't see how it's related to the atomic kmap path. Could > > you please explain, in words of one syllable? > > We cannot do that optimization generally. I'll give you two reasons, both > of which are sufficient on their own: > > - doing the page table walk is simply slower than doing the memcpy if the > page is just there. So you have to have a good heuristic on when it > might be worthwhile to do page table tricks. That heuristic should > include "is the page directly accessible". Which is exactly what you > get if you have a "atomic copy_to_user() that returns failure if it > cannot be done atomically". > > - Even if walking the page tables were to be fast (ie ignoring #1), > replacing a page in virtual memory is absolutely not. Especially not on > SMP, where replacing a page in memory implies doing CPU crosscalls in > order to invalidate the TLB on other CPU's for the old page. So before > you do the "clever VM stuff", you had better have a heuristic that says > "this page isn't mapped, so it doesn't need the expensive cross-calls". > > Again: guess what gives you pretty much exactly that heuristic? > > See? Yes, I see. Easy, when you put it that way. > The fact is, "memcpy()" is damned fast for a lot of cases, because it > natively uses the TLB and existing caches. It's slow for other cases, but > you want to have a good _heuristic_ for when you might want to try to > avoid the slow case without avoiding the fast case. Without that heuristic > you can't do the optimization sanely. > > And obviously the heuristic should be a really fast one. The atomic > copy_to_user() is the _perfect_ heuristic, because if it just does the > memcpy there is absolutely zero overhead (it just does it). The overhead > comes in only in the case where we're going to be slowed down by the fault > anyway, _and_ where we want to do the clever tricks. So the overhead consists of inc/deccing preempt_count around the copy_*_user, which fakes do_page_fault into forcing an early return. > > While I'm feeling disoriented, what exactly is the deadlock path for a > > write from a mmaped, not uptodate page, to the same page? And why does > > __get_user need to touch the page in *two* places to instantiate it? > > It doesn't touch it twice. It touches _both_ of the potential pages that > will be involved in the memcpy - since the copy may well not be > page-aligned in user space. Oh duh. I stared at that for the longest time, without realizing there's no alignment requirement. > The deadlock is when you do a write of a page into a mapping of the very > same page that isn't yet mapped. What happens is: > > - the write has gotten the page lock. Since the wrie knows that the whole > page is going to be overwritten, it is _not_ marked uptodate, and the > old contents (garbage from the allocation) are left alone. > > - the copy_from_user() pagefaults and tries to bring in the _same_ page > into user land. > > - that involves reading in the page and making sure it is up-to-date > > - but since the write has already locked the page, you now have a > deadlock. The write cannot continue, since it needs the old contents, > and the old contents cannot be read in since the write holds the page > lock. > > The "copy_from_user() atomically" solves the problem quite nicely. If the > atomic copy fails, we can afford to do the things that we cannot afford to > do normally (because the thing never triggers under real load, and real > load absolutely _needs_ to not try to get the page up-to-date before the > write). > > So with the atomic copy-from-user, we can trap the problem only when it is > a problem, and go full speed normally. That's all crystal clear now. (Though the way do_page_fault finesses copy_from_user into returning early is a little - how should I put it - opaque. Yes, I see it, but...) I'm sure you're aware there's a lot more you can do with these tricks than just zero-copy read - there's zero-copy write as well, and there are both of the above, except a full pte page at a time. There could even be a file to file copy if there were an interface for it. I don't see what prevents the read optimization even with a mmapped page, the page just becomes CoW in all of the mapped region, the read destination and the page cache. -- Daniel ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 18:16 ` Daniel Phillips @ 2002-08-10 18:32 ` Linus Torvalds 2002-08-10 18:46 ` Daniel Phillips 0 siblings, 1 reply; 52+ messages in thread From: Linus Torvalds @ 2002-08-10 18:32 UTC (permalink / raw) To: Daniel Phillips; +Cc: Andrew Morton, lkml On Sat, 10 Aug 2002, Daniel Phillips wrote: > > > > And obviously the heuristic should be a really fast one. The atomic > > copy_to_user() is the _perfect_ heuristic, because if it just does the > > memcpy there is absolutely zero overhead (it just does it). The overhead > > comes in only in the case where we're going to be slowed down by the fault > > anyway, _and_ where we want to do the clever tricks. > > So the overhead consists of inc/deccing preempt_count around the > copy_*_user, which fakes do_page_fault into forcing an early return. Well, I'm actually expecting that preempt will at some day be the normal thing to do, so the inc/dec is not so much an overhead of the heuristic, but a direct result of using "kmap_atomic()" in the first place. But yes, for the non-preempters, there would be the overhead of doing the preempt count thing. That is a nice per-cpu non-atomic thing, and in a cacheline that has been brought in as part of the system call logic anyway. It will dirty it, though - and I don't know if that is the "normal" state of that line otherwise. [ Side note - one of the reasons I'd potentially like to move the thread_info thing to the _top_ of the stack (instead of the bottom) is that that way it could share the cacheline with the kernel stack that gets dirtied on every kernel entry anyway. Dunno if it matters. ] > > It doesn't touch it twice. It touches _both_ of the potential pages that > > will be involved in the memcpy - since the copy may well not be > > page-aligned in user space. > > Oh duh. I stared at that for the longest time, without realizing there's no > alignment requirement. Well, I will not claim that that code is very pretty or obvious. Also, as-is, nobody has ever been able to prove that the pre-fetching as it stands now really fixes the race, although it makes it certainly makes it practically speaking impossible to trigger. > > So with the atomic copy-from-user, we can trap the problem only when it is > > a problem, and go full speed normally. > > That's all crystal clear now. (Though the way do_page_fault finesses > copy_from_user into returning early is a little - how should I put it - > opaque. Yes, I see it, but...) Well, yes. The whole "fixup" thing is certainly not the most obvious thing ever written (and you can thank Richard Henderson for the approach), but it has turned out to be a very useful thing to have. It removed all the races we had between checking whether an area was properly mapped and actually accessing that area (ie the old "verify_area()" approach), and it's extremely efficient for the fast path (the fault path is a bit less so, but ..) > I'm sure you're aware there's a lot more you can do with these tricks > than just zero-copy read - there's zero-copy write as well, and there > are both of the above, except a full pte page at a time. There could > even be a file to file copy if there were an interface for it. The file-to-file copy is really nasty to do, for the simple reason that one page really wants to have just one "owner". So while doing a file-to-file copy is certainly possible, it tends to imply removing the cached page from the source and inserting it into the destination. Which is the right thing to do for streaming copies, but the _wrong_ thing to do if the source is then used again. Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 18:32 ` Linus Torvalds @ 2002-08-10 18:46 ` Daniel Phillips 0 siblings, 0 replies; 52+ messages in thread From: Daniel Phillips @ 2002-08-10 18:46 UTC (permalink / raw) To: Linus Torvalds; +Cc: Andrew Morton, lkml On Saturday 10 August 2002 20:32, Linus Torvalds wrote: > On Sat, 10 Aug 2002, Daniel Phillips wrote: > > I'm sure you're aware there's a lot more you can do with these tricks > > than just zero-copy read - there's zero-copy write as well, and there > > are both of the above, except a full pte page at a time. There could > > even be a file to file copy if there were an interface for it. > > The file-to-file copy is really nasty to do, for the simple reason that > one page really wants to have just one "owner". So while doing a > file-to-file copy is certainly possible, it tends to imply removing the > cached page from the source and inserting it into the destination. > > Which is the right thing to do for streaming copies, but the _wrong_ thing > to do if the source is then used again. If the source is only used for reading it's fine, and you'd know that in advance if the file is opened r/o. I will admit that this one is pretty far out there, there is just a ton of meat and potatoes cleanup work to do before these deathray-type features get to the top of the stack. But when they do, it's going to be fun. -- Daniel ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 7:25 ` Linus Torvalds 2002-08-10 9:08 ` Andrew Morton 2002-08-10 12:44 ` Daniel Phillips @ 2002-08-10 14:16 ` Rik van Riel 2002-08-10 17:03 ` Linus Torvalds 2002-08-10 17:36 ` Jamie Lokier 3 siblings, 1 reply; 52+ messages in thread From: Rik van Riel @ 2002-08-10 14:16 UTC (permalink / raw) To: Linus Torvalds; +Cc: Andrew Morton, lkml On Sat, 10 Aug 2002, Linus Torvalds wrote: > and having it magically populate the VM directly with the whole file > mapping, with _one_ failed page fault. And the above is actually a fairly > common thing. See how many people have tried to optimize using mmap vs > read, and what they _all_ really wanted was this "populate the pages in > one go" thing. If this is worth it, chances are prefaulting at mmap() time could also be worth trying ... hmmm ;) Rik -- Bravely reimplemented by the knights who say "NIH". http://www.surriel.com/ http://distro.conectiva.com/ ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 14:16 ` Rik van Riel @ 2002-08-10 17:03 ` Linus Torvalds 0 siblings, 0 replies; 52+ messages in thread From: Linus Torvalds @ 2002-08-10 17:03 UTC (permalink / raw) To: Rik van Riel; +Cc: Andrew Morton, lkml On Sat, 10 Aug 2002, Rik van Riel wrote: > On Sat, 10 Aug 2002, Linus Torvalds wrote: > > > and having it magically populate the VM directly with the whole file > > mapping, with _one_ failed page fault. And the above is actually a fairly > > common thing. See how many people have tried to optimize using mmap vs > > read, and what they _all_ really wanted was this "populate the pages in > > one go" thing. > > If this is worth it, chances are prefaulting at mmap() time > could also be worth trying ... hmmm ;) Maybe, maybe not. The advantage of read() is that it contains an implicit "madvise()", since the read _tells_ us that it wants X pages. A page fault does not tell us, and prefaulting can hurt us. Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 7:25 ` Linus Torvalds ` (2 preceding siblings ...) 2002-08-10 14:16 ` Rik van Riel @ 2002-08-10 17:36 ` Jamie Lokier 2002-08-10 17:46 ` Linus Torvalds 3 siblings, 1 reply; 52+ messages in thread From: Jamie Lokier @ 2002-08-10 17:36 UTC (permalink / raw) To: Linus Torvalds; +Cc: Andrew Morton, lkml Linus Torvalds wrote: > Imagine doing a > > fstat(fd..) > buf = aligned_malloc(st->st_size) > read(fd, buf, st->st_size); > > and having it magically populate the VM directly with the whole file > mapping, with _one_ failed page fault. And the above is actually a fairly > common thing. See how many people have tried to optimize using mmap vs > read, and what they _all_ really wanted was this "populate the pages in > one go" thing. This will only provide the performance benefic when `aligned_malloc' return "fresh" memory, i.e. memory that has never been written to. Assuming most programs use plain old `malloc', which could be taught to align nicely, then the optimisation might occur when a program starts up, but later on it's more likely to return memory which has been written to and previously freed. So the performance becomes unpredictable. But it's a nice way to optimise if you are _deliberately_ optimising a user space program. First call mmap() to get some fresh pages, then call read() to fill them. Slower on kernels without the optimisation, fast on kernels with it. :-) -- Jamie ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 17:36 ` Jamie Lokier @ 2002-08-10 17:46 ` Linus Torvalds 2002-08-10 17:55 ` Jamie Lokier 0 siblings, 1 reply; 52+ messages in thread From: Linus Torvalds @ 2002-08-10 17:46 UTC (permalink / raw) To: Jamie Lokier; +Cc: Andrew Morton, lkml On Sat, 10 Aug 2002, Jamie Lokier wrote: > > This will only provide the performance benefic when `aligned_malloc' > return "fresh" memory, i.e. memory that has never been written to. Absolutely. Think o fthe optimization as a way to give application writers a new way of being efficient. In particular, I remember when the gcc people were worried about the most efficient way to read in a file for preprocessing (Neil Booth, mainly). Neil did all these timings on where the cut-off point was for using mmap vs just using read(). For people like that, wouldn't it be nice to just be able to tell them: if you do X, we guarantee that you'll get optimal zero-copy performance for reading a file. > But it's a nice way to optimise if you are _deliberately_ optimising a > user space program. Exactly. Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 17:46 ` Linus Torvalds @ 2002-08-10 17:55 ` Jamie Lokier 2002-08-10 18:42 ` Linus Torvalds 0 siblings, 1 reply; 52+ messages in thread From: Jamie Lokier @ 2002-08-10 17:55 UTC (permalink / raw) To: Linus Torvalds; +Cc: Andrew Morton, lkml Linus Torvalds wrote: > For people like that, wouldn't it be nice to just be able to tell them: if > you do X, we guarantee that you'll get optimal zero-copy performance for > reading a file. Don't forget to include the need for mmap(... MAP_ANON ...) prior to the read. Given the user will need to establish a new mapping anyway, why pussy foot around with subtleties? Just add a MAP_PREFAULT flag to mmap(), which reads the whole file and maps it before returning. -- Jamie ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 17:55 ` Jamie Lokier @ 2002-08-10 18:42 ` Linus Torvalds 2002-08-10 18:52 ` Jeff Garzik 2002-08-10 19:10 ` Jamie Lokier 0 siblings, 2 replies; 52+ messages in thread From: Linus Torvalds @ 2002-08-10 18:42 UTC (permalink / raw) To: Jamie Lokier; +Cc: Andrew Morton, lkml On Sat, 10 Aug 2002, Jamie Lokier wrote: > > Don't forget to include the need for mmap(... MAP_ANON ...) prior to the > read. Ahhah! But I _don't_. Yes, with read() you have to do a brk() or mmap(MAP_ANON) (and brk() is the _much_ faster of the two). But with mmap() you need to do a fstat() and a munmap() (while with read you just re-use the area, and we'd do the right thing thanks to the COW-ness of the pages). So I don't think the MAP_ANON thing is a loss for the read. And read() is often the much nicer interface, simply because you don't need to worry about the size of the file up-front etc. Also, because of the delayed nature of mmap()/fault, it has some strange behaviour if somebody is editing your file in the middle of the compile - with read() you might get strange syntax errors if somebody changes the file half-way, but with mmap() your preprocessor may get a SIGSEGV in the middle just because the file was truncated.. In general, I think read() tends to be the right (and simpler) interface to use if you don't explicitly want to take advantage of the things mmap offers (on-demand mappings, no-write-back pageouts, VM coherency etc). Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 18:42 ` Linus Torvalds @ 2002-08-10 18:52 ` Jeff Garzik 2002-08-10 19:01 ` Christoph Hellwig 2002-08-12 0:18 ` Albert D. Cahalan 2002-08-10 19:10 ` Jamie Lokier 1 sibling, 2 replies; 52+ messages in thread From: Jeff Garzik @ 2002-08-10 18:52 UTC (permalink / raw) To: Linus Torvalds; +Cc: Jamie Lokier, Andrew Morton, lkml Linus Torvalds wrote: > And read() is often the much nicer interface, simply because you don't > need to worry about the size of the file up-front etc. > > Also, because of the delayed nature of mmap()/fault, it has some strange > behaviour if somebody is editing your file in the middle of the compile - > with read() you might get strange syntax errors if somebody changes the > file half-way, but with mmap() your preprocessor may get a SIGSEGV in the > middle just because the file was truncated.. > > In general, I think read() tends to be the right (and simpler) interface > to use if you don't explicitly want to take advantage of the things mmap > offers (on-demand mappings, no-write-back pageouts, VM coherency etc). While working on a race-free rewrite of cp/mv/rm (suggested by Al), I did overall-time benchmarks on read+write versus sendfile/stat versus mmap/stat, and found that pretty much the fastest way under Linux 2.2, 2.4, and solaris was read+write of PAGE_SIZE, or PAGE_SIZE*2 chunks. [obviously, 2.2 and solaris didn't do sendfile test] The overhead of the extra stat and mmap/munmap syscalls seemed to be the thing that slowed things down. sendfile was pretty fast, but still an extra syscall, with an annoyingly large error handling case [only certain files can be sendfile'd] I sure would like an O_STREAMING flag, though... let a user app hint to the system that the pages it is reading or writing are perhaps less likely to be reused, or access randomly.... A copy-file syscall would be nice, too, but that's just laziness talking.... Jeff ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 18:52 ` Jeff Garzik @ 2002-08-10 19:01 ` Christoph Hellwig 2002-08-10 19:04 ` Jeff Garzik 2002-08-12 15:20 ` Ingo Oeser 2002-08-12 0:18 ` Albert D. Cahalan 1 sibling, 2 replies; 52+ messages in thread From: Christoph Hellwig @ 2002-08-10 19:01 UTC (permalink / raw) To: Jeff Garzik; +Cc: Linus Torvalds, Jamie Lokier, Andrew Morton, lkml On Sat, Aug 10, 2002 at 02:52:49PM -0400, Jeff Garzik wrote: > While working on a race-free rewrite of cp/mv/rm (suggested by Al), I > did overall-time benchmarks on read+write versus sendfile/stat versus > mmap/stat, and found that pretty much the fastest way under Linux 2.2, > 2.4, and solaris was read+write of PAGE_SIZE, or PAGE_SIZE*2 chunks. > [obviously, 2.2 and solaris didn't do sendfile test] Solaris 9 (and Solaris 8 with a certain patch) support Linux-style sendfile(). Linux 2.5 on the other hand doesn't support sendfile to files anymore.. ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 19:01 ` Christoph Hellwig @ 2002-08-10 19:04 ` Jeff Garzik 2002-08-12 15:20 ` Ingo Oeser 1 sibling, 0 replies; 52+ messages in thread From: Jeff Garzik @ 2002-08-10 19:04 UTC (permalink / raw) To: Christoph Hellwig; +Cc: Linus Torvalds, Jamie Lokier, Andrew Morton, lkml Christoph Hellwig wrote: > On Sat, Aug 10, 2002 at 02:52:49PM -0400, Jeff Garzik wrote: > >>While working on a race-free rewrite of cp/mv/rm (suggested by Al), I >>did overall-time benchmarks on read+write versus sendfile/stat versus >>mmap/stat, and found that pretty much the fastest way under Linux 2.2, >>2.4, and solaris was read+write of PAGE_SIZE, or PAGE_SIZE*2 chunks. >>[obviously, 2.2 and solaris didn't do sendfile test] > > > Solaris 9 (and Solaris 8 with a certain patch) support Linux-style > sendfile(). Linux 2.5 on the other hand doesn't support sendfile to > files anymore.. Really? Bummer :) That was a useful hack for some cases... ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 19:01 ` Christoph Hellwig 2002-08-10 19:04 ` Jeff Garzik @ 2002-08-12 15:20 ` Ingo Oeser 1 sibling, 0 replies; 52+ messages in thread From: Ingo Oeser @ 2002-08-12 15:20 UTC (permalink / raw) To: Christoph Hellwig; +Cc: linux-kernel, linux-fsdevel On Sat, Aug 10, 2002 at 08:01:17PM +0100, Christoph Hellwig wrote: > Solaris 9 (and Solaris 8 with a certain patch) support Linux-style > sendfile(). Linux 2.5 on the other hand doesn't support sendfile to > files anymore.. Why got it broken? It was useful for copying and showing the progress at the same time without touching the own VM. Regards Ingo Oeser -- Science is what we can tell a computer. Art is everything else. --- D.E.Knuth ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 18:52 ` Jeff Garzik 2002-08-10 19:01 ` Christoph Hellwig @ 2002-08-12 0:18 ` Albert D. Cahalan 2002-08-12 14:11 ` Jeff Garzik 1 sibling, 1 reply; 52+ messages in thread From: Albert D. Cahalan @ 2002-08-12 0:18 UTC (permalink / raw) To: Jeff Garzik; +Cc: Linus Torvalds, Jamie Lokier, Andrew Morton, lkml Jeff Garzik writes: > Linus Torvalds wrote: > The overhead of the extra stat and mmap/munmap syscalls seemed to be the > thing that slowed things down. sendfile was pretty fast, but still an > extra syscall, with an annoyingly large error handling case [only > certain files can be sendfile'd] That error handling case sure does discourage sendfile use. > I sure would like an O_STREAMING flag, though... let a user app hint to > the system that the pages it is reading or writing are perhaps less > likely to be reused, or access randomly.... A copy-file syscall would > be nice, too, but that's just laziness talking.... You have a laptop computer with a USB-connected Ethernet. You mount a NetApp or similar box via the SMB/CIFS protocol. You see a multi-gigabyte file. You make a copy... ouch!!! For each gigabyte, you hog the network for an hour. Now let's say this file is for a MacOS app. You have to preserve the creator, file type, resource fork, etc. ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 0:18 ` Albert D. Cahalan @ 2002-08-12 14:11 ` Jeff Garzik 2002-08-12 14:46 ` David Woodhouse 0 siblings, 1 reply; 52+ messages in thread From: Jeff Garzik @ 2002-08-12 14:11 UTC (permalink / raw) To: Albert D. Cahalan; +Cc: Linus Torvalds, Jamie Lokier, Andrew Morton, lkml Albert D. Cahalan wrote: > Jeff Garzik writes: >>I sure would like an O_STREAMING flag, though... let a user app hint to >>the system that the pages it is reading or writing are perhaps less >>likely to be reused, or access randomly.... A copy-file syscall would >>be nice, too, but that's just laziness talking.... > > > You have a laptop computer with a USB-connected Ethernet. > You mount a NetApp or similar box via the SMB/CIFS protocol. > You see a multi-gigabyte file. You make a copy... ouch!!! > For each gigabyte, you hog the network for an hour. > Now let's say this file is for a MacOS app. You have to > preserve the creator, file type, resource fork, etc. /bin/cp has these problems regardless of whether or not it uses a copy-file syscall. Jeff ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 14:11 ` Jeff Garzik @ 2002-08-12 14:46 ` David Woodhouse 0 siblings, 0 replies; 52+ messages in thread From: David Woodhouse @ 2002-08-12 14:46 UTC (permalink / raw) To: Jeff Garzik Cc: Albert D. Cahalan, Linus Torvalds, Jamie Lokier, Andrew Morton, lkml jgarzik@mandrakesoft.com said: > > > A copy-file syscall would be nice, too, but that's just laziness > > > talking.... > > You have a laptop computer with a USB-connected Ethernet. > > You mount a NetApp or similar box via the SMB/CIFS protocol. > > You see a multi-gigabyte file. You make a copy... ouch!!! > > For each gigabyte, you hog the network for an hour. > /bin/cp has these problems regardless of whether or not it uses a > copy-file syscall. Nope. There was a reason he specified SMB/CIFS. -- dwmw2 ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 18:42 ` Linus Torvalds 2002-08-10 18:52 ` Jeff Garzik @ 2002-08-10 19:10 ` Jamie Lokier 2002-08-10 22:42 ` Linus Torvalds 1 sibling, 1 reply; 52+ messages in thread From: Jamie Lokier @ 2002-08-10 19:10 UTC (permalink / raw) To: Linus Torvalds; +Cc: Andrew Morton, lkml Linus Torvalds wrote: > Yes, with read() you have to do a brk() or mmap(MAP_ANON) (and brk() is > the _much_ faster of the two). Ouch, that means a typical user-space program/library that wants to use this technique has to have an intimate relationship with its malloc() implementation: it's not in general safe to call brk() unless you are the memory allocator. (Yes, I know you can call brk() with Glibc's malloc(), but... dependencies upon dependencies!) And even when it is safe to allocate with brk(), there's no safe way to free that memory. So this would be fine for the stdio built in to Glibc, perhaps. > But with mmap() you need to do a fstat() and a munmap() (while with read > you just re-use the area, and we'd do the right thing thanks to the > COW-ness of the pages). Granted, you might re-use the area if you're doing block reads like stdio, compiler, XML parser etc. But not a few programs want to: 1. Allocate enough memory to hold whole file. 2. Load file into memory. > Also, because of the delayed nature of mmap()/fault, it has some strange > behaviour if somebody is editing your file in the middle of the compile - > with read() you might get strange syntax errors if somebody changes the > file half-way, but with mmap() your preprocessor may get a SIGSEGV in the > middle just because the file was truncated.. Isn't that SIGBUS :-) (Not that the architectures are at all consistent on this..) > In general, I think read() tends to be the right (and simpler) interface > to use if you don't explicitly want to take advantage of the things mmap > offers (on-demand mappings, no-write-back pageouts, VM coherency etc). I agree, although I think this particular optimisation requires some quite unusual preparation by user space - I still think GCC would need to call open/fstat/mmap/read/munmap/close. You've rightly pointed out that memcpy() is faster for a page, rather than VM tweaking. But this isn't true of large reads, is it? Then the TLB invalidation cost could, in principle, be amortised over the whole large read. -- Jamie ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 19:10 ` Jamie Lokier @ 2002-08-10 22:42 ` Linus Torvalds 2002-08-11 3:17 ` Simon Kirby 2002-08-11 8:00 ` Daniel Phillips 0 siblings, 2 replies; 52+ messages in thread From: Linus Torvalds @ 2002-08-10 22:42 UTC (permalink / raw) To: Jamie Lokier; +Cc: Andrew Morton, lkml On Sat, 10 Aug 2002, Jamie Lokier wrote: > > You've rightly pointed out that memcpy() is faster for a page, rather > than VM tweaking. But this isn't true of large reads, is it? > Then the TLB invalidation cost could, in principle, be amortised over > the whole large read. Yes. We could make the special case be just for large reads, and amortise the cost of VM handling etc. That's especially true since a single page table lookup can look up a lot of pages, so you amortise more than just the TLB invalidation cost. I have no idea where the cut-off point would be, and it will probably depend quite a lot on whether the reader will write to the pages it read from (causing COW faults) or not. If the read()'er will write to them, VM tricks probably never pay off (since you will just be delaying the copy and adding more page faults), so the question is what the common behaviour is. I _suspect_ that the common behaviour is to read just a few kB at a time and that is basically doesn't ever really pay to play VM games. (The "repeated read of a few kB" case is also likely to be the best-performing behaviour, simply because it's usually _better_ to do many small reads that re-use the cache than it is to do one large read that blows your cache and TLB. Of course, that all depends on what your patterns are after the read - do you want to have the whole file accessible or not). Anyway, this really is more food for thought than anything else, since this is definitely not anything for 2.6.x. The page cache impact of doing VM games is going to be noticeable too (because of the COW-by-hand issues), and the VM behaviour in general changes. For example, what do you do when somebody has a COW-page mapped into it's VM space and you want to start paging stuff out? There are "interesting" cases that just may mean that doing the COW thing is a really stupid thing to do, even if it is intriguing to _think_ about it. Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 22:42 ` Linus Torvalds @ 2002-08-11 3:17 ` Simon Kirby 2002-08-11 6:07 ` Andrew Morton 2002-08-11 18:52 ` Linus Torvalds 2002-08-11 8:00 ` Daniel Phillips 1 sibling, 2 replies; 52+ messages in thread From: Simon Kirby @ 2002-08-11 3:17 UTC (permalink / raw) To: Linus Torvalds, Andrew Morton, linux-kernel On Sat, Aug 10, 2002 at 03:42:29PM -0700, Linus Torvalds wrote: > I _suspect_ that the common behaviour is to read just a few kB at a time > and that is basically doesn't ever really pay to play VM games. > > (The "repeated read of a few kB" case is also likely to be the > best-performing behaviour, simply because it's usually _better_ to do many > small reads that re-use the cache than it is to do one large read that > blows your cache and TLB. Of course, that all depends on what your > patterns are after the read - do you want to have the whole file > accessible or not). This is only somewhat related, but I'm wondering if the cache effects also apply to readahead block sizes. Sequential page-sized read()s from a file causes readahead to kick in and grow in size. Over time, it ends up using very large blocks. Would it be beneficial to keep the readahead size smaller so that it still stays in cache? Also, this use of large blocks shouldn't really matter, but I'm seeing a problem where the process ends up sleeping for most of the time, switching between CPU and I/O rather than simply having the I/O for the next read() occur in advance of the current read(). The problem appears to be that readahead isn't awakening the process to present partial results. The blocks get so large that the process switches between running and being blocked in I/O, which decreases overall performance (think of a "grep" process that at 100% CPU can just saturate the disk I/O). Working correctly, readahead would not get in the way, it would just have blocks ready for "grep" to use, and grep would use all of the CPU not being used for I/O. Currently, grep sleeps 50% of the time waiting on I/O. This problem is showing up with NFS over a slow link, causing streaming audio to be unusable. On the other end of the speed scale, it probably also affects "grep" and other applications reading from hard disks, etc. To demonstrate the problem reliably, I've used "strace -r cat" on a floppy, which is a sufficiently slow medium. :) This is on a 2.4.19 kernel, but 2.5 behaves similarly. Note how the readahead starts small and gets very large. Also, note how the start of the first larger readahead occurs shortly after a previous read, and that it blocks early even though the data should already be there (4.9 seconds). It also appears to stumble a bit later on. read() times show up as the relative time for the following write() (which is going /dev/null): 0.000294 open("a/bigzero", O_RDONLY|O_LARGEFILE) = 3 0.000258 fstat64(3, {st_mode=S_IFREG|0775, st_size=914432, ...}) = 0 0.000275 brk(0x804e000) = 0x804e000 0.000223 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.593615 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000807 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000730 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000878 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000209 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000642 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000304 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000482 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.647682 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000537 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000687 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000469 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000433 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000183 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000186 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.649228 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000541 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000500 read(3, "\0\0\0\0\0"..., 4096) = 4096 4.897722 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000535 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000190 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000505 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000199 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000486 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000485 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000193 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000434 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000858 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000221 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.001148 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000243 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000877 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000247 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000649 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000220 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000497 read(3, "\0\0\0\0\0"..., 4096) = 4096 6.615653 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.002430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000283 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000857 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000217 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000449 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000537 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000198 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000436 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000455 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000530 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000283 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000475 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000434 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.001341 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000470 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.001626 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.001282 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000278 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000481 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000186 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000467 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000581 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000203 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000662 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000199 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000492 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000201 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000484 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000189 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000433 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000440 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000448 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000188 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.001342 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000328 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000839 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000449 read(3, "\0\0\0\0\0"..., 4096) = 4096 1.031732 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000531 read(3, "\0\0\0\0\0"..., 4096) = 4096 6.154301 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000544 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000198 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000740 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000250 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000723 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000186 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000444 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000448 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000435 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.001227 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000196 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000454 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000597 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000207 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000497 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000196 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000199 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000452 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000727 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000221 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000486 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000187 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000650 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.001078 read(3, "\0\0\0\0\0"..., 4096) = 4096 7.004463 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000538 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000440 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000448 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000506 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.001446 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000283 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000469 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000494 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000175 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000487 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000193 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000484 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000684 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000220 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000500 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000201 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000484 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000189 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000543 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000190 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 7.407175 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000530 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000435 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000447 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000446 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000183 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000954 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000410 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000478 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000239 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000435 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000627 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000205 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.001126 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000468 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000537 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000221 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000489 read(3, "\0\0\0\0\0"..., 4096) = 4096 3.391947 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000529 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000573 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000193 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000486 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000436 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000531 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000588 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000444 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000447 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000425 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.001018 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000811 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000536 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000258 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000531 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000207 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000487 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000525 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000231 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000439 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000175 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000425 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000485 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000547 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000448 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000479 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000633 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000241 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000434 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 1.144692 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000533 read(3, "\0\0\0\0\0"..., 4096) = 4096 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 0.000434 read(3, "\0\0\0\0\0"..., 4096) = 1024 0.000318 write(1, "\0\0\0\0\0"..., 1024) = 1024 0.000276 read(3, "", 4096) = 0 0.000184 close(3) = 0 0.000259 _exit(0) = ? We probably want huge readahead to occur in the case where programs are competing for I/O from the same device, but the latency here from slow devices is horrible. Simon- [ Stormix Technologies Inc. ][ NetNation Communications Inc. ] [ sim@stormix.com ][ sim@netnation.com ] [ Opinions expressed are not necessarily those of my employers. ] ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 3:17 ` Simon Kirby @ 2002-08-11 6:07 ` Andrew Morton 2002-08-11 8:46 ` Simon Kirby 2002-08-11 18:52 ` Linus Torvalds 1 sibling, 1 reply; 52+ messages in thread From: Andrew Morton @ 2002-08-11 6:07 UTC (permalink / raw) To: Simon Kirby; +Cc: linux-kernel Simon Kirby wrote: > > On Sat, Aug 10, 2002 at 03:42:29PM -0700, Linus Torvalds wrote: > > > I _suspect_ that the common behaviour is to read just a few kB at a time > > and that is basically doesn't ever really pay to play VM games. > > > > (The "repeated read of a few kB" case is also likely to be the > > best-performing behaviour, simply because it's usually _better_ to do many > > small reads that re-use the cache than it is to do one large read that > > blows your cache and TLB. Of course, that all depends on what your > > patterns are after the read - do you want to have the whole file > > accessible or not). > > This is only somewhat related, but I'm wondering if the cache effects > also apply to readahead block sizes. Sequential page-sized read()s from > a file causes readahead to kick in and grow in size. Over time, it ends > up using very large blocks. Would it be beneficial to keep the readahead > size smaller so that it still stays in cache? > > Also, this use of large blocks shouldn't really matter, but I'm seeing a > problem where the process ends up sleeping for most of the time, > switching between CPU and I/O rather than simply having the I/O for the > next read() occur in advance of the current read(). > > The problem appears to be that readahead isn't awakening the process to > present partial results. The blocks get so large that the process > switches between running and being blocked in I/O, which decreases > overall performance (think of a "grep" process that at 100% CPU can just > saturate the disk I/O). Working correctly, readahead would not get in > the way, it would just have blocks ready for "grep" to use, and grep > would use all of the CPU not being used for I/O. Currently, grep sleeps > 50% of the time waiting on I/O. This is interesting. The 2.5 readahead sort-of does the wrong thing for you. Note how fs/mpage.c:mpage_end_io_read() walks the BIO's pages backwards when unlocking the pages. And also note that the BIOs are 64kbytes, and the readahead window is up to 128k, etc. See, a boring old commodity disk drive will read 10,000 pages per second. The BIO code there is designed to *not* result in 10,000 context-switches per second in the common case. If the reader is capable of processing the data faster than the disk then hold them off and present them with large chunks of data. And that's usually the right thing to do, because most bulk readers read fast - if your grep is really spending 50% of its time not asleep then you either have a very slow grep or a very fast IO system. It's applications such as gzip, which perform a significant amount of work crunching on the data which are interesting to study, and which benefit from readahead. But that's all disks. You're not talking about disks. > This problem is showing up with NFS over a slow link, causing streaming > audio to be unusable. On the other end of the speed scale, it probably > also affects "grep" and other applications reading from hard disks, etc. Well, the question is "is the link saturated"? If so then it's not solvable. If is is not then that's a bug. > To demonstrate the problem reliably, I've used "strace -r cat" on a > floppy, which is a sufficiently slow medium. :) This is on a 2.4.19 > kernel, but 2.5 behaves similarly. Note how the readahead starts small > and gets very large. Also, note how the start of the first larger > readahead occurs shortly after a previous read, and that it blocks early > even though the data should already be there (4.9 seconds). It also > appears to stumble a bit later on. read() times show up as the relative > time for the following write() (which is going /dev/null): OK, it's doing 128k of readahead there, which is a bit gross for a floppy. You can tune that down with `blockdev --setra N /dev/floppy'. The defaults are not good, and I do intend to go through the various block drivers and teach them to set their initial readahead size to something appropriate. But in this example, where the test is `cat', there is nothing to be gained, I expect. The disk is achieving its peak bandwidth. However if the application was encrypting the data, or playing it through loudspeakers then this may not be appropriate behaviour. The design goal for readahead is that if an application is capable of processing 10 megabytes/second and the disk sustains 11 megabytes/sec then the application should never sleep. (I was about to test this, but `mke2fs /dev/fd0' oopses in 2.5.30. ho hum) Tuning the readahead per-fd is easy to do in 2.5. It would be in units of pages, even though for many requirements, milliseconds is a more appropriate unit for readahead. The basic unit of wakeup granularity is 64kbytes - the max size of a BIO. Reducing that to 4k for floppies would fix it up for you. We need some more BIO infrastructure for that, and that will happen. Then we can go and wind back the max bio size for floppies. With some additional radix tree work we can implement the posix_fadvise system call nicely, and its POSIX_FADV_WILLNEED could be beneficial. The infrastructure is in place for network filesystems to be able to tune their own readahead and expose that to user space, although none of that has been done. I don't think fiddling with readahead either in the application, the system setup or the kernel is a satisfactory way of fixing all this. It needs asynchronous IO. Then the time-sensitive application can explicitly manage its own readahead to its own requirements. (Could do this with POSIX_FADV_WILLNEED as well). So hmm. Good point, thanks. I'll go play some MP3's off floppies. (Holy crap. 2.5.31! I'm outta here) ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 6:07 ` Andrew Morton @ 2002-08-11 8:46 ` Simon Kirby 2002-08-11 9:36 ` Andrew Morton 2002-08-11 10:28 ` Andrew Morton 0 siblings, 2 replies; 52+ messages in thread From: Simon Kirby @ 2002-08-11 8:46 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel On Sat, Aug 10, 2002 at 11:07:44PM -0700, Andrew Morton wrote: > This is interesting. > > The 2.5 readahead sort-of does the wrong thing for you. Note how > fs/mpage.c:mpage_end_io_read() walks the BIO's pages backwards when > unlocking the pages. And also note that the BIOs are 64kbytes, and > the readahead window is up to 128k, etc. > > See, a boring old commodity disk drive will read 10,000 pages per > second. The BIO code there is designed to *not* result in 10,000 > context-switches per second in the common case. If the reader is > capable of processing the data faster than the disk then hold > them off and present them with large chunks of data. Hmm. I understand, but I now that I think about it a bit more, I think I failed to notice the real problem: The size of the readahead wouldn't matter if it actually prefetched the data in advance. It's not doing that right now. What's happening with my MP3 streaming is: 1. read(4k) gets data after a delay. xmms starts playing. 2. read(4k) gets some more data, right way, because readahead worked. xmms continues. ... 3. read(4k) blocks for a long time while readahead starts up again and reads a huge block of data. read() then returns the 4k. meanwhile, xmms has underrun. xmms starts again. 4. goto 2. It's really easy to see this behavior with the xmms-crossfade plugin and a large buffer with "buffer debugging" display on. With tcpdump in another window, I can see that the readahead doesn't start prefetching until it's right near the end of the data it fetched last, rather than doing it in advance. This is not obvious except in the case where read() speed is limited by something like audio playback rates or heavy processing times. > But that's all disks. You're not talking about disks. Well, my example with grep was assuming a CPU the speed of what I have right now, not something modern. :) "bzip2 -9" would likely apply these days. > > This problem is showing up with NFS over a slow link, causing streaming > > audio to be unusable. On the other end of the speed scale, it probably > > also affects "grep" and other applications reading from hard disks, etc. > > Well, the question is "is the link saturated"? If so then it's not > solvable. If is is not then that's a bug. The link is not saturated, but it is used in huge bursts mixed with periods of silence (where readahead is finished but has not yet started the next block). > OK, it's doing 128k of readahead there, which is a bit gross for a floppy. > You can tune that down with `blockdev --setra N /dev/floppy'. The Ooh, is there something like this for NFS? > but `mke2fs /dev/fd0' oopses in 2.5.30. ho hum) Yes, floppy in 2.5 has been broken for a while... > So hmm. Good point, thanks. I'll go play some MP3's off floppies. :) Simon- [ Simon Kirby ][ Network Operations ] [ sim@netnation.com ][ NetNation Communications ] [ Opinions expressed are not necessarily those of my employer. ] ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 8:46 ` Simon Kirby @ 2002-08-11 9:36 ` Andrew Morton 2002-08-11 9:49 ` Andrew Morton 2002-08-11 10:28 ` Andrew Morton 1 sibling, 1 reply; 52+ messages in thread From: Andrew Morton @ 2002-08-11 9:36 UTC (permalink / raw) To: Simon Kirby; +Cc: linux-kernel Simon Kirby wrote: > > With tcpdump in another window, I can see that the readahead doesn' > start prefetching until it's right near the end of the data it > fetched last, rather than doing it in advance. That's a big fat bug. And it wouldn't be astonishing if my shiny new readahead does the same thing - I haven't analysed/tested this scenario. Shall though. Knowing zero about NFS, this: if (!PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) { error = nfs_readpage_async(file, inode, page); goto out; } error = nfs_readpage_sync(file, inode, page); would seem to indicate that it's important to have 4k or 8k rsize and wsize. > ... > > > OK, it's doing 128k of readahead there, which is a bit gross for a floppy. > > You can tune that down with `blockdev --setra N /dev/floppy'. The > > Ooh, is there something like this for NFS? In 2.4, /proc/sys/vm/[min|max]_readahead should affect NFS, I think. In 2.5, no knobs yet. NFS is using the default_backing_dev_info's readahead setting, which isn't tunable. It needs to create its own backing_dev_info (probably per mount?), make each inode's inode.i_data.backing_dev_info point at that backing_dev_info structure and export it to userspace in some manner. Guess I should have told Trond that ;) > > but `mke2fs /dev/fd0' oopses in 2.5.30. ho hum) > > Yes, floppy in 2.5 has been broken for a while... > Well it's oopsing in the code which tries to work out the device geometry: generic_unplug_device (data=0x0) at /usr/src/25/include/asm/spinlock.h:117 117 { (gdb) bt #0 generic_unplug_device (data=0x0) at /usr/src/25/include/asm/spinlock.h:117 #1 0xc020b57c in __floppy_read_block_0 (bdev=0xf62c4e00) at floppy.c:3896 #2 0xc020b5f6 in floppy_read_block_0 (dev={value = 512}) at floppy.c:3915 #3 0xc020b745 in floppy_revalidate (dev={value = 512}) at floppy.c:3954 #4 0xc01448b7 in check_disk_change (bdev=0xf62c4e00) at block_dev.c:522 #5 0xc020b377 in floppy_open (inode=0xf54e5ec0, filp=0xf4baa1a0) at floppy.c:3808 #6 0xc0144bc6 in do_open (bdev=0xf62c4e00, inode=0xf54e5ec0, file=0xf4baa1a0) at block_dev.c:623 #7 0xc0144f63 in blkdev_open (inode=0xf54e5ec0, filp=0xf4baa1a0) at block_dev.c:740 #8 0xc013d83e in dentry_open (dentry=0xf62dc5e0, mnt=0xc3ff5ee0, flags=32768) at open.c:655 #9 0xc013d770 in filp_open (filename=0xf6362000 "/dev/fd0", flags=32768, mode=0) at open.c:624 #10 0xc013db4f in sys_open (filename=0xbffffb9c "/dev/fd0", flags=32768, mode=0) at open.c:800 #11 0xc0107123 in syscall_call () at stats.c:204 So if you use something with known geometry, like /dev/fd0h1440, it works! ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 9:36 ` Andrew Morton @ 2002-08-11 9:49 ` Andrew Morton 0 siblings, 0 replies; 52+ messages in thread From: Andrew Morton @ 2002-08-11 9:49 UTC (permalink / raw) To: Simon Kirby, linux-kernel Andrew Morton wrote: > > ... > So if you use something with known geometry, like /dev/fd0h1440, it works! No it doesn't. You can run mke2fs, but the result is a wreck. ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 8:46 ` Simon Kirby 2002-08-11 9:36 ` Andrew Morton @ 2002-08-11 10:28 ` Andrew Morton 1 sibling, 0 replies; 52+ messages in thread From: Andrew Morton @ 2002-08-11 10:28 UTC (permalink / raw) To: Simon Kirby; +Cc: linux-kernel Simon Kirby wrote: > > ... > What's happening with my MP3 streaming is: > > 1. read(4k) gets data after a delay. xmms starts playing. > 2. read(4k) gets some more data, right way, because readahead worked. > xmms continues. > ... > 3. read(4k) blocks for a long time while readahead starts up again and > reads a huge block of data. read() then returns the 4k. meanwhile, > xmms has underrun. xmms starts again. > 4. goto 2. > > It's really easy to see this behavior with the xmms-crossfade plugin and > a large buffer with "buffer debugging" display on. I happen to have a little test app for this stuff: http://www.zip.com.au/~akpm/linux/stream.tar.gz You can use it to slowly read or write a file. ./stream -i /dev/fd0h1440 23 1000 will read 1000k from floppy at 23k per second. It's a bit useless at those rates on 2.4 because of the coarse timer resolution. But in 1000Hz 2.5 it works a treat. ./stream -i /dev/fd0h1440 20 1000 0.00s user 0.01s system 0% cpu 51.896 total ./stream -i /dev/fd0h1440 21 1000 0.00s user 0.02s system 0% cpu 49.825 total ./stream -i /dev/fd0h1440 22 1000 0.00s user 0.02s system 0% cpu 47.843 total ./stream -i /dev/fd0h1440 23 1000 0.00s user 0.01s system 0% cpu 45.853 total ./stream -i /dev/fd0h1440 24 1000 0.01s user 0.02s system 0% cpu 44.077 total ./stream -i /dev/fd0h1440 25 1000 0.00s user 0.02s system 0% cpu 42.307 total ./stream -i /dev/fd0h1440 26 1000 0.00s user 0.01s system 0% cpu 41.305 total ./stream -i /dev/fd0h1440 27 1000 0.00s user 0.02s system 0% cpu 40.493 total ./stream -i /dev/fd0h1440 28 1000 0.01s user 0.02s system 0% cpu 39.122 total ./stream -i /dev/fd0h1440 29 1000 0.00s user 0.01s system 0% cpu 39.118 total What we see here is perfect readahead behaviour. The kernel is keeping the read streaming ahead of the application's read cursor all the way out to the point where the device is saturated. (The numbers are all off by three seconds because of the initial spinup delay). If you strace it, the reads are smooth on 2.4 and 2.5. So it may be an NFS peculiarity. That's a bit hard for me to test over 100bT. ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 3:17 ` Simon Kirby 2002-08-11 6:07 ` Andrew Morton @ 2002-08-11 18:52 ` Linus Torvalds 2002-08-12 3:28 ` Andrew Morton 1 sibling, 1 reply; 52+ messages in thread From: Linus Torvalds @ 2002-08-11 18:52 UTC (permalink / raw) To: Simon Kirby; +Cc: Andrew Morton, linux-kernel, Jens Axboe, Trond Myklebust On Sat, 10 Aug 2002, Simon Kirby wrote: > > This is only somewhat related, but I'm wondering if the cache effects > also apply to readahead block sizes. Sequential page-sized read()s from > a file causes readahead to kick in and grow in size. Over time, it ends > up using very large blocks. Would it be beneficial to keep the readahead > size smaller so that it still stays in cache? Any sane IO subsystem will do the actual IO using DMA, and not pollute the cache for read[aheads] until the point where it is actually _used_. > Also, this use of large blocks shouldn't really matter, but I'm seeing a > problem where the process ends up sleeping for most of the time, > switching between CPU and I/O rather than simply having the I/O for the > next read() occur in advance of the current read(). > > The problem appears to be that readahead isn't awakening the process to > present partial results. You're not the only one complaining about this. I _think_ that the problem is not th read-ahead code, but some of the block layer stuff. It appears that the read-ahead code is so successful that we generate one large request for all of it, and we won't be waking things up as they come in, but only after the whole request is done. This is due to Andrews bio work. It decreases CPU load, but it sure as hell does seem to decrease parallelism too, which is bad. Basically, it _used_ to be that each page got woken up one at a time as they became ready after IO. With the new scheme, they all get woken up together in "mpage_end_io_read()" (or write, but since people usually don't wait for writes..). At least that is how I read the code. Andrew? On the other hand, for most high-end controllers, you aren't even likely to get notified in the middle anyway, since the controller will just do the whole dang IO request in one go, and only notify us when it is totally done. > This problem is showing up with NFS over a slow link, causing streaming > audio to be unusable. On the other end of the speed scale, it probably > also affects "grep" and other applications reading from hard disks, etc. Hmm.. NFS should be a totally different kettle of fish. Although the read-ahead code is shared, I think the NFS client should be returning successes one page at a time. Jens, Trond and Andrew Cc'd for comments and your "strace" showing a 6/7-second latency is appended. Linus --- > To demonstrate the problem reliably, I've used "strace -r cat" on a > floppy, which is a sufficiently slow medium. :) This is on a 2.4.19 > kernel, but 2.5 behaves similarly. Note how the readahead starts small > and gets very large. Also, note how the start of the first larger > readahead occurs shortly after a previous read, and that it blocks early > even though the data should already be there (4.9 seconds). It also > appears to stumble a bit later on. read() times show up as the relative > time for the following write() (which is going /dev/null): > > 0.000294 open("a/bigzero", O_RDONLY|O_LARGEFILE) = 3 > 0.000258 fstat64(3, {st_mode=S_IFREG|0775, st_size=914432, ...}) = 0 > 0.000275 brk(0x804e000) = 0x804e000 > 0.000223 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.593615 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000807 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000730 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000878 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000209 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000642 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000304 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000482 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.647682 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000537 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000687 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000469 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000433 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000183 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000186 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.649228 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000541 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000500 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 4.897722 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000535 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000190 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000505 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000199 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000486 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000485 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000193 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000434 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000858 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000221 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.001148 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000243 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000877 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000247 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000649 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000220 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000497 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 6.615653 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.002430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000283 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000857 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000217 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000449 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000537 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000198 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000436 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000455 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000530 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000283 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000475 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000434 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.001341 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000470 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.001626 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.001282 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000278 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000481 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000186 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000467 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000581 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000203 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000662 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000199 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000492 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000201 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000484 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000189 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000433 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000440 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000448 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000188 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.001342 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000328 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000839 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000449 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 1.031732 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000531 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 6.154301 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000544 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000198 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000740 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000250 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000723 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000186 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000444 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000448 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000435 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.001227 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000196 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000454 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000597 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000207 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000497 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000196 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000199 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000452 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000727 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000221 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000486 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000187 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000650 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.001078 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 7.004463 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000538 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000440 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000448 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000506 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.001446 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000283 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000469 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000494 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000175 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000487 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000193 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000484 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000684 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000220 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000500 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000201 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000484 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000189 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000543 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000190 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 7.407175 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000530 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000435 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000447 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000446 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000183 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000954 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000410 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000478 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000239 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000435 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000429 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000627 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000205 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.001126 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000468 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000537 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000221 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000489 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 3.391947 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000529 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000573 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000193 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000486 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000191 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000436 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000531 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000588 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000444 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000447 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000425 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.001018 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000811 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000536 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000258 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000531 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000207 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000487 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000525 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000231 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000439 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000430 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000175 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000425 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000426 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000431 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000485 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000547 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000194 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000448 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000479 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000177 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000176 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000179 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000428 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000178 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000181 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000633 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000241 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000434 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000182 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000432 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000180 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000427 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 1.144692 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000533 read(3, "\0\0\0\0\0"..., 4096) = 4096 > 0.000185 write(1, "\0\0\0\0\0"..., 4096) = 4096 > 0.000434 read(3, "\0\0\0\0\0"..., 4096) = 1024 > 0.000318 write(1, "\0\0\0\0\0"..., 1024) = 1024 > 0.000276 read(3, "", 4096) = 0 > 0.000184 close(3) = 0 > 0.000259 _exit(0) = ? > > We probably want huge readahead to occur in the case where programs are > competing for I/O from the same device, but the latency here from slow > devices is horrible. > > Simon- > > [ Stormix Technologies Inc. ][ NetNation Communications Inc. ] > [ sim@stormix.com ][ sim@netnation.com ] > [ Opinions expressed are not necessarily those of my employers. ] > > > ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 18:52 ` Linus Torvalds @ 2002-08-12 3:28 ` Andrew Morton 2002-08-12 3:27 ` Linus Torvalds 2002-08-12 6:20 ` Simon Kirby 0 siblings, 2 replies; 52+ messages in thread From: Andrew Morton @ 2002-08-12 3:28 UTC (permalink / raw) To: Linus Torvalds; +Cc: Simon Kirby, linux-kernel, Jens Axboe, Trond Myklebust Linus Torvalds wrote: > > ... > Basically, it _used_ to be that each page got woken up one at a time as > they became ready after IO. With the new scheme, they all get woken up > together in "mpage_end_io_read()" (or write, but since people usually > don't wait for writes..). > > At least that is how I read the code. Andrew? Yes. The basic unit of IO in there is a 64k BIO. So once readahead is cruising, pages come unlocked in 16-page batches. In 2.4 they'll come unlocked one at a time against a device such as a floppy drive. But with default settings the readahead code lays one to two of these BIOs out ahead of the read point, so the application never stumbles across a locked page unless it's outpacing the device. At least that's the theory, and the testing I did yesterday was succesful. So I'd appreciate it if Simon could invetigate a little further with the test app I posted. Something is up, and it may not be just an NFS thing. But note that nfs_readpage will go synchronous if rsize is less than PAGE_CACHE_SIZE, so it has to be set up right. ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 3:28 ` Andrew Morton @ 2002-08-12 3:27 ` Linus Torvalds 2002-08-12 4:08 ` Andrew Morton 2002-08-12 6:20 ` Simon Kirby 1 sibling, 1 reply; 52+ messages in thread From: Linus Torvalds @ 2002-08-12 3:27 UTC (permalink / raw) To: Andrew Morton; +Cc: Simon Kirby, linux-kernel, Jens Axboe, Trond Myklebust On Sun, 11 Aug 2002, Andrew Morton wrote: > > At least that's the theory, and the testing I did yesterday > was succesful. Did you try Simons test-case which seemed to be just a "cat" on a floppy "To demonstrate the problem reliably, I've used "strace -r cat" on a floppy, which is a sufficiently slow medium. :) This is on a 2.4.19 kernel, but 2.5 behaves similarly.") although that may be different from the NFS issue, it is kind of interesting: the perfect behaviour would be a steady stream of data, not too many hickups. Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 3:27 ` Linus Torvalds @ 2002-08-12 4:08 ` Andrew Morton 0 siblings, 0 replies; 52+ messages in thread From: Andrew Morton @ 2002-08-12 4:08 UTC (permalink / raw) To: Linus Torvalds; +Cc: Simon Kirby, linux-kernel, Jens Axboe, Trond Myklebust Linus Torvalds wrote: > > On Sun, 11 Aug 2002, Andrew Morton wrote: > > > > At least that's the theory, and the testing I did yesterday > > was succesful. > > Did you try Simons test-case which seemed to be just a "cat" on a floppy > > "To demonstrate the problem reliably, I've used "strace -r cat" on a > floppy, which is a sufficiently slow medium. :) This is on a 2.4.19 > kernel, but 2.5 behaves similarly.") > > although that may be different from the NFS issue, it is kind of > interesting: the perfect behaviour would be a steady stream of data, not > too many hickups. I did, but I cut you from the Cc... > I happen to have a little test app for this stuff: > http://www.zip.com.au/~akpm/linux/stream.tar.gz > > You can use it to slowly read or write a file. > > ./stream -i /dev/fd0h1440 23 1000 > > will read 1000k from floppy at 23k per second. It's a bit > useless at those rates on 2.4 because of the coarse timer > resolution. But in 1000Hz 2.5 it works a treat. > > ./stream -i /dev/fd0h1440 20 1000 0.00s user 0.01s system 0% cpu 51.896 total > ./stream -i /dev/fd0h1440 21 1000 0.00s user 0.02s system 0% cpu 49.825 total > ./stream -i /dev/fd0h1440 22 1000 0.00s user 0.02s system 0% cpu 47.843 total > ./stream -i /dev/fd0h1440 23 1000 0.00s user 0.01s system 0% cpu 45.853 total > ./stream -i /dev/fd0h1440 24 1000 0.01s user 0.02s system 0% cpu 44.077 total > ./stream -i /dev/fd0h1440 25 1000 0.00s user 0.02s system 0% cpu 42.307 total > ./stream -i /dev/fd0h1440 26 1000 0.00s user 0.01s system 0% cpu 41.305 total > ./stream -i /dev/fd0h1440 27 1000 0.00s user 0.02s system 0% cpu 40.493 total > ./stream -i /dev/fd0h1440 28 1000 0.01s user 0.02s system 0% cpu 39.122 total > ./stream -i /dev/fd0h1440 29 1000 0.00s user 0.01s system 0% cpu 39.118 total > > What we see here is perfect readahead behaviour. The kernel is keeping the > read streaming ahead of the application's read cursor all the way out to the > point where the device is saturated. (The numbers are all off by three > seconds because of the initial spinup delay). > > If you strace it, the reads are smooth on 2.4 and 2.5. > > So it may be an NFS peculiarity. That's a bit hard for me to test over > 100bT. The strace of that app is smooth, all the way out to the peak disk bandwidth. So something is different either in the test or in Simon's setup. It needs further investigation. ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 3:28 ` Andrew Morton 2002-08-12 3:27 ` Linus Torvalds @ 2002-08-12 6:20 ` Simon Kirby 2002-08-12 6:44 ` Andrew Morton 1 sibling, 1 reply; 52+ messages in thread From: Simon Kirby @ 2002-08-12 6:20 UTC (permalink / raw) To: Andrew Morton; +Cc: Linus Torvalds, linux-kernel, Jens Axboe, Trond Myklebust On Sun, Aug 11, 2002 at 08:28:12PM -0700, Andrew Morton wrote: > So I'd appreciate it if Simon could invetigate a little further > with the test app I posted. Something is up, and it may not > be just an NFS thing. But note that nfs_readpage will go > synchronous if rsize is less than PAGE_CACHE_SIZE, so it has > to be set up right. You're right -- my NFS page size is set to 2048. I can't remember if I did this because I was trying to work around huge read-ahead or because I was trying to work around the bursts of high latency from my Terayon cable modem (which idles at a slow line speed and "falls forward" to higher speeds once it detects traffic, but with a delay, causing awful latency at the expense of "better noise immunity"). Anyway, I will test this tomorrow. I recall that 1024 byte-sized blocks were too small because the latency of the cable modem would cause it to not have high enough throughput, so I settled with 2048. I haven't been able to test your application over NFS yet, but I did get a chance to test it with a floppy. I was able to (on 2.4.19) reproduce a case where even with just 5 KB/second reads, the read() would block every so often (long strace attached). I don't really trust my floppy device to read every sector successfully on the first try, but at one point during this strace, I saw a point where read() blocked and the floppy LED lit immediately, as if it had done no preparation at all (it was not as if it was close and the motor didn't spin up in time). Some strace snippets: [sroot@oof:/]# umount /a ; mount -t ext2 -o noatime,nodiratime /dev/fd0 /a && strace -o /tmp/strace.txt -r a/stream -i a/bigfile 5 1024 0.209706 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000242 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209861 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000318 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209665 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.538707 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.201309 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000298 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209675 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000226 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209711 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000290 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209757 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000308 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209863 read(3, "\0\0\0\0\0"..., 1024) = 1024 2.680966 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209039 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000359 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209648 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000308 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209711 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.209645 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000307 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209734 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000301 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209672 read(3, "\0\0\0\0\0"..., 1024) = 1024 2.964750 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.205464 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000316 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209628 read(3, "\0\0\0\0\0"..., 1024) = 1024 0.000302 nanosleep({0, 200000000}, {3221223736, 1073818412}) = 0 0.209792 read(3, "\0\0\0\0\0"..., 1024) = 1024 So, something does appear to be wrong. If I can actually mount a filesystem on a floppy in 2.5, I'll see if the same thing happens. Simon- [ Simon Kirby ][ Network Operations ] [ sim@netnation.com ][ NetNation Communications ] [ Opinions expressed are not necessarily those of my employer. ] ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 6:20 ` Simon Kirby @ 2002-08-12 6:44 ` Andrew Morton 2002-08-12 19:43 ` Trond Myklebust 0 siblings, 1 reply; 52+ messages in thread From: Andrew Morton @ 2002-08-12 6:44 UTC (permalink / raw) To: Simon Kirby; +Cc: Linus Torvalds, linux-kernel, Jens Axboe, Trond Myklebust Simon Kirby wrote: > > On Sun, Aug 11, 2002 at 08:28:12PM -0700, Andrew Morton wrote: > > > So I'd appreciate it if Simon could invetigate a little further > > with the test app I posted. Something is up, and it may not > > be just an NFS thing. But note that nfs_readpage will go > > synchronous if rsize is less than PAGE_CACHE_SIZE, so it has > > to be set up right. > > You're right -- my NFS page size is set to 2048. I can't remember if I > did this because I was trying to work around huge read-ahead or because I > was trying to work around the bursts of high latency from my Terayon > cable modem (which idles at a slow line speed and "falls forward" to > higher speeds once it detects traffic, but with a delay, causing awful > latency at the expense of "better noise immunity"). Anyway, I will test > this tomorrow. I recall that 1024 byte-sized blocks were too small > because the latency of the cable modem would cause it to not have high > enough throughput, so I settled with 2048. OK, thanks. > I haven't been able to test your application over NFS yet, but I did get > a chance to test it with a floppy. I was able to (on 2.4.19) reproduce a > case where even with just 5 KB/second reads, the read() would block every > so often (long strace attached). Well with a 64k readahead chunk the kernel will only talk to the floppy drive once per 13 seconds. Surely it's spinning down? Try setting the readahead to 16 kbytes (three seconds) with blockdev --setra 32 /dev/floppy > > So, something does appear to be wrong. If I can actually mount a > filesystem on a floppy in 2.5, I'll see if the same thing happens. Nope, floppy is bust. But you can read directly from /dev/fd0h1440 OK. ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 6:44 ` Andrew Morton @ 2002-08-12 19:43 ` Trond Myklebust 2002-08-12 20:43 ` Andrew Morton 0 siblings, 1 reply; 52+ messages in thread From: Trond Myklebust @ 2002-08-12 19:43 UTC (permalink / raw) To: Andrew Morton; +Cc: Simon Kirby, Linus Torvalds, linux-kernel, Jens Axboe >>>>> " " == Andrew Morton <akpm@zip.com.au> writes: > Simon Kirby wrote: >> >> On Sun, Aug 11, 2002 at 08:28:12PM -0700, Andrew Morton wrote: >> >> > So I'd appreciate it if Simon could invetigate a little >> > further with the test app I posted. Something is up, and it >> > may not be just an NFS thing. But note that nfs_readpage >> > will go synchronous if rsize is less than PAGE_CACHE_SIZE, so >> > it has to be set up right. >> >> You're right -- my NFS page size is set to 2048. I can't >> remember if I did this because I was trying to work around huge >> read-ahead or because I was trying to work around the bursts of >> high latency from my Terayon cable modem (which idles at a slow >> line speed and "falls forward" to higher speeds once it detects >> traffic, but with a delay, causing awful latency at the expense >> of "better noise immunity"). Anyway, I will test this >> tomorrow. I recall that 1024 byte-sized blocks were too small >> because the latency of the cable modem would cause it to not >> have high enough throughput, so I settled with 2048. > OK, thanks. Sorry if somebody already covered this (I'm still a bit jetlagged so I may have missed part of the argument) but if the read is synchronous, why should we care about doing readahead at all? Wasn't the 2.4.x code designed so that you first scheduled the read for the page you are interested in, and only if the page was not immediately made available would you then schedule some readahead? Cheers, Trond ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 19:43 ` Trond Myklebust @ 2002-08-12 20:43 ` Andrew Morton 0 siblings, 0 replies; 52+ messages in thread From: Andrew Morton @ 2002-08-12 20:43 UTC (permalink / raw) To: trond.myklebust; +Cc: Simon Kirby, Linus Torvalds, linux-kernel, Jens Axboe Trond Myklebust wrote: > > >>>>> " " == Andrew Morton <akpm@zip.com.au> writes: > > > Simon Kirby wrote: > >> > >> On Sun, Aug 11, 2002 at 08:28:12PM -0700, Andrew Morton wrote: > >> > >> > So I'd appreciate it if Simon could invetigate a little > >> > further with the test app I posted. Something is up, and it > >> > may not be just an NFS thing. But note that nfs_readpage > >> > will go synchronous if rsize is less than PAGE_CACHE_SIZE, so > >> > it has to be set up right. > >> > >> You're right -- my NFS page size is set to 2048. I can't > >> remember if I did this because I was trying to work around huge > >> read-ahead or because I was trying to work around the bursts of > >> high latency from my Terayon cable modem (which idles at a slow > >> line speed and "falls forward" to higher speeds once it detects > >> traffic, but with a delay, causing awful latency at the expense > >> of "better noise immunity"). Anyway, I will test this > >> tomorrow. I recall that 1024 byte-sized blocks were too small > >> because the latency of the cable modem would cause it to not > >> have high enough throughput, so I settled with 2048. > > > OK, thanks. > > Sorry if somebody already covered this (I'm still a bit jetlagged so I > may have missed part of the argument) but if the read is synchronous, > why should we care about doing readahead at all? Well, all reads are synchronous, in a way.... In this case, where the application's data-processing bandwidth is vastly higher than the media bandwidth, readahead isn't doing anything useful, apart from allowing the submission of nice big chunks to the IO layers. Batching. If the application is processing data more slowly then readahead will allow the IO to be overlapped with that processing. But with rsize < PAGE_CACHE_SIZE, all NFS reads are synchronous and everything has gone bad. It may be sensible for NFS to disable readahead in this case. > Wasn't the 2.4.x code designed so that you first scheduled the read > for the page you are interested in, and only if the page was not > immediately made available would you then schedule some readahead? 2.4 will schedule readahead whether or not the requested page is uptodate. Same in 2.5. 2.4 readahead has an explicit "don't do more readahead if the current page is still under IO", whereas 2.5 has "don't readahead pages in a previously-submitted window". They'll have the same effect. ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 22:42 ` Linus Torvalds 2002-08-11 3:17 ` Simon Kirby @ 2002-08-11 8:00 ` Daniel Phillips 2002-08-11 19:00 ` Linus Torvalds 1 sibling, 1 reply; 52+ messages in thread From: Daniel Phillips @ 2002-08-11 8:00 UTC (permalink / raw) To: Linus Torvalds, Jamie Lokier; +Cc: Andrew Morton, lkml On Sunday 11 August 2002 00:42, Linus Torvalds wrote: > For example, what do you do when somebody has a COW-page mapped into it's > VM space and you want to start paging stuff out? Clearly it requires a CoW break and swapping out that page won't free any memory directly, but it will in turn allow the cache page to be dropped. I suppose your point is that these ideas touch the system in a lot of places, and right now the code is a little too irregular to withstand lathering on a new layer of cruft. That's true, but <plug>the reverse mapping work enables some fundamental VM simplifications that make a lot of things more local, and so a better base for these new, sophisticated features is on its way.</plug> > There are "interesting" > cases that just may mean that doing the COW thing is a really stupid thing > to do, even if it is intriguing to _think_ about it. It is good sport, but the real benefits are compelling and will only get more so. For high end scientific uses (read supercomputing clusters) it's a cinch developers will prefer high speed file operations that turn in nearly the same raw performance on large transfers as O_DIRECT while not bypassing the file cache. -- Daniel ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 8:00 ` Daniel Phillips @ 2002-08-11 19:00 ` Linus Torvalds 2002-08-11 19:43 ` Daniel Phillips 0 siblings, 1 reply; 52+ messages in thread From: Linus Torvalds @ 2002-08-11 19:00 UTC (permalink / raw) To: Daniel Phillips; +Cc: Jamie Lokier, Andrew Morton, lkml On Sun, 11 Aug 2002, Daniel Phillips wrote: > On Sunday 11 August 2002 00:42, Linus Torvalds wrote: > > For example, what do you do when somebody has a COW-page mapped into it's > > VM space and you want to start paging stuff out? > > Clearly it requires a CoW break and swapping out that page won't free any > memory directly, but it will in turn allow the cache page to be dropped. Well, that's the point. Is it really "clearly"? One alternative is to just instead remove it from the page cache, and add it to the swap cache directly (and unmapping it). In fact, I _think_ that is the right thing to do (yes, it only works if the page count is 2 (one for page cache, one for the VM mapping), but that's very different from breaking the COW and generating two separate pages. The "move directly to swap cache" is nice in that it doesn't add any new pages. But it's nasty in that it steals pages from the file cache, so that it basically turns a potentially sharable cache into a private cache that nobody else will see. See? You actually _do_ have choices on what to do. Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 19:00 ` Linus Torvalds @ 2002-08-11 19:43 ` Daniel Phillips 0 siblings, 0 replies; 52+ messages in thread From: Daniel Phillips @ 2002-08-11 19:43 UTC (permalink / raw) To: Linus Torvalds; +Cc: Jamie Lokier, Andrew Morton, lkml On Sunday 11 August 2002 21:00, Linus Torvalds wrote: > On Sun, 11 Aug 2002, Daniel Phillips wrote: > > > On Sunday 11 August 2002 00:42, Linus Torvalds wrote: > > > For example, what do you do when somebody has a COW-page mapped into it's > > > VM space and you want to start paging stuff out? > > > > Clearly it requires a CoW break and swapping out that page won't free any > > memory directly, but it will in turn allow the cache page to be dropped. > > Well, that's the point. Is it really "clearly"? > > One alternative is to just instead remove it from the page cache, and add > it to the swap cache directly (and unmapping it). In fact, I _think_ that > is the right thing to do (yes, it only works if the page count is 2 (one > for page cache, one for the VM mapping), but that's very different from > breaking the COW and generating two separate pages. Far clearer ;-) With reverse mapping it works for any page count. > The "move directly to swap cache" is nice in that it doesn't add any new > pages. But it's nasty in that it steals pages from the file cache, so that > it basically turns a potentially sharable cache into a private cache that > nobody else will see. But you got it right the first time: we're evicting the page because it's inactive and we want the memory for something else. We don't need to give that page more second chances, it already had its share of chances before it got this far in the eviction process. If the file page gets reloaded before the swap-out completes it just means we chose the victim poorly in the first place, or we're unlucky. The latter is supposed to be the exception, not the rule. > See? You actually _do_ have choices on what to do. Yes, in this case, the correct thing and the dumb thing. -- Daniel ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 1:33 ` Linus Torvalds 2002-08-10 3:53 ` Andrew Morton @ 2002-08-11 0:34 ` Andrew Morton 2002-08-11 0:56 ` Linus Torvalds 2002-08-12 7:45 ` Rusty Russell 2 siblings, 1 reply; 52+ messages in thread From: Andrew Morton @ 2002-08-11 0:34 UTC (permalink / raw) To: Linus Torvalds; +Cc: lkml Linus Torvalds wrote: > > ... > - do_page_fault() already does an > > if (in_interrupt() || !mm) > goto no_context; > > and the fact is, the "in_interrupt()" should really be an > "preempt_count()", since it's illegal to take a page fault not just in > interrupts, but while non-preemptible in general. > gargh. preempt_disable (and, hence, kmap_atomic) do not bump the preempt counter with CONFIG_PREEMPT=n. Is there a plan to change this? If not, I don't think it's worth making this change just for the highmem read/write thing (calculating `current' at each spin_lock site...) I just open coded it. This works. I still need to do the other architectures' fault handlers, do writes and test it for more than seven seconds. arch/i386/mm/fault.c | 6 +++--- include/linux/preempt.h | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) --- 2.5.30/arch/i386/mm/fault.c~atomic-copy_user Sat Aug 10 14:44:03 2002 +++ 2.5.30-akpm/arch/i386/mm/fault.c Sat Aug 10 14:44:52 2002 @@ -189,10 +189,10 @@ asmlinkage void do_page_fault(struct pt_ info.si_code = SEGV_MAPERR; /* - * If we're in an interrupt or have no user - * context, we must not take the fault.. + * If we're in an interrupt, have no user context or are running in an + * atomic region then we must not take the fault.. */ - if (in_interrupt() || !mm) + if (preempt_count() || !mm) goto no_context; #ifdef CONFIG_X86_REMOTE_DEBUG --- 2.5.30/include/linux/preempt.h~atomic-copy_user Sat Aug 10 16:18:50 2002 +++ 2.5.30-akpm/include/linux/preempt.h Sat Aug 10 16:20:16 2002 @@ -5,19 +5,29 @@ #define preempt_count() (current_thread_info()->preempt_count) +#define inc_preempt_count() \ +do { \ + preempt_count()++; \ +} while (0) + +#define dec_preempt_count() \ +do { \ + preempt_count()--; \ +} while (0) + #ifdef CONFIG_PREEMPT extern void preempt_schedule(void); #define preempt_disable() \ do { \ - preempt_count()++; \ + inc_preempt_count(); \ barrier(); \ } while (0) #define preempt_enable_no_resched() \ do { \ - preempt_count()--; \ + dec_preempt_count(); \ barrier(); \ } while (0) filemap.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 49 insertions(+), 2 deletions(-) --- 2.5.30/mm/filemap.c~kmap_atomic_reads Sat Aug 10 17:09:47 2002 +++ 2.5.30-akpm/mm/filemap.c Sat Aug 10 17:27:35 2002 @@ -1020,7 +1020,37 @@ no_cached_page: UPDATE_ATIME(inode); } -int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) +/* + * Fault a userspace page into pagetables. Return non-zero on EFAULT. + * FIXME: this assumes that two userspace pages are always sufficient. That's + * not true if PAGE_CACHE_SIZE > PAGE_SIZE. + */ +static inline int fault_in_page_writeable(char *uaddr, int size) +{ + int ret; + + /* + * Writing zeroes into userspace here is OK, because we know that if + * the zero gets there, we'll be overwriting it. + */ + ret = __put_user(0, uaddr); + if (ret == 0) { + char *end = uaddr + size - 1; + + /* + * If the page was already mapped, this will get a cache miss + * for sure, so try to avoid doing it. This is only useful if + * userspace is doing page-aligned IO, which is rare. Lose it? + */ + if (((unsigned long)uaddr & PAGE_MASK) != + ((unsigned long)end & PAGE_MASK)) + ret = __put_user(0, end); + } + return ret; +} + +int file_read_actor(read_descriptor_t *desc, struct page *page, + unsigned long offset, unsigned long size) { char *kaddr; unsigned long left, count = desc->count; @@ -1028,14 +1058,31 @@ int file_read_actor(read_descriptor_t * if (size > count) size = count; + /* + * Faults on the destination of a read are common, so do it before + * taking the kmap. + */ + if (!fault_in_page_writeable(desc->buf, size)) { + kaddr = kmap_atomic(page, KM_USER0); + inc_preempt_count(); /* An atomic copy_to_user */ + left = __copy_to_user(desc->buf, kaddr + offset, size); + dec_preempt_count(); + kunmap_atomic(kaddr, KM_USER0); + if (left == 0) + goto success; + printk("%s: Unexpected page fault\n", __FUNCTION__); + } + + /* Do it the slow way */ kaddr = kmap(page); left = __copy_to_user(desc->buf, kaddr + offset, size); kunmap(page); - + if (left) { size -= left; desc->error = -EFAULT; } +success: desc->count = count - size; desc->written += size; desc->buf += size; . ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 0:34 ` Andrew Morton @ 2002-08-11 0:56 ` Linus Torvalds 2002-08-11 1:27 ` Andrew Morton 0 siblings, 1 reply; 52+ messages in thread From: Linus Torvalds @ 2002-08-11 0:56 UTC (permalink / raw) To: Andrew Morton; +Cc: lkml On Sat, 10 Aug 2002, Andrew Morton wrote: > > If not, I don't think it's worth making this change just for > the highmem read/write thing (calculating `current' at each > spin_lock site...) I just open coded it. Well, this way it will now do the preempt count twice (once in kmap_atomic, once in th eopen-coded one) if preempt is enabled. I'd suggest just making k[un]map_atomic() always do the inc/dec_preempt_count. Other ideas? Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-11 0:56 ` Linus Torvalds @ 2002-08-11 1:27 ` Andrew Morton 0 siblings, 0 replies; 52+ messages in thread From: Andrew Morton @ 2002-08-11 1:27 UTC (permalink / raw) To: Linus Torvalds; +Cc: lkml Linus Torvalds wrote: > > On Sat, 10 Aug 2002, Andrew Morton wrote: > > > > If not, I don't think it's worth making this change just for > > the highmem read/write thing (calculating `current' at each > > spin_lock site...) I just open coded it. > > Well, this way it will now do the preempt count twice (once in > kmap_atomic, once in th eopen-coded one) if preempt is enabled. > > I'd suggest just making k[un]map_atomic() always do the > inc/dec_preempt_count. Other ideas? > Well the optimum solution there would be to create and use `inc_preempt_count_non_preempt()'. I don't see any way of embedding this in kmap_atomic() or copy_to_user_atomic() without loss of flexibility or incurring a double-inc somewhere. Please let my post-virginal brain know if you're not otherwise OK with the approach ;) arch/i386/mm/fault.c | 6 +++--- include/linux/preempt.h | 24 ++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 5 deletions(-) --- 2.5.30/arch/i386/mm/fault.c~atomic-copy_user Sat Aug 10 14:44:03 2002 +++ 2.5.30-akpm/arch/i386/mm/fault.c Sat Aug 10 14:44:52 2002 @@ -189,10 +189,10 @@ asmlinkage void do_page_fault(struct pt_ info.si_code = SEGV_MAPERR; /* - * If we're in an interrupt or have no user - * context, we must not take the fault.. + * If we're in an interrupt, have no user context or are running in an + * atomic region then we must not take the fault.. */ - if (in_interrupt() || !mm) + if (preempt_count() || !mm) goto no_context; #ifdef CONFIG_X86_REMOTE_DEBUG --- 2.5.30/include/linux/preempt.h~atomic-copy_user Sat Aug 10 16:18:50 2002 +++ 2.5.30-akpm/include/linux/preempt.h Sat Aug 10 18:23:40 2002 @@ -5,19 +5,29 @@ #define preempt_count() (current_thread_info()->preempt_count) +#define inc_preempt_count() \ +do { \ + preempt_count()++; \ +} while (0) + +#define dec_preempt_count() \ +do { \ + preempt_count()--; \ +} while (0) + #ifdef CONFIG_PREEMPT extern void preempt_schedule(void); #define preempt_disable() \ do { \ - preempt_count()++; \ + inc_preempt_count(); \ barrier(); \ } while (0) #define preempt_enable_no_resched() \ do { \ - preempt_count()--; \ + dec_preempt_count(); \ barrier(); \ } while (0) @@ -34,6 +44,9 @@ do { \ preempt_schedule(); \ } while (0) +#define inc_preempt_count_non_preempt() do { } while (0) +#define dec_preempt_count_non_preempt() do { } while (0) + #else #define preempt_disable() do { } while (0) @@ -41,6 +54,13 @@ do { \ #define preempt_enable() do { } while (0) #define preempt_check_resched() do { } while (0) +/* + * Sometimes we want to increment the preempt count, but we know that it's + * already incremented if the kernel is compiled for preemptibility. + */ +#define inc_preempt_count_non_preempt() inc_preempt_count() +#define dec_preempt_count_non_preempt() dec_preempt_count() + #endif #endif /* __LINUX_PREEMPT_H */ . ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-10 1:33 ` Linus Torvalds 2002-08-10 3:53 ` Andrew Morton 2002-08-11 0:34 ` Andrew Morton @ 2002-08-12 7:45 ` Rusty Russell 2002-08-12 9:45 ` Daniel Phillips 2002-08-12 17:30 ` Linus Torvalds 2 siblings, 2 replies; 52+ messages in thread From: Rusty Russell @ 2002-08-12 7:45 UTC (permalink / raw) To: Linus Torvalds; +Cc: akpm, linux-kernel On Fri, 9 Aug 2002 18:33:09 -0700 (PDT) Linus Torvalds <torvalds@transmeta.com> wrote: > repeat: > kmap_atomic(..); // this increments preempt count > nr = copy_from_user(..); Please please please use a different name for "I know I'm not preemptible but I can handle it" or a flag or something. That leaves us with the possibility of a BUG() in the "normal" copy_to/from_user for all those "I'm holding a spinlock while copying to userspace wheeee!" bugs. Very common mistake for new kernel authors. With the preempt count we have an easy way of detecting this at runtime: I'd like to keep that. Rusty. -- there are those who do and those who hang on and you don't see too many doers quoting their contemporaries. -- Larry McVoy ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 7:45 ` Rusty Russell @ 2002-08-12 9:45 ` Daniel Phillips 2002-08-12 20:29 ` Linus Torvalds 2002-08-12 17:30 ` Linus Torvalds 1 sibling, 1 reply; 52+ messages in thread From: Daniel Phillips @ 2002-08-12 9:45 UTC (permalink / raw) To: Rusty Russell, Linus Torvalds; +Cc: akpm, linux-kernel On Monday 12 August 2002 09:45, Rusty Russell wrote: > On Fri, 9 Aug 2002 18:33:09 -0700 (PDT) > Linus Torvalds <torvalds@transmeta.com> wrote: > > repeat: > > kmap_atomic(..); // this increments preempt count > > nr = copy_from_user(..); > > Please please please use a different name for "I know I'm not preemptible but > I can handle it" or a flag or something. > > That leaves us with the possibility of a BUG() in the "normal" copy_to/from_user > for all those "I'm holding a spinlock while copying to userspace wheeee!" bugs. > Very common mistake for new kernel authors. That's the whole point of this: it's not a bug anymore. (It's a feature.) But agreed, a different name than preempt count would be nice, because it's evolving away from its original function. Is this a 'monitor'? (I don't think so.) Perhaps 'atomic_count' is more accurate. -- Daniel ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 9:45 ` Daniel Phillips @ 2002-08-12 20:29 ` Linus Torvalds 2002-08-12 21:21 ` Daniel Phillips 0 siblings, 1 reply; 52+ messages in thread From: Linus Torvalds @ 2002-08-12 20:29 UTC (permalink / raw) To: Daniel Phillips; +Cc: Rusty Russell, akpm, linux-kernel On Mon, 12 Aug 2002, Daniel Phillips wrote: > > That's the whole point of this: it's not a bug anymore. (It's a feature.) Well, it's a feature only if _intentional_, so I think Rusty's argument was that we should call it something else than "copy_to/from_user()" if we're ready to accept the fact that it fails for random reasons.. Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 20:29 ` Linus Torvalds @ 2002-08-12 21:21 ` Daniel Phillips 0 siblings, 0 replies; 52+ messages in thread From: Daniel Phillips @ 2002-08-12 21:21 UTC (permalink / raw) To: Linus Torvalds; +Cc: Rusty Russell, akpm, linux-kernel On Monday 12 August 2002 22:29, Linus Torvalds wrote: > On Mon, 12 Aug 2002, Daniel Phillips wrote: > > > > That's the whole point of this: it's not a bug anymore. (It's a feature.) > > Well, it's a feature only if _intentional_, so I think Rusty's argument > was that we should call it something else than "copy_to/from_user()" if > we're ready to accept the fact that it fails for random reasons.. Right, I meant to follow up and correct that - the caller has the responsibility of detecting the short transfer and taking corrective action, but on the other hand, maybe the caller always had that responsibility. But for the cases where the caller 'knows' it holds no locks, it's better to oops if that's untrue as Rusty said, plus the inc/dec is saved in that case. -- Daniel ^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [patch 6/12] hold atomic kmaps across generic_file_read 2002-08-12 7:45 ` Rusty Russell 2002-08-12 9:45 ` Daniel Phillips @ 2002-08-12 17:30 ` Linus Torvalds 1 sibling, 0 replies; 52+ messages in thread From: Linus Torvalds @ 2002-08-12 17:30 UTC (permalink / raw) To: Rusty Russell; +Cc: akpm, linux-kernel On Mon, 12 Aug 2002, Rusty Russell wrote: > On Fri, 9 Aug 2002 18:33:09 -0700 (PDT) > Linus Torvalds <torvalds@transmeta.com> wrote: > > > repeat: > > kmap_atomic(..); // this increments preempt count > > nr = copy_from_user(..); > > Please please please use a different name for "I know I'm not preemptible but > I can handle it" or a flag or something. > > That leaves us with the possibility of a BUG() in the "normal" copy_to/from_user > for all those "I'm holding a spinlock while copying to userspace wheeee!" bugs. > Very common mistake for new kernel authors. Agreed. Maybe the right thing to do is to just have a atomic_copy_from_user() which can then be used to explicitly not check if we have a kernel debugging option. Linus ^ permalink raw reply [flat|nested] 52+ messages in thread
end of thread, other threads:[~2002-08-13 10:33 UTC | newest] Thread overview: 52+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2002-08-10 0:57 [patch 6/12] hold atomic kmaps across generic_file_read Andrew Morton 2002-08-10 1:33 ` Linus Torvalds 2002-08-10 3:53 ` Andrew Morton 2002-08-10 3:53 ` Linus Torvalds 2002-08-10 6:12 ` Andrew Morton 2002-08-10 7:25 ` Linus Torvalds 2002-08-10 9:08 ` Andrew Morton 2002-08-10 12:44 ` Daniel Phillips 2002-08-10 17:01 ` Linus Torvalds 2002-08-10 18:16 ` Daniel Phillips 2002-08-10 18:32 ` Linus Torvalds 2002-08-10 18:46 ` Daniel Phillips 2002-08-10 14:16 ` Rik van Riel 2002-08-10 17:03 ` Linus Torvalds 2002-08-10 17:36 ` Jamie Lokier 2002-08-10 17:46 ` Linus Torvalds 2002-08-10 17:55 ` Jamie Lokier 2002-08-10 18:42 ` Linus Torvalds 2002-08-10 18:52 ` Jeff Garzik 2002-08-10 19:01 ` Christoph Hellwig 2002-08-10 19:04 ` Jeff Garzik 2002-08-12 15:20 ` Ingo Oeser 2002-08-12 0:18 ` Albert D. Cahalan 2002-08-12 14:11 ` Jeff Garzik 2002-08-12 14:46 ` David Woodhouse 2002-08-10 19:10 ` Jamie Lokier 2002-08-10 22:42 ` Linus Torvalds 2002-08-11 3:17 ` Simon Kirby 2002-08-11 6:07 ` Andrew Morton 2002-08-11 8:46 ` Simon Kirby 2002-08-11 9:36 ` Andrew Morton 2002-08-11 9:49 ` Andrew Morton 2002-08-11 10:28 ` Andrew Morton 2002-08-11 18:52 ` Linus Torvalds 2002-08-12 3:28 ` Andrew Morton 2002-08-12 3:27 ` Linus Torvalds 2002-08-12 4:08 ` Andrew Morton 2002-08-12 6:20 ` Simon Kirby 2002-08-12 6:44 ` Andrew Morton 2002-08-12 19:43 ` Trond Myklebust 2002-08-12 20:43 ` Andrew Morton 2002-08-11 8:00 ` Daniel Phillips 2002-08-11 19:00 ` Linus Torvalds 2002-08-11 19:43 ` Daniel Phillips 2002-08-11 0:34 ` Andrew Morton 2002-08-11 0:56 ` Linus Torvalds 2002-08-11 1:27 ` Andrew Morton 2002-08-12 7:45 ` Rusty Russell 2002-08-12 9:45 ` Daniel Phillips 2002-08-12 20:29 ` Linus Torvalds 2002-08-12 21:21 ` Daniel Phillips 2002-08-12 17:30 ` Linus Torvalds
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox