* [PATCH] new version of singlecopy pipe
@ 2001-05-11 19:00 Manfred Spraul
2001-05-12 0:07 ` J . A . Magallon
0 siblings, 1 reply; 9+ messages in thread
From: Manfred Spraul @ 2001-05-11 19:00 UTC (permalink / raw)
To: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 385 bytes --]
I've attached a new version of the patch.
The patch is now split into 2 parts:
* add copy_user_to_user() into kernel/ptrace.c. Most code is shared
between access_process_vm() and copy_user_to_user().
* use the new function for singlecopy pipe.
Please test it.
The kernel space part should be ok, but I know that the
patch can cause deadlocks with buggy user space apps.
--
Manfred
[-- Attachment #2: patch-kiopipe --]
[-- Type: text/plain, Size: 14868 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 4
// SUBLEVEL = 4
// EXTRAVERSION =
--- 2.4/fs/pipe.c Mon May 7 20:43:38 2001
+++ build-2.4/fs/pipe.c Fri May 11 19:17:45 2001
@@ -2,6 +2,9 @@
* linux/fs/pipe.c
*
* Copyright (C) 1991, 1992, 1999 Linus Torvalds
+ *
+ * Major pipe_read() and pipe_write() cleanup: Single copy,
+ * fewer schedules. Copyright (C) 2001 Manfred Spraul
*/
#include <linux/mm.h>
@@ -10,6 +13,8 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/iobuf.h>
+#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
@@ -36,214 +41,303 @@
down(PIPE_SEM(*inode));
}
+#define ADDR_USER 1
+#define ADDR_KERNEL 2
+struct pipe_pio {
+ struct list_head list;
+ int state; /* >0: still pending. 0: success. < 0:error code */
+ struct task_struct *tsk;
+ unsigned long addr;
+ size_t len;
+};
+
+static ssize_t
+pio_copy_to_user(struct inode *inode, char *ubuf, ssize_t chars)
+{
+ struct pipe_pio *pio;
+ struct mm_struct *mm;
+ ssize_t ret;
+ pio = list_entry(PIPE_PIO(*inode).next, struct pipe_pio, list);
+ mm = pio->tsk->mm;
+
+ if (chars > pio->len)
+ chars = pio->len;
+ if (pio->state == ADDR_KERNEL) {
+ /* kernel thread writes into a pipe. */
+ if(copy_to_user(ubuf, (void*)pio->addr, chars))
+ return -EFAULT;
+ } else {
+ ret = 0;
+ /* pio->tsk is within pipe_write(), we don't have to protect
+ * against sudden death of that thread.
+ */
+ switch(copy_user_to_user(ubuf, mm, pio->addr, chars, 0))
+ {
+ case 0:
+ break;
+ case OTHER_EFAULT:
+ pio->state = -EFAULT;
+ goto unlink;
+ case OTHER_ENOMEM:
+ pio->state = -ENOMEM;
+ goto unlink;
+ case CUR_EFAULT:
+ return -EFAULT;
+ }
+ }
+ pio->addr += chars;
+ pio->len -= chars;
+ ret = chars;
+
+ if (!pio->len) {
+ pio->state = 0;
+unlink:
+ list_del(&pio->list);
+ wake_up_process(pio->tsk);
+ if (!ret && list_empty(&PIPE_PIO(*inode))) {
+ /*
+ * Lock-up:
+ * A block with a bad pointer was queued
+ * by pipe_write() and got deleted.
+ * The pipe was filled and is now empty
+ * after reading 0 bytes ;-)
+ * Wakeup to recover.
+ */
+ wake_up_interruptible(PIPE_WAIT(*inode));
+ }
+ }
+ return ret;
+}
+
static ssize_t
pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
{
struct inode *inode = filp->f_dentry->d_inode;
- ssize_t size, read, ret;
+ ssize_t read;
- /* Seeks are not allowed on pipes. */
- ret = -ESPIPE;
- read = 0;
+ /* pread is not allowed on pipes. */
if (ppos != &filp->f_pos)
- goto out_nolock;
+ return -ESPIPE;
/* Always return 0 on null read. */
- ret = 0;
if (count == 0)
- goto out_nolock;
+ return 0;
- /* Get the pipe semaphore */
- ret = -ERESTARTSYS;
- if (down_interruptible(PIPE_SEM(*inode)))
- goto out_nolock;
-
- if (PIPE_EMPTY(*inode)) {
-do_more_read:
- ret = 0;
- if (!PIPE_WRITERS(*inode))
- goto out;
-
- ret = -EAGAIN;
- if (filp->f_flags & O_NONBLOCK)
- goto out;
+ down(PIPE_SEM(*inode));
- for (;;) {
- PIPE_WAITING_READERS(*inode)++;
- pipe_wait(inode);
- PIPE_WAITING_READERS(*inode)--;
- ret = -ERESTARTSYS;
- if (signal_pending(current))
+ read = 0;
+ for (;;) {
+ /* read what data is available */
+ int chars = PIPE_LEN(*inode);
+ if (chars) {
+ char *pipebuf = PIPE_BASE(*inode);
+ int offset = PIPE_START(*inode);
+ if (chars > count)
+ chars = count;
+ if (chars > PIPE_SIZE-offset)
+ chars = PIPE_SIZE-offset;
+ if (copy_to_user(buf, pipebuf+offset, chars)) {
+ if (!read)
+ read = -EFAULT;
goto out;
- ret = 0;
- if (!PIPE_EMPTY(*inode))
- break;
- if (!PIPE_WRITERS(*inode))
+ }
+ PIPE_LEN(*inode) -= chars;
+ if (!PIPE_LEN(*inode)) {
+ /* Cache behaviour optimization */
+ PIPE_START(*inode) = 0;
+ } else {
+ PIPE_START(*inode) += chars;
+ PIPE_START(*inode) &= (PIPE_SIZE - 1);
+ }
+ } else if (!list_empty(&PIPE_PIO(*inode))) {
+ chars = pio_copy_to_user(inode, buf, count);
+ if(chars < 0) {
+ if (!read)
+ read = chars;
goto out;
+ }
}
- }
-
- /* Read what data is available. */
- ret = -EFAULT;
- while (count > 0 && (size = PIPE_LEN(*inode))) {
- char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
- ssize_t chars = PIPE_MAX_RCHUNK(*inode);
-
- if (chars > count)
- chars = count;
- if (chars > size)
- chars = size;
-
- if (copy_to_user(buf, pipebuf, chars))
- goto out;
-
read += chars;
- PIPE_START(*inode) += chars;
- PIPE_START(*inode) &= (PIPE_SIZE - 1);
- PIPE_LEN(*inode) -= chars;
count -= chars;
buf += chars;
- }
- /* Cache behaviour optimization */
- if (!PIPE_LEN(*inode))
- PIPE_START(*inode) = 0;
+ if (!count)
+ goto out; /* common case: done */
+
+ /* special case:
+ * additional data available.
+ */
+ if (PIPE_LEN(*inode) || !list_empty(&PIPE_PIO(*inode)))
+ continue;
- if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
- /*
- * We know that we are going to sleep: signal
- * writers synchronously that there is more
- * room.
+ /* tests before sleeping:
+ * - don't sleep if data was read.
*/
- wake_up_interruptible_sync(PIPE_WAIT(*inode));
- if (!PIPE_EMPTY(*inode))
- BUG();
- goto do_more_read;
- }
- /* Signal writers asynchronously that there is more room. */
- wake_up_interruptible(PIPE_WAIT(*inode));
+ if (read)
+ goto out;
+
+ /* - don't sleep if no process has the pipe open
+ * for writing
+ */
+ if (!PIPE_WRITERS(*inode))
+ goto out;
- ret = read;
+ /* - don't sleep if O_NONBLOCK is set */
+ if (filp->f_flags & O_NONBLOCK) {
+ read = -EAGAIN;
+ goto out;
+ }
+
+ pipe_wait(inode);
+ if (signal_pending(current)) {
+ read = -ERESTARTSYS;
+ goto out;
+ }
+ }
out:
up(PIPE_SEM(*inode));
-out_nolock:
- if (read)
- ret = read;
- return ret;
+ /* Signal asynchronously that the pipe became empty. */
+ if (read > 0 && !PIPE_LEN(*inode) && list_empty(&PIPE_PIO(*inode)))
+ wake_up_interruptible(PIPE_WAIT(*inode));
+
+ return read;
}
static ssize_t
pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
{
struct inode *inode = filp->f_dentry->d_inode;
- ssize_t free, written, ret;
+ size_t min;
+ size_t free;
+ ssize_t ret;
- /* Seeks are not allowed on pipes. */
- ret = -ESPIPE;
- written = 0;
+ /* pwrite is not allowed on pipes. */
if (ppos != &filp->f_pos)
- goto out_nolock;
+ return -ESPIPE;
/* Null write succeeds. */
- ret = 0;
if (count == 0)
- goto out_nolock;
+ return 0;
- ret = -ERESTARTSYS;
- if (down_interruptible(PIPE_SEM(*inode)))
- goto out_nolock;
+ down(PIPE_SEM(*inode));
/* No readers yields SIGPIPE. */
+ ret = -EPIPE;
if (!PIPE_READERS(*inode))
- goto sigpipe;
-
- /* If count <= PIPE_BUF, we have to make it atomic. */
- free = (count <= PIPE_BUF ? count : 1);
+ goto out;
- /* Wait, or check for, available space. */
- if (filp->f_flags & O_NONBLOCK) {
- ret = -EAGAIN;
- if (PIPE_FREE(*inode) < free)
+ free = 0;
+ if (list_empty(&PIPE_PIO(*inode)))
+ free = PIPE_FREE(*inode);
+ min = count;
+ if (min > PIPE_BUF && (filp->f_flags & O_NONBLOCK))
+ min = 1; /* no atomicity guarantee for transfers > PIPE_BUF */
+
+ if (free >= min) {
+ /* Enough space for the minimal transfer.
+ * Use the kernel buffer.
+ */
+ int offset;
+ ret = 0;
+next_chunk:
+ if (free > count)
+ free = count;
+ offset = PIPE_END(*inode);
+ if (free > PIPE_SIZE-offset)
+ free = PIPE_SIZE-offset;
+
+ if (copy_from_user(PIPE_BASE(*inode)+offset, buf, free)) {
+ if (!ret)
+ ret = -EFAULT;
goto out;
- } else {
- while (PIPE_FREE(*inode) < free) {
- PIPE_WAITING_WRITERS(*inode)++;
- pipe_wait(inode);
- PIPE_WAITING_WRITERS(*inode)--;
- ret = -ERESTARTSYS;
- if (signal_pending(current))
- goto out;
-
- if (!PIPE_READERS(*inode))
- goto sigpipe;
}
- }
-
- /* Copy into available space. */
- ret = -EFAULT;
- while (count > 0) {
- int space;
- char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
- ssize_t chars = PIPE_MAX_WCHUNK(*inode);
-
- if ((space = PIPE_FREE(*inode)) != 0) {
- if (chars > count)
- chars = count;
- if (chars > space)
- chars = space;
-
- if (copy_from_user(pipebuf, buf, chars))
- goto out;
- written += chars;
- PIPE_LEN(*inode) += chars;
- count -= chars;
- buf += chars;
- space = PIPE_FREE(*inode);
- continue;
+ ret += free;
+ PIPE_LEN(*inode) += free;
+ buf += free;
+ count -= free;
+
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ wake_up_interruptible(PIPE_WAIT(*inode));
+ if (count) {
+ free = PIPE_FREE(*inode);
+ if (free)
+ goto next_chunk;
}
+ } else if (filp->f_flags & O_NONBLOCK) {
+ ret = -EAGAIN;
+ } else {
+ /* Blocking transfer.
+ * Set up single copy and wait until a reader
+ * has eaten the data.
+ */
+ struct pipe_pio pio_desc;
- ret = written;
- if (filp->f_flags & O_NONBLOCK)
- break;
-
- do {
- /*
- * Synchronous wake-up: it knows that this process
- * is going to give up this CPU, so it doesnt have
- * to do idle reschedules.
- */
- wake_up_interruptible_sync(PIPE_WAIT(*inode));
- PIPE_WAITING_WRITERS(*inode)++;
- pipe_wait(inode);
- PIPE_WAITING_WRITERS(*inode)--;
- if (signal_pending(current))
- goto out;
- if (!PIPE_READERS(*inode))
- goto sigpipe;
- } while (!PIPE_FREE(*inode));
ret = -EFAULT;
- }
-
- /* Signal readers asynchronously that there is more data. */
- wake_up_interruptible(PIPE_WAIT(*inode));
+ if(!access_ok(VERIFY_READ, buf, count))
+ goto out;
- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
- mark_inode_dirty(inode);
+ list_add_tail(&pio_desc.list, &PIPE_PIO(*inode));
+ if (segment_eq(get_fs(),KERNEL_DS))
+ pio_desc.state = ADDR_KERNEL;
+ else
+ pio_desc.state = ADDR_USER;
+ pio_desc.tsk = current;
+ pio_desc.addr = (unsigned long)buf;
+ pio_desc.len = count;
+
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ /*
+ * Synchronous wake-up: this process is going to
+ * give up this CPU, so it doesn't have to do
+ * idle reschedules.
+ */
+ wake_up_interruptible_sync(PIPE_WAIT(*inode));
+ for(;;) {
+ pipe_wait(inode);
+ if (!pio_desc.state) {
+ ret = count;
+ break;
+ }
+ if (pio_desc.state < 0) {
+ /* FIXME:
+ * ENOMEM is not a valid return code
+ * for pipe_write. Under OOM the return
+ * code could return ENOMEM.
+ * do_exit(SIGKILL)?
+ */
+ ret = pio_desc.state;
+ break;
+ }
+ /* No readers yields SIGPIPE. */
+ if (!PIPE_READERS(*inode)) {
+ ret = count - pio_desc.len;
+ if (!ret)
+ ret = -EPIPE;
+ goto unwire;
+ }
+
+ if (signal_pending(current)) {
+ ret = count - pio_desc.len;
+ if (!ret)
+ ret = -ERESTARTSYS;
+unwire:
+ /* unwire single copy transfer */
+ list_del(&pio_desc.list);
+ if (!PIPE_LEN(*inode) && list_empty(&PIPE_PIO(*inode)))
+ wake_up_interruptible(PIPE_WAIT(*inode));
+ break;
+ }
+ }
+ }
out:
up(PIPE_SEM(*inode));
-out_nolock:
- if (written)
- ret = written;
+ if (ret == -EPIPE)
+ send_sig(SIGPIPE, current, 0);
return ret;
-
-sigpipe:
- if (written)
- goto out;
- up(PIPE_SEM(*inode));
- send_sig(SIGPIPE, current, 0);
- return -EPIPE;
}
static loff_t
@@ -264,13 +358,31 @@
return -EBADF;
}
+static inline int pipe_getlen(struct inode *inode)
+{
+ unsigned long long len;
+ struct list_head *walk;
+
+ down(PIPE_SEM(*inode));
+ len = PIPE_LEN(*inode);
+ list_for_each(walk, &PIPE_PIO(*inode)) {
+ struct pipe_pio *pio;
+ pio = list_entry(PIPE_PIO(*inode).next, struct pipe_pio, list);
+ len += pio->len;
+ }
+ up(PIPE_SEM(*inode));
+ if (len > INT_MAX)
+ return INT_MAX;
+ return len;
+}
+
static int
pipe_ioctl(struct inode *pino, struct file *filp,
unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case FIONREAD:
- return put_user(PIPE_LEN(*pino), (int *)arg);
+ return put_user(pipe_getlen(pino), (int *)arg);
default:
return -EINVAL;
}
@@ -287,7 +399,7 @@
/* Reading only -- no need for acquiring the semaphore. */
mask = POLLIN | POLLRDNORM;
- if (PIPE_EMPTY(*inode))
+ if (!PIPE_LEN(*inode) && list_empty(&PIPE_PIO(*inode)))
mask = POLLOUT | POLLWRNORM;
if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
mask |= POLLHUP;
@@ -454,9 +566,9 @@
init_waitqueue_head(PIPE_WAIT(*inode));
PIPE_BASE(*inode) = (char*) page;
+ INIT_LIST_HEAD(&PIPE_PIO(*inode));
PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
- PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
return inode;
--- 2.4/include/linux/pipe_fs_i.h Mon May 7 20:43:38 2001
+++ build-2.4/include/linux/pipe_fs_i.h Mon May 7 19:38:12 2001
@@ -5,12 +5,11 @@
struct pipe_inode_info {
wait_queue_head_t wait;
char *base;
- unsigned int len;
+ unsigned int len; /* not including pio buffers */
unsigned int start;
+ struct list_head pio;
unsigned int readers;
unsigned int writers;
- unsigned int waiting_readers;
- unsigned int waiting_writers;
unsigned int r_counter;
unsigned int w_counter;
};
@@ -24,19 +23,14 @@
#define PIPE_BASE(inode) ((inode).i_pipe->base)
#define PIPE_START(inode) ((inode).i_pipe->start)
#define PIPE_LEN(inode) ((inode).i_pipe->len)
+#define PIPE_PIO(inode) ((inode).i_pipe->pio)
#define PIPE_READERS(inode) ((inode).i_pipe->readers)
#define PIPE_WRITERS(inode) ((inode).i_pipe->writers)
-#define PIPE_WAITING_READERS(inode) ((inode).i_pipe->waiting_readers)
-#define PIPE_WAITING_WRITERS(inode) ((inode).i_pipe->waiting_writers)
#define PIPE_RCOUNTER(inode) ((inode).i_pipe->r_counter)
#define PIPE_WCOUNTER(inode) ((inode).i_pipe->w_counter)
-#define PIPE_EMPTY(inode) (PIPE_LEN(inode) == 0)
-#define PIPE_FULL(inode) (PIPE_LEN(inode) == PIPE_SIZE)
#define PIPE_FREE(inode) (PIPE_SIZE - PIPE_LEN(inode))
#define PIPE_END(inode) ((PIPE_START(inode) + PIPE_LEN(inode)) & (PIPE_SIZE-1))
-#define PIPE_MAX_RCHUNK(inode) (PIPE_SIZE - PIPE_START(inode))
-#define PIPE_MAX_WCHUNK(inode) (PIPE_SIZE - PIPE_END(inode))
/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct inode * inode);
--- 2.4/fs/fifo.c Mon May 7 20:43:38 2001
+++ build-2.4/fs/fifo.c Mon May 7 19:38:12 2001
@@ -32,10 +32,8 @@
{
int ret;
- ret = -ERESTARTSYS;
- lock_kernel();
if (down_interruptible(PIPE_SEM(*inode)))
- goto err_nolock_nocleanup;
+ return -ERESTARTSYS;
if (!inode->i_pipe) {
ret = -ENOMEM;
@@ -116,7 +114,6 @@
/* Ok! */
up(PIPE_SEM(*inode));
- unlock_kernel();
return 0;
err_rd:
@@ -141,9 +138,6 @@
err_nocleanup:
up(PIPE_SEM(*inode));
-
-err_nolock_nocleanup:
- unlock_kernel();
return ret;
}
[-- Attachment #3: patch-copy_user_user --]
[-- Type: text/plain, Size: 5490 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 4
// SUBLEVEL = 4
// EXTRAVERSION =
--- 2.4/kernel/ptrace.c Sat Mar 31 21:49:29 2001
+++ build-2.4/kernel/ptrace.c Fri May 11 18:38:02 2001
@@ -19,13 +19,14 @@
/*
* Access another process' address space, one page at a time.
*/
-static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write)
+static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, void *buf, size_t len, int write)
{
pgd_t * pgdir;
pmd_t * pgmiddle;
pte_t * pgtable;
char *maddr;
struct page *page;
+ int i;
repeat:
spin_lock(&mm->page_table_lock);
@@ -50,7 +51,7 @@
if (page != ZERO_PAGE(addr) || write) {
if ((!VALID_PAGE(page)) || PageReserved(page)) {
spin_unlock(&mm->page_table_lock);
- return 0;
+ return OTHER_EFAULT;
}
}
get_page(page);
@@ -59,38 +60,40 @@
if (write) {
maddr = kmap(page);
- memcpy(maddr + (addr & ~PAGE_MASK), buf, len);
+ i = copy_from_user(maddr + (addr & ~PAGE_MASK), buf, len);
flush_page_to_ram(page);
flush_icache_page(vma, page);
kunmap(page);
} else {
maddr = kmap(page);
- memcpy(buf, maddr + (addr & ~PAGE_MASK), len);
+ i = copy_to_user(buf, maddr + (addr & ~PAGE_MASK), len);
flush_page_to_ram(page);
kunmap(page);
}
put_page(page);
- return len;
+ return i ? CUR_EFAULT : 0;
fault_in_page:
spin_unlock(&mm->page_table_lock);
/* -1: out of memory. 0 - unmapped page */
- if (handle_mm_fault(mm, vma, addr, write) > 0)
+ i = handle_mm_fault(mm, vma, addr, write);
+ if (i > 0)
goto repeat;
- return 0;
-
+ if (i < 0)
+ return OTHER_ENOMEM;
+ return OTHER_EFAULT;
bad_pgd:
spin_unlock(&mm->page_table_lock);
pgd_ERROR(*pgdir);
- return 0;
+ return OTHER_EFAULT;
bad_pmd:
spin_unlock(&mm->page_table_lock);
pmd_ERROR(*pgmiddle);
- return 0;
+ return OTHER_EFAULT;
}
-static int access_mm(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write)
+static int access_mm(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long addr, void *buf, size_t len, int write, int *pcopied)
{
int copied = 0;
@@ -102,11 +105,14 @@
if (this_len > len)
this_len = len;
retval = access_one_page(mm, vma, addr, buf, this_len, write);
- copied += retval;
- if (retval != this_len)
- break;
+ if (retval) {
+ if (pcopied)
+ *pcopied = copied;
+ return retval;
+ }
+ copied += this_len;
- len -= retval;
+ len -= this_len;
if (!len)
break;
@@ -115,17 +121,20 @@
if (addr < vma->vm_end)
continue;
- if (!vma->vm_next)
- break;
- if (vma->vm_next->vm_start != vma->vm_end)
- break;
+ if (!vma->vm_next || vma->vm_next->vm_start != vma->vm_end) {
+ if (pcopied)
+ *pcopied = copied;
+ return OTHER_EFAULT;
+ }
vma = vma->vm_next;
}
- return copied;
+ if (pcopied)
+ *pcopied = copied;
+ return 0;
}
-int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
+int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, size_t len, int write)
{
int copied;
struct mm_struct *mm;
@@ -143,12 +152,44 @@
down_read(&mm->mmap_sem);
vma = find_extend_vma(mm, addr);
copied = 0;
- if (vma)
- copied = access_mm(mm, vma, addr, buf, len, write);
+ if (vma) {
+ mm_segment_t old;
+ old = get_fs();
+ set_fs(KERNEL_DS);
+ /* silently ignore failues, perhaps the caller
+ * is interested in partial transfers
+ */
+ access_mm(mm, vma, addr, buf, len, write, &copied);
+ set_fs(old);
+ }
up_read(&mm->mmap_sem);
mmput(mm);
return copied;
+}
+
+/*
+ * transfers 'len' bytes from/to from 'mm:oaddr' to/from
+ * 'current->mm:cbuf'.
+ * The caller must guarantee that mm won't go away.
+ * if do_write is true, then the data is written into 'mm:oaddr'
+ * Return 0 on success.
+ */
+
+
+int copy_user_to_user(char *cbuf, struct mm_struct *mm, unsigned long oaddr, size_t len, int do_write)
+{
+ struct vm_area_struct * vma;
+ int retval;
+ down_read(&mm->mmap_sem);
+ vma = find_extend_vma(mm, oaddr);
+ if (vma)
+ retval = access_mm(mm, vma, oaddr, cbuf, len, do_write, NULL);
+ else
+ retval = OTHER_EFAULT;
+
+ up_read(&mm->mmap_sem);
+ return retval;
}
int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len)
--- 2.4/include/linux/mm.h Wed May 9 21:03:43 2001
+++ build-2.4/include/linux/mm.h Fri May 11 17:17:54 2001
@@ -399,9 +399,15 @@
extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
extern int make_pages_present(unsigned long addr, unsigned long end);
-extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
+extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, size_t len, int write);
extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
+
+#define OTHER_EFAULT 1
+#define OTHER_ENOMEM 2
+#define CUR_EFAULT 3
+extern int copy_user_to_user(char *cbuf, struct mm_struct *mm, unsigned long oaddr, size_t len, int do_write);
+
/*
* On a two-level page table, this ends up being trivial. Thus the
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] new version of singlecopy pipe
2001-05-11 19:00 [PATCH] new version of singlecopy pipe Manfred Spraul
@ 2001-05-12 0:07 ` J . A . Magallon
2001-05-12 0:19 ` J . A . Magallon
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: J . A . Magallon @ 2001-05-12 0:07 UTC (permalink / raw)
To: Manfred Spraul; +Cc: linux-kernel
On 05.11 Manfred Spraul wrote:
>
> Please test it.
> The kernel space part should be ok, but I know that the
> patch can cause deadlocks with buggy user space apps.
>
I tried your patch on 2.4.4-ac8, and something strange happens.
Untarring linux-2.4.4 takes a little time, disk light flashes,
but no files appear on the disk (just 'Makefile', as you will see below).
Doing a separate gunzip - tar xf works fine:
werewolf:~/soft/kernel# gtar zxf linux-2.4.4.tar.gz
werewolf:~/soft/kernel# ls linux-2.4.4
Makefile
werewolf:~/soft/kernel# sync
werewolf:~/soft/kernel# ls linux-2.4.4
Makefile
werewolf:~/soft/kernel# gunzip linux-2.4.4.tar.gz
werewolf:~/soft/kernel# tar xf linux-2.4.4.tar
werewolf:~/soft/kernel# ls linux-2.4.4
COPYING MAINTAINERS REPORTING-BUGS drivers/ init/ lib/ scripts/
CREDITS Makefile Rules.make fs/ ipc/ mm/
Documentation/ README arch/ include/ kernel/ net/
Some buffers get not flushed ???
If they can interact with yours, I have also applied the patches for
- cache-align from Andrea (I suppose it has nothing to do with yours)
- context-switches reduction by Mike Galbraith (perhaps ?)
and a silly couple more (CC:=$(CC) in Makefile, missing return in aic7xxx).
--
J.A. Magallon # Let the source be with you...
mailto:jamagallon@able.es
Linux Mandrake release 8.1 (Cooker) for i586
Linux werewolf 2.4.4-ac8 #1 SMP Sat May 12 01:16:37 CEST 2001 i686
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] new version of singlecopy pipe
2001-05-12 0:07 ` J . A . Magallon
@ 2001-05-12 0:19 ` J . A . Magallon
2001-05-12 0:25 ` David S. Miller
2001-05-12 8:21 ` Manfred Spraul
2 siblings, 0 replies; 9+ messages in thread
From: J . A . Magallon @ 2001-05-12 0:19 UTC (permalink / raw)
To: J . A . Magallon; +Cc: Manfred Spraul, linux-kernel
On 05.12 J . A . Magallon wrote:
>
> On 05.11 Manfred Spraul wrote:
> >
> > Please test it.
> > The kernel space part should be ok, but I know that the
> > patch can cause deadlocks with buggy user space apps.
> >
>
> I tried your patch on 2.4.4-ac8, and something strange happens.
> Untarring linux-2.4.4 takes a little time, disk light flashes,
> but no files appear on the disk (just 'Makefile', as you will see below).
> Doing a separate gunzip - tar xf works fine:
>
Quoting myself, I have tried with other tar.gz files and only the first
file gets extracted.
With a tar.bz2 file, I get:
werewolf:~/io> gtar jxf src.tar.bz2
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Skipping to next header
gtar: Error exit delayed from previous errors
but all files seem to be there.
--
J.A. Magallon # Let the source be with you...
mailto:jamagallon@able.es
Linux Mandrake release 8.1 (Cooker) for i586
Linux werewolf 2.4.4-ac8 #1 SMP Sat May 12 01:16:37 CEST 2001 i686
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] new version of singlecopy pipe
2001-05-12 0:07 ` J . A . Magallon
2001-05-12 0:19 ` J . A . Magallon
@ 2001-05-12 0:25 ` David S. Miller
2001-05-12 7:50 ` J . A . Magallon
2001-05-12 8:21 ` Manfred Spraul
2 siblings, 1 reply; 9+ messages in thread
From: David S. Miller @ 2001-05-12 0:25 UTC (permalink / raw)
To: J . A . Magallon; +Cc: Manfred Spraul, linux-kernel
J . A . Magallon writes:
> I tried your patch on 2.4.4-ac8, and something strange happens.
> Untarring linux-2.4.4 takes a little time, disk light flashes,
> but no files appear on the disk (just 'Makefile', as you will see below).
> Doing a separate gunzip - tar xf works fine:
What platform?
Later,
David S. Miller
davem@redhat.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] new version of singlecopy pipe
2001-05-12 0:25 ` David S. Miller
@ 2001-05-12 7:50 ` J . A . Magallon
2001-05-12 8:23 ` David S. Miller
0 siblings, 1 reply; 9+ messages in thread
From: J . A . Magallon @ 2001-05-12 7:50 UTC (permalink / raw)
To: David S . Miller; +Cc: Manfred Spraul, linux-kernel
On 05.12 David S. Miller wrote:
>
> J . A . Magallon writes:
> > I tried your patch on 2.4.4-ac8, and something strange happens.
> > Untarring linux-2.4.4 takes a little time, disk light flashes,
> > but no files appear on the disk (just 'Makefile', as you will see below).
> > Doing a separate gunzip - tar xf works fine:
>
> What platform?
>
Mandrake 8.1 Cooker. Dual PII@400, 440BX, 256Mb. One aic7890 under Gibbs driver.
gcc 2.96-0.50mdk (matches 84-redhat).
Any more info ?
--
J.A. Magallon # Let the source be with you...
mailto:jamagallon@able.es
Linux Mandrake release 8.1 (Cooker) for i586
Linux werewolf 2.4.4-ac8 #1 SMP Sat May 12 01:16:37 CEST 2001 i686
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] new version of singlecopy pipe
2001-05-12 0:07 ` J . A . Magallon
2001-05-12 0:19 ` J . A . Magallon
2001-05-12 0:25 ` David S. Miller
@ 2001-05-12 8:21 ` Manfred Spraul
2 siblings, 0 replies; 9+ messages in thread
From: Manfred Spraul @ 2001-05-12 8:21 UTC (permalink / raw)
To: J . A . Magallon; +Cc: linux-kernel
"J . A . Magallon" wrote:
>
> On 05.11 Manfred Spraul wrote:
> >
> > Please test it.
> > The kernel space part should be ok, but I know that the
> > patch can cause deadlocks with buggy user space apps.
> >
>
> I tried your patch on 2.4.4-ac8, and something strange happens.
> Untarring linux-2.4.4 takes a little time, disk light flashes,
> but no files appear on the disk (just 'Makefile', as you will see below).
> Doing a separate gunzip - tar xf works fine:
>
> werewolf:~/soft/kernel# gtar zxf linux-2.4.4.tar.gz
> werewolf:~/soft/kernel# ls linux-2.4.4
> Makefile
Could you try if 'strace -f -F gtar zxf linux-2.4.4.tar.gz > /tmp/trace
2>&1' hangs?
If yes, please send me the strace.
--
Manfred
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] new version of singlecopy pipe
2001-05-12 7:50 ` J . A . Magallon
@ 2001-05-12 8:23 ` David S. Miller
2001-05-17 20:59 ` Manfred Spraul
0 siblings, 1 reply; 9+ messages in thread
From: David S. Miller @ 2001-05-12 8:23 UTC (permalink / raw)
To: J . A . Magallon; +Cc: Manfred Spraul, linux-kernel
J . A . Magallon writes:
> > What platform?
> Any more info ?
No, I thought it might be some cache flushing issue
on a non-x86 machine.
Later,
David S. Miller
davem@redhat.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] new version of singlecopy pipe
2001-05-12 8:23 ` David S. Miller
@ 2001-05-17 20:59 ` Manfred Spraul
2001-05-18 11:08 ` J . A . Magallon
0 siblings, 1 reply; 9+ messages in thread
From: Manfred Spraul @ 2001-05-17 20:59 UTC (permalink / raw)
To: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 306 bytes --]
"David S. Miller" wrote:
>
> J . A . Magallon writes:
> > > What platform?
>
> > Any more info ?
>
> No, I thought it might be some cache flushing issue
> on a non-x86 machine.
>
I found the problem:
I sent out the old patch :-(
Attached is the correct version of patch-copy_user_user.
--
Manfred
[-- Attachment #2: patch-copy_user_user --]
[-- Type: text/plain, Size: 5552 bytes --]
// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 4
// SUBLEVEL = 4
// EXTRAVERSION = -ac8
--- 2.4/kernel/ptrace.c Sat Mar 31 21:49:29 2001
+++ build-2.4/kernel/ptrace.c Thu May 17 19:43:09 2001
@@ -19,13 +19,14 @@
/*
* Access another process' address space, one page at a time.
*/
-static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write)
+static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, void *buf, size_t len, int write)
{
pgd_t * pgdir;
pmd_t * pgmiddle;
pte_t * pgtable;
char *maddr;
struct page *page;
+ int i;
repeat:
spin_lock(&mm->page_table_lock);
@@ -50,7 +51,7 @@
if (page != ZERO_PAGE(addr) || write) {
if ((!VALID_PAGE(page)) || PageReserved(page)) {
spin_unlock(&mm->page_table_lock);
- return 0;
+ return OTHER_EFAULT;
}
}
get_page(page);
@@ -59,38 +60,40 @@
if (write) {
maddr = kmap(page);
- memcpy(maddr + (addr & ~PAGE_MASK), buf, len);
+ i = copy_from_user(maddr + (addr & ~PAGE_MASK), buf, len);
flush_page_to_ram(page);
flush_icache_page(vma, page);
kunmap(page);
} else {
maddr = kmap(page);
- memcpy(buf, maddr + (addr & ~PAGE_MASK), len);
+ i = copy_to_user(buf, maddr + (addr & ~PAGE_MASK), len);
flush_page_to_ram(page);
kunmap(page);
}
put_page(page);
- return len;
+ return i ? CUR_EFAULT : 0;
fault_in_page:
spin_unlock(&mm->page_table_lock);
/* -1: out of memory. 0 - unmapped page */
- if (handle_mm_fault(mm, vma, addr, write) > 0)
+ i = handle_mm_fault(mm, vma, addr, write);
+ if (i > 0)
goto repeat;
- return 0;
-
+ if (i < 0)
+ return OTHER_ENOMEM;
+ return OTHER_EFAULT;
bad_pgd:
spin_unlock(&mm->page_table_lock);
pgd_ERROR(*pgdir);
- return 0;
+ return OTHER_EFAULT;
bad_pmd:
spin_unlock(&mm->page_table_lock);
pmd_ERROR(*pgmiddle);
- return 0;
+ return OTHER_EFAULT;
}
-static int access_mm(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write)
+static int access_mm(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long addr, void *buf, size_t len, int write, int *pcopied)
{
int copied = 0;
@@ -102,30 +105,36 @@
if (this_len > len)
this_len = len;
retval = access_one_page(mm, vma, addr, buf, this_len, write);
- copied += retval;
- if (retval != this_len)
- break;
+ if (retval) {
+ if (pcopied)
+ *pcopied = copied;
+ return retval;
+ }
+ copied += this_len;
- len -= retval;
+ len -= this_len;
if (!len)
break;
- addr += retval;
- buf += retval;
+ addr += this_len;
+ buf += this_len;
if (addr < vma->vm_end)
continue;
- if (!vma->vm_next)
- break;
- if (vma->vm_next->vm_start != vma->vm_end)
- break;
+ if (!vma->vm_next || vma->vm_next->vm_start != vma->vm_end) {
+ if (pcopied)
+ *pcopied = copied;
+ return OTHER_EFAULT;
+ }
vma = vma->vm_next;
}
- return copied;
+ if (pcopied)
+ *pcopied = copied;
+ return 0;
}
-int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
+int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, size_t len, int write)
{
int copied;
struct mm_struct *mm;
@@ -143,12 +152,44 @@
down_read(&mm->mmap_sem);
vma = find_extend_vma(mm, addr);
copied = 0;
- if (vma)
- copied = access_mm(mm, vma, addr, buf, len, write);
+ if (vma) {
+ mm_segment_t old;
+ old = get_fs();
+ set_fs(KERNEL_DS);
+ /* silently ignore failues, perhaps the caller
+ * is interested in partial transfers
+ */
+ access_mm(mm, vma, addr, buf, len, write, &copied);
+ set_fs(old);
+ }
up_read(&mm->mmap_sem);
mmput(mm);
return copied;
+}
+
+/*
+ * transfers 'len' bytes from/to from 'mm:oaddr' to/from
+ * 'current->mm:cbuf'.
+ * The caller must guarantee that mm won't go away.
+ * if do_write is true, then the data is written into 'mm:oaddr'
+ * Return 0 on success.
+ */
+
+
+int copy_user_to_user(char *cbuf, struct mm_struct *mm, unsigned long oaddr, size_t len, int do_write)
+{
+ struct vm_area_struct * vma;
+ int retval;
+ down_read(&mm->mmap_sem);
+ vma = find_extend_vma(mm, oaddr);
+ if (vma)
+ retval = access_mm(mm, vma, oaddr, cbuf, len, do_write, NULL);
+ else
+ retval = OTHER_EFAULT;
+
+ up_read(&mm->mmap_sem);
+ return retval;
}
int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len)
--- 2.4/include/linux/mm.h Sat May 12 10:03:59 2001
+++ build-2.4/include/linux/mm.h Thu May 17 19:42:38 2001
@@ -440,9 +440,15 @@
extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
extern int make_pages_present(unsigned long addr, unsigned long end);
-extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
+extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, size_t len, int write);
extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
+
+#define OTHER_EFAULT 1
+#define OTHER_ENOMEM 2
+#define CUR_EFAULT 3
+extern int copy_user_to_user(char *cbuf, struct mm_struct *mm, unsigned long oaddr, size_t len, int do_write);
+
/*
* On a two-level page table, this ends up being trivial. Thus the
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] new version of singlecopy pipe
2001-05-17 20:59 ` Manfred Spraul
@ 2001-05-18 11:08 ` J . A . Magallon
0 siblings, 0 replies; 9+ messages in thread
From: J . A . Magallon @ 2001-05-18 11:08 UTC (permalink / raw)
To: Manfred Spraul; +Cc: linux-kernel
On 05.17 Manfred Spraul wrote:
> "David S. Miller" wrote:
> >
> > J . A . Magallon writes:
> > > > What platform?
> >
> > > Any more info ?
> >
> > No, I thought it might be some cache flushing issue
> > on a non-x86 machine.
> >
> I found the problem:
> I sent out the old patch :-(
>
> Attached is the correct version of patch-copy_user_user.
>
Yes, this time it worked fine (against ac11)...
--
J.A. Magallon # Let the source be with you...
mailto:jamagallon@able.es
Linux Mandrake release 8.1 (Cooker) for i586
Linux werewolf 2.4.4-ac11 #2 SMP Fri May 18 12:27:06 CEST 2001 i686
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2001-05-18 11:09 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2001-05-11 19:00 [PATCH] new version of singlecopy pipe Manfred Spraul
2001-05-12 0:07 ` J . A . Magallon
2001-05-12 0:19 ` J . A . Magallon
2001-05-12 0:25 ` David S. Miller
2001-05-12 7:50 ` J . A . Magallon
2001-05-12 8:23 ` David S. Miller
2001-05-17 20:59 ` Manfred Spraul
2001-05-18 11:08 ` J . A . Magallon
2001-05-12 8:21 ` Manfred Spraul
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox