All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yasushi Saito <ysaito@hpl.hp.com>
To: unlisted-recipients:; (no To-header on input)
Cc: suparna@in.ibm.com, Janet Morgan <janetmor@us.ibm.com>,
	ysaito@hpl.hp.com, linux-aio@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 2/2]  aio: add vectored I/O support
Date: Thu, 14 Oct 2004 13:10:28 -0700	[thread overview]
Message-ID: <416EDD34.1020704@hpl.hp.com> (raw)

This is the second part of the vectored I/O patch to libaio.
yaz

Signed-off-by: Yasushi Saito <ysaito@hpl.hp.com>

--- .pc/aio-vector.patch/fs/aio.c    2004-10-14 12:58:39 -07:00
+++ fs/aio.c    2004-10-14 12:58:40 -07:00
@@ -459,6 +459,8 @@ static inline void really_put_req(struct
     req->ki_obj.user = NULL;
     req->ki_dtor = NULL;
     req->private = NULL;
+    if (req->ki_slow_iov)
+        kfree(req->ki_slow_iov);
     kmem_cache_free(kiocb_cachep, req);
     ctx->reqs_active--;
 
@@ -1312,6 +1314,24 @@ asmlinkage long sys_io_destroy(aio_conte
     return -EINVAL;
 }
 
+static void
+aio_increment_iov(struct iovec **iov_ptr, unsigned long *nr_segs, 
size_t nr_bytes)
+{
+    struct iovec *iov = *iov_ptr;
+    while (nr_bytes > 0) {
+        if (iov->iov_len <= nr_bytes) {
+            nr_bytes -= iov->iov_len;
+            iov++;
+            (*nr_segs)--;
+        } else {
+            iov->iov_len -= nr_bytes;
+            iov->iov_base = (char*)iov->iov_base + nr_bytes;
+            break;
+        }
+    }
+    BUG_ON(*nr_segs >= 9999999);
+    *iov_ptr = iov;
+}
 /*
  * Default retry method for aio_read (also used for first time submit)
  * Responsible for updating iocb state as retries progress
@@ -1323,15 +1343,19 @@ static ssize_t aio_pread(struct kiocb *i
     struct inode *inode = mapping->host;
     ssize_t ret = 0;
 
-    ret = file->f_op->aio_read(iocb, iocb->ki_buf,
-        iocb->ki_left, iocb->ki_pos);
-
+     if (iocb->ki_nr_segs == 1) {
+         ret = file->f_op->aio_read(iocb, iocb->ki_iov[0].iov_base,
+                        iocb->ki_iov[0].iov_len,
+                        iocb->ki_pos);
+     } else {
+         ret = file->f_op->aio_readv(iocb);
+     }
     /*
      * Can't just depend on iocb->ki_left to determine
      * whether we are done. This may have been a short read.
      */
     if (ret > 0) {
-        iocb->ki_buf += ret;
+         aio_increment_iov(&iocb->ki_iov, &iocb->ki_nr_segs, ret);
         iocb->ki_left -= ret;
         /*
          * For pipes and sockets we return once we have
@@ -1360,11 +1384,16 @@ static ssize_t aio_pwrite(struct kiocb *
     struct file *file = iocb->ki_filp;
     ssize_t ret = 0;
 
-    ret = file->f_op->aio_write(iocb, iocb->ki_buf,
-        iocb->ki_left, iocb->ki_pos);
+     if (iocb->ki_nr_segs == 1) {
+         ret = file->f_op->aio_write(iocb, iocb->ki_iov[0].iov_base,
+                         iocb->ki_iov[0].iov_len,
+                         iocb->ki_pos);
+     } else {
+         ret = file->f_op->aio_writev(iocb);
+     }
 
     if (ret > 0) {
-        iocb->ki_buf += ret;
+         aio_increment_iov(&iocb->ki_iov, &iocb->ki_nr_segs, ret);
         iocb->ki_left -= ret;
 
         ret = -EIOCBRETRY;
@@ -1398,6 +1427,16 @@ static ssize_t aio_fsync(struct kiocb *i
     return ret;
 }
 
+static int aio_iov_access_ok(int mode, struct kiocb *kiocb)
+{
+     int i;
+     for (i = 0; i < kiocb->ki_nr_segs; i++)
+         if (unlikely(!access_ok(mode, kiocb->ki_iov[i].iov_base,
+                     kiocb->ki_iov[i].iov_len)))
+             return 0;
+     return 1;
+}
+
 /*
  * aio_setup_iocb:
  *    Performs the initial checks and aio retry method
@@ -1410,24 +1449,24 @@ ssize_t aio_setup_iocb(struct kiocb *kio
 
     switch (kiocb->ki_opcode) {
     case IOCB_CMD_PREAD:
+    case IOCB_CMD_PREADV:
         ret = -EBADF;
         if (unlikely(!(file->f_mode & FMODE_READ)))
             break;
         ret = -EFAULT;
-        if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
-            kiocb->ki_left)))
+        if (unlikely(!aio_iov_access_ok(VERIFY_WRITE, kiocb)))
             break;
         ret = -EINVAL;
         if (file->f_op->aio_read)
             kiocb->ki_retry = aio_pread;
         break;
     case IOCB_CMD_PWRITE:
+    case IOCB_CMD_PWRITEV:
         ret = -EBADF;
         if (unlikely(!(file->f_mode & FMODE_WRITE)))
             break;
         ret = -EFAULT;
-        if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
-            kiocb->ki_left)))
+        if (unlikely(!aio_iov_access_ok(VERIFY_READ, kiocb)))
             break;
         ret = -EINVAL;
         if (file->f_op->aio_write)
@@ -1495,16 +1534,6 @@ int fastcall io_submit_one(struct kioctx
         return -EINVAL;
     }
 
-    /* prevent overflows */
-    if (unlikely(
-        (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
-        (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
-        ((ssize_t)iocb->aio_nbytes < 0)
-       )) {
-        pr_debug("EINVAL: io_submit: overflow check\n");
-        return -EINVAL;
-    }
-
     file = fget(iocb->aio_fildes);
     if (unlikely(!file))
         return -EBADF;
@@ -1525,10 +1554,60 @@ int fastcall io_submit_one(struct kioctx
 
     req->ki_obj.user = user_iocb;
     req->ki_user_data = iocb->aio_data;
-    req->ki_pos = iocb->aio_offset;
-
-    req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf;
-    req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
+
+     req->ki_slow_iov = NULL;
+
+     switch (iocb->aio_lio_opcode) {
+     case IOCB_CMD_PREADV:
+         /* FALLTHROUGH */
+     case IOCB_CMD_PWRITEV:
+         ret = -EINVAL;
+         req->ki_pos = iocb->u.v.offset;
+         req->ki_nr_segs = iocb->u.v.nr;
+         req->ki_iov = &req->ki_fast_iov;
+         if (req->ki_nr_segs > 1) {
+             if (req->ki_nr_segs >= UIO_MAXIOV)
+                 goto out_put_req;
+             req->ki_slow_iov = kmalloc(sizeof(struct iovec) * 
req->ki_nr_segs, GFP_KERNEL);
+             req->ki_iov = req->ki_slow_iov;
+         }
+         ret = -EFAULT;
+         if (unlikely(copy_from_user(req->ki_iov, iocb->u.v.vec,
+                         sizeof(struct iovec) * req->ki_nr_segs)))
+             goto out_put_req;
+         /* Compute the total length; also make sure that the
+            length isn't ridiculuously large. */
+         {
+             int i;
+             ssize_t tot_len = 0;
+             ret = -EINVAL;
+             for (i = 0;  i < req->ki_nr_segs; i++) {
+                 ssize_t len = (ssize_t)req->ki_iov[i].iov_len;
+                 tot_len += len;
+                 if (len < 0 || tot_len < 0)   
+                     // overflow
+                     goto out_put_req;
+             }
+             req->ki_nbytes = tot_len;
+         }
+         break;
+     default:
+         /* prevent overflows */
+         ret = -EINVAL;
+         if (unlikely((iocb->u.c.buf != (unsigned long)iocb->u.c.buf) ||
+                  (iocb->u.c.nbytes != (size_t)iocb->u.c.nbytes) ||
+                  ((ssize_t)iocb->u.c.nbytes < 0))) {
+             pr_debug("EINVAL: io_submit: overflow check\n");
+             goto out_put_req;
+         }
+         req->ki_pos = iocb->u.c.offset;
+         req->ki_nr_segs = 1;
+         req->ki_iov = &req->ki_fast_iov;
+         req->ki_iov->iov_base = (char __user*)(unsigned 
long)iocb->u.c.buf;
+         req->ki_iov->iov_len = iocb->u.c.nbytes;
+         req->ki_nbytes = iocb->u.c.nbytes;
+     }
+     req->ki_left = req->ki_nbytes;
     req->ki_opcode = iocb->aio_lio_opcode;
     init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
     INIT_LIST_HEAD(&req->ki_wait.task_list);
--- .pc/aio-vector.patch/fs/bad_inode.c    2004-10-14 12:58:38 -07:00
+++ fs/bad_inode.c    2004-10-14 12:58:40 -07:00
@@ -55,6 +55,9 @@ static struct file_operations bad_file_o
     .writev        = EIO_ERROR,
     .sendfile    = EIO_ERROR,
     .sendpage    = EIO_ERROR,
+    .aio_readv    = EIO_ERROR,
+    .aio_writev    = EIO_ERROR,
+   
     .get_unmapped_area = EIO_ERROR,
 };
 
--- .pc/aio-vector.patch/fs/block_dev.c    2004-10-14 12:58:38 -07:00
+++ fs/block_dev.c    2004-10-14 12:58:40 -07:00
@@ -763,6 +763,10 @@ static ssize_t blkdev_file_aio_write(str
 
     return generic_file_aio_write_nolock(iocb, &local_iov, 1, 
&iocb->ki_pos);
 }
+static ssize_t blkdev_file_aio_writev(struct kiocb *iocb)
+{
+    return generic_file_aio_write_nolock(iocb, iocb->ki_iov, 
iocb->ki_nr_segs, &iocb->ki_pos);
+}
 
 static int block_ioctl(struct inode *inode, struct file *file, unsigned 
cmd,
             unsigned long arg)
@@ -788,6 +792,8 @@ struct file_operations def_blk_fops = {
     .write        = blkdev_file_write,
       .aio_read    = generic_file_aio_read,
       .aio_write    = blkdev_file_aio_write,
+      .aio_readv    = generic_file_aio_readv,
+      .aio_writev    = blkdev_file_aio_writev,
     .mmap        = generic_file_mmap,
     .fsync        = block_fsync,
     .ioctl        = block_ioctl,
--- .pc/aio-vector.patch/fs/ext2/file.c    2004-10-14 12:58:38 -07:00
+++ fs/ext2/file.c    2004-10-14 12:58:40 -07:00
@@ -45,6 +45,8 @@ struct file_operations ext2_file_operati
     .write        = generic_file_write,
     .aio_read    = generic_file_aio_read,
     .aio_write    = generic_file_aio_write,
+    .aio_readv    = generic_file_aio_readv,
+    .aio_writev    = generic_file_aio_writev,
     .ioctl        = ext2_ioctl,
     .mmap        = generic_file_mmap,
     .open        = generic_file_open,
--- .pc/aio-vector.patch/fs/ext3/file.c    2004-10-14 12:58:38 -07:00
+++ fs/ext3/file.c    2004-10-14 12:58:40 -07:00
@@ -58,14 +58,14 @@ static int ext3_open_file (struct inode
 }
 
 static ssize_t
-ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t 
count, loff_t pos)
+ext3_file_writev(struct kiocb *iocb)
 {
     struct file *file = iocb->ki_filp;
     struct inode *inode = file->f_dentry->d_inode;
     ssize_t ret;
     int err;
 
-    ret = generic_file_aio_write(iocb, buf, count, pos);
+    ret = generic_file_aio_writev(iocb);
 
     /*
      * Skip flushing if there was an error, or if nothing was written.
@@ -115,12 +115,24 @@ force_commit:
     return ret;
 }
 
+static ssize_t
+ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t 
count, loff_t pos)
+{
+        /* aio_write is a legacy interface. */
+        BUG_ON(buf != iocb->ki_iov[0].iov_base
+           || count != iocb->ki_iov[0].iov_len
+           || pos != iocb->ki_pos);
+    return ext3_file_writev(iocb);
+}
+
 struct file_operations ext3_file_operations = {
     .llseek        = generic_file_llseek,
     .read        = do_sync_read,
     .write        = do_sync_write,
     .aio_read    = generic_file_aio_read,
     .aio_write    = ext3_file_write,
+    .aio_readv    = generic_file_aio_readv,
+    .aio_writev    = ext3_file_writev,
     .readv        = generic_file_readv,
     .writev        = generic_file_writev,
     .ioctl        = ext3_ioctl,
--- .pc/aio-vector.patch/fs/jfs/file.c    2004-10-14 12:58:38 -07:00
+++ fs/jfs/file.c    2004-10-14 12:58:40 -07:00
@@ -110,6 +110,8 @@ struct file_operations jfs_file_operatio
     .read        = generic_file_read,
     .aio_read    = generic_file_aio_read,
     .aio_write    = generic_file_aio_write,
+    .aio_readv    = generic_file_aio_readv,
+    .aio_writev    = generic_file_aio_writev,
     .mmap        = generic_file_mmap,
     .readv        = generic_file_readv,
     .writev        = generic_file_writev,
--- .pc/aio-vector.patch/fs/nfs/direct.c    2004-10-14 12:58:38 -07:00
+++ fs/nfs/direct.c    2004-10-14 12:58:40 -07:00
@@ -448,11 +448,11 @@ nfs_direct_IO(int rw, struct kiocb *iocb
 }
 
 /**
- * nfs_file_direct_read - file direct read operation for NFS files
+ * nfs_file_direct_readv - file direct read operation for NFS files
  * @iocb: target I/O control block
- * @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ *
+ * The iovec and its size is passed through iocb->ki_iov and 
iocb->ki_nr_segs.
+ * The read offset is passed through iocb->ki_pos.
  *
  * We use this function for direct reads instead of calling
  * generic_file_aio_read() in order to avoid gfar's check to see if
@@ -469,31 +469,31 @@ nfs_direct_IO(int rw, struct kiocb *iocb
  * cache.
  */
 ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t 
count, loff_t pos)
+nfs_file_direct_readv(struct kiocb *iocb)
 {
     ssize_t retval = -EINVAL;
-    loff_t *ppos = &iocb->ki_pos;
     struct file *file = iocb->ki_filp;
     struct nfs_open_context *ctx =
             (struct nfs_open_context *) file->private_data;
     struct dentry *dentry = file->f_dentry;
     struct address_space *mapping = file->f_mapping;
     struct inode *inode = mapping->host;
-    struct iovec iov = {
-        .iov_base = buf,
-        .iov_len = count,
-    };
+     const struct iovec *iov = iocb->ki_iov;
+     unsigned long nr_segs = iocb->ki_nr_segs;
+     size_t count = iov_length(iov, nr_segs);
+     int i;
 
     dprintk("nfs: direct read(%s/%s, %lu@%lu)\n",
         dentry->d_parent->d_name.name, dentry->d_name.name,
-        (unsigned long) count, (unsigned long) pos);
+        (unsigned long) count, (unsigned long) iocb->ki_pos);
 
     if (!is_sync_kiocb(iocb))
         goto out;
     if (count < 0)
         goto out;
     retval = -EFAULT;
-    if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+     for (i = 0; i < nr_segs; i++)
+         if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len))
         goto out;
     retval = 0;
     if (!count)
@@ -507,20 +507,20 @@ nfs_file_direct_read(struct kiocb *iocb,
             goto out;
     }
 
-    retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+    retval = nfs_direct_read(inode, ctx, iov, iocb->ki_pos, nr_segs);
     if (retval > 0)
-        *ppos = pos + retval;
+            iocb->ki_pos += retval;
 
 out:
     return retval;
 }
 
 /**
- * nfs_file_direct_write - file direct write operation for NFS files
+ * nfs_file_direct_writev - file direct write operation for NFS files
  * @iocb: target I/O control block
- * @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * The iovec and its size is passed through iocb->ki_iov and 
iocb->ki_nr_segs.
+ *
+ * The read offset is passed through iocb->ki_pos.
  *
  * We use this function for direct writes instead of calling
  * generic_file_aio_write() in order to avoid taking the inode
@@ -541,10 +541,10 @@ out:
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
 ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, 
size_t count, loff_t pos)
+nfs_file_direct_writev(struct kiocb *iocb)
 {
     ssize_t retval = -EINVAL;
-    loff_t *ppos = &iocb->ki_pos;
+     loff_t pos = iocb->ki_pos;
     unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
     struct file *file = iocb->ki_filp;
     struct nfs_open_context *ctx =
@@ -552,10 +552,10 @@ nfs_file_direct_write(struct kiocb *iocb
     struct dentry *dentry = file->f_dentry;
     struct address_space *mapping = file->f_mapping;
     struct inode *inode = mapping->host;
-    struct iovec iov = {
-        .iov_base = (char __user *)buf,
-        .iov_len = count,
-    };
+     const struct iovec *iov = iocb->ki_iov;
+     unsigned long nr_segs = iocb->ki_nr_segs;
+     size_t count = iov_length(iov, nr_segs);
+     int i;
 
     dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n",
         dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -568,7 +568,8 @@ nfs_file_direct_write(struct kiocb *iocb
         if (pos < 0)
         goto out;
     retval = -EFAULT;
-    if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+    for (i = 0; i < nr_segs; i++)
+        if (!access_ok(VERIFY_READ, iov[i].iov_base, iov[i].iov_len))
         goto out;
         if (file->f_error) {
                 retval = file->f_error;
@@ -596,11 +597,11 @@ nfs_file_direct_write(struct kiocb *iocb
             goto out;
     }
 
-    retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+    retval = nfs_direct_write(inode, ctx, iov, pos, nr_segs);
     if (mapping->nrpages)
         invalidate_inode_pages2(mapping);
     if (retval > 0)
-        *ppos = pos + retval;
+         iocb->ki_pos = pos + retval;
 
 out:
     return retval;
--- .pc/aio-vector.patch/fs/nfs/file.c    2004-10-14 12:58:38 -07:00
+++ fs/nfs/file.c    2004-10-14 12:58:40 -07:00
@@ -41,6 +41,8 @@ static int  nfs_file_mmap(struct file *,
 static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, 
read_actor_t, void *);
 static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, 
loff_t);
 static ssize_t nfs_file_write(struct kiocb *, const char __user *, 
size_t, loff_t);
+static ssize_t nfs_file_readv(struct kiocb *);
+static ssize_t nfs_file_writev(struct kiocb *);
 static int  nfs_file_flush(struct file *);
 static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync);
 static int nfs_check_flags(int flags);
@@ -51,6 +53,8 @@ struct file_operations nfs_file_operatio
     .write        = do_sync_write,
     .aio_read        = nfs_file_read,
     .aio_write        = nfs_file_write,
+    .aio_readv        = nfs_file_readv,
+    .aio_writev        = nfs_file_writev,
     .mmap        = nfs_file_mmap,
     .open        = nfs_file_open,
     .flush        = nfs_file_flush,
@@ -137,7 +141,7 @@ nfs_file_flush(struct file *file)
 }
 
 static ssize_t
-nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, 
loff_t pos)
+nfs_file_readv(struct kiocb *iocb)
 {
     struct dentry * dentry = iocb->ki_filp->f_dentry;
     struct inode * inode = dentry->d_inode;
@@ -145,18 +149,27 @@ nfs_file_read(struct kiocb *iocb, char _
 
 #ifdef CONFIG_NFS_DIRECTIO
     if (iocb->ki_filp->f_flags & O_DIRECT)
-        return nfs_file_direct_read(iocb, buf, count, pos);
+            return nfs_file_direct_readv(iocb);
 #endif
 
     dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
         dentry->d_parent->d_name.name, dentry->d_name.name,
-        (unsigned long) count, (unsigned long) pos);
+         (unsigned long)iov_length(iocb->ki_iov, iocb->ki_nr_segs),
+         (unsigned long)iocb->ki_pos);
 
     result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
     if (!result)
-        result = generic_file_aio_read(iocb, buf, count, pos);
+            result = generic_file_aio_readv(iocb);
     return result;
 }
+static ssize_t
+nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, 
loff_t pos)
+{
+        BUG_ON(buf != iocb->ki_iov[0].iov_base
+           || count != iocb->ki_iov[0].iov_len
+           || pos != iocb->ki_pos);
+    return nfs_file_readv(iocb);
+}
 
 static ssize_t
 nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
@@ -257,20 +270,21 @@ struct address_space_operations nfs_file
  * Write to a file (through the page cache).
  */
 static ssize_t
-nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t 
count, loff_t pos)
+nfs_file_writev(struct kiocb *iocb)
 {
     struct dentry * dentry = iocb->ki_filp->f_dentry;
     struct inode * inode = dentry->d_inode;
+    size_t count = iov_length(iocb->ki_iov, iocb->ki_nr_segs);
     ssize_t result;
 
 #ifdef CONFIG_NFS_DIRECTIO
     if (iocb->ki_filp->f_flags & O_DIRECT)
-        return nfs_file_direct_write(iocb, buf, count, pos);
+        return nfs_file_direct_writev(iocb);
 #endif
 
     dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
         dentry->d_parent->d_name.name, dentry->d_name.name,
-        inode->i_ino, (unsigned long) count, (unsigned long) pos);
+         inode->i_ino, (unsigned long)count, (unsigned long)iocb->ki_pos);
 
     result = -EBUSY;
     if (IS_SWAPFILE(inode))
@@ -283,13 +297,22 @@ nfs_file_write(struct kiocb *iocb, const
     if (!count)
         goto out;
 
-    result = generic_file_aio_write(iocb, buf, count, pos);
+    result = generic_file_aio_writev(iocb);
 out:
     return result;
 
 out_swapfile:
     printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
     goto out;
+}
+
+static ssize_t
+nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t 
count, loff_t pos)
+{
+        BUG_ON(buf != iocb->ki_iov[0].iov_base
+           || count != iocb->ki_iov[0].iov_len
+           || pos != iocb->ki_pos);
+    return nfs_file_writev(iocb);
 }
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
--- .pc/aio-vector.patch/fs/read_write.c    2004-10-14 12:58:38 -07:00
+++ fs/read_write.c    2004-10-14 12:58:40 -07:00
@@ -190,6 +190,10 @@ ssize_t do_sync_read(struct file *filp,
 
     init_sync_kiocb(&kiocb, filp);
     kiocb.ki_pos = *ppos;
+    kiocb.ki_iov = &kiocb.ki_fast_iov;
+    kiocb.ki_iov->iov_base = buf;
+    kiocb.ki_iov->iov_len = len;
+    kiocb.ki_nr_segs = 1;
     ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos);
     if (-EIOCBQUEUED == ret)
         ret = wait_on_sync_kiocb(&kiocb);
@@ -234,6 +238,10 @@ ssize_t do_sync_write(struct file *filp,
 
     init_sync_kiocb(&kiocb, filp);
     kiocb.ki_pos = *ppos;
+    kiocb.ki_iov = &kiocb.ki_fast_iov;
+    kiocb.ki_iov->iov_base = (char __user*)buf;
+    kiocb.ki_iov->iov_len = len;
+    kiocb.ki_nr_segs = 1;
     ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos);
     if (-EIOCBQUEUED == ret)
         ret = wait_on_sync_kiocb(&kiocb);
--- .pc/aio-vector.patch/fs/reiserfs/file.c    2004-10-14 12:58:38 -07:00
+++ fs/reiserfs/file.c    2004-10-14 12:58:40 -07:00
@@ -1380,6 +1380,11 @@ static ssize_t reiserfs_aio_write(struct
 {
     return generic_file_aio_write(iocb, buf, count, pos);
 }
+static ssize_t reiserfs_aio_writev(struct kiocb *iocb)
+{
+    return generic_file_aio_writev(iocb);
+}
+
 
 
 
@@ -1393,6 +1398,8 @@ struct file_operations reiserfs_file_ope
     .sendfile    = generic_file_sendfile,
     .aio_read   = generic_file_aio_read,
     .aio_write  = reiserfs_aio_write,
+    .aio_readv   = generic_file_aio_readv,
+    .aio_writev  = reiserfs_aio_writev,
 };
 
 
--- .pc/aio-vector.patch/include/linux/aio.h    2004-10-14 12:58:38 -07:00
+++ include/linux/aio.h    2004-10-14 12:58:40 -07:00
@@ -4,6 +4,7 @@
 #include <linux/list.h>
 #include <linux/workqueue.h>
 #include <linux/aio_abi.h>
+#include <linux/uio.h>
 
 #include <asm/atomic.h>
 
@@ -67,7 +68,20 @@ struct kiocb {
     /* State that we remember to be able to restart/retry  */
     unsigned short        ki_opcode;
     size_t            ki_nbytes;     /* copy of iocb->aio_nbytes */
-    char             __user *ki_buf;    /* remaining iocb->aio_buf */
+
+        /* Used for PREAD, PWRITE */
+        struct iovec            ki_fast_iov;
+
+     /* Used for PREADV and PWRITEV. iov is kmalloced. */
+        struct iovec            *ki_slow_iov;
+
+     /* ki_iov points to either &ki_short_iov or ki_long_iov,
+        depending on the value of ki_nr_segs. Its pointers are
+        incremented as more data is read or written
+        asynchronously. */
+     struct iovec            *ki_iov;
+        unsigned long           ki_nr_segs;     /* number of iovs left. */
+
     size_t            ki_left;     /* remaining bytes */
     wait_queue_t        ki_wait;
     long            ki_retried;     /* just for testing */
--- .pc/aio-vector.patch/include/linux/aio_abi.h    2004-10-14 12:58:38 
-07:00
+++ include/linux/aio_abi.h    2004-10-14 12:58:40 -07:00
@@ -41,6 +41,8 @@ enum {
      * IOCB_CMD_POLL = 5,
      */
     IOCB_CMD_NOOP = 6,
+    IOCB_CMD_PREADV = 7,
+    IOCB_CMD_PWRITEV = 8,
 };
 
 /* read() from /dev/aio returns these structures. */
@@ -65,6 +67,27 @@ struct io_event {
  * proper padding and aio_error abstraction
  */
 
+struct io_iocb_poll {
+    __u32 events;
+};
+
+struct io_iocb_sockaddr {
+    __u64    addr;
+    __u32    len;
+};
+
+struct io_iocb_common {
+    __u64    buf;
+    __u64    nbytes;
+    __s64      offset;
+};
+
+struct io_iocb_vector {
+    struct iovec  __user *vec;
+    __u32    nr;
+    __s64     offset;
+};
+
 struct iocb {
     /* these are internal to the kernel/libc. */
     __u64    aio_data;    /* data to be returned in event's data */
@@ -76,9 +99,12 @@ struct iocb {
     __s16    aio_reqprio;
     __u32    aio_fildes;
 
-    __u64    aio_buf;
-    __u64    aio_nbytes;
-    __s64    aio_offset;
+    union {
+        struct io_iocb_common c;
+        struct io_iocb_vector v;
+        struct io_iocb_poll poll;
+        struct io_iocb_sockaddr    saddr;
+    } u;
 
     /* extra parameters */
     __u64    aio_reserved2;    /* TODO: use this for a (struct sigevent 
*) */
--- .pc/aio-vector.patch/include/linux/fs.h    2004-10-14 12:58:38 -07:00
+++ include/linux/fs.h    2004-10-14 12:58:40 -07:00
@@ -967,6 +967,7 @@ struct file_operations {
     loff_t (*llseek) (struct file *, loff_t, int);
     ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
     ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t);
+
     ssize_t (*write) (struct file *, const char __user *, size_t, 
loff_t *);
     ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, 
loff_t);
     int (*readdir) (struct file *, void *, filldir_t);
@@ -984,6 +985,10 @@ struct file_operations {
     ssize_t (*writev) (struct file *, const struct iovec *, unsigned 
long, loff_t *);
     ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, 
void *);
     ssize_t (*sendpage) (struct file *, struct page *, int, size_t, 
loff_t *, int);
+    /* For aio_readv and aio_writev, the iovec and offset are passed
+       through kiocb->ki_iov, ki_nr_segs, and ki_pos. */
+    ssize_t (*aio_readv) (struct kiocb *);
+        ssize_t (*aio_writev) (struct kiocb *);
     unsigned long (*get_unmapped_area)(struct file *, unsigned long, 
unsigned long, unsigned long, unsigned long);
     int (*check_flags)(int);
     int (*dir_notify)(struct file *filp, unsigned long arg);
@@ -1508,8 +1513,9 @@ extern ssize_t generic_file_read(struct
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, 
int isblk);
 extern ssize_t generic_file_write(struct file *, const char __user *, 
size_t, loff_t *);
 extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, 
size_t, loff_t);
-extern ssize_t __generic_file_aio_read(struct kiocb *, const struct 
iovec *, unsigned long, loff_t *);
+extern ssize_t generic_file_aio_readv(struct kiocb *);
 extern ssize_t generic_file_aio_write(struct kiocb *, const char __user 
*, size_t, loff_t);
+extern ssize_t generic_file_aio_writev(struct kiocb *);
 extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const 
struct iovec *,
         unsigned long, loff_t *);
 extern ssize_t generic_file_direct_write(struct kiocb *, const struct 
iovec *,
--- .pc/aio-vector.patch/include/linux/nfs_fs.h    2004-10-14 12:58:38 
-07:00
+++ include/linux/nfs_fs.h    2004-10-14 12:58:40 -07:00
@@ -337,10 +337,9 @@ static inline struct rpc_cred *nfs_file_
  */
 extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, 
loff_t,
             unsigned long);
-extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf,
-            size_t count, loff_t pos);
-extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char 
__user *buf,
-            size_t count, loff_t pos);
+/* iov, #iov, and offset are passed through iocb ki_iov, ki_pos. */
+extern ssize_t nfs_file_direct_readv(struct kiocb *iocb);
+extern ssize_t nfs_file_direct_writev(struct kiocb *iocb);
 
 /*
  * linux/fs/nfs/dir.c
--- .pc/aio-vector.patch/mm/filemap.c    2004-10-14 12:58:39 -07:00
+++ mm/filemap.c    2004-10-14 12:58:40 -07:00
@@ -998,7 +998,13 @@ generic_file_aio_read(struct kiocb *iocb
     BUG_ON(iocb->ki_pos != pos);
     return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
 }
+EXPORT_SYMBOL(generic_file_aio_readv);
 
+ssize_t
+generic_file_aio_readv(struct kiocb *iocb)
+{
+        return __generic_file_aio_read(iocb, iocb->ki_iov, 
iocb->ki_nr_segs, &iocb->ki_pos);
+}
 EXPORT_SYMBOL(generic_file_aio_read);
 
 ssize_t
@@ -2125,20 +2131,17 @@ generic_file_write_nolock(struct file *f
 
 EXPORT_SYMBOL(generic_file_write_nolock);
 
-ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
-                   size_t count, loff_t pos)
+EXPORT_SYMBOL(generic_file_aio_writev);
+ssize_t generic_file_aio_writev(struct kiocb *iocb)
 {
     struct file *file = iocb->ki_filp;
     struct address_space *mapping = file->f_mapping;
     struct inode *inode = mapping->host;
     ssize_t ret;
-    struct iovec local_iov = { .iov_base = (void __user *)buf,
-                    .iov_len = count };
-
-    BUG_ON(iocb->ki_pos != pos);
+     loff_t pos = iocb->ki_pos;
 
     down(&inode->i_sem);
-    ret = generic_file_aio_write_nolock(iocb, &local_iov, 1,
+    ret = generic_file_aio_write_nolock(iocb, iocb->ki_iov, 
iocb->ki_nr_segs,
                         &iocb->ki_pos);
     up(&inode->i_sem);
 
@@ -2151,7 +2154,17 @@ ssize_t generic_file_aio_write(struct ki
     }
     return ret;
 }
+
 EXPORT_SYMBOL(generic_file_aio_write);
+ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
+                   size_t count, loff_t pos)
+{
+        /* aio_write is a legacy interface. */
+        BUG_ON(buf != iocb->ki_iov[0].iov_base
+           || count != iocb->ki_iov[0].iov_len
+           || pos != iocb->ki_pos);
+    return generic_file_aio_writev(iocb);
+}
 
 ssize_t generic_file_write(struct file *file, const char __user *buf,
                size_t count, loff_t *ppos)
--- .pc/aio-vector.patch/net/socket.c    2004-10-14 12:58:38 -07:00
+++ net/socket.c    2004-10-14 12:58:40 -07:00
@@ -99,6 +99,8 @@ static ssize_t sock_aio_read(struct kioc
              size_t size, loff_t pos);
 static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
               size_t size, loff_t pos);
+static ssize_t sock_aio_readv(struct kiocb *iocb);
+static ssize_t sock_aio_writev(struct kiocb *iocb);
 static int sock_mmap(struct file *file, struct vm_area_struct * vma);
 
 static int sock_close(struct inode *inode, struct file *file);
@@ -125,6 +127,8 @@ static struct file_operations socket_fil
     .llseek =    no_llseek,
     .aio_read =    sock_aio_read,
     .aio_write =    sock_aio_write,
+    .aio_readv =    sock_aio_readv,
+    .aio_writev =    sock_aio_writev,
     .poll =        sock_poll,
     .ioctl =    sock_ioctl,
     .mmap =        sock_mmap,
@@ -640,15 +644,15 @@ static void sock_aio_dtor(struct kiocb *
  *    area ubuf...ubuf+size-1 is writable before asking the protocol.
  */
 
-static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
-             size_t size, loff_t pos)
+static ssize_t sock_aio_readv(struct kiocb *iocb)
 {
     struct sock_iocb *x, siocb;
     struct socket *sock;
-    int flags;
+    const struct iovec *iov = iocb->ki_iov;
+    unsigned long nr_segs = iocb->ki_nr_segs;
 
-    if (pos != 0)
-        return -ESPIPE;
+    int flags;
+    size_t size = iov_length(iov, nr_segs);
     if (size==0)        /* Match SYS5 behaviour */
         return 0;
 
@@ -666,31 +670,46 @@ static ssize_t sock_aio_read(struct kioc
 
     x->async_msg.msg_name = NULL;
     x->async_msg.msg_namelen = 0;
-    x->async_msg.msg_iov = &x->async_iov;
-    x->async_msg.msg_iovlen = 1;
     x->async_msg.msg_control = NULL;
     x->async_msg.msg_controllen = 0;
-    x->async_iov.iov_base = ubuf;
-    x->async_iov.iov_len = size;
+    if (nr_segs == 1) {
+        // handle sock_aio_read that may pass iov on the stack.
+        x->async_msg.msg_iov = &x->async_iov;
+        x->async_msg.msg_iovlen = 1;
+        x->async_iov.iov_base = iov[0].iov_base;
+        x->async_iov.iov_len = iov[0].iov_len;
+    } else {
+        // we can assume that iov is held in iocb and not
+        // freed until x is freed.
+        x->async_msg.msg_iov = (struct iovec*)iov;
+        x->async_msg.msg_iovlen = nr_segs;
+    }
     flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 
     return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags);
 }
-
-
+static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
+             size_t size, loff_t pos)
+{
+        /* aio_read is a legacy interface. */
+        BUG_ON(ubuf != iocb->ki_iov[0].iov_base
+           || size != iocb->ki_iov[0].iov_len
+           || pos != iocb->ki_pos);
+    return sock_aio_readv(iocb);
+}
 /*
  *    Write data to a socket. We verify that the user area 
ubuf..ubuf+size-1
  *    is readable by the user process.
  */
 
-static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
-              size_t size, loff_t pos)
+static ssize_t sock_aio_writev(struct kiocb *iocb)
 {
     struct sock_iocb *x, siocb;
     struct socket *sock;
-   
-    if (pos != 0)
-        return -ESPIPE;
+    struct iovec *iov = iocb->ki_iov;
+    unsigned long nr_segs = iocb->ki_nr_segs;
+
+    size_t size = iov_length(iov, nr_segs);
     if(size==0)        /* Match SYS5 behaviour */
         return 0;
 
@@ -708,17 +727,34 @@ static ssize_t sock_aio_write(struct kio
 
     x->async_msg.msg_name = NULL;
     x->async_msg.msg_namelen = 0;
-    x->async_msg.msg_iov = &x->async_iov;
-    x->async_msg.msg_iovlen = 1;
     x->async_msg.msg_control = NULL;
     x->async_msg.msg_controllen = 0;
     x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 
: MSG_DONTWAIT;
+    if (nr_segs == 1) {
+        // handle sock_aio_read that may pass iov on the stack.
+        x->async_msg.msg_iov = &x->async_iov;
+        x->async_msg.msg_iovlen = 1;
+        x->async_iov.iov_base = iov[0].iov_base;
+        x->async_iov.iov_len = iov[0].iov_len;
+    } else {
+        // we can assume that iov is held in iocb and not
+        // freed until x is freed.
+        x->async_msg.msg_iov = (struct iovec*)iov;
+        x->async_msg.msg_iovlen = nr_segs;
+    }
     if (sock->type == SOCK_SEQPACKET)
         x->async_msg.msg_flags |= MSG_EOR;
-    x->async_iov.iov_base = (void __user *)ubuf;
-    x->async_iov.iov_len = size;
    
     return __sock_sendmsg(iocb, sock, &x->async_msg, size);
+}
+static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
+             size_t size, loff_t pos)
+{
+        /* aio_write is a legacy interface. */
+        BUG_ON(ubuf != iocb->ki_iov[0].iov_base
+           || size != iocb->ki_iov[0].iov_len
+           || pos != iocb->ki_pos);
+    return sock_aio_writev(iocb);
 }
 
 ssize_t sock_sendpage(struct file *file, struct page *page,


                 reply	other threads:[~2004-10-14 20:16 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=416EDD34.1020704@hpl.hp.com \
    --to=ysaito@hpl.hp.com \
    --cc=janetmor@us.ibm.com \
    --cc=linux-aio@kvack.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=suparna@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.