From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
To: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Cc: David Miller <davem@davemloft.net>,
Ulrich Drepper <drepper@redhat.com>,
Evgeniy Polyakov <johnpol@2ka.mipt.ru>,
netdev <netdev@vger.kernel.org>,
Zach Brown <zach.brown@oracle.com>,
linux-fsdevel@vger.kernel.org, linux-aio@kvack.org
Subject: [take5 2/4] kevent: AIO, aio_sendfile() implementation.
Date: Tue, 8 Aug 2006 11:44:47 +0400 [thread overview]
Message-ID: <1155023087143@2ka.mipt.ru> (raw)
In-Reply-To: <11550230871019@2ka.mipt.ru>
AIO, aio_sendfile() implementation.
This patch includes asynchronous propagation of file's data into VFS
cache and aio_sendfile() implementation.
Network aio_sendfile() works lazily - it asynchronously populates pages
into the VFS cache (which can be used for various tricks with adaptive
readahead) and then uses usual ->sendfile() callback.
This patch contains AIO and aio_sendfile() implementation on top of get_block()
abstraction, which was decided to postpone for a while.
More info can be found in the following thread at linux-kernel@ or netdev@:
http://thread.gmane.org/gmane.linux.kernel/425005
Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
diff --git a/fs/bio.c b/fs/bio.c
index 6a0b9ad..a3ee530 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -119,7 +119,7 @@ void bio_free(struct bio *bio, struct bi
/*
* default destructor for a bio allocated with bio_alloc_bioset()
*/
-static void bio_fs_destructor(struct bio *bio)
+void bio_fs_destructor(struct bio *bio)
{
bio_free(bio, fs_bio_set);
}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fb4d322..9316551 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -685,6 +685,7 @@ ext2_writepages(struct address_space *ma
}
const struct address_space_operations ext2_aops = {
+ .get_block = ext2_get_block,
.readpage = ext2_readpage,
.readpages = ext2_readpages,
.writepage = ext2_writepage,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c5ee9f0..d9210d4 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1699,6 +1699,7 @@ static int ext3_journalled_set_page_dirt
}
static const struct address_space_operations ext3_ordered_aops = {
+ .get_block = ext3_get_block,
.readpage = ext3_readpage,
.readpages = ext3_readpages,
.writepage = ext3_ordered_writepage,
diff --git a/fs/file_table.c b/fs/file_table.c
index 0131ba0..7f5b35f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -112,6 +112,10 @@ struct file *get_empty_filp(void)
if (security_file_alloc(f))
goto fail_sec;
+#ifdef CONFIG_KEVENT_POLL
+ kevent_storage_init(f, &f->st);
+ kevent_poll_reinit(f);
+#endif
tsk = current;
INIT_LIST_HEAD(&f->f_u.fu_list);
atomic_set(&f->f_count, 1);
@@ -159,6 +163,9 @@ void fastcall __fput(struct file *file)
might_sleep();
fsnotify_close(file);
+#ifdef CONFIG_KEVENT_POLL
+ kevent_storage_fini(&file->st);
+#endif
/*
* The function eventpoll_release() should be the first called
* in the file cleanup chain.
diff --git a/fs/inode.c b/fs/inode.c
index 0bf9f04..fdbd0ba 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,7 @@ #include <linux/pagemap.h>
#include <linux/cdev.h>
#include <linux/bootmem.h>
#include <linux/inotify.h>
+#include <linux/kevent.h>
#include <linux/mount.h>
/*
@@ -165,12 +166,18 @@ #endif
}
memset(&inode->u, 0, sizeof(inode->u));
inode->i_mapping = mapping;
+#if defined CONFIG_KEVENT
+ kevent_storage_init(inode, &inode->st);
+#endif
}
return inode;
}
void destroy_inode(struct inode *inode)
{
+#if defined CONFIG_KEVENT_INODE || defined CONFIG_KEVENT_SOCKET
+ kevent_storage_fini(&inode->st);
+#endif
BUG_ON(inode_has_buffers(inode));
security_inode_free(inode);
if (inode->i_sb->s_op->destroy_inode)
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 12dfdcf..f8dca72 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3001,6 +3001,7 @@ int reiserfs_setattr(struct dentry *dent
}
const struct address_space_operations reiserfs_address_space_operations = {
+ .get_block = reiserfs_get_block,
.writepage = reiserfs_writepage,
.readpage = reiserfs_readpage,
.readpages = reiserfs_readpages,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2561020..65eb438 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -240,6 +240,9 @@ #include <linux/mutex.h>
#include <asm/atomic.h>
#include <asm/semaphore.h>
#include <asm/byteorder.h>
+#ifdef CONFIG_KEVENT
+#include <linux/kevent_storage.h>
+#endif
struct hd_geometry;
struct iovec;
@@ -352,6 +355,8 @@ struct address_space;
struct writeback_control;
struct address_space_operations {
+ int (*get_block)(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
void (*sync_page)(struct page *);
@@ -546,6 +551,10 @@ #ifdef CONFIG_INOTIFY
struct mutex inotify_mutex; /* protects the watches list */
#endif
+#ifdef CONFIG_KEVENT_INODE
+ struct kevent_storage st;
+#endif
+
unsigned long i_state;
unsigned long dirtied_when; /* jiffies of first dirtying */
@@ -698,6 +707,9 @@ #ifdef CONFIG_EPOLL
struct list_head f_ep_links;
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
+#ifdef CONFIG_KEVENT_POLL
+ struct kevent_storage st;
+#endif
struct address_space *f_mapping;
};
extern spinlock_t files_lock;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index cc5dec7..0acc8db 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -15,6 +15,7 @@ #ifdef __KERNEL__
#include <linux/dnotify.h>
#include <linux/inotify.h>
+#include <linux/kevent.h>
#include <linux/audit.h>
/*
@@ -79,6 +80,7 @@ static inline void fsnotify_nameremove(s
isdir = IN_ISDIR;
dnotify_parent(dentry, DN_DELETE);
inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name);
+ kevent_inode_notify_parent(dentry, KEVENT_INODE_REMOVE);
}
/*
@@ -88,6 +90,7 @@ static inline void fsnotify_inoderemove(
{
inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
inotify_inode_is_dead(inode);
+ kevent_inode_remove(inode);
}
/*
@@ -96,6 +99,7 @@ static inline void fsnotify_inoderemove(
static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
{
inode_dir_notify(inode, DN_CREATE);
+ kevent_inode_notify(inode, KEVENT_INODE_CREATE);
inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
dentry->d_inode);
audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino);
@@ -107,6 +111,7 @@ static inline void fsnotify_create(struc
static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
{
inode_dir_notify(inode, DN_CREATE);
+ kevent_inode_notify(inode, KEVENT_INODE_CREATE);
inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0,
dentry->d_name.name, dentry->d_inode);
audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino);
diff --git a/kernel/kevent/kevent_aio.c b/kernel/kevent/kevent_aio.c
new file mode 100644
index 0000000..332629b
--- /dev/null
+++ b/kernel/kevent/kevent_aio.c
@@ -0,0 +1,584 @@
+/*
+ * kevent_aio.c
+ *
+ * 2006 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/buffer_head.h>
+#include <linux/kevent.h>
+
+#include <net/sock.h>
+
+#define KEVENT_AIO_DEBUG
+
+#ifdef KEVENT_AIO_DEBUG
+#define dprintk(f, a...) printk(f, ##a)
+#else
+#define dprintk(f, a...) do {} while (0)
+#endif
+
+struct kevent_aio_private
+{
+ int pg_num;
+ size_t size;
+ loff_t offset;
+ loff_t processed;
+ atomic_t bio_page_num;
+ struct completion bio_complete;
+ struct file *file, *sock;
+ struct work_struct work;
+};
+
+static int kevent_aio_dequeue(struct kevent *k);
+static int kevent_aio_enqueue(struct kevent *k);
+static int kevent_aio_callback(struct kevent *k);
+
+extern void bio_fs_destructor(struct bio *bio);
+
+static void kevent_aio_bio_destructor(struct bio *bio)
+{
+ struct kevent *k = bio->bi_private;
+ struct kevent_aio_private *priv = k->priv;
+
+ dprintk("%s: bio=%p, num=%u, k=%p, inode=%p.\n", __func__, bio, bio->bi_vcnt, k, k->st->origin);
+ schedule_work(&priv->work);
+ bio_fs_destructor(bio);
+}
+
+static void kevent_aio_bio_put(struct kevent *k)
+{
+ struct kevent_aio_private *priv = k->priv;
+
+ if (atomic_dec_and_test(&priv->bio_page_num))
+ complete(&priv->bio_complete);
+}
+
+static int kevent_mpage_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
+{
+ const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct kevent *k = bio->bi_private;
+
+ if (bio->bi_size)
+ return 1;
+
+ do {
+ struct page *page = bvec->bv_page;
+
+ if (--bvec >= bio->bi_io_vec)
+ prefetchw(&bvec->bv_page->flags);
+
+ if (uptodate) {
+ SetPageUptodate(page);
+ } else {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+
+ unlock_page(page);
+ kevent_aio_bio_put(k);
+ } while (bvec >= bio->bi_io_vec);
+
+ bio_put(bio);
+ return 0;
+}
+
+static inline struct bio *kevent_mpage_bio_submit(int rw, struct bio *bio)
+{
+ if (bio) {
+ bio->bi_end_io = kevent_mpage_end_io_read;
+ dprintk("%s: bio=%p, num=%u.\n", __func__, bio, bio->bi_vcnt);
+ submit_bio(READ, bio);
+ }
+ return NULL;
+}
+
+static struct bio *kevent_mpage_readpage(struct kevent *k, struct bio *bio,
+ struct page *page, unsigned nr_pages, get_block_t get_block,
+ loff_t *offset, sector_t *last_block_in_bio)
+{
+ struct inode *inode = k->st->origin;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ sector_t block_in_file;
+ sector_t last_block;
+ struct block_device *bdev = NULL;
+ unsigned first_hole = blocks_per_page;
+ unsigned page_block;
+ sector_t blocks[MAX_BUF_PER_PAGE];
+ struct buffer_head bh;
+ int fully_mapped = 1, length;
+
+ block_in_file = (*offset + blocksize - 1) >> blkbits;
+ last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
+
+ bh.b_page = page;
+ for (page_block = 0; page_block < blocks_per_page; page_block++, block_in_file++) {
+ bh.b_state = 0;
+ if (block_in_file < last_block) {
+ if (get_block(inode, block_in_file, &bh, 0))
+ goto confused;
+ }
+
+ if (!buffer_mapped(&bh)) {
+ fully_mapped = 0;
+ if (first_hole == blocks_per_page)
+ first_hole = page_block;
+ continue;
+ }
+
+ /* some filesystems will copy data into the page during
+ * the get_block call, in which case we don't want to
+ * read it again. map_buffer_to_page copies the data
+ * we just collected from get_block into the page's buffers
+ * so readpage doesn't have to repeat the get_block call
+ */
+ if (buffer_uptodate(&bh)) {
+ BUG();
+ //map_buffer_to_page(page, &bh, page_block);
+ goto confused;
+ }
+
+ if (first_hole != blocks_per_page)
+ goto confused; /* hole -> non-hole */
+
+ /* Contiguous blocks? */
+ if (page_block && blocks[page_block-1] != bh.b_blocknr-1)
+ goto confused;
+ blocks[page_block] = bh.b_blocknr;
+ bdev = bh.b_bdev;
+ }
+
+ if (!bdev)
+ goto confused;
+
+ if (first_hole != blocks_per_page) {
+ char *kaddr = kmap_atomic(page, KM_USER0);
+ memset(kaddr + (first_hole << blkbits), 0,
+ PAGE_CACHE_SIZE - (first_hole << blkbits));
+ flush_dcache_page(page);
+ kunmap_atomic(kaddr, KM_USER0);
+ if (first_hole == 0) {
+ SetPageUptodate(page);
+ goto out;
+ }
+ } else if (fully_mapped) {
+ SetPageMappedToDisk(page);
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this BIO off first?
+ */
+ if (bio && (*last_block_in_bio != blocks[0] - 1))
+ bio = kevent_mpage_bio_submit(READ, bio);
+
+alloc_new:
+ if (bio == NULL) {
+ nr_pages = min_t(unsigned, nr_pages, bio_get_nr_vecs(bdev));
+ bio = bio_alloc(GFP_KERNEL, nr_pages);
+ if (bio == NULL)
+ goto confused;
+
+ bio->bi_destructor = kevent_aio_bio_destructor;
+ bio->bi_bdev = bdev;
+ bio->bi_sector = blocks[0] << (blkbits - 9);
+ bio->bi_private = k;
+ }
+
+ length = first_hole << blkbits;
+ if (bio_add_page(bio, page, length, 0) < length) {
+ bio = kevent_mpage_bio_submit(READ, bio);
+ dprintk("%s: Failed to add a page: nr_pages=%d, length=%d, page=%p.\n",
+ __func__, nr_pages, length, page);
+ goto alloc_new;
+ }
+
+ dprintk("%s: bio=%p, b=%d, m=%d, u=%d, nr_pages=%d, offset=%Lu, "
+ "size=%Lu. page_block=%u, page=%p.\n",
+ __func__, bio, buffer_boundary(&bh), buffer_mapped(&bh),
+ buffer_uptodate(&bh), nr_pages, *offset, i_size_read(inode),
+ page_block, page);
+
+ *offset = *offset + length;
+
+ if (buffer_boundary(&bh) || (first_hole != blocks_per_page))
+ bio = kevent_mpage_bio_submit(READ, bio);
+ else
+ *last_block_in_bio = blocks[blocks_per_page - 1];
+
+out:
+ return bio;
+
+confused:
+ dprintk("%s: confused. bio=%p, nr_pages=%d.\n", __func__, bio, nr_pages);
+ if (bio)
+ bio = kevent_mpage_bio_submit(READ, bio);
+ kevent_aio_bio_put(k);
+ SetPageUptodate(page);
+
+ if (nr_pages == 1) {
+ struct kevent_aio_private *priv = k->priv;
+
+ wait_for_completion(&priv->bio_complete);
+ kevent_storage_ready(k->st, NULL, KEVENT_AIO_BIO);
+ init_completion(&priv->bio_complete);
+ complete(&priv->bio_complete);
+ }
+ goto out;
+}
+
+static int kevent_aio_alloc_cached_page(struct kevent *k, struct page **cached_page)
+{
+ struct kevent_aio_private *priv = k->priv;
+ struct address_space *mapping = priv->file->f_mapping;
+ struct page *page;
+ int err = 0;
+ pgoff_t index = priv->offset >> PAGE_CACHE_SHIFT;
+
+ page = page_cache_alloc_cold(mapping);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+ if (err) {
+ if (err == -EEXIST)
+ err = 0;
+ page_cache_release(page);
+ goto out;
+ }
+
+ dprintk("%s: page=%p, offset=%Lu, processed=%Lu, index=%lu, size=%zu.\n",
+ __func__, page, priv->offset, priv->processed, index, priv->size);
+
+ *cached_page = page;
+
+out:
+ return err;
+}
+
+static int kevent_mpage_readpages(struct kevent *k, int first,
+ int (* get_block)(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create))
+{
+ struct bio *bio = NULL;
+ struct kevent_aio_private *priv = k->priv;
+ sector_t last_block_in_bio = 0;
+ int i, err = 0;
+
+ atomic_set(&priv->bio_page_num, priv->pg_num);
+
+ for (i=first; i<priv->pg_num; ++i) {
+ struct page *page = NULL;
+
+ err = kevent_aio_alloc_cached_page(k, &page);
+ if (err)
+ break;
+
+ /*
+ * If there is no error and page is NULL, this means
+ * that someone added a page into VFS cache.
+ * We will not process this page, since it is that who
+ * added a page must read data from disk.
+ */
+ if (!page)
+ continue;
+
+ bio = kevent_mpage_readpage(k, bio, page, priv->pg_num - i,
+ get_block, &priv->offset, &last_block_in_bio);
+ }
+
+ if (bio)
+ bio = kevent_mpage_bio_submit(READ, bio);
+
+ return err;
+}
+
+static size_t kevent_aio_vfs_read_actor(struct kevent *k, struct page *kpage, size_t len)
+{
+ struct kevent_aio_private *priv = k->priv;
+ size_t ret;
+
+ ret = priv->sock->f_op->sendpage(priv->sock, kpage, 0, len, &priv->sock->f_pos, 1);
+
+ dprintk("%s: k=%p, page=%p, len=%zu, ret=%zd.\n",
+ __func__, k, kpage, len, ret);
+
+ return ret;
+}
+
+static int kevent_aio_vfs_read(struct kevent *k,
+ size_t (*actor)(struct kevent *, struct page *, size_t))
+{
+ struct kevent_aio_private *priv = k->priv;
+ struct address_space *mapping;
+ size_t isize, actor_size;
+ int i;
+
+ mapping = priv->file->f_mapping;
+ isize = i_size_read(priv->file->f_dentry->d_inode);
+
+ dprintk("%s: start: size_left=%zd, offset=%Lu, processed=%Lu, isize=%zu, pg_num=%d.\n",
+ __func__, priv->size, priv->offset, priv->processed, isize, priv->pg_num);
+
+ for (i=0; i<priv->pg_num && priv->size; ++i) {
+ struct page *page;
+ size_t nr = PAGE_CACHE_SIZE;
+
+ cond_resched();
+ page = find_get_page(mapping, priv->processed >> PAGE_CACHE_SHIFT);
+ if (unlikely(page == NULL))
+ break;
+ if (!PageUptodate(page)) {
+ dprintk("%s: %2d: page=%p, processed=%Lu, size=%zu not uptodate.\n",
+ __func__, i, page, priv->processed, priv->size);
+ page_cache_release(page);
+ break;
+ }
+
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_page(page);
+
+ mark_page_accessed(page);
+
+ if (nr + priv->processed > isize)
+ nr = isize - priv->processed;
+ if (nr > priv->size)
+ nr = priv->size;
+
+ actor_size = actor(k, page, nr);
+ if (actor_size < 0) {
+ page_cache_release(page);
+ break;
+ }
+
+ page_cache_release(page);
+
+ priv->processed += actor_size;
+ priv->size -= actor_size;
+ }
+
+ if (!priv->size)
+ i = priv->pg_num;
+
+ if (i != priv->pg_num)
+ priv->offset = priv->processed;
+
+ dprintk("%s: end: next=%d, num=%d, left=%zu, offset=%Lu, procesed=%Lu, ret=%d.\n",
+ __func__, i, priv->pg_num,
+ priv->size, priv->offset, priv->processed, i);
+
+ return i;
+}
+
+static int kevent_aio_callback(struct kevent *k)
+{
+ return 1;
+}
+
+static void kevent_aio_work(void *data)
+{
+ struct kevent *k = data;
+ struct kevent_aio_private *priv = k->priv;
+ struct inode *inode = k->st->origin;
+ struct address_space *mapping = priv->file->f_mapping;
+ int err, ready = 0, num;
+
+ dprintk("%s: k=%p, priv=%p, inode=%p.\n", __func__, k, priv, inode);
+
+ init_completion(&priv->bio_complete);
+
+ num = ready = kevent_aio_vfs_read(k, &kevent_aio_vfs_read_actor);
+ if (ready > 0 && ready != priv->pg_num)
+ ready = 0;
+
+ dprintk("%s: k=%p, ready=%d, size=%zd.\n", __func__, k, ready, priv->size);
+
+ if (!ready) {
+ err = kevent_mpage_readpages(k, num, mapping->a_ops->get_block);
+ if (err) {
+ dprintk("%s: kevent_mpage_readpages failed: err=%d, k=%p, size=%zd.\n",
+ __func__, err, k, priv->size);
+ kevent_break(k);
+ kevent_storage_ready(k->st, NULL, KEVENT_MASK_ALL);
+ }
+ } else {
+ dprintk("%s: next k=%p, size=%zd.\n", __func__, k, priv->size);
+
+ if (priv->size)
+ schedule_work(&priv->work);
+ else {
+ kevent_storage_ready(k->st, NULL, KEVENT_MASK_ALL);
+ }
+
+ complete(&priv->bio_complete);
+ }
+}
+
+static int kevent_aio_enqueue(struct kevent *k)
+{
+ int err;
+ struct file *file, *sock;
+ struct inode *inode;
+ struct kevent_aio_private *priv;
+ struct address_space *mapping;
+ int fd = k->event.id.raw[0];
+ int num = k->event.id.raw[1];
+ int s = k->event.ret_data[0];
+ size_t size;
+
+ err = -ENODEV;
+ file = fget(fd);
+ if (!file)
+ goto err_out_exit;
+
+ sock = fget(s);
+ if (!sock)
+ goto err_out_fput_file;
+
+ mapping = file->f_mapping;
+
+ err = -EINVAL;
+ if (!file->f_dentry || !file->f_dentry->d_inode || !mapping->a_ops->get_block)
+ goto err_out_fput;
+ if (!sock->f_dentry || !sock->f_dentry->d_inode)
+ goto err_out_fput;
+
+ inode = igrab(file->f_dentry->d_inode);
+ if (!inode)
+ goto err_out_fput;
+
+ size = i_size_read(inode);
+
+ num = (size > num << PAGE_SHIFT) ? num : (size >> PAGE_SHIFT);
+
+ err = -ENOMEM;
+ priv = kzalloc(sizeof(struct kevent_aio_private), GFP_KERNEL);
+ if (!priv)
+ goto err_out_iput;
+
+ priv->pg_num = num;
+ priv->size = size;
+ priv->offset = 0;
+ priv->file = file;
+ priv->sock = sock;
+ INIT_WORK(&priv->work, kevent_aio_work, k);
+ k->priv = priv;
+
+ dprintk("%s: read: k=%p, priv=%p, inode=%p, num=%u, size=%zu, off=%Lu.\n",
+ __func__, k, priv, inode, priv->pg_num, priv->size, priv->offset);
+
+ init_completion(&priv->bio_complete);
+ kevent_storage_enqueue(&inode->st, k);
+ schedule_work(&priv->work);
+
+ return 0;
+
+err_out_iput:
+ iput(inode);
+err_out_fput:
+ fput(sock);
+err_out_fput_file:
+ fput(file);
+err_out_exit:
+
+ return err;
+}
+
+static int kevent_aio_dequeue(struct kevent *k)
+{
+ struct kevent_aio_private *priv = k->priv;
+ struct inode *inode = k->st->origin;
+ struct file *file = priv->file;
+ struct file *sock = priv->sock;
+
+ kevent_storage_dequeue(k->st, k);
+ flush_scheduled_work();
+ wait_for_completion(&priv->bio_complete);
+
+ kfree(k->priv);
+ k->priv = NULL;
+ iput(inode);
+ fput(file);
+ fput(sock);
+
+ return 0;
+}
+
+asmlinkage long sys_aio_sendfile(int ctl_fd, int fd, int s,
+ size_t size, unsigned flags)
+{
+ struct ukevent ukread, uksend;
+ struct kevent_user *u;
+ struct file *file;
+ int err;
+ int num = (flags & 7)?(flags & 7):8;
+
+ memset(&ukread, 0, sizeof(struct ukevent));
+ memset(&uksend, 0, sizeof(struct ukevent));
+
+ ukread.type = KEVENT_AIO;
+ ukread.event = KEVENT_AIO_BIO;
+
+ ukread.id.raw[0] = fd;
+ ukread.id.raw[1] = num;
+ ukread.ret_data[0] = s;
+
+ dprintk("%s: fd=%d, s=%d, num=%d.\n", __func__, fd, s, num);
+
+ file = fget(ctl_fd);
+ if (!file)
+ return -ENODEV;
+
+ u = file->private_data;
+ if (!u) {
+ err = -EINVAL;
+ goto err_out_fput;
+ }
+
+ err = kevent_user_add_ukevent(&ukread, u);
+ if (err < 0)
+ goto err_out_fput;
+
+err_out_fput:
+ fput(file);
+ return err;
+}
+
+static int __init kevent_init_aio(void)
+{
+ struct kevent_callbacks *ac = &kevent_registered_callbacks[KEVENT_AIO];
+
+ ac->enqueue = &kevent_aio_enqueue;
+ ac->dequeue = &kevent_aio_dequeue;
+ ac->callback = &kevent_aio_callback;
+
+ return 0;
+}
+late_initcall(kevent_init_aio);
diff --git a/kernel/kevent/kevent_inode.c b/kernel/kevent/kevent_inode.c
new file mode 100644
index 0000000..00e59f3
--- /dev/null
+++ b/kernel/kevent/kevent_inode.c
@@ -0,0 +1,114 @@
+/*
+ * kevent_inode.c
+ *
+ * 2006 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/file.h>
+#include <linux/kevent.h>
+#include <linux/fs.h>
+
+static int kevent_inode_enqueue(struct kevent *k)
+{
+ struct file *file;
+ struct inode *inode;
+ int err;
+
+ file = fget(k->event.id.raw[0]);
+ if (!file)
+ return -ENODEV;
+
+ err = -EINVAL;
+ if (!file->f_dentry || !file->f_dentry->d_inode)
+ goto err_out_fput;
+
+ inode = igrab(file->f_dentry->d_inode);
+ if (!inode)
+ goto err_out_fput;
+
+ err = kevent_storage_enqueue(&inode->st, k);
+ if (err)
+ goto err_out_iput;
+
+ fput(file);
+ return 0;
+
+err_out_iput:
+ iput(inode);
+err_out_fput:
+ fput(file);
+ return err;
+}
+
+static int kevent_inode_dequeue(struct kevent *k)
+{
+ struct inode *inode = k->st->origin;
+
+ kevent_storage_dequeue(k->st, k);
+ iput(inode);
+
+ return 0;
+}
+
+static int kevent_inode_callback(struct kevent *k)
+{
+ return 1;
+}
+
+void kevent_inode_notify_parent(struct dentry *dentry, u32 event)
+{
+ struct dentry *parent;
+ struct inode *inode;
+
+ spin_lock(&dentry->d_lock);
+ parent = dentry->d_parent;
+ inode = parent->d_inode;
+
+ dget(parent);
+ spin_unlock(&dentry->d_lock);
+ kevent_inode_notify(inode, KEVENT_INODE_REMOVE);
+ dput(parent);
+}
+
+void kevent_inode_remove(struct inode *inode)
+{
+ kevent_storage_fini(&inode->st);
+}
+
+void kevent_inode_notify(struct inode *inode, u32 event)
+{
+ kevent_storage_ready(&inode->st, NULL, event);
+}
+
+static int __init kevent_init_inode(void)
+{
+ struct kevent_callbacks *ic = &kevent_registered_callbacks[KEVENT_INODE];
+
+ ic->enqueue = &kevent_inode_enqueue;
+ ic->dequeue = &kevent_inode_dequeue;
+ ic->callback = &kevent_inode_callback;
+
+ return 0;
+}
+late_initcall(kevent_init_inode);
next prev parent reply other threads:[~2006-08-08 7:20 UTC|newest]
Thread overview: 182+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-07-09 13:24 [RFC 1/4] kevent: core files Evgeniy Polyakov
2006-07-09 14:59 ` Pekka Enberg
2006-07-09 15:08 ` Evgeniy Polyakov
2006-07-25 6:17 ` David Miller
2006-07-25 6:26 ` Evgeniy Polyakov
2006-07-27 19:18 ` Zach Brown
2006-07-27 20:06 ` Evgeniy Polyakov
2006-07-27 21:32 ` Zach Brown
2006-07-28 5:23 ` Evgeniy Polyakov
2006-07-28 18:33 ` Zach Brown
2006-07-28 18:44 ` Evgeniy Polyakov
2006-07-28 19:10 ` Zach Brown
2006-07-29 3:38 ` Ulrich Drepper
2006-07-29 4:32 ` Nicholas Miell
2006-07-29 15:48 ` Evgeniy Polyakov
2006-07-29 20:54 ` Nicholas Miell
2006-07-30 8:08 ` Ulrich Drepper
2006-07-29 15:44 ` Evgeniy Polyakov
2006-07-29 16:18 ` Ulrich Drepper
2006-07-29 16:36 ` Hans Henrik Happe
2006-07-31 10:33 ` Evgeniy Polyakov
2006-07-31 10:35 ` Herbert Xu
2006-07-31 10:50 ` Evgeniy Polyakov
2006-07-31 10:57 ` David Miller
2006-07-31 10:59 ` Herbert Xu
2006-08-01 7:53 ` Ulrich Drepper
2006-08-01 7:58 ` David Miller
2006-07-31 19:41 ` Evgeniy Polyakov
2006-07-31 22:00 ` David Miller
2006-07-31 22:16 ` Brent Cook
2006-07-31 22:20 ` David Miller
2006-08-01 6:24 ` Evgeniy Polyakov
2006-07-31 22:46 ` Zach Brown
2006-08-01 9:34 ` [take2 0/4] kevent: introduction Evgeniy Polyakov
2006-08-01 9:34 ` [take2 1/4] kevent: core files Evgeniy Polyakov
2006-08-01 9:34 ` [take2 2/4] kevent: network AIO, socket notifications Evgeniy Polyakov
2006-08-01 9:34 ` [take2 4/4] kevent: poll/select() notifications. Timer notifications Evgeniy Polyakov
2006-08-01 9:34 ` [take2 3/4] kevent: AIO, aio_sendfile() implementation Evgeniy Polyakov
2006-08-01 13:46 ` [take2 1/4] kevent: core files James Morris
2006-08-01 13:55 ` Evgeniy Polyakov
2006-08-01 14:27 ` James Morris
2006-08-01 14:34 ` Evgeniy Polyakov
2006-08-01 23:56 ` Zach Brown
2006-08-02 0:01 ` David Miller
2006-08-02 6:43 ` Evgeniy Polyakov
2006-08-02 6:39 ` Evgeniy Polyakov
2006-08-02 7:25 ` David Miller
2006-08-02 7:46 ` Evgeniy Polyakov
2006-08-03 9:45 ` [take3 0/4] kevent: Generic event handling mechanism Evgeniy Polyakov
2006-08-03 9:40 ` Evgeniy Polyakov
2006-08-03 9:46 ` [take3 1/4] kevent: Core files Evgeniy Polyakov
2006-08-03 9:46 ` [take3 2/4] kevent: AIO, aio_sendfile() implementation Evgeniy Polyakov
2006-08-03 9:46 ` [take3 3/4] kevent: Network AIO, socket notifications Evgeniy Polyakov
2006-08-03 9:46 ` [take3 4/4] kevent: poll/select() notifications. Timer notifications Evgeniy Polyakov
2006-08-03 9:43 ` Eric Dumazet
2006-08-03 9:48 ` Evgeniy Polyakov
2006-08-03 9:54 ` [take3 3/4] kevent: Network AIO, socket notifications Eric Dumazet
2006-08-03 10:13 ` Evgeniy Polyakov
2006-08-03 17:04 ` [take3 2/4] kevent: AIO, aio_sendfile() implementation Badari Pulavarty
2006-08-03 17:13 ` Evgeniy Polyakov
2006-08-03 14:40 ` [take3 1/4] kevent: Core files Eric Dumazet
2006-08-03 14:55 ` Evgeniy Polyakov
2006-08-03 15:11 ` Eric Dumazet
2006-08-03 15:21 ` Evgeniy Polyakov
2006-08-03 21:37 ` David Miller
2006-08-05 13:02 ` [take4 0/4] kevent: Generic event handling mechanism Evgeniy Polyakov
2006-08-05 13:02 ` [take4 1/4] kevent: Core files Evgeniy Polyakov
2006-08-05 13:02 ` [take4 2/4] kevent: AIO, aio_sendfile() implementation Evgeniy Polyakov
2006-08-05 13:02 ` [take4 3/4] kevent: Network AIO, socket notifications Evgeniy Polyakov
2006-08-05 13:02 ` [take4 4/4] kevent: poll/select() notifications. Timer notifications Evgeniy Polyakov
2006-08-05 17:57 ` [take4 1/4] kevent: Core files Greg KH
2006-08-05 18:10 ` Evgeniy Polyakov
2006-08-08 7:44 ` [take5 0/4] kevent: Generic event handling mechanism Evgeniy Polyakov
2006-08-08 7:44 ` [take5 1/4] kevent: Core files Evgeniy Polyakov
2006-08-08 7:44 ` Evgeniy Polyakov [this message]
2006-08-08 7:44 ` [take5 3/4] kevent: Network AIO, socket notifications Evgeniy Polyakov
2006-08-08 7:44 ` [take5 4/4] kevent: poll/select() notifications. Timer notifications Evgeniy Polyakov
2006-08-08 9:52 ` [take5 3/4] kevent: Network AIO, socket notifications Eric Dumazet
2006-08-08 10:02 ` Evgeniy Polyakov
2006-08-08 22:02 ` [take5 1/4] kevent: Core files Zach Brown
2006-08-09 5:22 ` Evgeniy Polyakov
2006-08-08 21:32 ` [take5 0/4] kevent: Generic event handling mechanism Zach Brown
2006-08-09 5:31 ` Evgeniy Polyakov
2006-08-09 5:52 ` David Miller
2006-08-09 6:11 ` Evgeniy Polyakov
2006-08-09 6:25 ` Evgeniy Polyakov
2006-08-09 6:31 ` David Miller
2006-08-09 6:49 ` Evgeniy Polyakov
2006-08-09 6:57 ` Ulrich Drepper
2006-08-09 7:00 ` David Miller
2006-08-09 7:00 ` Evgeniy Polyakov
2006-08-09 8:34 ` Christoph Hellwig
2006-08-09 8:45 ` Andrew Morton
2006-08-09 8:02 ` [take6 0/3] " Evgeniy Polyakov
2006-08-09 7:58 ` David Miller
2006-08-09 8:07 ` Evgeniy Polyakov
2006-08-09 8:20 ` David Miller
2006-08-09 8:24 ` Evgeniy Polyakov
2006-08-09 8:02 ` [take6 1/3] kevent: Core files Evgeniy Polyakov
2006-08-09 8:02 ` [take6 3/3] kevent: Network AIO, socket notifications Evgeniy Polyakov
2006-08-09 8:02 ` [take6 2/3] kevent: poll/select() notifications. Timer notifications Evgeniy Polyakov
2006-08-09 17:47 ` [take6 1/3] kevent: Core files Stephen Hemminger
2006-08-09 19:17 ` Evgeniy Polyakov
2006-08-10 0:04 ` David Miller
2006-08-09 22:21 ` Andrew Morton
2006-08-10 6:14 ` Evgeniy Polyakov
2006-08-10 6:42 ` David Miller
2006-08-10 6:48 ` Evgeniy Polyakov
2006-08-10 7:18 ` Andrew Morton
2006-08-10 7:50 ` Evgeniy Polyakov
2006-08-10 8:02 ` Andrew Morton
2006-08-10 8:22 ` Evgeniy Polyakov
2006-08-11 0:56 ` Andrew Morton
2006-08-11 6:15 ` Evgeniy Polyakov
2006-08-11 6:23 ` Andrew Morton
2006-08-11 6:30 ` Evgeniy Polyakov
2006-08-11 7:04 ` Andrew Morton
2006-08-11 7:27 ` Evgeniy Polyakov
2006-08-11 6:25 ` Ulrich Drepper
2006-08-11 6:33 ` Evgeniy Polyakov
2006-08-11 6:38 ` David Miller
2006-08-11 6:55 ` Evgeniy Polyakov
2006-08-10 12:12 ` [take7 0/1] kevent: generic event handling mechanism Evgeniy Polyakov
2006-08-10 12:16 ` [take7 1/1] kevent: core files and timer/poll notifications Evgeniy Polyakov
2006-08-10 12:22 ` Evgeniy Polyakov
2006-08-11 8:40 ` [take8 0/2] kevent: Generic event handling mechanism Evgeniy Polyakov
2006-08-11 8:40 ` [take8 1/2] kevent: Core files Evgeniy Polyakov
2006-08-11 8:40 ` [take8 2/2] kevent: poll/select() notifications. Timer notifications Evgeniy Polyakov
2006-08-11 15:45 ` Andrew Morton
2006-08-12 8:18 ` Evgeniy Polyakov
2006-08-12 8:38 ` Andrew Morton
2006-08-12 8:55 ` Evgeniy Polyakov
2006-08-13 0:51 ` [take8 1/2] kevent: Core files Jeff Carr
2006-08-13 9:04 ` Evgeniy Polyakov
2006-08-14 6:20 ` [take8 0/2] kevent: Generic event handling mechanism Evgeniy Polyakov
2006-08-14 6:20 ` [take8 1/2] kevent: Core files Evgeniy Polyakov
2006-08-14 6:20 ` [take8 2/2] kevent: poll/select() notifications. Timer notifications Evgeniy Polyakov
2006-08-14 6:21 ` [take9 0/2] kevent: Generic event handling mechanism Evgeniy Polyakov
2006-08-14 6:21 ` [take9 1/2] kevent: Core files Evgeniy Polyakov
2006-08-14 6:21 ` [take9 2/2] kevent: poll/select() notifications. Timer notifications Evgeniy Polyakov
2006-08-16 13:30 ` Christoph Hellwig
2006-08-16 13:40 ` Evgeniy Polyakov
2006-08-18 10:41 ` Christoph Hellwig
2006-08-18 10:59 ` Evgeniy Polyakov
2006-08-21 11:01 ` Christoph Hellwig
2006-08-21 11:26 ` Evgeniy Polyakov
2006-08-22 14:35 ` Davide Libenzi
2006-08-16 13:45 ` [take9 1/2] kevent: Core files Christoph Hellwig
2006-08-16 13:56 ` Evgeniy Polyakov
2006-08-16 18:08 ` Zach Brown
2006-08-16 19:24 ` Evgeniy Polyakov
2006-08-16 19:45 ` David Miller
2006-08-16 20:06 ` Evgeniy Polyakov
2006-08-18 10:46 ` Christoph Hellwig
2006-08-18 11:23 ` Evgeniy Polyakov
2006-08-21 10:56 ` Christoph Hellwig
2006-08-21 11:13 ` Evgeniy Polyakov
2006-08-21 12:53 ` Bernd Petrovitsch
2006-08-21 13:01 ` Evgeniy Polyakov
2006-08-21 13:49 ` Bernd Petrovitsch
2006-08-21 19:09 ` David Miller
2006-08-16 13:26 ` [take9 0/2] kevent: Generic event handling mechanism Christoph Hellwig
2006-08-16 13:38 ` Evgeniy Polyakov
2006-08-16 18:10 ` Zach Brown
2006-08-16 12:34 ` [take10 " Evgeniy Polyakov
2006-08-16 12:34 ` [take10 1/2] kevent: Core files Evgeniy Polyakov
2006-08-16 12:34 ` [take10 2/2] kevent: poll/select() notifications. Timer notifications Evgeniy Polyakov
2006-08-16 12:37 ` [take10 1/2] kevent: Core files Mika Penttilä
2006-08-16 12:44 ` Evgeniy Polyakov
2006-08-18 9:35 ` Joe Jin
2006-08-18 10:10 ` Evgeniy Polyakov
2006-08-01 1:05 ` [RFC 1/4] kevent: core files David Miller
2006-07-27 20:58 ` Benjamin LaHaise
2006-07-27 21:44 ` Zach Brown
2006-07-27 22:02 ` Benjamin LaHaise
2006-07-28 5:39 ` Evgeniy Polyakov
2006-07-28 19:01 ` Zach Brown
2006-07-28 19:24 ` Evgeniy Polyakov
2006-07-28 19:34 ` Zach Brown
2006-07-28 19:37 ` Zach Brown
2006-08-01 1:02 ` David Miller
2006-08-01 17:02 ` Zach Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1155023087143@2ka.mipt.ru \
--to=johnpol@2ka.mipt.ru \
--cc=davem@davemloft.net \
--cc=drepper@redhat.com \
--cc=linux-aio@kvack.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=zach.brown@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.