linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tao Ma <tm@tao.ma>
To: linux-ext4@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Subject: [PATCH V4 05/22] ext4: Add normal write support for inline data.
Date: Mon, 20 Feb 2012 15:01:39 +0800	[thread overview]
Message-ID: <1329721316-4955-5-git-send-email-tm@tao.ma> (raw)
In-Reply-To: <1329721316-4955-1-git-send-email-tm@tao.ma>

From: Tao Ma <boyu.mt@taobao.com>

For a normal write case(not journalled write, not delayed allocation),
we write to the inline if the file is small and convert it to an extent
based file when the write is larger than the max inline size.

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
---
 fs/ext4/ext4.h    |   11 +++
 fs/ext4/extents.c |    9 ++-
 fs/ext4/inline.c  |  232 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/inode.c   |   59 +++++++++-----
 fs/ext4/xattr.h   |   26 ++++++
 5 files changed, 316 insertions(+), 21 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 18254c0..6d5a350 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1874,8 +1874,19 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *,
 						ext4_lblk_t, int, int *);
 struct buffer_head *ext4_bread(handle_t *, struct inode *,
 						ext4_lblk_t, int, int *);
+int ext4_get_block_write(struct inode *inode, sector_t iblock,
+			 struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
 				struct buffer_head *bh_result, int create);
+int walk_page_buffers(handle_t *handle,
+			     struct buffer_head *head,
+			     unsigned from,
+			     unsigned to,
+			     int *partial,
+			     int (*fn)(handle_t *handle,
+				       struct buffer_head *bh));
+int do_journal_get_write_access(handle_t *handle,
+				struct buffer_head *bh);
 
 extern struct inode *ext4_iget(struct super_block *, unsigned long);
 extern int  ext4_write_inode(struct inode *, struct writeback_control *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74f23c2..c861727 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -41,6 +41,7 @@
 #include <asm/uaccess.h>
 #include <linux/fiemap.h>
 #include "ext4_jbd2.h"
+#include "xattr.h"
 
 #include <trace/events/ext4.h>
 
@@ -2177,7 +2178,13 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
 int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
 	int index;
-	int depth = ext_depth(inode);
+	int depth;
+
+	/* If we are converting the inline data, only one is needed here. */
+	if (ext4_has_inline_data(inode))
+		return 1;
+
+	depth = ext_depth(inode);
 
 	if (chunk)
 		index = depth * 2;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 97ca5de..4946a24 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -14,6 +14,7 @@
 #include "ext4_jbd2.h"
 #include "ext4.h"
 #include "xattr.h"
+#include "truncate.h"
 
 #define EXT4_XATTR_SYSTEM_DATA_NAME	"data"
 #define EXT4_MIN_INLINE_DATA_SIZE	((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -505,6 +506,237 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
 	return ret >= 0 ? 0 : ret;
 }
 
+static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
+					      struct inode *inode,
+					      unsigned flags)
+{
+	int ret, needed_blocks;
+	handle_t *handle = NULL;
+	int retries = 0, sem_held = 0;
+	struct page *page = NULL;
+	unsigned from, to;
+	struct ext4_iloc iloc;
+
+	if (!ext4_has_inline_data(inode)) {
+		/*
+		 * clear the flag so that no new write
+		 * will trap here again.
+		 */
+		ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+		return 0;
+	}
+
+	needed_blocks = ext4_writepage_trans_blocks(inode);
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret)
+		return ret;
+
+retry:
+	handle = ext4_journal_start(inode, needed_blocks);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		handle = NULL;
+		goto out;
+	}
+
+	/* We cannot recurse into the filesystem as the transaction is already
+	 * started */
+	flags |= AOP_FLAG_NOFS;
+
+	page = grab_cache_page_write_begin(mapping, 0, flags);
+	if (!page) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	down_write(&EXT4_I(inode)->xattr_sem);
+	sem_held = 1;
+	/* If some one has already done this for us, just exit. */
+	if (!ext4_has_inline_data(inode)) {
+		ret = 0;
+		goto out;
+	}
+
+	from = 0;
+	to = ext4_get_inline_size(inode);
+	if (!PageUptodate(page)) {
+		ret = ext4_read_inline_page(inode, page);
+		if (ret < 0)
+			goto out;
+	}
+
+	ret = ext4_destroy_inline_data_nolock(handle, inode);
+	if (ret)
+		goto out;
+
+	if (ext4_should_dioread_nolock(inode))
+		ret = __block_write_begin(page, from, to, ext4_get_block_write);
+	else
+		ret = __block_write_begin(page, from, to, ext4_get_block);
+
+	if (!ret && ext4_should_journal_data(inode)) {
+		ret = walk_page_buffers(handle, page_buffers(page),
+				from, to, NULL, do_journal_get_write_access);
+	}
+
+	if (ret) {
+		unlock_page(page);
+		page_cache_release(page);
+		ext4_orphan_add(handle, inode);
+		up_write(&EXT4_I(inode)->xattr_sem);
+		sem_held = 0;
+		ext4_journal_stop(handle);
+		handle = NULL;
+		ext4_truncate_failed_write(inode);
+		/*
+		 * If truncate failed early the inode might
+		 * still be on the orphan list; we need to
+		 * make sure the inode is removed from the
+		 * orphan list in that case.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
+
+	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
+
+	block_commit_write(page, from, to);
+out:
+	if (page) {
+		unlock_page(page);
+		page_cache_release(page);
+	}
+	if (sem_held)
+		up_write(&EXT4_I(inode)->xattr_sem);
+	if (handle)
+		ext4_journal_stop(handle);
+	brelse(iloc.bh);
+	return ret;
+}
+
+/*
+ * Try to write data in the inode.
+ * If the inode has inline data, check whether the new write can be
+ * in the inode also. If not, create the page the handle, move the data
+ * to the page make it update and let the later codes create extent for it.
+ */
+int ext4_try_to_write_inline_data(struct address_space *mapping,
+				  struct inode *inode,
+				  loff_t pos, unsigned len,
+				  unsigned flags,
+				  struct page **pagep)
+{
+	int ret;
+	handle_t *handle;
+	struct page *page;
+	struct ext4_iloc iloc;
+
+	if (pos + len > ext4_get_max_inline_size(inode))
+		goto convert;
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret)
+		return ret;
+
+	/*
+	 * The possible write could happen in the inode,
+	 * so try to reserve the space in inode first.
+	 */
+	handle = ext4_journal_start(inode, 1);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		handle = NULL;
+		goto out;
+	}
+
+	ret = ext4_prepare_inline_data(handle, inode, pos + len);
+	if (ret && ret != -ENOSPC)
+		goto out;
+
+	/* We don't have space in inline inode, so convert it to extent. */
+	if (ret == -ENOSPC) {
+		ext4_journal_stop(handle);
+		brelse(iloc.bh);
+		goto convert;
+	}
+
+	flags |= AOP_FLAG_NOFS;
+
+	page = grab_cache_page_write_begin(mapping, 0, flags);
+	if (!page) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	*pagep = page;
+	down_read(&EXT4_I(inode)->xattr_sem);
+	if (!ext4_has_inline_data(inode)) {
+		ret = 0;
+		unlock_page(page);
+		page_cache_release(page);
+		goto out_up_read;
+	}
+
+	if (!PageUptodate(page)) {
+		ret = ext4_read_inline_page(inode, page);
+		if (ret < 0)
+			goto out_up_read;
+	}
+
+	ret = 1;
+	handle = NULL;
+out_up_read:
+	up_read(&EXT4_I(inode)->xattr_sem);
+out:
+	if (handle)
+		ext4_journal_stop(handle);
+	brelse(iloc.bh);
+	return ret;
+convert:
+	return ext4_convert_inline_data_to_extent(mapping,
+						  inode, flags);
+}
+
+int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
+			       unsigned copied, struct page *page)
+{
+	int ret;
+	void *kaddr;
+	struct ext4_iloc iloc;
+
+	if (unlikely(copied < len)) {
+		if (!PageUptodate(page)) {
+			copied = 0;
+			goto out;
+		}
+	}
+
+	ret = ext4_get_inode_loc(inode, &iloc);
+	if (ret) {
+		ext4_std_error(inode->i_sb, ret);
+		copied = 0;
+		goto out;
+	}
+
+	down_write(&EXT4_I(inode)->xattr_sem);
+	BUG_ON(!ext4_has_inline_data(inode));
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
+	kunmap_atomic(kaddr, KM_USER0);
+	SetPageUptodate(page);
+	/* clear page dirty so that writepages wouldn't work for us. */
+	ClearPageDirty(page);
+
+	up_write(&EXT4_I(inode)->xattr_sem);
+	brelse(iloc.bh);
+out:
+	return copied;
+}
+
+
 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
 {
 	int ret;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f631070..bae79b4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -713,7 +713,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 	return NULL;
 }
 
-static int walk_page_buffers(handle_t *handle,
+int walk_page_buffers(handle_t *handle,
 			     struct buffer_head *head,
 			     unsigned from,
 			     unsigned to,
@@ -769,8 +769,8 @@ static int walk_page_buffers(handle_t *handle,
  * is elevated.  We'll still have enough credits for the tiny quotafile
  * write.
  */
-static int do_journal_get_write_access(handle_t *handle,
-				       struct buffer_head *bh)
+int do_journal_get_write_access(handle_t *handle,
+				struct buffer_head *bh)
 {
 	int dirty = buffer_dirty(bh);
 	int ret;
@@ -793,8 +793,6 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ret;
 }
 
-static int ext4_get_block_write(struct inode *inode, sector_t iblock,
-		   struct buffer_head *bh_result, int create);
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
 			    loff_t pos, unsigned len, unsigned flags,
 			    struct page **pagep, void **fsdata)
@@ -817,6 +815,17 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
 
+	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
+		ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
+						    flags, pagep);
+		if (ret < 0)
+			goto out;
+		if (ret == 1) {
+			ret = 0;
+			goto out;
+		}
+	}
+
 retry:
 	handle = ext4_journal_start(inode, needed_blocks);
 	if (IS_ERR(handle)) {
@@ -834,6 +843,7 @@ retry:
 		ret = -ENOMEM;
 		goto out;
 	}
+
 	*pagep = page;
 
 	if (ext4_should_dioread_nolock(inode))
@@ -898,7 +908,12 @@ static int ext4_generic_write_end(struct file *file,
 	struct inode *inode = mapping->host;
 	handle_t *handle = ext4_journal_current_handle();
 
-	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (ext4_has_inline_data(inode))
+		copied = ext4_write_inline_data_end(inode, pos, len,
+						    copied, page);
+	else
+		copied = block_write_end(file, mapping, pos,
+					 len, copied, page, fsdata);
 
 	/*
 	 * No need to use i_size_read() here, the i_size
@@ -2764,7 +2779,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
  * We allocate an uinitialized extent if blocks haven't been allocated.
  * The extent will be converted to initialized after the IO is complete.
  */
-static int ext4_get_block_write(struct inode *inode, sector_t iblock,
+int ext4_get_block_write(struct inode *inode, sector_t iblock,
 		   struct buffer_head *bh_result, int create)
 {
 	ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
@@ -3648,7 +3663,8 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
 	if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
 		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
 		ext4_find_inline_data(inode);
-	}
+	} else
+		EXT4_I(inode)->i_inline_off = 0;
 }
 
 struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
@@ -3786,17 +3802,19 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 				 ei->i_file_acl);
 		ret = -EIO;
 		goto bad_inode;
-	} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-		if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-		    (S_ISLNK(inode->i_mode) &&
-		     !ext4_inode_is_fast_symlink(inode)))
-			/* Validate extent which is part of inode */
-			ret = ext4_ext_check_inode(inode);
-	} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-		   (S_ISLNK(inode->i_mode) &&
-		    !ext4_inode_is_fast_symlink(inode))) {
-		/* Validate block references which are part of inode */
-		ret = ext4_ind_check_inode(inode);
+	} else if (!ext4_has_inline_data(inode)) {
+		if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+			if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+			    (S_ISLNK(inode->i_mode) &&
+			     !ext4_inode_is_fast_symlink(inode))))
+				/* Validate extent which is part of inode */
+				ret = ext4_ext_check_inode(inode);
+		} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+			   (S_ISLNK(inode->i_mode) &&
+			    !ext4_inode_is_fast_symlink(inode))) {
+			/* Validate block references which are part of inode */
+			ret = ext4_ind_check_inode(inode);
+		}
 	}
 	if (ret)
 		goto bad_inode;
@@ -3979,9 +3997,10 @@ static int ext4_do_update_inode(handle_t *handle,
 				cpu_to_le32(new_encode_dev(inode->i_rdev));
 			raw_inode->i_block[2] = 0;
 		}
-	} else
+	} else if (!ext4_has_inline_data(inode)) {
 		for (block = 0; block < EXT4_N_BLOCKS; block++)
 			raw_inode->i_block[block] = ei->i_data[block];
+	}
 
 	raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
 	if (ei->i_extra_isize) {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 8971d06..b9e5cec 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -139,6 +139,15 @@ extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
 extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
 
 extern int ext4_readpage_inline(struct inode *inode, struct page *page);
+extern int ext4_try_to_write_inline_data(struct address_space *mapping,
+					 struct inode *inode,
+					 loff_t pos, unsigned len,
+					 unsigned flags,
+					 struct page **pagep);
+extern int ext4_write_inline_data_end(struct inode *inode,
+				      loff_t pos, unsigned len,
+				      unsigned copied,
+				      struct page *page);
 # else  /* CONFIG_EXT4_FS_XATTR */
 
 static inline int
@@ -259,6 +268,23 @@ static inline int ext4_readpage_inline(struct inode *inode, struct page *page)
 {
 	return 0;
 }
+
+static inline int ext4_try_to_write_inline_data(struct address_space *mapping,
+						struct inode *inode,
+						loff_t pos, unsigned len,
+						unsigned flags,
+						struct page **pagep)
+{
+	return 0;
+}
+
+static inline int ext4_write_inline_data_end(struct inode *inode,
+					     loff_t pos, unsigned len,
+					     unsigned copied,
+					     struct page *page)
+{
+	return 0;
+}
 # endif  /* CONFIG_EXT4_FS_XATTR */
 
 #ifdef CONFIG_EXT4_FS_SECURITY
-- 
1.7.0.4


  parent reply	other threads:[~2012-02-20  7:02 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-20  7:00 [PATCH V4 00/22] ext4: Add inline data support Tao Ma
2012-02-20  7:01 ` [PATCH V4 01/22] ext4: Move extra inode read to a new function Tao Ma
2012-02-20  7:01   ` [PATCH V4 02/22] ext4: export inline xattr functions Tao Ma
2012-02-21 22:45     ` Andreas Dilger
2012-02-20  7:01   ` [PATCH V4 03/22] ext4: Add the basic function for inline data support Tao Ma
2012-04-07  0:21     ` Andreas Dilger
2012-04-13  2:59       ` Tao Ma
2012-02-20  7:01   ` [PATCH V4 04/22] ext4: Add read support for inline data Tao Ma
2012-02-20  7:01   ` Tao Ma [this message]
2012-02-20  7:01   ` [PATCH V4 06/22] ext4: Add journalled write " Tao Ma
2012-02-20  7:01   ` [PATCH V4 07/22] ext4: Add delalloc " Tao Ma
2012-02-20  7:01   ` [PATCH V4 08/22] ext4: Create a new function ext4_init_new_dir Tao Ma
2012-02-20  7:01   ` [PATCH V4 09/22] ext4: Refactor __ext4_check_dir_entry to accepts start and size Tao Ma
2012-02-20  7:01   ` [PATCH V4 10/22] ext4: Create __ext4_insert_dentry for dir entry insertion Tao Ma
2012-02-20  7:01   ` [PATCH V4 11/22] ext4: let add_dir_entry handle inline data properly Tao Ma
2012-02-20  7:01   ` [PATCH V4 12/22] ext4: Let ext4_readdir handle inline data Tao Ma
2012-02-20  7:01   ` [PATCH V4 13/22] ext4: Create a new function search_dir Tao Ma
2012-02-20  7:01   ` [PATCH V4 14/22] ext4: let ext4_find_entry handle inline data Tao Ma
2012-02-20  7:01   ` [PATCH V4 15/22] ext4: make ext4_delete_entry generic Tao Ma
2012-02-20  7:01   ` [PATCH V4 16/22] ext4: let ext4_delete_entry handle inline data Tao Ma
2012-02-20  7:01   ` [PATCH V4 17/22] ext4: let empty_dir handle inline dir Tao Ma
2012-04-07  0:21     ` Andreas Dilger
2012-04-17  3:55       ` Tao Ma
2012-02-20  7:01   ` [PATCH V4 18/22] ext4: let ext4_rename " Tao Ma
2012-02-20  7:01   ` [PATCH V4 19/22] ext4: Let fiemap work with inline data Tao Ma
2012-02-20  7:01   ` [PATCH V4 20/22] ext4: Evict inline data out if we needs to strore xattr in inode Tao Ma
2012-02-20  7:01   ` [PATCH V4 21/22] ext4: let ext4_truncate handle inline data correctly Tao Ma
2012-02-20  7:01   ` [PATCH V4 22/22] ext4: Enable ext4 inline support Tao Ma
2012-02-21  6:51   ` [PATCH V4 01/22] ext4: Move extra inode read to a new function Andreas Dilger
2012-02-21  8:13     ` Tao Ma
2012-02-21 23:44 ` [PATCH V4 00/22] ext4: Add inline data support Andreas Dilger
2012-02-22  1:34   ` Tao Ma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1329721316-4955-5-git-send-email-tm@tao.ma \
    --to=tm@tao.ma \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).