[PATCH 10/38] whiteout: Split of ext2_append_link() from ext2_add

linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 10/38] whiteout: Split of ext2_append_link() from ext2_add_link()
       [not found] <1276627208-17242-1-git-send-email-vaurora@redhat.com>
@ 2010-06-15 18:39 ` Valerie Aurora
  2010-06-15 18:39 ` [PATCH 11/38] whiteout: ext2 whiteout support Valerie Aurora
  2010-06-15 18:39 ` [PATCH 14/38] fallthru: ext2 fallthru support Valerie Aurora
  2 siblings, 0 replies; 26+ messages in thread
From: Valerie Aurora @ 2010-06-15 18:39 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Miklos Szeredi, Jan Blunck, Christoph Hellwig, linux-kernel,
	linux-fsdevel, Valerie Aurora, Theodore Tso, linux-ext4

From: Jan Blunck <jblunck@suse.de>

The ext2_append_link() is later used to find or append a directory
entry to whiteout.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Valerie Aurora <vaurora@redhat.com>
Cc: Theodore Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org
---
 fs/ext2/dir.c |   70 ++++++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 7516957..57207a9 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -472,9 +472,10 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 }
 
 /*
- *	Parent is locked.
+ * Find or append a given dentry to the parent directory
  */
-int ext2_add_link (struct dentry *dentry, struct inode *inode)
+static ext2_dirent * ext2_append_entry(struct dentry * dentry,
+				       struct page ** page)
 {
 	struct inode *dir = dentry->d_parent->d_inode;
 	const char *name = dentry->d_name.name;
@@ -482,13 +483,10 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 	unsigned chunk_size = ext2_chunk_size(dir);
 	unsigned reclen = EXT2_DIR_REC_LEN(namelen);
 	unsigned short rec_len, name_len;
-	struct page *page = NULL;
-	ext2_dirent * de;
+	ext2_dirent * de = NULL;
 	unsigned long npages = dir_pages(dir);
 	unsigned long n;
 	char *kaddr;
-	loff_t pos;
-	int err;
 
 	/*
 	 * We take care of directory expansion in the same loop.
@@ -498,20 +496,19 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 	for (n = 0; n <= npages; n++) {
 		char *dir_end;
 
-		page = ext2_get_page(dir, n, 0);
-		err = PTR_ERR(page);
-		if (IS_ERR(page))
+		*page = ext2_get_page(dir, n, 0);
+		de = ERR_PTR(PTR_ERR(*page));
+		if (IS_ERR(*page))
 			goto out;
-		lock_page(page);
-		kaddr = page_address(page);
+		lock_page(*page);
+		kaddr = page_address(*page);
 		dir_end = kaddr + ext2_last_byte(dir, n);
 		de = (ext2_dirent *)kaddr;
 		kaddr += PAGE_CACHE_SIZE - reclen;
 		while ((char *)de <= kaddr) {
 			if ((char *)de == dir_end) {
 				/* We hit i_size */
-				name_len = 0;
-				rec_len = chunk_size;
+				de->name_len = 0;
 				de->rec_len = ext2_rec_len_to_disk(chunk_size);
 				de->inode = 0;
 				goto got_it;
@@ -519,12 +516,11 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 			if (de->rec_len == 0) {
 				ext2_error(dir->i_sb, __func__,
 					"zero-length directory entry");
-				err = -EIO;
+				de = ERR_PTR(-EIO);
 				goto out_unlock;
 			}
-			err = -EEXIST;
 			if (ext2_match (namelen, name, de))
-				goto out_unlock;
+				goto got_it;
 			name_len = EXT2_DIR_REC_LEN(de->name_len);
 			rec_len = ext2_rec_len_from_disk(de->rec_len);
 			if (!de->inode && rec_len >= reclen)
@@ -533,13 +529,48 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 				goto got_it;
 			de = (ext2_dirent *) ((char *) de + rec_len);
 		}
-		unlock_page(page);
-		ext2_put_page(page);
+		unlock_page(*page);
+		ext2_put_page(*page);
 	}
+
 	BUG();
-	return -EINVAL;
 
 got_it:
+	return de;
+	/* OFFSET_CACHE */
+out_unlock:
+	unlock_page(*page);
+	ext2_put_page(*page);
+out:
+	return de;
+}
+
+/*
+ *	Parent is locked.
+ */
+int ext2_add_link (struct dentry *dentry, struct inode *inode)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+	const char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+	unsigned short rec_len, name_len;
+	ext2_dirent * de;
+	struct page *page;
+	loff_t pos;
+	int err;
+
+	de = ext2_append_entry(dentry, &page);
+	if (IS_ERR(de))
+		return PTR_ERR(de);
+
+	err = -EEXIST;
+	if (ext2_match (namelen, name, de))
+		goto out_unlock;
+
+got_it:
+	name_len = EXT2_DIR_REC_LEN(de->name_len);
+	rec_len = ext2_rec_len_from_disk(de->rec_len);
+
 	pos = page_offset(page) +
 		(char*)de - (char*)page_address(page);
 	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
@@ -563,7 +594,6 @@ got_it:
 	/* OFFSET_CACHE */
 out_put:
 	ext2_put_page(page);
-out:
 	return err;
 out_unlock:
 	unlock_page(page);
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 11/38] whiteout: ext2 whiteout support
       [not found] <1276627208-17242-1-git-send-email-vaurora@redhat.com>
  2010-06-15 18:39 ` [PATCH 10/38] whiteout: Split of ext2_append_link() from ext2_add_link() Valerie Aurora
@ 2010-06-15 18:39 ` Valerie Aurora
  2010-07-13  4:24   ` Ian Kent
  2010-06-15 18:39 ` [PATCH 14/38] fallthru: ext2 fallthru support Valerie Aurora
  2 siblings, 1 reply; 26+ messages in thread
From: Valerie Aurora @ 2010-06-15 18:39 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Miklos Szeredi, Jan Blunck, Christoph Hellwig, linux-kernel,
	linux-fsdevel, Valerie Aurora, Theodore Tso, linux-ext4

From: Jan Blunck <jblunck@suse.de>

This patch adds whiteout support to EXT2. A whiteout is an empty directory
entry (inode == 0) with the file type set to EXT2_FT_WHT. Therefore it
allocates space in directories. Due to being implemented as a filetype it is
necessary to have the EXT2_FEATURE_INCOMPAT_FILETYPE flag set.

XXX - Needs serious review.  Al wonders: What happens with a delete at
the beginning of a block?  Will we find the matching dentry or the
first empty space?

Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Valerie Aurora <vaurora@redhat.com>
Cc: Theodore Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org
---
 fs/ext2/dir.c           |   96 +++++++++++++++++++++++++++++++++++++++++++++--
 fs/ext2/ext2.h          |    3 +
 fs/ext2/inode.c         |   11 ++++-
 fs/ext2/namei.c         |   67 +++++++++++++++++++++++++++++++-
 fs/ext2/super.c         |    6 +++
 include/linux/ext2_fs.h |    4 ++
 6 files changed, 177 insertions(+), 10 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 57207a9..030bd46 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -219,7 +219,7 @@ static inline int ext2_match (int len, const char * const name,
 {
 	if (len != de->name_len)
 		return 0;
-	if (!de->inode)
+	if (!de->inode && (de->file_type != EXT2_FT_WHT))
 		return 0;
 	return !memcmp(name, de->name, len);
 }
@@ -255,6 +255,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
 	[EXT2_FT_FIFO]		= DT_FIFO,
 	[EXT2_FT_SOCK]		= DT_SOCK,
 	[EXT2_FT_SYMLINK]	= DT_LNK,
+	[EXT2_FT_WHT]		= DT_WHT,
 };
 
 #define S_SHIFT 12
@@ -448,6 +449,26 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
 	return res;
 }
 
+/* Special version for filetype based whiteout support */
+ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
+{
+	ino_t res = 0;
+	struct ext2_dir_entry_2 *de;
+	struct page *page;
+
+	de = ext2_find_entry (dir, &dentry->d_name, &page);
+	if (de) {
+		res = le32_to_cpu(de->inode);
+		if (!res && de->file_type == EXT2_FT_WHT) {
+			spin_lock(&dentry->d_lock);
+			dentry->d_flags |= DCACHE_WHITEOUT;
+			spin_unlock(&dentry->d_lock);
+		}
+		ext2_put_page(page);
+	}
+	return res;
+}
+
 /* Releases the page */
 void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 		   struct page *page, struct inode *inode, int update_times)
@@ -523,7 +544,8 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
 				goto got_it;
 			name_len = EXT2_DIR_REC_LEN(de->name_len);
 			rec_len = ext2_rec_len_from_disk(de->rec_len);
-			if (!de->inode && rec_len >= reclen)
+			if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
+			    (rec_len >= reclen))
 				goto got_it;
 			if (rec_len >= name_len + reclen)
 				goto got_it;
@@ -564,8 +586,11 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 		return PTR_ERR(de);
 
 	err = -EEXIST;
-	if (ext2_match (namelen, name, de))
+	if (ext2_match (namelen, name, de)) {
+		if (de->file_type == EXT2_FT_WHT)
+			goto got_it;
 		goto out_unlock;
+	}
 
 got_it:
 	name_len = EXT2_DIR_REC_LEN(de->name_len);
@@ -577,7 +602,8 @@ got_it:
 							&page, NULL);
 	if (err)
 		goto out_unlock;
-	if (de->inode) {
+	if (de->inode || ((de->file_type == EXT2_FT_WHT) &&
+			  !ext2_match (namelen, name, de))) {
 		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
 		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
 		de->rec_len = ext2_rec_len_to_disk(name_len);
@@ -646,6 +672,68 @@ out:
 	return err;
 }
 
+int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
+			 struct ext2_dir_entry_2 * de, struct page * page)
+{
+	const char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+	unsigned short rec_len, name_len;
+	loff_t pos;
+	int err;
+
+	if (!de) {
+		de = ext2_append_entry(dentry, &page);
+		BUG_ON(!de);
+	}
+
+	err = -EEXIST;
+	if (ext2_match (namelen, name, de) &&
+	    (de->file_type == EXT2_FT_WHT)) {
+		ext2_error(dir->i_sb, __func__,
+			   "entry is already a whiteout in directory #%lu",
+			   dir->i_ino);
+		goto out_unlock;
+	}
+
+	name_len = EXT2_DIR_REC_LEN(de->name_len);
+	rec_len = ext2_rec_len_from_disk(de->rec_len);
+
+	pos = page_offset(page) +
+		(char*)de - (char*)page_address(page);
+	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
+							&page, NULL);
+	if (err)
+		goto out_unlock;
+	/*
+	 * We whiteout an existing entry. Do what ext2_delete_entry() would do,
+	 * except that we don't need to merge with the previous entry since
+	 * we are going to reuse it.
+	 */
+	if (ext2_match (namelen, name, de))
+		de->inode = 0;
+	if (de->inode || (de->file_type == EXT2_FT_WHT)) {
+		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
+		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
+		de->rec_len = ext2_rec_len_to_disk(name_len);
+		de = de1;
+	}
+	de->name_len = namelen;
+	memcpy(de->name, name, namelen);
+	de->inode = 0;
+	de->file_type = EXT2_FT_WHT;
+	err = ext2_commit_chunk(page, pos, rec_len);
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+	mark_inode_dirty(dir);
+	/* OFFSET_CACHE */
+out_put:
+	ext2_put_page(page);
+	return err;
+out_unlock:
+	unlock_page(page);
+	goto out_put;
+}
+
 /*
  * Set the first fragment of directory.
  */
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 0b038e4..44d190c 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -102,9 +102,12 @@ extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_wind
 /* dir.c */
 extern int ext2_add_link (struct dentry *, struct inode *);
 extern ino_t ext2_inode_by_name(struct inode *, struct qstr *);
+extern ino_t ext2_inode_by_dentry(struct inode *, struct dentry *);
 extern int ext2_make_empty(struct inode *, struct inode *);
 extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, struct page **);
 extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_whiteout_entry (struct inode *, struct dentry *,
+				struct ext2_dir_entry_2 *, struct page *);
 extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fc13cc1..5ad2cbb 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1184,7 +1184,8 @@ void ext2_set_inode_flags(struct inode *inode)
 {
 	unsigned int flags = EXT2_I(inode)->i_flags;
 
-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
+			    S_OPAQUE);
 	if (flags & EXT2_SYNC_FL)
 		inode->i_flags |= S_SYNC;
 	if (flags & EXT2_APPEND_FL)
@@ -1195,6 +1196,8 @@ void ext2_set_inode_flags(struct inode *inode)
 		inode->i_flags |= S_NOATIME;
 	if (flags & EXT2_DIRSYNC_FL)
 		inode->i_flags |= S_DIRSYNC;
+	if (flags & EXT2_OPAQUE_FL)
+		inode->i_flags |= S_OPAQUE;
 }
 
 /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
@@ -1202,8 +1205,8 @@ void ext2_get_inode_flags(struct ext2_inode_info *ei)
 {
 	unsigned int flags = ei->vfs_inode.i_flags;
 
-	ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
-			EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
+	ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|EXT2_IMMUTABLE_FL|
+			 EXT2_NOATIME_FL|EXT2_DIRSYNC_FL|EXT2_OPAQUE_FL);
 	if (flags & S_SYNC)
 		ei->i_flags |= EXT2_SYNC_FL;
 	if (flags & S_APPEND)
@@ -1214,6 +1217,8 @@ void ext2_get_inode_flags(struct ext2_inode_info *ei)
 		ei->i_flags |= EXT2_NOATIME_FL;
 	if (flags & S_DIRSYNC)
 		ei->i_flags |= EXT2_DIRSYNC_FL;
+	if (flags & S_OPAQUE)
+		ei->i_flags |= EXT2_OPAQUE_FL;
 }
 
 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 71efb0e..12195a5 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -55,15 +55,16 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
  * Methods themselves.
  */
 
-static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry,
+				  struct nameidata *nd)
 {
 	struct inode * inode;
 	ino_t ino;
-	
+
 	if (dentry->d_name.len > EXT2_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	ino = ext2_inode_by_name(dir, &dentry->d_name);
+	ino = ext2_inode_by_dentry(dir, dentry);
 	inode = NULL;
 	if (ino) {
 		inode = ext2_iget(dir->i_sb, ino);
@@ -242,6 +243,10 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	else
 		inode->i_mapping->a_ops = &ext2_aops;
 
+	/* if we call mkdir on a whiteout create an opaque directory */
+	if (dentry->d_flags & DCACHE_WHITEOUT)
+		inode->i_flags |= S_OPAQUE;
+
 	inode_inc_link_count(inode);
 
 	err = ext2_make_empty(inode, dir);
@@ -307,6 +312,61 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
 	return err;
 }
 
+/*
+ * Create a whiteout for the dentry
+ */
+static int ext2_whiteout(struct inode *dir, struct dentry *dentry,
+			 struct dentry *new_dentry)
+{
+	struct inode * inode = dentry->d_inode;
+	struct ext2_dir_entry_2 * de = NULL;
+	struct page * page;
+	int err = -ENOTEMPTY;
+
+	if (!EXT2_HAS_INCOMPAT_FEATURE(dir->i_sb,
+				       EXT2_FEATURE_INCOMPAT_FILETYPE)) {
+		ext2_error (dir->i_sb, "ext2_whiteout",
+			    "can't set whiteout filetype");
+		err = -EPERM;
+		goto out;
+	}
+
+	dquot_initialize(dir);
+
+	if (inode) {
+		if (S_ISDIR(inode->i_mode) && !ext2_empty_dir(inode))
+			goto out;
+
+		err = -ENOENT;
+		de = ext2_find_entry (dir, &dentry->d_name, &page);
+		if (!de)
+			goto out;
+		lock_page(page);
+	}
+
+	err = ext2_whiteout_entry (dir, dentry, de, page);
+	if (err)
+		goto out;
+
+	spin_lock(&new_dentry->d_lock);
+	new_dentry->d_flags |= DCACHE_WHITEOUT;
+	spin_unlock(&new_dentry->d_lock);
+	d_add(new_dentry, NULL);
+
+	if (inode) {
+		inode->i_ctime = dir->i_ctime;
+		inode_dec_link_count(inode);
+		if (S_ISDIR(inode->i_mode)) {
+			inode->i_size = 0;
+			inode_dec_link_count(inode);
+			inode_dec_link_count(dir);
+		}
+	}
+	err = 0;
+out:
+	return err;
+}
+
 static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	struct inode * new_dir,	struct dentry * new_dentry )
 {
@@ -409,6 +469,7 @@ const struct inode_operations ext2_dir_inode_operations = {
 	.mkdir		= ext2_mkdir,
 	.rmdir		= ext2_rmdir,
 	.mknod		= ext2_mknod,
+	.whiteout	= ext2_whiteout,
 	.rename		= ext2_rename,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 42e4a30..000ee17 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1079,6 +1079,12 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
 		ext2_msg(sb, KERN_WARNING,
 			"warning: mounting ext3 filesystem as ext2");
+	/*
+	 * Whiteouts (and fallthrus) require explicit whiteout support.
+	 */
+	if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_WHITEOUT))
+		sb->s_flags |= MS_WHITEOUT;
+
 	ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
 	return 0;
 
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 2dfa707..20468bd 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -189,6 +189,7 @@ struct ext2_group_desc
 #define EXT2_NOTAIL_FL			FS_NOTAIL_FL	/* file tail should not be merged */
 #define EXT2_DIRSYNC_FL			FS_DIRSYNC_FL	/* dirsync behaviour (directories only) */
 #define EXT2_TOPDIR_FL			FS_TOPDIR_FL	/* Top of directory hierarchies*/
+#define EXT2_OPAQUE_FL			0x00040000
 #define EXT2_RESERVED_FL		FS_RESERVED_FL	/* reserved for ext2 lib */
 
 #define EXT2_FL_USER_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
@@ -503,10 +504,12 @@ struct ext2_super_block {
 #define EXT3_FEATURE_INCOMPAT_RECOVER		0x0004
 #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008
 #define EXT2_FEATURE_INCOMPAT_META_BG		0x0010
+#define EXT2_FEATURE_INCOMPAT_WHITEOUT		0x0020
 #define EXT2_FEATURE_INCOMPAT_ANY		0xffffffff
 
 #define EXT2_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT2_FEATURE_INCOMPAT_SUPP	(EXT2_FEATURE_INCOMPAT_FILETYPE| \
+					 EXT2_FEATURE_INCOMPAT_WHITEOUT| \
 					 EXT2_FEATURE_INCOMPAT_META_BG)
 #define EXT2_FEATURE_RO_COMPAT_SUPP	(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
@@ -573,6 +576,7 @@ enum {
 	EXT2_FT_FIFO		= 5,
 	EXT2_FT_SOCK		= 6,
 	EXT2_FT_SYMLINK		= 7,
+	EXT2_FT_WHT		= 8,
 	EXT2_FT_MAX
 };
 
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 14/38] fallthru: ext2 fallthru support
       [not found] <1276627208-17242-1-git-send-email-vaurora@redhat.com>
  2010-06-15 18:39 ` [PATCH 10/38] whiteout: Split of ext2_append_link() from ext2_add_link() Valerie Aurora
  2010-06-15 18:39 ` [PATCH 11/38] whiteout: ext2 whiteout support Valerie Aurora
@ 2010-06-15 18:39 ` Valerie Aurora
  2010-07-13  4:30   ` Ian Kent
  2010-08-04 14:44   ` Miklos Szeredi
  2 siblings, 2 replies; 26+ messages in thread
From: Valerie Aurora @ 2010-06-15 18:39 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Miklos Szeredi, Jan Blunck, Christoph Hellwig, linux-kernel,
	linux-fsdevel, Valerie Aurora, Theodore Tso, linux-ext4

Add support for fallthru directory entries to ext2.

XXX What to do for d_ino for fallthrus?  If we return the inode from
the the underlying file system, it comes from a different inode
"namespace" and that will produce spurious matches.  This argues for
implementation of fallthrus as symlinks because they have to allocate
an inode (and inode number) anyway, and we can later reuse it if we
copy the file up.

Cc: Theodore Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org
Signed-off-by: Valerie Aurora <vaurora@redhat.com>
Signed-off-by: Jan Blunck <jblunck@suse.de>
---
 fs/ext2/dir.c           |   92 ++++++++++++++++++++++++++++++++++++++++++++--
 fs/ext2/ext2.h          |    1 +
 fs/ext2/namei.c         |   22 +++++++++++
 include/linux/ext2_fs.h |    1 +
 4 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 030bd46..f3b4aff 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -219,7 +219,8 @@ static inline int ext2_match (int len, const char * const name,
 {
 	if (len != de->name_len)
 		return 0;
-	if (!de->inode && (de->file_type != EXT2_FT_WHT))
+	if (!de->inode && ((de->file_type != EXT2_FT_WHT) &&
+			   (de->file_type != EXT2_FT_FALLTHRU)))
 		return 0;
 	return !memcmp(name, de->name, len);
 }
@@ -256,6 +257,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
 	[EXT2_FT_SOCK]		= DT_SOCK,
 	[EXT2_FT_SYMLINK]	= DT_LNK,
 	[EXT2_FT_WHT]		= DT_WHT,
+	[EXT2_FT_FALLTHRU]	= DT_UNKNOWN,
 };
 
 #define S_SHIFT 12
@@ -342,6 +344,24 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
 					ext2_put_page(page);
 					return 0;
 				}
+			} else if (de->file_type == EXT2_FT_FALLTHRU) {
+				int over;
+				unsigned char d_type = DT_UNKNOWN;
+
+				offset = (char *)de - kaddr;
+				/* XXX We don't know the inode number
+				 * of the directory entry in the
+				 * underlying file system.  Should
+				 * look it up, either on fallthru
+				 * creation at first readdir or now at
+				 * filldir time. */
+				over = filldir(dirent, de->name, de->name_len,
+					       (n<<PAGE_CACHE_SHIFT) | offset,
+					       123 /* Made up ino */, d_type);
+				if (over) {
+					ext2_put_page(page);
+					return 0;
+				}
 			}
 			filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
 		}
@@ -463,6 +483,10 @@ ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags |= DCACHE_WHITEOUT;
 			spin_unlock(&dentry->d_lock);
+		} else if(!res && de->file_type == EXT2_FT_FALLTHRU) {
+			spin_lock(&dentry->d_lock);
+			dentry->d_flags |= DCACHE_FALLTHRU;
+			spin_unlock(&dentry->d_lock);
 		}
 		ext2_put_page(page);
 	}
@@ -532,6 +556,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
 				de->name_len = 0;
 				de->rec_len = ext2_rec_len_to_disk(chunk_size);
 				de->inode = 0;
+				de->file_type = 0;
 				goto got_it;
 			}
 			if (de->rec_len == 0) {
@@ -545,6 +570,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
 			name_len = EXT2_DIR_REC_LEN(de->name_len);
 			rec_len = ext2_rec_len_from_disk(de->rec_len);
 			if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
+			    (de->file_type != EXT2_FT_FALLTHRU) &&
 			    (rec_len >= reclen))
 				goto got_it;
 			if (rec_len >= name_len + reclen)
@@ -587,7 +613,8 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 
 	err = -EEXIST;
 	if (ext2_match (namelen, name, de)) {
-		if (de->file_type == EXT2_FT_WHT)
+		if ((de->file_type == EXT2_FT_WHT) ||
+		    (de->file_type == EXT2_FT_FALLTHRU))
 			goto got_it;
 		goto out_unlock;
 	}
@@ -602,7 +629,8 @@ got_it:
 							&page, NULL);
 	if (err)
 		goto out_unlock;
-	if (de->inode || ((de->file_type == EXT2_FT_WHT) &&
+	if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
+			   (de->file_type == EXT2_FT_FALLTHRU)) &&
 			  !ext2_match (namelen, name, de))) {
 		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
 		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
@@ -627,6 +655,60 @@ out_unlock:
 }
 
 /*
+ * Create a fallthru entry.
+ */
+int ext2_fallthru_entry (struct inode *dir, struct dentry *dentry)
+{
+	const char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+	unsigned short rec_len, name_len;
+	ext2_dirent * de;
+	struct page *page;
+	loff_t pos;
+	int err;
+
+	de = ext2_append_entry(dentry, &page);
+	if (IS_ERR(de))
+		return PTR_ERR(de);
+
+	err = -EEXIST;
+	if (ext2_match (namelen, name, de))
+		goto out_unlock;
+
+	name_len = EXT2_DIR_REC_LEN(de->name_len);
+	rec_len = ext2_rec_len_from_disk(de->rec_len);
+
+	pos = page_offset(page) +
+		(char*)de - (char*)page_address(page);
+	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
+							&page, NULL);
+	if (err)
+		goto out_unlock;
+	if (de->inode || (de->file_type == EXT2_FT_WHT) ||
+	    (de->file_type == EXT2_FT_FALLTHRU)) {
+		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
+		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
+		de->rec_len = ext2_rec_len_to_disk(name_len);
+		de = de1;
+	}
+	de->name_len = namelen;
+	memcpy(de->name, name, namelen);
+	de->inode = 0;
+	de->file_type = EXT2_FT_FALLTHRU;
+	err = ext2_commit_chunk(page, pos, rec_len);
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+	mark_inode_dirty(dir);
+	/* OFFSET_CACHE */
+out_put:
+	ext2_put_page(page);
+	return err;
+out_unlock:
+	unlock_page(page);
+	goto out_put;
+}
+
+/*
  * ext2_delete_entry deletes a directory entry by merging it with the
  * previous entry. Page is up-to-date. Releases the page.
  */
@@ -711,7 +793,9 @@ int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
 	 */
 	if (ext2_match (namelen, name, de))
 		de->inode = 0;
-	if (de->inode || (de->file_type == EXT2_FT_WHT)) {
+	if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
+			   (de->file_type == EXT2_FT_FALLTHRU)) &&
+			  !ext2_match (namelen, name, de))) {
 		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
 		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
 		de->rec_len = ext2_rec_len_to_disk(name_len);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 44d190c..2fa32b3 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -108,6 +108,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *,
 extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
 extern int ext2_whiteout_entry (struct inode *, struct dentry *,
 				struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_fallthru_entry (struct inode *, struct dentry *);
 extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 12195a5..f28154c 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -349,6 +349,7 @@ static int ext2_whiteout(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	spin_lock(&new_dentry->d_lock);
+	new_dentry->d_flags &= ~DCACHE_FALLTHRU;
 	new_dentry->d_flags |= DCACHE_WHITEOUT;
 	spin_unlock(&new_dentry->d_lock);
 	d_add(new_dentry, NULL);
@@ -367,6 +368,26 @@ out:
 	return err;
 }
 
+/*
+ * Create a fallthru entry.
+ */
+static int ext2_fallthru (struct inode *dir, struct dentry *dentry)
+{
+	int err;
+
+	dquot_initialize(dir);
+
+	err = ext2_fallthru_entry(dir, dentry);
+	if (err)
+		return err;
+
+	d_instantiate(dentry, NULL);
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_FALLTHRU;
+	spin_unlock(&dentry->d_lock);
+	return 0;
+}
+
 static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	struct inode * new_dir,	struct dentry * new_dentry )
 {
@@ -470,6 +491,7 @@ const struct inode_operations ext2_dir_inode_operations = {
 	.rmdir		= ext2_rmdir,
 	.mknod		= ext2_mknod,
 	.whiteout	= ext2_whiteout,
+	.fallthru	= ext2_fallthru,
 	.rename		= ext2_rename,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 20468bd..cb3d400 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -577,6 +577,7 @@ enum {
 	EXT2_FT_SOCK		= 6,
 	EXT2_FT_SYMLINK		= 7,
 	EXT2_FT_WHT		= 8,
+	EXT2_FT_FALLTHRU	= 9,
 	EXT2_FT_MAX
 };
 
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 14/38] fallthru: ext2 fallthru support
       [not found] <1277492728-11446-1-git-send-email-vaurora@redhat.com>
@ 2010-06-25 19:05 ` Valerie Aurora
  0 siblings, 0 replies; 26+ messages in thread
From: Valerie Aurora @ 2010-06-25 19:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Miklos Szeredi, Jan Blunck, Christoph Hellwig, linux-kernel,
	linux-fsdevel, Valerie Aurora, Theodore Tso, linux-ext4

Add support for fallthru directory entries to ext2.

XXX What to do for d_ino for fallthrus?  If we return the inode from
the the underlying file system, it comes from a different inode
"namespace" and that will produce spurious matches.  This argues for
implementation of fallthrus as symlinks because they have to allocate
an inode (and inode number) anyway, and we can later reuse it if we
copy the file up.

Cc: Theodore Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org
Signed-off-by: Valerie Aurora <vaurora@redhat.com>
Signed-off-by: Jan Blunck <jblunck@suse.de>
---
 fs/ext2/dir.c           |   92 ++++++++++++++++++++++++++++++++++++++++++++--
 fs/ext2/ext2.h          |    1 +
 fs/ext2/namei.c         |   22 +++++++++++
 include/linux/ext2_fs.h |    1 +
 4 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 030bd46..f3b4aff 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -219,7 +219,8 @@ static inline int ext2_match (int len, const char * const name,
 {
 	if (len != de->name_len)
 		return 0;
-	if (!de->inode && (de->file_type != EXT2_FT_WHT))
+	if (!de->inode && ((de->file_type != EXT2_FT_WHT) &&
+			   (de->file_type != EXT2_FT_FALLTHRU)))
 		return 0;
 	return !memcmp(name, de->name, len);
 }
@@ -256,6 +257,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
 	[EXT2_FT_SOCK]		= DT_SOCK,
 	[EXT2_FT_SYMLINK]	= DT_LNK,
 	[EXT2_FT_WHT]		= DT_WHT,
+	[EXT2_FT_FALLTHRU]	= DT_UNKNOWN,
 };
 
 #define S_SHIFT 12
@@ -342,6 +344,24 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
 					ext2_put_page(page);
 					return 0;
 				}
+			} else if (de->file_type == EXT2_FT_FALLTHRU) {
+				int over;
+				unsigned char d_type = DT_UNKNOWN;
+
+				offset = (char *)de - kaddr;
+				/* XXX We don't know the inode number
+				 * of the directory entry in the
+				 * underlying file system.  Should
+				 * look it up, either on fallthru
+				 * creation at first readdir or now at
+				 * filldir time. */
+				over = filldir(dirent, de->name, de->name_len,
+					       (n<<PAGE_CACHE_SHIFT) | offset,
+					       123 /* Made up ino */, d_type);
+				if (over) {
+					ext2_put_page(page);
+					return 0;
+				}
 			}
 			filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
 		}
@@ -463,6 +483,10 @@ ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags |= DCACHE_WHITEOUT;
 			spin_unlock(&dentry->d_lock);
+		} else if(!res && de->file_type == EXT2_FT_FALLTHRU) {
+			spin_lock(&dentry->d_lock);
+			dentry->d_flags |= DCACHE_FALLTHRU;
+			spin_unlock(&dentry->d_lock);
 		}
 		ext2_put_page(page);
 	}
@@ -532,6 +556,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
 				de->name_len = 0;
 				de->rec_len = ext2_rec_len_to_disk(chunk_size);
 				de->inode = 0;
+				de->file_type = 0;
 				goto got_it;
 			}
 			if (de->rec_len == 0) {
@@ -545,6 +570,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
 			name_len = EXT2_DIR_REC_LEN(de->name_len);
 			rec_len = ext2_rec_len_from_disk(de->rec_len);
 			if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
+			    (de->file_type != EXT2_FT_FALLTHRU) &&
 			    (rec_len >= reclen))
 				goto got_it;
 			if (rec_len >= name_len + reclen)
@@ -587,7 +613,8 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 
 	err = -EEXIST;
 	if (ext2_match (namelen, name, de)) {
-		if (de->file_type == EXT2_FT_WHT)
+		if ((de->file_type == EXT2_FT_WHT) ||
+		    (de->file_type == EXT2_FT_FALLTHRU))
 			goto got_it;
 		goto out_unlock;
 	}
@@ -602,7 +629,8 @@ got_it:
 							&page, NULL);
 	if (err)
 		goto out_unlock;
-	if (de->inode || ((de->file_type == EXT2_FT_WHT) &&
+	if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
+			   (de->file_type == EXT2_FT_FALLTHRU)) &&
 			  !ext2_match (namelen, name, de))) {
 		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
 		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
@@ -627,6 +655,60 @@ out_unlock:
 }
 
 /*
+ * Create a fallthru entry.
+ */
+int ext2_fallthru_entry (struct inode *dir, struct dentry *dentry)
+{
+	const char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+	unsigned short rec_len, name_len;
+	ext2_dirent * de;
+	struct page *page;
+	loff_t pos;
+	int err;
+
+	de = ext2_append_entry(dentry, &page);
+	if (IS_ERR(de))
+		return PTR_ERR(de);
+
+	err = -EEXIST;
+	if (ext2_match (namelen, name, de))
+		goto out_unlock;
+
+	name_len = EXT2_DIR_REC_LEN(de->name_len);
+	rec_len = ext2_rec_len_from_disk(de->rec_len);
+
+	pos = page_offset(page) +
+		(char*)de - (char*)page_address(page);
+	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
+							&page, NULL);
+	if (err)
+		goto out_unlock;
+	if (de->inode || (de->file_type == EXT2_FT_WHT) ||
+	    (de->file_type == EXT2_FT_FALLTHRU)) {
+		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
+		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
+		de->rec_len = ext2_rec_len_to_disk(name_len);
+		de = de1;
+	}
+	de->name_len = namelen;
+	memcpy(de->name, name, namelen);
+	de->inode = 0;
+	de->file_type = EXT2_FT_FALLTHRU;
+	err = ext2_commit_chunk(page, pos, rec_len);
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+	mark_inode_dirty(dir);
+	/* OFFSET_CACHE */
+out_put:
+	ext2_put_page(page);
+	return err;
+out_unlock:
+	unlock_page(page);
+	goto out_put;
+}
+
+/*
  * ext2_delete_entry deletes a directory entry by merging it with the
  * previous entry. Page is up-to-date. Releases the page.
  */
@@ -711,7 +793,9 @@ int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
 	 */
 	if (ext2_match (namelen, name, de))
 		de->inode = 0;
-	if (de->inode || (de->file_type == EXT2_FT_WHT)) {
+	if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
+			   (de->file_type == EXT2_FT_FALLTHRU)) &&
+			  !ext2_match (namelen, name, de))) {
 		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
 		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
 		de->rec_len = ext2_rec_len_to_disk(name_len);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 44d190c..2fa32b3 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -108,6 +108,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *,
 extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
 extern int ext2_whiteout_entry (struct inode *, struct dentry *,
 				struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_fallthru_entry (struct inode *, struct dentry *);
 extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 12195a5..f28154c 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -349,6 +349,7 @@ static int ext2_whiteout(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	spin_lock(&new_dentry->d_lock);
+	new_dentry->d_flags &= ~DCACHE_FALLTHRU;
 	new_dentry->d_flags |= DCACHE_WHITEOUT;
 	spin_unlock(&new_dentry->d_lock);
 	d_add(new_dentry, NULL);
@@ -367,6 +368,26 @@ out:
 	return err;
 }
 
+/*
+ * Create a fallthru entry.
+ */
+static int ext2_fallthru (struct inode *dir, struct dentry *dentry)
+{
+	int err;
+
+	dquot_initialize(dir);
+
+	err = ext2_fallthru_entry(dir, dentry);
+	if (err)
+		return err;
+
+	d_instantiate(dentry, NULL);
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_FALLTHRU;
+	spin_unlock(&dentry->d_lock);
+	return 0;
+}
+
 static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	struct inode * new_dir,	struct dentry * new_dentry )
 {
@@ -470,6 +491,7 @@ const struct inode_operations ext2_dir_inode_operations = {
 	.rmdir		= ext2_rmdir,
 	.mknod		= ext2_mknod,
 	.whiteout	= ext2_whiteout,
+	.fallthru	= ext2_fallthru,
 	.rename		= ext2_rename,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 20468bd..cb3d400 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -577,6 +577,7 @@ enum {
 	EXT2_FT_SOCK		= 6,
 	EXT2_FT_SYMLINK		= 7,
 	EXT2_FT_WHT		= 8,
+	EXT2_FT_FALLTHRU	= 9,
 	EXT2_FT_MAX
 };
 
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 11/38] whiteout: ext2 whiteout support
  2010-06-15 18:39 ` [PATCH 11/38] whiteout: ext2 whiteout support Valerie Aurora
@ 2010-07-13  4:24   ` Ian Kent
  2010-07-19 22:14     ` Valerie Aurora
  0 siblings, 1 reply; 26+ messages in thread
From: Ian Kent @ 2010-07-13  4:24 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: Alexander Viro, Miklos Szeredi, Jan Blunck, Christoph Hellwig,
	linux-kernel, linux-fsdevel, Theodore Tso, linux-ext4

On Tue, Jun 15, 2010 at 11:39:41AM -0700, Valerie Aurora wrote:
> From: Jan Blunck <jblunck@suse.de>
> 
> This patch adds whiteout support to EXT2. A whiteout is an empty directory
> entry (inode == 0) with the file type set to EXT2_FT_WHT. Therefore it
> allocates space in directories. Due to being implemented as a filetype it is
> necessary to have the EXT2_FEATURE_INCOMPAT_FILETYPE flag set.
> 
> XXX - Needs serious review.  Al wonders: What happens with a delete at
> the beginning of a block?  Will we find the matching dentry or the
> first empty space?
> 
> Signed-off-by: Jan Blunck <jblunck@suse.de>
> Signed-off-by: Valerie Aurora <vaurora@redhat.com>
> Cc: Theodore Tso <tytso@mit.edu>
> Cc: linux-ext4@vger.kernel.org
> ---
>  fs/ext2/dir.c           |   96 +++++++++++++++++++++++++++++++++++++++++++++--
>  fs/ext2/ext2.h          |    3 +
>  fs/ext2/inode.c         |   11 ++++-
>  fs/ext2/namei.c         |   67 +++++++++++++++++++++++++++++++-
>  fs/ext2/super.c         |    6 +++
>  include/linux/ext2_fs.h |    4 ++
>  6 files changed, 177 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
> index 57207a9..030bd46 100644
> --- a/fs/ext2/dir.c
> +++ b/fs/ext2/dir.c
> @@ -219,7 +219,7 @@ static inline int ext2_match (int len, const char * const name,
>  {
>  	if (len != de->name_len)
>  		return 0;
> -	if (!de->inode)
> +	if (!de->inode && (de->file_type != EXT2_FT_WHT))
>  		return 0;
>  	return !memcmp(name, de->name, len);
>  }
> @@ -255,6 +255,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
>  	[EXT2_FT_FIFO]		= DT_FIFO,
>  	[EXT2_FT_SOCK]		= DT_SOCK,
>  	[EXT2_FT_SYMLINK]	= DT_LNK,
> +	[EXT2_FT_WHT]		= DT_WHT,
>  };
>  
>  #define S_SHIFT 12
> @@ -448,6 +449,26 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
>  	return res;
>  }
>  
> +/* Special version for filetype based whiteout support */
> +ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
> +{
> +	ino_t res = 0;
> +	struct ext2_dir_entry_2 *de;
> +	struct page *page;
> +
> +	de = ext2_find_entry (dir, &dentry->d_name, &page);
> +	if (de) {
> +		res = le32_to_cpu(de->inode);
> +		if (!res && de->file_type == EXT2_FT_WHT) {
> +			spin_lock(&dentry->d_lock);
> +			dentry->d_flags |= DCACHE_WHITEOUT;
> +			spin_unlock(&dentry->d_lock);
> +		}
> +		ext2_put_page(page);
> +	}
> +	return res;
> +}
> +
>  /* Releases the page */
>  void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
>  		   struct page *page, struct inode *inode, int update_times)
> @@ -523,7 +544,8 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
>  				goto got_it;
>  			name_len = EXT2_DIR_REC_LEN(de->name_len);
>  			rec_len = ext2_rec_len_from_disk(de->rec_len);
> -			if (!de->inode && rec_len >= reclen)
> +			if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
> +			    (rec_len >= reclen))
>  				goto got_it;
>  			if (rec_len >= name_len + reclen)
>  				goto got_it;
> @@ -564,8 +586,11 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
>  		return PTR_ERR(de);
>  
>  	err = -EEXIST;
> -	if (ext2_match (namelen, name, de))
> +	if (ext2_match (namelen, name, de)) {
> +		if (de->file_type == EXT2_FT_WHT)
> +			goto got_it;
>  		goto out_unlock;
> +	}
>  
>  got_it:
>  	name_len = EXT2_DIR_REC_LEN(de->name_len);
> @@ -577,7 +602,8 @@ got_it:
>  							&page, NULL);
>  	if (err)
>  		goto out_unlock;
> -	if (de->inode) {
> +	if (de->inode || ((de->file_type == EXT2_FT_WHT) &&
> +			  !ext2_match (namelen, name, de))) {
>  		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
>  		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
>  		de->rec_len = ext2_rec_len_to_disk(name_len);
> @@ -646,6 +672,68 @@ out:
>  	return err;
>  }
>  
> +int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
> +			 struct ext2_dir_entry_2 * de, struct page * page)
> +{
> +	const char *name = dentry->d_name.name;
> +	int namelen = dentry->d_name.len;
> +	unsigned short rec_len, name_len;
> +	loff_t pos;
> +	int err;
> +
> +	if (!de) {
> +		de = ext2_append_entry(dentry, &page);
> +		BUG_ON(!de);
> +	}
> +
> +	err = -EEXIST;
> +	if (ext2_match (namelen, name, de) &&
> +	    (de->file_type == EXT2_FT_WHT)) {
> +		ext2_error(dir->i_sb, __func__,
> +			   "entry is already a whiteout in directory #%lu",
> +			   dir->i_ino);
> +		goto out_unlock;
> +	}
> +
> +	name_len = EXT2_DIR_REC_LEN(de->name_len);
> +	rec_len = ext2_rec_len_from_disk(de->rec_len);
> +
> +	pos = page_offset(page) +
> +		(char*)de - (char*)page_address(page);
> +	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
> +							&page, NULL);
> +	if (err)
> +		goto out_unlock;
> +	/*
> +	 * We whiteout an existing entry. Do what ext2_delete_entry() would do,
> +	 * except that we don't need to merge with the previous entry since
> +	 * we are going to reuse it.
> +	 */
> +	if (ext2_match (namelen, name, de))
> +		de->inode = 0;
> +	if (de->inode || (de->file_type == EXT2_FT_WHT)) {
> +		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
> +		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
> +		de->rec_len = ext2_rec_len_to_disk(name_len);
> +		de = de1;
> +	}

This looks odd, can someone tell me what's actually going with de and de1
here please?

> +	de->name_len = namelen;
> +	memcpy(de->name, name, namelen);
> +	de->inode = 0;
> +	de->file_type = EXT2_FT_WHT;
> +	err = ext2_commit_chunk(page, pos, rec_len);
> +	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
> +	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
> +	mark_inode_dirty(dir);
> +	/* OFFSET_CACHE */
> +out_put:
> +	ext2_put_page(page);
> +	return err;
> +out_unlock:
> +	unlock_page(page);
> +	goto out_put;
> +}
> +
>  /*
>   * Set the first fragment of directory.
>   */
> diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
> index 0b038e4..44d190c 100644
> --- a/fs/ext2/ext2.h
> +++ b/fs/ext2/ext2.h
> @@ -102,9 +102,12 @@ extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_wind
>  /* dir.c */
>  extern int ext2_add_link (struct dentry *, struct inode *);
>  extern ino_t ext2_inode_by_name(struct inode *, struct qstr *);
> +extern ino_t ext2_inode_by_dentry(struct inode *, struct dentry *);
>  extern int ext2_make_empty(struct inode *, struct inode *);
>  extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, struct page **);
>  extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
> +extern int ext2_whiteout_entry (struct inode *, struct dentry *,
> +				struct ext2_dir_entry_2 *, struct page *);
>  extern int ext2_empty_dir (struct inode *);
>  extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
>  extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> index fc13cc1..5ad2cbb 100644
> --- a/fs/ext2/inode.c
> +++ b/fs/ext2/inode.c
> @@ -1184,7 +1184,8 @@ void ext2_set_inode_flags(struct inode *inode)
>  {
>  	unsigned int flags = EXT2_I(inode)->i_flags;
>  
> -	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
> +	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
> +			    S_OPAQUE);
>  	if (flags & EXT2_SYNC_FL)
>  		inode->i_flags |= S_SYNC;
>  	if (flags & EXT2_APPEND_FL)
> @@ -1195,6 +1196,8 @@ void ext2_set_inode_flags(struct inode *inode)
>  		inode->i_flags |= S_NOATIME;
>  	if (flags & EXT2_DIRSYNC_FL)
>  		inode->i_flags |= S_DIRSYNC;
> +	if (flags & EXT2_OPAQUE_FL)
> +		inode->i_flags |= S_OPAQUE;
>  }
>  
>  /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
> @@ -1202,8 +1205,8 @@ void ext2_get_inode_flags(struct ext2_inode_info *ei)
>  {
>  	unsigned int flags = ei->vfs_inode.i_flags;
>  
> -	ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
> -			EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
> +	ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|EXT2_IMMUTABLE_FL|
> +			 EXT2_NOATIME_FL|EXT2_DIRSYNC_FL|EXT2_OPAQUE_FL);
>  	if (flags & S_SYNC)
>  		ei->i_flags |= EXT2_SYNC_FL;
>  	if (flags & S_APPEND)
> @@ -1214,6 +1217,8 @@ void ext2_get_inode_flags(struct ext2_inode_info *ei)
>  		ei->i_flags |= EXT2_NOATIME_FL;
>  	if (flags & S_DIRSYNC)
>  		ei->i_flags |= EXT2_DIRSYNC_FL;
> +	if (flags & S_OPAQUE)
> +		ei->i_flags |= EXT2_OPAQUE_FL;
>  }
>  
>  struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
> diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
> index 71efb0e..12195a5 100644
> --- a/fs/ext2/namei.c
> +++ b/fs/ext2/namei.c
> @@ -55,15 +55,16 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
>   * Methods themselves.
>   */
>  
> -static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
> +static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry,
> +				  struct nameidata *nd)
>  {
>  	struct inode * inode;
>  	ino_t ino;
> -	
> +
>  	if (dentry->d_name.len > EXT2_NAME_LEN)
>  		return ERR_PTR(-ENAMETOOLONG);
>  
> -	ino = ext2_inode_by_name(dir, &dentry->d_name);
> +	ino = ext2_inode_by_dentry(dir, dentry);
>  	inode = NULL;
>  	if (ino) {
>  		inode = ext2_iget(dir->i_sb, ino);
> @@ -242,6 +243,10 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
>  	else
>  		inode->i_mapping->a_ops = &ext2_aops;
>  
> +	/* if we call mkdir on a whiteout create an opaque directory */
> +	if (dentry->d_flags & DCACHE_WHITEOUT)
> +		inode->i_flags |= S_OPAQUE;
> +
>  	inode_inc_link_count(inode);
>  
>  	err = ext2_make_empty(inode, dir);
> @@ -307,6 +312,61 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
>  	return err;
>  }
>  
> +/*
> + * Create a whiteout for the dentry
> + */
> +static int ext2_whiteout(struct inode *dir, struct dentry *dentry,
> +			 struct dentry *new_dentry)
> +{
> +	struct inode * inode = dentry->d_inode;
> +	struct ext2_dir_entry_2 * de = NULL;
> +	struct page * page;
> +	int err = -ENOTEMPTY;
> +
> +	if (!EXT2_HAS_INCOMPAT_FEATURE(dir->i_sb,
> +				       EXT2_FEATURE_INCOMPAT_FILETYPE)) {
> +		ext2_error (dir->i_sb, "ext2_whiteout",
> +			    "can't set whiteout filetype");
> +		err = -EPERM;
> +		goto out;
> +	}
> +
> +	dquot_initialize(dir);
> +
> +	if (inode) {
> +		if (S_ISDIR(inode->i_mode) && !ext2_empty_dir(inode))
> +			goto out;
> +
> +		err = -ENOENT;
> +		de = ext2_find_entry (dir, &dentry->d_name, &page);
> +		if (!de)
> +			goto out;
> +		lock_page(page);
> +	}

Is page "always" set in ext2_find_entry(), I couldn't quite make that out?
If dentry is negative, isn't this a use without initialization of page in
ext2_whiteout_entry().

> +
> +	err = ext2_whiteout_entry (dir, dentry, de, page);
> +	if (err)
> +		goto out;
> +
> +	spin_lock(&new_dentry->d_lock);
> +	new_dentry->d_flags |= DCACHE_WHITEOUT;
> +	spin_unlock(&new_dentry->d_lock);
> +	d_add(new_dentry, NULL);
> +
> +	if (inode) {
> +		inode->i_ctime = dir->i_ctime;
> +		inode_dec_link_count(inode);
> +		if (S_ISDIR(inode->i_mode)) {
> +			inode->i_size = 0;
> +			inode_dec_link_count(inode);
> +			inode_dec_link_count(dir);
> +		}
> +	}
> +	err = 0;
> +out:
> +	return err;
> +}
> +
>  static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
>  	struct inode * new_dir,	struct dentry * new_dentry )
>  {
> @@ -409,6 +469,7 @@ const struct inode_operations ext2_dir_inode_operations = {
>  	.mkdir		= ext2_mkdir,
>  	.rmdir		= ext2_rmdir,
>  	.mknod		= ext2_mknod,
> +	.whiteout	= ext2_whiteout,
>  	.rename		= ext2_rename,
>  #ifdef CONFIG_EXT2_FS_XATTR
>  	.setxattr	= generic_setxattr,
> diff --git a/fs/ext2/super.c b/fs/ext2/super.c
> index 42e4a30..000ee17 100644
> --- a/fs/ext2/super.c
> +++ b/fs/ext2/super.c
> @@ -1079,6 +1079,12 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
>  	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
>  		ext2_msg(sb, KERN_WARNING,
>  			"warning: mounting ext3 filesystem as ext2");
> +	/*
> +	 * Whiteouts (and fallthrus) require explicit whiteout support.
> +	 */
> +	if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_WHITEOUT))
> +		sb->s_flags |= MS_WHITEOUT;
> +
>  	ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
>  	return 0;
>  
> diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
> index 2dfa707..20468bd 100644
> --- a/include/linux/ext2_fs.h
> +++ b/include/linux/ext2_fs.h
> @@ -189,6 +189,7 @@ struct ext2_group_desc
>  #define EXT2_NOTAIL_FL			FS_NOTAIL_FL	/* file tail should not be merged */
>  #define EXT2_DIRSYNC_FL			FS_DIRSYNC_FL	/* dirsync behaviour (directories only) */
>  #define EXT2_TOPDIR_FL			FS_TOPDIR_FL	/* Top of directory hierarchies*/
> +#define EXT2_OPAQUE_FL			0x00040000
>  #define EXT2_RESERVED_FL		FS_RESERVED_FL	/* reserved for ext2 lib */
>  
>  #define EXT2_FL_USER_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
> @@ -503,10 +504,12 @@ struct ext2_super_block {
>  #define EXT3_FEATURE_INCOMPAT_RECOVER		0x0004
>  #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008
>  #define EXT2_FEATURE_INCOMPAT_META_BG		0x0010
> +#define EXT2_FEATURE_INCOMPAT_WHITEOUT		0x0020
>  #define EXT2_FEATURE_INCOMPAT_ANY		0xffffffff
>  
>  #define EXT2_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
>  #define EXT2_FEATURE_INCOMPAT_SUPP	(EXT2_FEATURE_INCOMPAT_FILETYPE| \
> +					 EXT2_FEATURE_INCOMPAT_WHITEOUT| \
>  					 EXT2_FEATURE_INCOMPAT_META_BG)
>  #define EXT2_FEATURE_RO_COMPAT_SUPP	(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
>  					 EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
> @@ -573,6 +576,7 @@ enum {
>  	EXT2_FT_FIFO		= 5,
>  	EXT2_FT_SOCK		= 6,
>  	EXT2_FT_SYMLINK		= 7,
> +	EXT2_FT_WHT		= 8,
>  	EXT2_FT_MAX
>  };
>  
> -- 
> 1.6.3.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-06-15 18:39 ` [PATCH 14/38] fallthru: ext2 fallthru support Valerie Aurora
@ 2010-07-13  4:30   ` Ian Kent
  2010-08-04 14:44   ` Miklos Szeredi
  1 sibling, 0 replies; 26+ messages in thread
From: Ian Kent @ 2010-07-13  4:30 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: Alexander Viro, Miklos Szeredi, Jan Blunck, Christoph Hellwig,
	linux-kernel, linux-fsdevel, Theodore Tso, linux-ext4

On Tue, Jun 15, 2010 at 11:39:44AM -0700, Valerie Aurora wrote:
> Add support for fallthru directory entries to ext2.
> 
> XXX What to do for d_ino for fallthrus?  If we return the inode from
> the the underlying file system, it comes from a different inode
> "namespace" and that will produce spurious matches.  This argues for
> implementation of fallthrus as symlinks because they have to allocate
> an inode (and inode number) anyway, and we can later reuse it if we
> copy the file up.
> 
> Cc: Theodore Tso <tytso@mit.edu>
> Cc: linux-ext4@vger.kernel.org
> Signed-off-by: Valerie Aurora <vaurora@redhat.com>
> Signed-off-by: Jan Blunck <jblunck@suse.de>
> ---
>  fs/ext2/dir.c           |   92 ++++++++++++++++++++++++++++++++++++++++++++--
>  fs/ext2/ext2.h          |    1 +
>  fs/ext2/namei.c         |   22 +++++++++++
>  include/linux/ext2_fs.h |    1 +
>  4 files changed, 112 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
> index 030bd46..f3b4aff 100644
> --- a/fs/ext2/dir.c
> +++ b/fs/ext2/dir.c
> @@ -219,7 +219,8 @@ static inline int ext2_match (int len, const char * const name,
>  {
>  	if (len != de->name_len)
>  		return 0;
> -	if (!de->inode && (de->file_type != EXT2_FT_WHT))
> +	if (!de->inode && ((de->file_type != EXT2_FT_WHT) &&
> +			   (de->file_type != EXT2_FT_FALLTHRU)))
>  		return 0;
>  	return !memcmp(name, de->name, len);
>  }
> @@ -256,6 +257,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
>  	[EXT2_FT_SOCK]		= DT_SOCK,
>  	[EXT2_FT_SYMLINK]	= DT_LNK,
>  	[EXT2_FT_WHT]		= DT_WHT,
> +	[EXT2_FT_FALLTHRU]	= DT_UNKNOWN,
>  };
>  
>  #define S_SHIFT 12
> @@ -342,6 +344,24 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
>  					ext2_put_page(page);
>  					return 0;
>  				}
> +			} else if (de->file_type == EXT2_FT_FALLTHRU) {
> +				int over;
> +				unsigned char d_type = DT_UNKNOWN;
> +
> +				offset = (char *)de - kaddr;
> +				/* XXX We don't know the inode number
> +				 * of the directory entry in the
> +				 * underlying file system.  Should
> +				 * look it up, either on fallthru
> +				 * creation at first readdir or now at
> +				 * filldir time. */
> +				over = filldir(dirent, de->name, de->name_len,
> +					       (n<<PAGE_CACHE_SHIFT) | offset,
> +					       123 /* Made up ino */, d_type);
> +				if (over) {
> +					ext2_put_page(page);
> +					return 0;
> +				}
>  			}
>  			filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
>  		}
> @@ -463,6 +483,10 @@ ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
>  			spin_lock(&dentry->d_lock);
>  			dentry->d_flags |= DCACHE_WHITEOUT;
>  			spin_unlock(&dentry->d_lock);
> +		} else if(!res && de->file_type == EXT2_FT_FALLTHRU) {
> +			spin_lock(&dentry->d_lock);
> +			dentry->d_flags |= DCACHE_FALLTHRU;
> +			spin_unlock(&dentry->d_lock);
>  		}
>  		ext2_put_page(page);
>  	}
> @@ -532,6 +556,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
>  				de->name_len = 0;
>  				de->rec_len = ext2_rec_len_to_disk(chunk_size);
>  				de->inode = 0;
> +				de->file_type = 0;
>  				goto got_it;
>  			}
>  			if (de->rec_len == 0) {
> @@ -545,6 +570,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
>  			name_len = EXT2_DIR_REC_LEN(de->name_len);
>  			rec_len = ext2_rec_len_from_disk(de->rec_len);
>  			if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
> +			    (de->file_type != EXT2_FT_FALLTHRU) &&
>  			    (rec_len >= reclen))
>  				goto got_it;
>  			if (rec_len >= name_len + reclen)
> @@ -587,7 +613,8 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
>  
>  	err = -EEXIST;
>  	if (ext2_match (namelen, name, de)) {
> -		if (de->file_type == EXT2_FT_WHT)
> +		if ((de->file_type == EXT2_FT_WHT) ||
> +		    (de->file_type == EXT2_FT_FALLTHRU))
>  			goto got_it;
>  		goto out_unlock;
>  	}
> @@ -602,7 +629,8 @@ got_it:
>  							&page, NULL);
>  	if (err)
>  		goto out_unlock;
> -	if (de->inode || ((de->file_type == EXT2_FT_WHT) &&
> +	if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
> +			   (de->file_type == EXT2_FT_FALLTHRU)) &&
>  			  !ext2_match (namelen, name, de))) {
>  		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
>  		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
> @@ -627,6 +655,60 @@ out_unlock:
>  }
>  
>  /*
> + * Create a fallthru entry.
> + */
> +int ext2_fallthru_entry (struct inode *dir, struct dentry *dentry)
> +{
> +	const char *name = dentry->d_name.name;
> +	int namelen = dentry->d_name.len;
> +	unsigned short rec_len, name_len;
> +	ext2_dirent * de;
> +	struct page *page;
> +	loff_t pos;
> +	int err;
> +
> +	de = ext2_append_entry(dentry, &page);
> +	if (IS_ERR(de))
> +		return PTR_ERR(de);
> +
> +	err = -EEXIST;
> +	if (ext2_match (namelen, name, de))
> +		goto out_unlock;
> +
> +	name_len = EXT2_DIR_REC_LEN(de->name_len);
> +	rec_len = ext2_rec_len_from_disk(de->rec_len);
> +
> +	pos = page_offset(page) +
> +		(char*)de - (char*)page_address(page);
> +	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
> +							&page, NULL);
> +	if (err)
> +		goto out_unlock;
> +	if (de->inode || (de->file_type == EXT2_FT_WHT) ||
> +	    (de->file_type == EXT2_FT_FALLTHRU)) {
> +		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
> +		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
> +		de->rec_len = ext2_rec_len_to_disk(name_len);
> +		de = de1;
> +	}

And again, what's foing on here?

> +	de->name_len = namelen;
> +	memcpy(de->name, name, namelen);
> +	de->inode = 0;
> +	de->file_type = EXT2_FT_FALLTHRU;
> +	err = ext2_commit_chunk(page, pos, rec_len);
> +	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
> +	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
> +	mark_inode_dirty(dir);
> +	/* OFFSET_CACHE */
> +out_put:
> +	ext2_put_page(page);
> +	return err;
> +out_unlock:
> +	unlock_page(page);
> +	goto out_put;
> +}
> +
> +/*
>   * ext2_delete_entry deletes a directory entry by merging it with the
>   * previous entry. Page is up-to-date. Releases the page.
>   */
> @@ -711,7 +793,9 @@ int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
>  	 */
>  	if (ext2_match (namelen, name, de))
>  		de->inode = 0;
> -	if (de->inode || (de->file_type == EXT2_FT_WHT)) {
> +	if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
> +			   (de->file_type == EXT2_FT_FALLTHRU)) &&
> +			  !ext2_match (namelen, name, de))) {
>  		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
>  		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
>  		de->rec_len = ext2_rec_len_to_disk(name_len);
> diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
> index 44d190c..2fa32b3 100644
> --- a/fs/ext2/ext2.h
> +++ b/fs/ext2/ext2.h
> @@ -108,6 +108,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *,
>  extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
>  extern int ext2_whiteout_entry (struct inode *, struct dentry *,
>  				struct ext2_dir_entry_2 *, struct page *);
> +extern int ext2_fallthru_entry (struct inode *, struct dentry *);
>  extern int ext2_empty_dir (struct inode *);
>  extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
>  extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
> diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
> index 12195a5..f28154c 100644
> --- a/fs/ext2/namei.c
> +++ b/fs/ext2/namei.c
> @@ -349,6 +349,7 @@ static int ext2_whiteout(struct inode *dir, struct dentry *dentry,
>  		goto out;
>  
>  	spin_lock(&new_dentry->d_lock);
> +	new_dentry->d_flags &= ~DCACHE_FALLTHRU;
>  	new_dentry->d_flags |= DCACHE_WHITEOUT;
>  	spin_unlock(&new_dentry->d_lock);
>  	d_add(new_dentry, NULL);
> @@ -367,6 +368,26 @@ out:
>  	return err;
>  }
>  
> +/*
> + * Create a fallthru entry.
> + */
> +static int ext2_fallthru (struct inode *dir, struct dentry *dentry)
> +{
> +	int err;
> +
> +	dquot_initialize(dir);
> +
> +	err = ext2_fallthru_entry(dir, dentry);
> +	if (err)
> +		return err;
> +
> +	d_instantiate(dentry, NULL);
> +	spin_lock(&dentry->d_lock);
> +	dentry->d_flags |= DCACHE_FALLTHRU;
> +	spin_unlock(&dentry->d_lock);
> +	return 0;
> +}
> +
>  static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
>  	struct inode * new_dir,	struct dentry * new_dentry )
>  {
> @@ -470,6 +491,7 @@ const struct inode_operations ext2_dir_inode_operations = {
>  	.rmdir		= ext2_rmdir,
>  	.mknod		= ext2_mknod,
>  	.whiteout	= ext2_whiteout,
> +	.fallthru	= ext2_fallthru,
>  	.rename		= ext2_rename,
>  #ifdef CONFIG_EXT2_FS_XATTR
>  	.setxattr	= generic_setxattr,
> diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
> index 20468bd..cb3d400 100644
> --- a/include/linux/ext2_fs.h
> +++ b/include/linux/ext2_fs.h
> @@ -577,6 +577,7 @@ enum {
>  	EXT2_FT_SOCK		= 6,
>  	EXT2_FT_SYMLINK		= 7,
>  	EXT2_FT_WHT		= 8,
> +	EXT2_FT_FALLTHRU	= 9,
>  	EXT2_FT_MAX
>  };
>  
> -- 
> 1.6.3.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 11/38] whiteout: ext2 whiteout support
  2010-07-13  4:24   ` Ian Kent
@ 2010-07-19 22:14     ` Valerie Aurora
  0 siblings, 0 replies; 26+ messages in thread
From: Valerie Aurora @ 2010-07-19 22:14 UTC (permalink / raw)
  To: Ian Kent
  Cc: Alexander Viro, Miklos Szeredi, Jan Blunck, Christoph Hellwig,
	linux-kernel, linux-fsdevel, Theodore Tso, linux-ext4

On Tue, Jul 13, 2010 at 12:24:20PM +0800, Ian Kent wrote:
> On Tue, Jun 15, 2010 at 11:39:41AM -0700, Valerie Aurora wrote:
> > From: Jan Blunck <jblunck@suse.de>
> > 
> > This patch adds whiteout support to EXT2. A whiteout is an empty directory
> > entry (inode == 0) with the file type set to EXT2_FT_WHT. Therefore it
> > allocates space in directories. Due to being implemented as a filetype it is
> > necessary to have the EXT2_FEATURE_INCOMPAT_FILETYPE flag set.
> > 
> > XXX - Needs serious review.  Al wonders: What happens with a delete at
> > the beginning of a block?  Will we find the matching dentry or the
> > first empty space?
> > 
> > Signed-off-by: Jan Blunck <jblunck@suse.de>
> > Signed-off-by: Valerie Aurora <vaurora@redhat.com>
> > Cc: Theodore Tso <tytso@mit.edu>
> > Cc: linux-ext4@vger.kernel.org
> > ---
> >  fs/ext2/dir.c           |   96 +++++++++++++++++++++++++++++++++++++++++++++--
> >  fs/ext2/ext2.h          |    3 +
> >  fs/ext2/inode.c         |   11 ++++-
> >  fs/ext2/namei.c         |   67 +++++++++++++++++++++++++++++++-
> >  fs/ext2/super.c         |    6 +++
> >  include/linux/ext2_fs.h |    4 ++
> >  6 files changed, 177 insertions(+), 10 deletions(-)
> > 
> > diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
> > index 57207a9..030bd46 100644
> > --- a/fs/ext2/dir.c
> > +++ b/fs/ext2/dir.c
> > @@ -219,7 +219,7 @@ static inline int ext2_match (int len, const char * const name,
> >  {
> >  	if (len != de->name_len)
> >  		return 0;
> > -	if (!de->inode)
> > +	if (!de->inode && (de->file_type != EXT2_FT_WHT))
> >  		return 0;
> >  	return !memcmp(name, de->name, len);
> >  }
> > @@ -255,6 +255,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
> >  	[EXT2_FT_FIFO]		= DT_FIFO,
> >  	[EXT2_FT_SOCK]		= DT_SOCK,
> >  	[EXT2_FT_SYMLINK]	= DT_LNK,
> > +	[EXT2_FT_WHT]		= DT_WHT,
> >  };
> >  
> >  #define S_SHIFT 12
> > @@ -448,6 +449,26 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
> >  	return res;
> >  }
> >  
> > +/* Special version for filetype based whiteout support */
> > +ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
> > +{
> > +	ino_t res = 0;
> > +	struct ext2_dir_entry_2 *de;
> > +	struct page *page;
> > +
> > +	de = ext2_find_entry (dir, &dentry->d_name, &page);
> > +	if (de) {
> > +		res = le32_to_cpu(de->inode);
> > +		if (!res && de->file_type == EXT2_FT_WHT) {
> > +			spin_lock(&dentry->d_lock);
> > +			dentry->d_flags |= DCACHE_WHITEOUT;
> > +			spin_unlock(&dentry->d_lock);
> > +		}
> > +		ext2_put_page(page);
> > +	}
> > +	return res;
> > +}
> > +
> >  /* Releases the page */
> >  void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
> >  		   struct page *page, struct inode *inode, int update_times)
> > @@ -523,7 +544,8 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
> >  				goto got_it;
> >  			name_len = EXT2_DIR_REC_LEN(de->name_len);
> >  			rec_len = ext2_rec_len_from_disk(de->rec_len);
> > -			if (!de->inode && rec_len >= reclen)
> > +			if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
> > +			    (rec_len >= reclen))
> >  				goto got_it;
> >  			if (rec_len >= name_len + reclen)
> >  				goto got_it;
> > @@ -564,8 +586,11 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
> >  		return PTR_ERR(de);
> >  
> >  	err = -EEXIST;
> > -	if (ext2_match (namelen, name, de))
> > +	if (ext2_match (namelen, name, de)) {
> > +		if (de->file_type == EXT2_FT_WHT)
> > +			goto got_it;
> >  		goto out_unlock;
> > +	}
> >  
> >  got_it:
> >  	name_len = EXT2_DIR_REC_LEN(de->name_len);
> > @@ -577,7 +602,8 @@ got_it:
> >  							&page, NULL);
> >  	if (err)
> >  		goto out_unlock;
> > -	if (de->inode) {
> > +	if (de->inode || ((de->file_type == EXT2_FT_WHT) &&
> > +			  !ext2_match (namelen, name, de))) {
> >  		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
> >  		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
> >  		de->rec_len = ext2_rec_len_to_disk(name_len);
> > @@ -646,6 +672,68 @@ out:
> >  	return err;
> >  }
> >  
> > +int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
> > +			 struct ext2_dir_entry_2 * de, struct page * page)
> > +{
> > +	const char *name = dentry->d_name.name;
> > +	int namelen = dentry->d_name.len;
> > +	unsigned short rec_len, name_len;
> > +	loff_t pos;
> > +	int err;
> > +
> > +	if (!de) {
> > +		de = ext2_append_entry(dentry, &page);
> > +		BUG_ON(!de);
> > +	}
> > +
> > +	err = -EEXIST;
> > +	if (ext2_match (namelen, name, de) &&
> > +	    (de->file_type == EXT2_FT_WHT)) {
> > +		ext2_error(dir->i_sb, __func__,
> > +			   "entry is already a whiteout in directory #%lu",
> > +			   dir->i_ino);
> > +		goto out_unlock;
> > +	}
> > +
> > +	name_len = EXT2_DIR_REC_LEN(de->name_len);
> > +	rec_len = ext2_rec_len_from_disk(de->rec_len);
> > +
> > +	pos = page_offset(page) +
> > +		(char*)de - (char*)page_address(page);
> > +	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
> > +							&page, NULL);
> > +	if (err)
> > +		goto out_unlock;
> > +	/*
> > +	 * We whiteout an existing entry. Do what ext2_delete_entry() would do,
> > +	 * except that we don't need to merge with the previous entry since
> > +	 * we are going to reuse it.
> > +	 */
> > +	if (ext2_match (namelen, name, de))
> > +		de->inode = 0;
> > +	if (de->inode || (de->file_type == EXT2_FT_WHT)) {
> > +		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
> > +		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
> > +		de->rec_len = ext2_rec_len_to_disk(name_len);
> > +		de = de1;
> > +	}
> 
> This looks odd, can someone tell me what's actually going with de and de1
> here please?

This patch needs serious review (as noted).  I will corner some ext*
developer at some point to look at it.

-VAL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-06-15 18:39 ` [PATCH 14/38] fallthru: ext2 fallthru support Valerie Aurora
  2010-07-13  4:30   ` Ian Kent
@ 2010-08-04 14:44   ` Miklos Szeredi
  2010-08-04 22:48     ` Valerie Aurora
  2010-08-04 23:04     ` Valerie Aurora
  1 sibling, 2 replies; 26+ messages in thread
From: Miklos Szeredi @ 2010-08-04 14:44 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: viro, miklos, jblunck, hch, linux-kernel, linux-fsdevel, vaurora,
	tytso, linux-ext4

On Tue, 15 Jun 2010, Valerie Aurora wrote:
> Add support for fallthru directory entries to ext2.

If a previously used ext2 filesystem with is mounted again then
fallthroughs don't appear to work as expected.  Stat returns ENOENT
for these entries.

Can't see anything obviously wrong with the code.

> 
> XXX What to do for d_ino for fallthrus?  If we return the inode from
> the the underlying file system, it comes from a different inode
> "namespace" and that will produce spurious matches.  This argues for
> implementation of fallthrus as symlinks because they have to allocate
> an inode (and inode number) anyway, and we can later reuse it if we
> copy the file up.

That's an idea, but I guess it won't make everyone happy since it
wastes both disk space and memory.

One of the key differentiators for union mounts concept was that it
doesn't duplicate inodes and dentries from the layers.  With the
directory copyup on lookup that's already partially lost, but that can
be justified by the fact that non-directories usually far outnumber
directories.

Another idea is to use an internal inode and make all fallthroughs be
hard links to that.

I think the same would work for whiteouts as well.  I don't like the
fact that whiteouts are invisible even when not mounted as part of a
union.

Miklos

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-04 14:44   ` Miklos Szeredi
@ 2010-08-04 22:48     ` Valerie Aurora
  2010-08-05 10:36       ` Miklos Szeredi
  2010-08-04 23:04     ` Valerie Aurora
  1 sibling, 1 reply; 26+ messages in thread
From: Valerie Aurora @ 2010-08-04 22:48 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

[-- Attachment #1: Type: text/plain, Size: 504 bytes --]

On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > Add support for fallthru directory entries to ext2.
> 
> If a previously used ext2 filesystem with is mounted again then
> fallthroughs don't appear to work as expected.  Stat returns ENOENT
> for these entries.
> 
> Can't see anything obviously wrong with the code.

Hm, I wrote one test case for this that worked (attached).  Can you
give me more details on your test case?  Thanks,

-VAL

[-- Attachment #2: ext2_remount_fallthrus.sh --]
[-- Type: application/x-sh, Size: 1305 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-04 14:44   ` Miklos Szeredi
  2010-08-04 22:48     ` Valerie Aurora
@ 2010-08-04 23:04     ` Valerie Aurora
  2010-08-05 11:13       ` Miklos Szeredi
  1 sibling, 1 reply; 26+ messages in thread
From: Valerie Aurora @ 2010-08-04 23:04 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > Add support for fallthru directory entries to ext2.
> 
> If a previously used ext2 filesystem with is mounted again then
> fallthroughs don't appear to work as expected.  Stat returns ENOENT
> for these entries.
> 
> Can't see anything obviously wrong with the code.
> 
> > 
> > XXX What to do for d_ino for fallthrus?  If we return the inode from
> > the the underlying file system, it comes from a different inode
> > "namespace" and that will produce spurious matches.  This argues for
> > implementation of fallthrus as symlinks because they have to allocate
> > an inode (and inode number) anyway, and we can later reuse it if we
> > copy the file up.
> 
> That's an idea, but I guess it won't make everyone happy since it
> wastes both disk space and memory.

Hm, I should probably remove this comment - I've talked over the
symlink implementation with a few people and it seems like it
introduces more problems than it solves.

> One of the key differentiators for union mounts concept was that it
> doesn't duplicate inodes and dentries from the layers.  With the
> directory copyup on lookup that's already partially lost, but that can
> be justified by the fact that non-directories usually far outnumber
> directories.

And it solves all the readdir() problems in one go. :)

> Another idea is to use an internal inode and make all fallthroughs be
> hard links to that.
> 
> I think the same would work for whiteouts as well.  I don't like the
> fact that whiteouts are invisible even when not mounted as part of a
> union.

I don't know if this helps, but I just wrote support for removing ext2
whiteouts and fallthrus using tune2fs and e2fsck.  I think this does
what people want from a "visible" whiteout feature without adding more
complexity to the VFS.  It also takes away all consideration of race
conditions and dentry conversion that happens with online removal of
whiteouts and fallthrus.

What are your thoughts on what a visible whiteout/fallthru would look
like?

Thanks,

-VAL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-04 22:48     ` Valerie Aurora
@ 2010-08-05 10:36       ` Miklos Szeredi
  2010-08-05 23:30         ` Valerie Aurora
  0 siblings, 1 reply; 26+ messages in thread
From: Miklos Szeredi @ 2010-08-05 10:36 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: miklos, viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

On Wed, 4 Aug 2010, Valerie Aurora wrote:
> --zYM0uCDKw75PZbzx
> Content-Type: text/plain; charset=us-ascii
> Content-Disposition: inline
> 
> On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> > On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > > Add support for fallthru directory entries to ext2.
> > 
> > If a previously used ext2 filesystem with is mounted again then
> > fallthroughs don't appear to work as expected.  Stat returns ENOENT
> > for these entries.
> > 
> > Can't see anything obviously wrong with the code.
> 
> Hm, I wrote one test case for this that worked (attached).  Can you
> give me more details on your test case?  Thanks,

uml:~# mount -oloop -r ext3-2.img /mnt/img/
uml:~# mount -oloop -r ext3.img /mnt/img/
uml:~# losetup -f ovl.img 
uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
uml:~# "ls" /mnt/img
bunion  lost+found  union
uml:~# "ls" /mnt/img/union
1  2  3
uml:~# "ls" /mnt/img/union/1
a  x
uml:~# umount /mnt/img/
uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
uml:~# ls -l /mnt/img/  
total 14
drwxr-xr-x 2 root root  1024 Aug  5 09:56 bunion
drwx------ 2 root root 12288 Aug  5 09:41 lost+found
drwxr-xr-x 3 root root  1024 Aug  5 09:56 union
uml:~# ls -l /mnt/img/union/
ls: cannot access /mnt/img/union/3: No such file or directory
ls: cannot access /mnt/img/union/2: No such file or directory
total 1
drwxr-xr-x 2 root root 1024 Aug  5 09:56 1
?????????? ? ?    ?       ?            ? 2
?????????? ? ?    ?       ?            ? 3
uml:~# ls -l /mnt/img/union/1
ls: cannot access /mnt/img/union/1/a: No such file or directory
ls: cannot access /mnt/img/union/1/x: No such file or directory
total 0
?????????? ? ? ? ?            ? a
?????????? ? ? ? ?            ? x
uml:~# 

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-04 23:04     ` Valerie Aurora
@ 2010-08-05 11:13       ` Miklos Szeredi
  2010-08-06 17:12         ` Valerie Aurora
  2010-08-17 22:27         ` Valerie Aurora
  0 siblings, 2 replies; 26+ messages in thread
From: Miklos Szeredi @ 2010-08-05 11:13 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: miklos, viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > Another idea is to use an internal inode and make all fallthroughs be
> > hard links to that.
> > 
> > I think the same would work for whiteouts as well.  I don't like the
> > fact that whiteouts are invisible even when not mounted as part of a
> > union.
> 
> I don't know if this helps, but I just wrote support for removing ext2
> whiteouts and fallthrus using tune2fs and e2fsck.  I think this does
> what people want from a "visible" whiteout feature without adding more
> complexity to the VFS.  It also takes away all consideration of race
> conditions and dentry conversion that happens with online removal of
> whiteouts and fallthrus.
> 
> What are your thoughts on what a visible whiteout/fallthru would look
> like?

Best would be if it didn't need any modification to filesystems.  All
this having to upgrade util-linux, e2fsprogs, having incompatible
filesystem features is a pain for users (just been through that).

What we already have in most filesystems:

 - extended attributes, e.g. use the system.union.* namespace and
   denote whiteouts and falltroughs with such an attribute

 - hard links to make sure a separate inode is not necessary for each
   whiteout/fallthrough entry

 - some way for the user to easily identify such files when not
   mounted as part of a union e.g. make it a symlink pointing to
   "(deleted)" or whatever

Later the extended attributes can also be used for other things like
e.g. chmod()/chown() only copying up metadata, not data, and
indicating that data is still found on the lower layers.

Miklos

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-05 10:36       ` Miklos Szeredi
@ 2010-08-05 23:30         ` Valerie Aurora
  2010-08-06  8:15           ` Miklos Szeredi
  0 siblings, 1 reply; 26+ messages in thread
From: Valerie Aurora @ 2010-08-05 23:30 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

On Thu, Aug 05, 2010 at 12:36:55PM +0200, Miklos Szeredi wrote:
> On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > --zYM0uCDKw75PZbzx
> > Content-Type: text/plain; charset=us-ascii
> > Content-Disposition: inline
> > 
> > On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> > > On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > > > Add support for fallthru directory entries to ext2.
> > > 
> > > If a previously used ext2 filesystem with is mounted again then
> > > fallthroughs don't appear to work as expected.  Stat returns ENOENT
> > > for these entries.
> > > 
> > > Can't see anything obviously wrong with the code.
> > 
> > Hm, I wrote one test case for this that worked (attached).  Can you
> > give me more details on your test case?  Thanks,

Cool, thanks.  Yes, I suppose the fallthrus should be ignored if they
don't fall through to anything.  If I do a proper lookup for d_ino, I
can kill two birds with one stone, since that will tell us whether
there is anything below the fallthru and thus whether to return this
directory entry.

-VAL

> uml:~# mount -oloop -r ext3-2.img /mnt/img/
> uml:~# mount -oloop -r ext3.img /mnt/img/
> uml:~# losetup -f ovl.img 
> uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> uml:~# "ls" /mnt/img
> bunion  lost+found  union
> uml:~# "ls" /mnt/img/union
> 1  2  3
> uml:~# "ls" /mnt/img/union/1
> a  x
> uml:~# umount /mnt/img/
> uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> uml:~# ls -l /mnt/img/  
> total 14
> drwxr-xr-x 2 root root  1024 Aug  5 09:56 bunion
> drwx------ 2 root root 12288 Aug  5 09:41 lost+found
> drwxr-xr-x 3 root root  1024 Aug  5 09:56 union
> uml:~# ls -l /mnt/img/union/
> ls: cannot access /mnt/img/union/3: No such file or directory
> ls: cannot access /mnt/img/union/2: No such file or directory
> total 1
> drwxr-xr-x 2 root root 1024 Aug  5 09:56 1
> ?????????? ? ?    ?       ?            ? 2
> ?????????? ? ?    ?       ?            ? 3
> uml:~# ls -l /mnt/img/union/1
> ls: cannot access /mnt/img/union/1/a: No such file or directory
> ls: cannot access /mnt/img/union/1/x: No such file or directory
> total 0
> ?????????? ? ? ? ?            ? a
> ?????????? ? ? ? ?            ? x
> uml:~# 
> 
> Thanks,
> Miklos

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-05 23:30         ` Valerie Aurora
@ 2010-08-06  8:15           ` Miklos Szeredi
  2010-08-06 17:16             ` Valerie Aurora
  0 siblings, 1 reply; 26+ messages in thread
From: Miklos Szeredi @ 2010-08-06  8:15 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: miklos, viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

On Thu, 5 Aug 2010, Valerie Aurora wrote:
> On Thu, Aug 05, 2010 at 12:36:55PM +0200, Miklos Szeredi wrote:
> > On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > > --zYM0uCDKw75PZbzx
> > > Content-Type: text/plain; charset=us-ascii
> > > Content-Disposition: inline
> > > 
> > > On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> > > > On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > > > > Add support for fallthru directory entries to ext2.
> > > > 
> > > > If a previously used ext2 filesystem with is mounted again then
> > > > fallthroughs don't appear to work as expected.  Stat returns ENOENT
> > > > for these entries.
> > > > 
> > > > Can't see anything obviously wrong with the code.
> > > 
> > > Hm, I wrote one test case for this that worked (attached).  Can you
> > > give me more details on your test case?  Thanks,
> 
> Cool, thanks.  Yes, I suppose the fallthrus should be ignored if they
> don't fall through to anything.

That's not the case here.  They _should_ fall through to ext3.img, yet
apparently after ovl.img being mounted again they don't seem to work.

Note, ovl.img starts out as a completely empty filesystem.

The difference between your test case and mine mitght be that yours
not only reads the directory but stats the files as well, while mine
does not.

Thanks,
Miklos


> 
> > uml:~# mount -oloop -r ext3-2.img /mnt/img/
> > uml:~# mount -oloop -r ext3.img /mnt/img/
> > uml:~# losetup -f ovl.img 
> > uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> > uml:~# "ls" /mnt/img
> > bunion  lost+found  union
> > uml:~# "ls" /mnt/img/union
> > 1  2  3
> > uml:~# "ls" /mnt/img/union/1
> > a  x
> > uml:~# umount /mnt/img/
> > uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> > uml:~# ls -l /mnt/img/  
> > total 14
> > drwxr-xr-x 2 root root  1024 Aug  5 09:56 bunion
> > drwx------ 2 root root 12288 Aug  5 09:41 lost+found
> > drwxr-xr-x 3 root root  1024 Aug  5 09:56 union
> > uml:~# ls -l /mnt/img/union/
> > ls: cannot access /mnt/img/union/3: No such file or directory
> > ls: cannot access /mnt/img/union/2: No such file or directory
> > total 1
> > drwxr-xr-x 2 root root 1024 Aug  5 09:56 1
> > ?????????? ? ?    ?       ?            ? 2
> > ?????????? ? ?    ?       ?            ? 3
> > uml:~# ls -l /mnt/img/union/1
> > ls: cannot access /mnt/img/union/1/a: No such file or directory
> > ls: cannot access /mnt/img/union/1/x: No such file or directory
> > total 0
> > ?????????? ? ? ? ?            ? a
> > ?????????? ? ? ? ?            ? x
> > uml:~# 
> > 
> > Thanks,
> > Miklos
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-05 11:13       ` Miklos Szeredi
@ 2010-08-06 17:12         ` Valerie Aurora
  2010-08-17 22:27         ` Valerie Aurora
  1 sibling, 0 replies; 26+ messages in thread
From: Valerie Aurora @ 2010-08-06 17:12 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

On Thu, Aug 05, 2010 at 01:13:55PM +0200, Miklos Szeredi wrote:
> On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > > Another idea is to use an internal inode and make all fallthroughs be
> > > hard links to that.
> > > 
> > > I think the same would work for whiteouts as well.  I don't like the
> > > fact that whiteouts are invisible even when not mounted as part of a
> > > union.
> > 
> > I don't know if this helps, but I just wrote support for removing ext2
> > whiteouts and fallthrus using tune2fs and e2fsck.  I think this does
> > what people want from a "visible" whiteout feature without adding more
> > complexity to the VFS.  It also takes away all consideration of race
> > conditions and dentry conversion that happens with online removal of
> > whiteouts and fallthrus.
> > 
> > What are your thoughts on what a visible whiteout/fallthru would look
> > like?
> 
> Best would be if it didn't need any modification to filesystems.  All
> this having to upgrade util-linux, e2fsprogs, having incompatible
> filesystem features is a pain for users (just been through that).
> 
> What we already have in most filesystems:
> 
>  - extended attributes, e.g. use the system.union.* namespace and
>    denote whiteouts and falltroughs with such an attribute
> 
>  - hard links to make sure a separate inode is not necessary for each
>    whiteout/fallthrough entry
> 
>  - some way for the user to easily identify such files when not
>    mounted as part of a union e.g. make it a symlink pointing to
>    "(deleted)" or whatever
> 
> Later the extended attributes can also be used for other things like
> e.g. chmod()/chown() only copying up metadata, not data, and
> indicating that data is still found on the lower layers.

Just a quick note to say that my explicit design was to do as much as
possible in the VFS, except when adding a little support to the
low-level fs would make it significantly faster, simpler, and more
correct.  I think for union mounts to perform moderately well, and to
avoid namespace problems, we can't build it 100% out of existing file
system parts like xattrs.  However, I could be wrong and I will
definitely give any other implementation serious consideration.

-VAL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-06  8:15           ` Miklos Szeredi
@ 2010-08-06 17:16             ` Valerie Aurora
  2010-08-06 17:44               ` Miklos Szeredi
  0 siblings, 1 reply; 26+ messages in thread
From: Valerie Aurora @ 2010-08-06 17:16 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

On Fri, Aug 06, 2010 at 10:15:14AM +0200, Miklos Szeredi wrote:
> On Thu, 5 Aug 2010, Valerie Aurora wrote:
> > On Thu, Aug 05, 2010 at 12:36:55PM +0200, Miklos Szeredi wrote:
> > > On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > > > --zYM0uCDKw75PZbzx
> > > > Content-Type: text/plain; charset=us-ascii
> > > > Content-Disposition: inline
> > > > 
> > > > On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> > > > > On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > > > > > Add support for fallthru directory entries to ext2.
> > > > > 
> > > > > If a previously used ext2 filesystem with is mounted again then
> > > > > fallthroughs don't appear to work as expected.  Stat returns ENOENT
> > > > > for these entries.
> > > > > 
> > > > > Can't see anything obviously wrong with the code.
> > > > 
> > > > Hm, I wrote one test case for this that worked (attached).  Can you
> > > > give me more details on your test case?  Thanks,
> > 
> > Cool, thanks.  Yes, I suppose the fallthrus should be ignored if they
> > don't fall through to anything.
> 
> That's not the case here.  They _should_ fall through to ext3.img, yet
> apparently after ovl.img being mounted again they don't seem to work.

Oh, "mmount -b 8" == "mount -o union".  Is this the mmount from mtools
or something else?

> Note, ovl.img starts out as a completely empty filesystem.
> 
> The difference between your test case and mine mitght be that yours
> not only reads the directory but stats the files as well, while mine
> does not.

Okay, I'll experiment more and see what I can do.

-VAL

> Thanks,
> Miklos
> 
> 
> > 
> > > uml:~# mount -oloop -r ext3-2.img /mnt/img/
> > > uml:~# mount -oloop -r ext3.img /mnt/img/
> > > uml:~# losetup -f ovl.img 
> > > uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> > > uml:~# "ls" /mnt/img
> > > bunion  lost+found  union
> > > uml:~# "ls" /mnt/img/union
> > > 1  2  3
> > > uml:~# "ls" /mnt/img/union/1
> > > a  x
> > > uml:~# umount /mnt/img/
> > > uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> > > uml:~# ls -l /mnt/img/  
> > > total 14
> > > drwxr-xr-x 2 root root  1024 Aug  5 09:56 bunion
> > > drwx------ 2 root root 12288 Aug  5 09:41 lost+found
> > > drwxr-xr-x 3 root root  1024 Aug  5 09:56 union
> > > uml:~# ls -l /mnt/img/union/
> > > ls: cannot access /mnt/img/union/3: No such file or directory
> > > ls: cannot access /mnt/img/union/2: No such file or directory
> > > total 1
> > > drwxr-xr-x 2 root root 1024 Aug  5 09:56 1
> > > ?????????? ? ?    ?       ?            ? 2
> > > ?????????? ? ?    ?       ?            ? 3
> > > uml:~# ls -l /mnt/img/union/1
> > > ls: cannot access /mnt/img/union/1/a: No such file or directory
> > > ls: cannot access /mnt/img/union/1/x: No such file or directory
> > > total 0
> > > ?????????? ? ? ? ?            ? a
> > > ?????????? ? ? ? ?            ? x
> > > uml:~# 
> > > 
> > > Thanks,
> > > Miklos
> > 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-06 17:16             ` Valerie Aurora
@ 2010-08-06 17:44               ` Miklos Szeredi
  0 siblings, 0 replies; 26+ messages in thread
From: Miklos Szeredi @ 2010-08-06 17:44 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: miklos, viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

On Fri, 6 Aug 2010, Valerie Aurora wrote:
> Oh, "mmount -b 8" == "mount -o union".

Right.

>   Is this the mmount from mtools
> or something else?

It's primitive utility that basically just wraps the mount(2) syscall
without any fstab/mtab support:

  http://www.kernel.org/pub/linux/kernel/people/mszeredi/mmount/

Miklos

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 14/38] fallthru: ext2 fallthru support
       [not found] <1281134124-17041-1-git-send-email-vaurora@redhat.com>
@ 2010-08-06 22:35 ` Valerie Aurora
  2010-08-07  0:28   ` Andreas Dilger
  0 siblings, 1 reply; 26+ messages in thread
From: Valerie Aurora @ 2010-08-06 22:35 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Miklos Szeredi, Jan Blunck, Christoph Hellwig, linux-kernel,
	linux-fsdevel, Valerie Aurora, Theodore Tso, linux-ext4

Add support for fallthru directory entries to ext2.

XXX What to do for d_ino for fallthrus?  If we return the inode from
the the underlying file system, it comes from a different inode
"namespace" and that will produce spurious matches.  This argues for
implementation of fallthrus as symlinks because they have to allocate
an inode (and inode number) anyway, and we can later reuse it if we
copy the file up.

Cc: Theodore Tso <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org
Signed-off-by: Valerie Aurora <vaurora@redhat.com>
Signed-off-by: Jan Blunck <jblunck@suse.de>
---
 fs/ext2/dir.c           |   92 ++++++++++++++++++++++++++++++++++++++++++++--
 fs/ext2/ext2.h          |    1 +
 fs/ext2/namei.c         |   22 +++++++++++
 fs/ext2/super.c         |    2 +
 include/linux/ext2_fs.h |    4 ++
 5 files changed, 117 insertions(+), 4 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 030bd46..f3b4aff 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -219,7 +219,8 @@ static inline int ext2_match (int len, const char * const name,
 {
 	if (len != de->name_len)
 		return 0;
-	if (!de->inode && (de->file_type != EXT2_FT_WHT))
+	if (!de->inode && ((de->file_type != EXT2_FT_WHT) &&
+			   (de->file_type != EXT2_FT_FALLTHRU)))
 		return 0;
 	return !memcmp(name, de->name, len);
 }
@@ -256,6 +257,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
 	[EXT2_FT_SOCK]		= DT_SOCK,
 	[EXT2_FT_SYMLINK]	= DT_LNK,
 	[EXT2_FT_WHT]		= DT_WHT,
+	[EXT2_FT_FALLTHRU]	= DT_UNKNOWN,
 };
 
 #define S_SHIFT 12
@@ -342,6 +344,24 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
 					ext2_put_page(page);
 					return 0;
 				}
+			} else if (de->file_type == EXT2_FT_FALLTHRU) {
+				int over;
+				unsigned char d_type = DT_UNKNOWN;
+
+				offset = (char *)de - kaddr;
+				/* XXX We don't know the inode number
+				 * of the directory entry in the
+				 * underlying file system.  Should
+				 * look it up, either on fallthru
+				 * creation at first readdir or now at
+				 * filldir time. */
+				over = filldir(dirent, de->name, de->name_len,
+					       (n<<PAGE_CACHE_SHIFT) | offset,
+					       123 /* Made up ino */, d_type);
+				if (over) {
+					ext2_put_page(page);
+					return 0;
+				}
 			}
 			filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
 		}
@@ -463,6 +483,10 @@ ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
 			spin_lock(&dentry->d_lock);
 			dentry->d_flags |= DCACHE_WHITEOUT;
 			spin_unlock(&dentry->d_lock);
+		} else if(!res && de->file_type == EXT2_FT_FALLTHRU) {
+			spin_lock(&dentry->d_lock);
+			dentry->d_flags |= DCACHE_FALLTHRU;
+			spin_unlock(&dentry->d_lock);
 		}
 		ext2_put_page(page);
 	}
@@ -532,6 +556,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
 				de->name_len = 0;
 				de->rec_len = ext2_rec_len_to_disk(chunk_size);
 				de->inode = 0;
+				de->file_type = 0;
 				goto got_it;
 			}
 			if (de->rec_len == 0) {
@@ -545,6 +570,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
 			name_len = EXT2_DIR_REC_LEN(de->name_len);
 			rec_len = ext2_rec_len_from_disk(de->rec_len);
 			if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
+			    (de->file_type != EXT2_FT_FALLTHRU) &&
 			    (rec_len >= reclen))
 				goto got_it;
 			if (rec_len >= name_len + reclen)
@@ -587,7 +613,8 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 
 	err = -EEXIST;
 	if (ext2_match (namelen, name, de)) {
-		if (de->file_type == EXT2_FT_WHT)
+		if ((de->file_type == EXT2_FT_WHT) ||
+		    (de->file_type == EXT2_FT_FALLTHRU))
 			goto got_it;
 		goto out_unlock;
 	}
@@ -602,7 +629,8 @@ got_it:
 							&page, NULL);
 	if (err)
 		goto out_unlock;
-	if (de->inode || ((de->file_type == EXT2_FT_WHT) &&
+	if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
+			   (de->file_type == EXT2_FT_FALLTHRU)) &&
 			  !ext2_match (namelen, name, de))) {
 		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
 		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
@@ -627,6 +655,60 @@ out_unlock:
 }
 
 /*
+ * Create a fallthru entry.
+ */
+int ext2_fallthru_entry (struct inode *dir, struct dentry *dentry)
+{
+	const char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+	unsigned short rec_len, name_len;
+	ext2_dirent * de;
+	struct page *page;
+	loff_t pos;
+	int err;
+
+	de = ext2_append_entry(dentry, &page);
+	if (IS_ERR(de))
+		return PTR_ERR(de);
+
+	err = -EEXIST;
+	if (ext2_match (namelen, name, de))
+		goto out_unlock;
+
+	name_len = EXT2_DIR_REC_LEN(de->name_len);
+	rec_len = ext2_rec_len_from_disk(de->rec_len);
+
+	pos = page_offset(page) +
+		(char*)de - (char*)page_address(page);
+	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
+							&page, NULL);
+	if (err)
+		goto out_unlock;
+	if (de->inode || (de->file_type == EXT2_FT_WHT) ||
+	    (de->file_type == EXT2_FT_FALLTHRU)) {
+		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
+		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
+		de->rec_len = ext2_rec_len_to_disk(name_len);
+		de = de1;
+	}
+	de->name_len = namelen;
+	memcpy(de->name, name, namelen);
+	de->inode = 0;
+	de->file_type = EXT2_FT_FALLTHRU;
+	err = ext2_commit_chunk(page, pos, rec_len);
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+	mark_inode_dirty(dir);
+	/* OFFSET_CACHE */
+out_put:
+	ext2_put_page(page);
+	return err;
+out_unlock:
+	unlock_page(page);
+	goto out_put;
+}
+
+/*
  * ext2_delete_entry deletes a directory entry by merging it with the
  * previous entry. Page is up-to-date. Releases the page.
  */
@@ -711,7 +793,9 @@ int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
 	 */
 	if (ext2_match (namelen, name, de))
 		de->inode = 0;
-	if (de->inode || (de->file_type == EXT2_FT_WHT)) {
+	if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
+			   (de->file_type == EXT2_FT_FALLTHRU)) &&
+			  !ext2_match (namelen, name, de))) {
 		ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
 		de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
 		de->rec_len = ext2_rec_len_to_disk(name_len);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 89ab2f7..1504814 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -108,6 +108,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *,
 extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
 extern int ext2_whiteout_entry (struct inode *, struct dentry *,
 				struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_fallthru_entry (struct inode *, struct dentry *);
 extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 8f92dd0..af4052f 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -345,6 +345,7 @@ static int ext2_whiteout(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	spin_lock(&new_dentry->d_lock);
+	new_dentry->d_flags &= ~DCACHE_FALLTHRU;
 	new_dentry->d_flags |= DCACHE_WHITEOUT;
 	spin_unlock(&new_dentry->d_lock);
 	d_add(new_dentry, NULL);
@@ -363,6 +364,26 @@ out:
 	return err;
 }
 
+/*
+ * Create a fallthru entry.
+ */
+static int ext2_fallthru (struct inode *dir, struct dentry *dentry)
+{
+	int err;
+
+	dquot_initialize(dir);
+
+	err = ext2_fallthru_entry(dir, dentry);
+	if (err)
+		return err;
+
+	d_instantiate(dentry, NULL);
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_FALLTHRU;
+	spin_unlock(&dentry->d_lock);
+	return 0;
+}
+
 static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	struct inode * new_dir,	struct dentry * new_dentry )
 {
@@ -466,6 +487,7 @@ const struct inode_operations ext2_dir_inode_operations = {
 	.rmdir		= ext2_rmdir,
 	.mknod		= ext2_mknod,
 	.whiteout	= ext2_whiteout,
+	.fallthru	= ext2_fallthru,
 	.rename		= ext2_rename,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 704521b..76eba1e 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1095,6 +1095,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 
 	if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_WHITEOUT))
 		sb->s_flags |= MS_WHITEOUT;
+	if (EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FALLTHRU))
+		sb->s_flags |= MS_FALLTHRU;
 
 	if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY))
 		sb->s_flags |= MS_RDONLY;
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index b0fb356..1a6f929 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -505,11 +505,14 @@ struct ext2_super_block {
 #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008
 #define EXT2_FEATURE_INCOMPAT_META_BG		0x0010
 #define EXT2_FEATURE_INCOMPAT_WHITEOUT		0x0020
+/* ext3/4 incompat flags take up the intervening constants */
+#define EXT2_FEATURE_INCOMPAT_FALLTHRU		0x2000
 #define EXT2_FEATURE_INCOMPAT_ANY		0xffffffff
 
 #define EXT2_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT2_FEATURE_INCOMPAT_SUPP	(EXT2_FEATURE_INCOMPAT_FILETYPE| \
 					 EXT2_FEATURE_INCOMPAT_WHITEOUT| \
+					 EXT2_FEATURE_INCOMPAT_FALLTHRU| \
 					 EXT2_FEATURE_INCOMPAT_META_BG)
 #define EXT2_FEATURE_RO_COMPAT_SUPP	(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
@@ -577,6 +580,7 @@ enum {
 	EXT2_FT_SOCK		= 6,
 	EXT2_FT_SYMLINK		= 7,
 	EXT2_FT_WHT		= 8,
+	EXT2_FT_FALLTHRU	= 9,
 	EXT2_FT_MAX
 };
 
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-06 22:35 ` Valerie Aurora
@ 2010-08-07  0:28   ` Andreas Dilger
  2010-08-08 16:40     ` Valerie Aurora
  0 siblings, 1 reply; 26+ messages in thread
From: Andreas Dilger @ 2010-08-07  0:28 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: Alexander Viro, Miklos Szeredi, Jan Blunck, Christoph Hellwig,
	linux-kernel, linux-fsdevel, Theodore Tso, linux-ext4

On 2010-08-06, at 16:35, Valerie Aurora wrote:
> XXX What to do for d_ino for fallthrus?  If we return the inode from
> the the underlying file system, it comes from a different inode
> "namespace" and that will produce spurious matches.  This argues for
> implementation of fallthrus as symlinks because they have to allocate
> an inode (and inode number) anyway, and we can later reuse it if we
> copy the file up.
> 
> @@ -342,6 +344,24 @@ ext2_readdir (struct file * filp, void * dirent, +				/* XXX We don't know the inode number
> +				 * of the directory entry in the
> +				 * underlying file system.  Should
> +				 * look it up, either on fallthru
> +				 * creation at first readdir or now at
> +				 * filldir time. */
> +				over = filldir(dirent, de->name, de->name_len,
> +					       (n<<PAGE_CACHE_SHIFT) | offset,
> +					       123 /* Made up ino */, d_type);

I don't think it makes sense to use "123" for the inode number.  This is a valid inode number, and almost certainly one that will be in use in most filesystems.  One option for extN is to use EXT2_BAD_INO (1).

Cheers, Andreas






^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-07  0:28   ` Andreas Dilger
@ 2010-08-08 16:40     ` Valerie Aurora
  0 siblings, 0 replies; 26+ messages in thread
From: Valerie Aurora @ 2010-08-08 16:40 UTC (permalink / raw)
  To: Andreas Dilger
  Cc: Alexander Viro, Miklos Szeredi, Jan Blunck, Christoph Hellwig,
	linux-kernel, linux-fsdevel, Theodore Tso, linux-ext4

On Fri, Aug 06, 2010 at 06:28:29PM -0600, Andreas Dilger wrote:
> On 2010-08-06, at 16:35, Valerie Aurora wrote:
> > XXX What to do for d_ino for fallthrus?  If we return the inode from
> > the the underlying file system, it comes from a different inode
> > "namespace" and that will produce spurious matches.  This argues for
> > implementation of fallthrus as symlinks because they have to allocate
> > an inode (and inode number) anyway, and we can later reuse it if we
> > copy the file up.
> > 
> > @@ -342,6 +344,24 @@ ext2_readdir (struct file * filp, void * dirent, +				/* XXX We don't know the inode number
> > +				 * of the directory entry in the
> > +				 * underlying file system.  Should
> > +				 * look it up, either on fallthru
> > +				 * creation at first readdir or now at
> > +				 * filldir time. */
> > +				over = filldir(dirent, de->name, de->name_len,
> > +					       (n<<PAGE_CACHE_SHIFT) | offset,
> > +					       123 /* Made up ino */, d_type);
> 
> I don't think it makes sense to use "123" for the inode number.  This is a valid inode number, and almost certainly one that will be in use in most filesystems.  One option for extN is to use EXT2_BAD_INO (1).

The next version (Subject: Union mounts - return d_ino from lower fs)
fixed this.  Take a look and tell me what you think?

-VAL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-05 11:13       ` Miklos Szeredi
  2010-08-06 17:12         ` Valerie Aurora
@ 2010-08-17 22:27         ` Valerie Aurora
  2010-08-18  8:26           ` Miklos Szeredi
  1 sibling, 1 reply; 26+ messages in thread
From: Valerie Aurora @ 2010-08-17 22:27 UTC (permalink / raw)
  To: Miklos Szeredi, Jan Kara, Andreas Gruenbacher
  Cc: viro, jblunck, hch, linux-kernel, linux-fsdevel, tytso,
	linux-ext4

On Thu, Aug 05, 2010 at 01:13:55PM +0200, Miklos Szeredi wrote:
> On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > > Another idea is to use an internal inode and make all fallthroughs be
> > > hard links to that.
> > > 
> > > I think the same would work for whiteouts as well.  I don't like the
> > > fact that whiteouts are invisible even when not mounted as part of a
> > > union.
> > 
> > I don't know if this helps, but I just wrote support for removing ext2
> > whiteouts and fallthrus using tune2fs and e2fsck.  I think this does
> > what people want from a "visible" whiteout feature without adding more
> > complexity to the VFS.  It also takes away all consideration of race
> > conditions and dentry conversion that happens with online removal of
> > whiteouts and fallthrus.
> > 
> > What are your thoughts on what a visible whiteout/fallthru would look
> > like?
> 
> Best would be if it didn't need any modification to filesystems.  All
> this having to upgrade util-linux, e2fsprogs, having incompatible
> filesystem features is a pain for users (just been through that).
> 
> What we already have in most filesystems:
> 
>  - extended attributes, e.g. use the system.union.* namespace and
>    denote whiteouts and falltroughs with such an attribute

Jan Kara helped convince me this might be better than fs-specific
fallthrus and whiteouts.  See my email on get_unlinked_inode().

>  - hard links to make sure a separate inode is not necessary for each
>    whiteout/fallthrough entry

The problem with hard links is that you run into hard link limits.  I
don't think we can do hard links for whiteouts and fallthrus.  Each
whiteout or fallthru will cost an inode if we implement them as
extended attributes.  This cost has to be balanced against the cost of
implementing them as dentries, which is mainly code complexity in
individual file systems.

>  - some way for the user to easily identify such files when not
>    mounted as part of a union e.g. make it a symlink pointing to
>    "(deleted)" or whatever

Perhaps we can simply not interpret the whiteout/fallthru extended
attributes when the file system is not unioned and let userland
operate on them via getxattr()/setxattr().

> Later the extended attributes can also be used for other things like
> e.g. chmod()/chown() only copying up metadata, not data, and
> indicating that data is still found on the lower layers.

It would certainly be more extensible than in-dentry flags.

-VAL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-17 22:27         ` Valerie Aurora
@ 2010-08-18  8:26           ` Miklos Szeredi
  0 siblings, 0 replies; 26+ messages in thread
From: Miklos Szeredi @ 2010-08-18  8:26 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: miklos, jack, agruen, viro, jblunck, hch, linux-kernel,
	linux-fsdevel, tytso, linux-ext4

On Tue, 17 Aug 2010, Valerie Aurora wrote:
> >  - hard links to make sure a separate inode is not necessary for each
> >    whiteout/fallthrough entry
> 
> The problem with hard links is that you run into hard link limits.  I
> don't think we can do hard links for whiteouts and fallthrus.  Each
> whiteout or fallthru will cost an inode if we implement them as
> extended attributes.  This cost has to be balanced against the cost of
> implementing them as dentries, which is mainly code complexity in
> individual file systems.

get_unlinked_inode() is a great idea.  But I feel that individual
inodes for each fallthrough is excessive.  It'll make the first
readdir() really really expensive and wastes a lot of disk and memory
for no good reason.

Not sure how to fix the hard link limits problem though...

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
       [not found]           ` <fiMc1-6ip-7@gated-at.bofh.it>
@ 2010-08-18 23:24             ` Bodo Eggert
  2010-08-19  2:03               ` J. R. Okajima
  2010-08-24 17:21               ` Valerie Aurora
  0 siblings, 2 replies; 26+ messages in thread
From: Bodo Eggert @ 2010-08-18 23:24 UTC (permalink / raw)
  To: Miklos Szeredi, miklos, jack, agruen, viro, jblunck, hch,
	linux-kernel, linux-f

Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Tue, 17 Aug 2010, Valerie Aurora wrote:

>> >  - hard links to make sure a separate inode is not necessary for each
>> >    whiteout/fallthrough entry
>> 
>> The problem with hard links is that you run into hard link limits.  I
>> don't think we can do hard links for whiteouts and fallthrus.  Each
>> whiteout or fallthru will cost an inode if we implement them as
>> extended attributes.  This cost has to be balanced against the cost of
>> implementing them as dentries, which is mainly code complexity in
>> individual file systems.

Not knowing the details, I'd suggest to implement a generic function to
create an attributed inode and let the fs override it to create an
unlinked-file-dentry instead.

Benefit: All fs supporting extended attributes will be able to support
whiteout. If the fs has other means of supporting whiteout, they may fake
the attribute.

Possible problems:
- Having two ways of reporting a whiteout? Or can it be reported using a
  (static) fake inode?
- How do you un-whiteout while (not) having an overlaying fs?

> get_unlinked_inode() is a great idea.  But I feel that individual
> inodes for each fallthrough is excessive.  It'll make the first
> readdir() really really expensive and wastes a lot of disk and memory
> for no good reason.
> 
> Not sure how to fix the hard link limits problem though...

Do a hardlink if you can create a hard link, otherwise use a fresh inode
and use that for the next hardlink(s).



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-18 23:24             ` Bodo Eggert
@ 2010-08-19  2:03               ` J. R. Okajima
  2010-08-24 17:21               ` Valerie Aurora
  1 sibling, 0 replies; 26+ messages in thread
From: J. R. Okajima @ 2010-08-19  2:03 UTC (permalink / raw)
  To: 7eggert
  Cc: Miklos Szeredi, jack, agruen, viro, jblunck, hch, linux-kernel,
	linux-fsdevel, tytso, linux-ext4, Valerie Aurora


Bodo Eggert:
> Do a hardlink if you can create a hard link, otherwise use a fresh inode
> and use that for the next hardlink(s).

Exactly.
That's the approach aufs takes for whiteout and its brothers.


J. R. Okajima

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-18 23:24             ` Bodo Eggert
  2010-08-19  2:03               ` J. R. Okajima
@ 2010-08-24 17:21               ` Valerie Aurora
  2010-08-26  9:53                 ` Bodo Eggert
  1 sibling, 1 reply; 26+ messages in thread
From: Valerie Aurora @ 2010-08-24 17:21 UTC (permalink / raw)
  To: 7eggert, David Woodhouse
  Cc: Miklos Szeredi, jack, agruen, viro, jblunck, hch, linux-kernel,
	linux-fsdevel, tytso, linux-ext4

On Thu, Aug 19, 2010 at 01:24:07AM +0200, Bodo Eggert wrote:
> Miklos Szeredi <miklos@szeredi.hu> wrote:
> > On Tue, 17 Aug 2010, Valerie Aurora wrote:
> 
> >> >  - hard links to make sure a separate inode is not necessary for each
> >> >    whiteout/fallthrough entry
> >> 
> >> The problem with hard links is that you run into hard link limits.  I
> >> don't think we can do hard links for whiteouts and fallthrus.  Each
> >> whiteout or fallthru will cost an inode if we implement them as
> >> extended attributes.  This cost has to be balanced against the cost of
> >> implementing them as dentries, which is mainly code complexity in
> >> individual file systems.
> 
> Not knowing the details, I'd suggest to implement a generic function to
> create an attributed inode and let the fs override it to create an
> unlinked-file-dentry instead.
> 
> Benefit: All fs supporting extended attributes will be able to support
> whiteout. If the fs has other means of supporting whiteout, they may fake
> the attribute.

Yeah, I think that's the way to go.

> Possible problems:
> - Having two ways of reporting a whiteout? Or can it be reported using a
>   (static) fake inode?

They are going to look the same at the VFS level and higher.

> - How do you un-whiteout while (not) having an overlaying fs?

The current version of whiteout support always hides DT_WHT dentries
from userspace.  Perhaps a start is to only hide DT_WHT entries when
the file system is union mounted.  Applications usually ignore all
dentries with d_ino == 0 so it might not cause problems.

Right now, you have to remove whiteouts offline using fsck.

> > get_unlinked_inode() is a great idea.  But I feel that individual
> > inodes for each fallthrough is excessive.  It'll make the first
> > readdir() really really expensive and wastes a lot of disk and memory
> > for no good reason.
> > 
> > Not sure how to fix the hard link limits problem though...
> 
> Do a hardlink if you can create a hard link, otherwise use a fresh inode
> and use that for the next hardlink(s).

Bleah!  Then you have a code path that is only tested when you hit
LINK_MAX.  Sounds like a recipe for bugs for me.

-VAL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/38] fallthru: ext2 fallthru support
  2010-08-24 17:21               ` Valerie Aurora
@ 2010-08-26  9:53                 ` Bodo Eggert
  0 siblings, 0 replies; 26+ messages in thread
From: Bodo Eggert @ 2010-08-26  9:53 UTC (permalink / raw)
  To: Valerie Aurora
  Cc: 7eggert, David Woodhouse, Miklos Szeredi, jack, agruen, viro,
	jblunck, hch, linux-kernel, linux-fsdevel, tytso, linux-ext4

On Tue, 24 Aug 2010, Valerie Aurora wrote:
> On Thu, Aug 19, 2010 at 01:24:07AM +0200, Bodo Eggert wrote:
>> Miklos Szeredi <miklos@szeredi.hu> wrote:
>>> On Tue, 17 Aug 2010, Valerie Aurora wrote:

>>> get_unlinked_inode() is a great idea.  But I feel that individual
>>> inodes for each fallthrough is excessive.  It'll make the first
>>> readdir() really really expensive and wastes a lot of disk and memory
>>> for no good reason.
>>>
>>> Not sure how to fix the hard link limits problem though...
>>
>> Do a hardlink if you can create a hard link, otherwise use a fresh inode
>> and use that for the next hardlink(s).
>
> Bleah!  Then you have a code path that is only tested when you hit
> LINK_MAX.  Sounds like a recipe for bugs for me.

You'll also hit it while creating the first whiteout, maybe on creating 
the first whiteout since mounting, and on filesystems not supporting 
hardlinks (are there some that support attributes but not hardlinks?).
Maybe it will be possible to create immutable whiteout inodes, too.

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2010-08-26  9:53 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1276627208-17242-1-git-send-email-vaurora@redhat.com>
2010-06-15 18:39 ` [PATCH 10/38] whiteout: Split of ext2_append_link() from ext2_add_link() Valerie Aurora
2010-06-15 18:39 ` [PATCH 11/38] whiteout: ext2 whiteout support Valerie Aurora
2010-07-13  4:24   ` Ian Kent
2010-07-19 22:14     ` Valerie Aurora
2010-06-15 18:39 ` [PATCH 14/38] fallthru: ext2 fallthru support Valerie Aurora
2010-07-13  4:30   ` Ian Kent
2010-08-04 14:44   ` Miklos Szeredi
2010-08-04 22:48     ` Valerie Aurora
2010-08-05 10:36       ` Miklos Szeredi
2010-08-05 23:30         ` Valerie Aurora
2010-08-06  8:15           ` Miklos Szeredi
2010-08-06 17:16             ` Valerie Aurora
2010-08-06 17:44               ` Miklos Szeredi
2010-08-04 23:04     ` Valerie Aurora
2010-08-05 11:13       ` Miklos Szeredi
2010-08-06 17:12         ` Valerie Aurora
2010-08-17 22:27         ` Valerie Aurora
2010-08-18  8:26           ` Miklos Szeredi
     [not found] <1277492728-11446-1-git-send-email-vaurora@redhat.com>
2010-06-25 19:05 ` Valerie Aurora
     [not found] <1281134124-17041-1-git-send-email-vaurora@redhat.com>
2010-08-06 22:35 ` Valerie Aurora
2010-08-07  0:28   ` Andreas Dilger
2010-08-08 16:40     ` Valerie Aurora
     [not found] <eVJmW-3Lf-15@gated-at.bofh.it>
     [not found] ` <eVJmW-3Lf-19@gated-at.bofh.it>
     [not found]   ` <fdNs6-5F1-7@gated-at.bofh.it>
     [not found]     ` <fdVfY-pv-23@gated-at.bofh.it>
     [not found]       ` <fe6Ep-cD-13@gated-at.bofh.it>
     [not found]         ` <fiCPo-73x-17@gated-at.bofh.it>
     [not found]           ` <fiMc1-6ip-7@gated-at.bofh.it>
2010-08-18 23:24             ` Bodo Eggert
2010-08-19  2:03               ` J. R. Okajima
2010-08-24 17:21               ` Valerie Aurora
2010-08-26  9:53                 ` Bodo Eggert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).