* [RFC/PATCH 2/3] ext4: add shutdown bit and check for it
2017-02-02 22:59 [RFC/PATCH 0/3] Implement XFS's GOINGDOWN ioctl for ext4 Theodore Ts'o
2017-02-02 22:59 ` [RFC/PATCH 1/3] ext4: rename s_resize_flags to s_ext4_flags Theodore Ts'o
@ 2017-02-02 22:59 ` Theodore Ts'o
2017-02-02 22:59 ` [RFC/PATCH 3/3] ext4: add EXT4_IOC_GOINGDOWN ioctl Theodore Ts'o
2017-02-03 7:05 ` [RFC/PATCH 0/3] Implement XFS's GOINGDOWN ioctl for ext4 Amir Goldstein
3 siblings, 0 replies; 8+ messages in thread
From: Theodore Ts'o @ 2017-02-02 22:59 UTC (permalink / raw)
To: Ext4 Developers List; +Cc: Theodore Ts'o
Add a shutdown bit that will cause ext4 processing to fail immediately
with EIO.
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
fs/ext4/ext4.h | 6 ++++++
fs/ext4/ext4_jbd2.c | 2 ++
fs/ext4/file.c | 12 ++++++++++++
fs/ext4/fsync.c | 3 +++
fs/ext4/ialloc.c | 3 +++
fs/ext4/inode.c | 25 +++++++++++++++++++++++++
fs/ext4/namei.c | 12 ++++++++++++
fs/ext4/xattr.c | 3 +++
8 files changed, 66 insertions(+)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d5b7b2ccb302..94064b7ce16c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1836,6 +1836,12 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
* Superblock flags
*/
#define EXT4_FLAGS_RESIZING 0
+#define EXT4_FLAGS_SHUTDOWN 1
+
+static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
+{
+ return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+}
/*
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index e770c1ee4613..8ea22d066f5c 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -43,6 +43,8 @@ static int ext4_journal_check_start(struct super_block *sb)
journal_t *journal;
might_sleep();
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ return -EIO;
if (sb->s_flags & MS_RDONLY)
return -EROFS;
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d663d3d7c81c..ff3f6107b0ba 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -57,6 +57,9 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb))))
+ return -EIO;
+
if (!iov_iter_count(to))
return 0; /* skip atime */
@@ -213,6 +216,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
int overwrite = 0;
ssize_t ret;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
#ifdef CONFIG_FS_DAX
if (IS_DAX(inode))
return ext4_dax_write_iter(iocb, from);
@@ -348,6 +354,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_mapping->host;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
if (ext4_encrypted_inode(inode)) {
int err = fscrypt_get_encryption_info(inode);
if (err)
@@ -375,6 +384,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
char buf[64], *cp;
int ret;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
!(sb->s_flags & MS_RDONLY))) {
sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 88effb1053c7..9d549608fd30 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -100,6 +100,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
tid_t commit_tid;
bool needs_barrier = false;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
J_ASSERT(ext4_journal_current_handle() == NULL);
trace_ext4_sync_file_enter(file, datasync);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f372fc431b8e..b14bae2598bc 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -764,6 +764,9 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (!dir || !dir->i_nlink)
return ERR_PTR(-EPERM);
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return ERR_PTR(-EIO);
+
if ((ext4_encrypted_inode(dir) ||
DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 918d351d5b94..e4416e94fb92 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1189,6 +1189,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
unsigned from, to;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
trace_ext4_write_begin(inode, pos, len, flags);
/*
* Reserve one block more for addition to orphan list in case
@@ -2037,6 +2040,9 @@ static int ext4_writepage(struct page *page,
struct ext4_io_submit io_submit;
bool keep_towrite = false;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
trace_ext4_writepage(page);
size = i_size_read(inode);
if (page->index == size >> PAGE_SHIFT)
@@ -2634,6 +2640,9 @@ static int ext4_writepages(struct address_space *mapping,
struct blk_plug plug;
bool give_up_on_write = false;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
percpu_down_read(&sbi->s_journal_flag_rwsem);
trace_ext4_writepages(inode, wbc);
@@ -2895,6 +2904,9 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
handle_t *handle;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
index = pos >> PAGE_SHIFT;
if (ext4_nonda_switch(inode->i_sb) ||
@@ -3199,6 +3211,9 @@ static int ext4_readpage(struct file *file, struct page *page)
int ret = -EAGAIN;
struct inode *inode = page->mapping->host;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
trace_ext4_readpage(page);
if (ext4_has_inline_data(inode))
@@ -3216,6 +3231,9 @@ ext4_readpages(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
/* If the file has inline data, no need to do readpages. */
if (ext4_has_inline_data(inode))
return 0;
@@ -5202,6 +5220,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
error = setattr_prepare(dentry, attr);
if (error)
return error;
@@ -5488,6 +5509,8 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{
int err = 0;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
@@ -5511,6 +5534,8 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
{
int err;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
err = ext4_get_inode_loc(inode, iloc);
if (!err) {
BUFFER_TRACE(iloc->bh, "get_write_access");
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index af6fa5949193..32906ea1dc6b 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2939,6 +2939,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
struct ext4_dir_entry_2 *de;
handle_t *handle = NULL;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return -EIO;
+
/* Initialize quotas before so that eventual writes go in
* separate transaction */
retval = dquot_initialize(dir);
@@ -3012,6 +3015,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
struct ext4_dir_entry_2 *de;
handle_t *handle = NULL;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return -EIO;
+
trace_ext4_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
@@ -3082,6 +3088,9 @@ static int ext4_symlink(struct inode *dir,
struct fscrypt_str disk_link;
struct fscrypt_symlink_data *sd = NULL;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+ return -EIO;
+
disk_link.len = len + 1;
disk_link.name = (char *) symname;
@@ -3886,6 +3895,9 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
+ return -EIO;
+
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c40bd55b6400..67636acf7624 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -411,6 +411,9 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
{
int error;
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ return -EIO;
+
if (strlen(name) > 255)
return -ERANGE;
--
2.11.0.rc0.7.gbe5a750
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC/PATCH 3/3] ext4: add EXT4_IOC_GOINGDOWN ioctl
2017-02-02 22:59 [RFC/PATCH 0/3] Implement XFS's GOINGDOWN ioctl for ext4 Theodore Ts'o
2017-02-02 22:59 ` [RFC/PATCH 1/3] ext4: rename s_resize_flags to s_ext4_flags Theodore Ts'o
2017-02-02 22:59 ` [RFC/PATCH 2/3] ext4: add shutdown bit and check for it Theodore Ts'o
@ 2017-02-02 22:59 ` Theodore Ts'o
2017-02-03 0:02 ` Darrick J. Wong
2017-02-03 7:05 ` [RFC/PATCH 0/3] Implement XFS's GOINGDOWN ioctl for ext4 Amir Goldstein
3 siblings, 1 reply; 8+ messages in thread
From: Theodore Ts'o @ 2017-02-02 22:59 UTC (permalink / raw)
To: Ext4 Developers List; +Cc: Theodore Ts'o
This ioctl is modeled after the xfs's XFS_IOC_GOINGDOWN ioctl. (In
fact, it uses the same code points.)
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
fs/ext4/ext4.h | 10 ++++++++++
fs/ext4/ioctl.c | 42 ++++++++++++++++++++++++++++++++++++++++++
fs/ext4/super.c | 2 +-
3 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 94064b7ce16c..eae92f1a52e1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -679,6 +679,16 @@ struct fsxattr {
#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
+#define EXT4_IOC_GOiNGDOWN _IOR ('X', 125, __u32)
+
+/*
+ * Flags for going down operation
+ */
+#define EXT4_GOING_FLAGS_DEFAULT 0x0 /* going down */
+#define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
+#define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
+
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index d534399cf607..6d5443cf4a47 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -442,6 +442,45 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
return iflags;
}
+int ext4_goingdown(struct super_block *sb, unsigned long arg)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ __u32 flags;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(flags, (__u32 __user *)arg))
+ return -EFAULT;
+
+ if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
+ return -EINVAL;
+
+ if (ext4_forced_shutdown(sbi))
+ return 0;
+
+ switch (flags) {
+ case EXT4_GOING_FLAGS_DEFAULT:
+ freeze_bdev(sb->s_bdev);
+ set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+ thaw_bdev(sb->s_bdev, sb);
+ break;
+ case EXT4_GOING_FLAGS_LOGFLUSH:
+ if (sbi->s_journal && !is_journal_aborted(sbi->s_journal))
+ (void) ext4_force_commit(sb);
+ /* fall through */
+ case EXT4_GOING_FLAGS_NOLOGFLUSH:
+ if (sbi->s_journal && !is_journal_aborted(sbi->s_journal))
+ (void) jbd2_journal_destroy(sbi->s_journal);
+ sbi->s_journal = NULL;
+ set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -893,6 +932,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return 0;
}
+ case EXT4_IOC_GOiNGDOWN:
+ return ext4_goingdown(sb, arg);
default:
return -ENOTTY;
}
@@ -959,6 +1000,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_SET_ENCRYPTION_POLICY:
case EXT4_IOC_GET_ENCRYPTION_PWSALT:
case EXT4_IOC_GET_ENCRYPTION_POLICY:
+ case EXT4_IOC_GOiNGDOWN:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 829e4a7b59e4..8db181c4dad6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4797,7 +4797,7 @@ static int ext4_freeze(struct super_block *sb)
*/
static int ext4_unfreeze(struct super_block *sb)
{
- if (sb->s_flags & MS_RDONLY)
+ if ((sb->s_flags & MS_RDONLY) || ext4_forced_shutdown(EXT4_SB(sb)))
return 0;
if (EXT4_SB(sb)->s_journal) {
--
2.11.0.rc0.7.gbe5a750
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [RFC/PATCH 0/3] Implement XFS's GOINGDOWN ioctl for ext4
2017-02-02 22:59 [RFC/PATCH 0/3] Implement XFS's GOINGDOWN ioctl for ext4 Theodore Ts'o
` (2 preceding siblings ...)
2017-02-02 22:59 ` [RFC/PATCH 3/3] ext4: add EXT4_IOC_GOINGDOWN ioctl Theodore Ts'o
@ 2017-02-03 7:05 ` Amir Goldstein
2017-02-03 15:09 ` Theodore Ts'o
3 siblings, 1 reply; 8+ messages in thread
From: Amir Goldstein @ 2017-02-03 7:05 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: Ext4 Developers List, fstests
On Fri, Feb 3, 2017 at 12:59 AM, Theodore Ts'o <tytso@mit.edu> wrote:
> This is a proof of concept implementation of XFS's GOINGDOWN ioctl for
> ext4.
>
> I've tried to replicate XFS's semantics (as much as they can translate
> to ext4). This test series is currently *not* passing xfstests.
> Specifically, the following tests are failing:
>
> generic/042 generic/044 generic/045 generic/046
>
> As near as I can tell, these tests are sensitive to how the file
> system implements and handles delayed allocation. In particular,
> generic/04[456] assumes that if you do a delayed allocation write of
> 64k, and then truncate the file to 64k or 32k, the file will either be
> zero length, or i_size is non-zero, it MUST NOT have no extents.
>
> It's not clear to me why this needs to be true. The test description
> says "test for NULL files problem". But since POSIX states that how
> truncate will handle truncates beyond i_size is unspecified, and what
> happens after a crash w/o an fsync() is similarly unspecified, it's
> not clear what is the best way to deal with this.
>
> One is to simply use a different ioctl code point, to avoid enabling
> the xfstests tests. Another to modify the tests to skip them for
> ext4. Or I can teach kvm-xfstests and gce-xfstests to ignore these
> test failures by skipping the tests in my test framework.
>
> Comments, thoughts?
I have a naive question about generic implementation:
We already have mnt_want_write() hooks in generic vfs code,
so it should be easy enough to set the shutdown bit on sb and return -EIO there.
Wouldn't it be better to add mnt_want_read() hooks in vfs helpers
instead of duplicating
the fs specific hooks? I hear f2fs is yet another potential customer??
I realize this needs more thinking and more work, so
no intention of blocking your immediate production needs, just wondering
about future implementation that is more robust and fs agnostic.
>
> - Ted
>
> P.S. So I'm not implementing this just for increased xfstests
> coverage; I have an operational need for this functionality on
> production systems. The short version is if you are tearing down a
> container, and you don't care about its scratch space, being able to
> drop all of the writes from being sent to the storage device (which
> might be over the network, say using iSCSI), is a Good Thing.
>
> Theodore Ts'o (3):
> ext4: rename s_resize_flags to s_ext4_flags
> ext4: add shutdown bit and check for it
> ext4: add EXT4_IOC_GOINGDOWN ioctl
>
> fs/ext4/ext4.h | 26 +++++++++++++++++++++++---
> fs/ext4/ext4_jbd2.c | 2 ++
> fs/ext4/file.c | 12 ++++++++++++
> fs/ext4/fsync.c | 3 +++
> fs/ext4/ialloc.c | 3 +++
> fs/ext4/inode.c | 25 +++++++++++++++++++++++++
> fs/ext4/ioctl.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> fs/ext4/namei.c | 12 ++++++++++++
> fs/ext4/resize.c | 5 +++--
> fs/ext4/super.c | 2 +-
> fs/ext4/xattr.c | 3 +++
> 11 files changed, 129 insertions(+), 6 deletions(-)
>
> --
> 2.11.0.rc0.7.gbe5a750
>
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 8+ messages in thread