From: "Jörn Engel" <joern@wohnheim.fh-wedel.de>
To: linux-kernel@vger.kernel.org
Subject: [PATCH COW] MAD COW
Date: Thu, 6 May 2004 15:21:05 +0200 [thread overview]
Message-ID: <20040506132105.GF7930@wohnheim.fh-wedel.de> (raw)
In-Reply-To: <20040506131731.GA7930@wohnheim.fh-wedel.de>
Patch 5
Jörn
--
A victorious army first wins and then seeks battle.
-- Sun Tzu
Allow COW behaviour for hard links, depending on an inode flag.
Semantics:
o Files with S_COWLINK do cow. (yes, really ;)
o Directories with S_COWLINK inherit flag to new files.
o If in doubt, return -EMLINK and let the user sort it out:
- When linking non-cow files to cow directories.
- When moving non-cow files/directories to cow directories.
- When moving cow files/directories to non-cow directories.
Thanks to Sytse and Andrew for tips.
fs/ext2/inode.c | 11 ++++++
fs/ext2/super.c | 2 -
fs/ext3/inode.c | 11 ++++++
fs/ext3/super.c | 2 -
fs/fcntl.c | 24 ++++++++++++++
fs/namei.c | 76 +++++++++++++++++++++++++++++++++++++++++++++-
fs/open.c | 79 +++++++++++++++++++++++++++++++++++++++++++++---
include/linux/ext2_fs.h | 1
include/linux/ext3_fs.h | 1
include/linux/fcntl.h | 3 +
include/linux/fs.h | 4 ++
11 files changed, 205 insertions(+), 9 deletions(-)
--- linux-2.6.5cow/fs/open.c~break_madcow 2004-05-04 16:31:50.000000000 +0200
+++ linux-2.6.5cow/fs/open.c 2004-05-06 01:02:57.000000000 +0200
@@ -723,6 +723,71 @@
return error;
}
+char *index(const char *s, int c)
+{
+ while (*s) {
+ if (*s == c)
+ return (char *) s;
+ s++;
+ }
+ return NULL;
+}
+
+char *rindex(const char *s, int c)
+{
+ char *ret = NULL;
+ while (*s) {
+ if (*s == c)
+ ret = (char *) s;
+ s++;
+ }
+ return ret;
+}
+
+char *madcow_temp_name(const char *name)
+{
+ const char temp[] = "__MADCOW_BREAK_LINK"; /* FIXME: add random part */
+ char *last_slash = rindex(name, '/');
+ size_t dir_len = last_slash ? last_slash + 1 - name : 0;
+ size_t len = sizeof(temp) + dir_len;
+ char *ret;
+
+ ret = kmalloc(len, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ strncpy(ret, name, dir_len);
+ strcpy(ret+dir_len, temp);
+ return ret;
+}
+
+int vfs_rename_other(struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
+int do_copyfile(const char *from, const char *to, int mode);
+int do_rename(const char * oldname, const char * newname);
+
+int madcow_break_link(const char *from)
+{
+ int err, ret = -EMLINK;
+ char *to = madcow_temp_name(from);
+
+ printk("break link '%s' -> '%s'\n", from, to);
+ err = do_copyfile(from, to, -1);
+ printk("do_copyfile returned %d\n", err);
+ if (err)
+ goto out;
+
+ err = do_rename(to, from);
+ printk("do_rename returned %d\n", err);
+ if (err)
+ goto out;
+
+ ret = 0;
+out:
+ kfree(to);
+ return ret;
+}
+
/*
* Note that while the flag value (low two bits) for sys_open means:
* 00 - read-only
@@ -746,13 +811,19 @@
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;
if (namei_flags & O_TRUNC)
- namei_flags |= 2;
+ namei_flags |= FMODE_WRITE;
error = open_namei(filename, namei_flags, mode, &nd);
- if (!error)
- return dentry_open(nd.dentry, nd.mnt, flags);
- return ERR_PTR(error);
+ if (error == -EMLINK) {
+ error = madcow_break_link(filename);
+ if (!error) /*retry*/
+ error = open_namei(filename, namei_flags, mode, &nd);
+ }
+ if (error)
+ return ERR_PTR(error);
+
+ return dentry_open(nd.dentry, nd.mnt, flags);
}
EXPORT_SYMBOL(filp_open);
--- linux-2.6.5cow/fs/ext2/inode.c~madcow 2004-04-27 16:48:55.000000000 +0200
+++ linux-2.6.5cow/fs/ext2/inode.c 2004-04-27 17:02:33.000000000 +0200
@@ -1021,7 +1021,8 @@
unsigned int flags = EXT2_I(inode)->i_flags;
spin_lock(inode->i_lock);
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME
+ | S_DIRSYNC | S_MADCOWLINK);
if (flags & EXT2_SYNC_FL)
inode->i_flags |= S_SYNC;
if (flags & EXT2_APPEND_FL)
@@ -1032,6 +1033,8 @@
inode->i_flags |= S_NOATIME;
if (flags & EXT2_DIRSYNC_FL)
inode->i_flags |= S_DIRSYNC;
+ if (flags & EXT2_MADCOWLINK_FL)
+ inode->i_flags |= S_MADCOWLINK;
spin_unlock(inode->i_lock);
}
@@ -1159,6 +1162,12 @@
if (IS_ERR(raw_inode))
return -EIO;
+ /* vfs inode holds the current MADCOWLINK flag, so we have to update
+ * ei->i_flags first */
+ ei->i_flags &= ~EXT2_MADCOWLINK_FL;
+ if (inode_flags(inode, S_MADCOWLINK))
+ ei->i_flags |= EXT2_MADCOWLINK_FL;
+
/* For fields not not tracking in the in-memory inode,
* initialise them to zero for new inodes. */
if (ei->i_state & EXT2_STATE_NEW)
--- linux-2.6.5cow/fs/ext2/super.c~madcow 2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/fs/ext2/super.c 2004-04-27 17:03:10.000000000 +0200
@@ -1015,7 +1015,7 @@
.name = "ext2",
.get_sb = ext2_get_sb,
.kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .fs_flags = FS_REQUIRES_DEV | FS_MADCOW,
};
static int __init init_ext2_fs(void)
--- linux-2.6.5cow/fs/ext3/inode.c~madcow 2004-04-27 16:48:55.000000000 +0200
+++ linux-2.6.5cow/fs/ext3/inode.c 2004-04-27 17:04:41.000000000 +0200
@@ -2448,7 +2448,8 @@
unsigned int flags = EXT3_I(inode)->i_flags;
spin_lock(inode->i_lock);
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME
+ | S_DIRSYNC | S_MADCOWLINK);
if (flags & EXT3_SYNC_FL)
inode->i_flags |= S_SYNC;
if (flags & EXT3_APPEND_FL)
@@ -2459,6 +2460,8 @@
inode->i_flags |= S_NOATIME;
if (flags & EXT3_DIRSYNC_FL)
inode->i_flags |= S_DIRSYNC;
+ if (flags & EXT3_MADCOWLINK_FL)
+ inode->i_flags |= S_MADCOWLINK;
spin_unlock(inode->i_lock);
}
@@ -2594,6 +2597,12 @@
struct buffer_head *bh = iloc->bh;
int err = 0, rc, block;
+ /* vfs inode holds the current MADCOWLINK flag, so we have to update
+ * ei->i_flags first */
+ ei->i_flags &= ~EXT3_MADCOWLINK_FL;
+ if (inode_flags(inode, S_MADCOWLINK))
+ ei->i_flags |= EXT3_MADCOWLINK_FL;
+
/* For fields not not tracking in the in-memory inode,
* initialise them to zero for new inodes. */
if (ei->i_state & EXT3_STATE_NEW)
--- linux-2.6.5cow/fs/ext3/super.c~madcow 2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/fs/ext3/super.c 2004-04-27 17:05:22.000000000 +0200
@@ -2004,7 +2004,7 @@
.name = "ext3",
.get_sb = ext3_get_sb,
.kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .fs_flags = FS_REQUIRES_DEV | FS_MADCOW,
};
static int __init init_ext3_fs(void)
--- linux-2.6.5cow/fs/fcntl.c~madcow 2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/fs/fcntl.c 2004-04-27 16:55:42.000000000 +0200
@@ -282,6 +282,24 @@
EXPORT_SYMBOL(f_delown);
+static long fcntl_setmadcow(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+
+ if (!(inode->i_sb->s_type->fs_flags & FS_MADCOW))
+ return -EINVAL;
+ //FIXME: -EPERM?
+
+ spin_lock(&inode->i_lock);
+ if (arg)
+ inode->i_flags |= S_MADCOWLINK;
+ else
+ inode->i_flags &= ~S_MADCOWLINK;
+ spin_unlock(&inode->i_lock);
+ mark_inode_dirty(inode);
+ return 0;
+}
+
static long do_fcntl(unsigned int fd, unsigned int cmd,
unsigned long arg, struct file * filp)
{
@@ -346,6 +364,12 @@
case F_NOTIFY:
err = fcntl_dirnotify(fd, filp, arg);
break;
+ case F_SETMADCOW:
+ err = fcntl_setmadcow(filp, arg);
+ break;
+ case F_GETMADCOW:
+ err = !!inode_flags(filp->f_dentry->d_inode, S_MADCOWLINK);
+ break;
default:
break;
}
--- linux-2.6.5cow/fs/namei.c~madcow 2004-04-27 16:48:55.000000000 +0200
+++ linux-2.6.5cow/fs/namei.c 2004-05-02 15:07:57.000000000 +0200
@@ -223,6 +223,41 @@
return security_inode_permission(inode, mask, nd);
}
+static inline void set_madcowflag(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ inode->i_flags |= S_MADCOWLINK;
+ spin_unlock(&inode->i_lock);
+ mark_inode_dirty(inode);
+}
+
+/*
+ * Files with the S_MADCOWLINK flag set cannot be written to, if more
+ * than one hard link to them exists. Ultimately, this function
+ * should copy the inode, assign the copy to the dentry and lower use
+ * count of the old inode - one day.
+ * For now, it is sufficient to return an error and let userspace
+ * deal with the messy part. Not exactly the meaning of
+ * copy-on-write, but much better than writing to fifty files at once
+ * and noticing month later.
+ *
+ * Yes, this breaks the kernel interface and is simply wrong. This
+ * is intended behaviour, so Linus will not merge the code before
+ * it is complete. Or will he?
+ */
+static int break_madcow_link(struct inode *inode)
+{
+ if (!inode_flags(inode, S_MADCOWLINK))
+ return 0;
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+ if (inode->i_nlink < 2)
+ return 0;
+ /* TODO: As soon as sendfile can do normal file copies, use that
+ * and always return 0 */
+ return -EMLINK;
+}
+
/*
* get_write_access() gets write permission for a file.
* put_write_access() releases this write permission.
@@ -243,6 +278,10 @@
int get_write_access(struct inode * inode)
{
+ int error = break_madcow_link(inode);
+ if (error)
+ return error;
+
spin_lock(&inode->i_lock);
if (atomic_read(&inode->i_writecount) < 0) {
spin_unlock(&inode->i_lock);
@@ -1146,6 +1185,8 @@
DQUOT_INIT(dir);
error = dir->i_op->create(dir, dentry, mode, nd);
if (!error) {
+ if (inode_flags(dir, S_MADCOWLINK))
+ set_madcowflag(dentry->d_inode);
inode_dir_notify(dir, DN_CREATE);
security_inode_post_create(dir, dentry, mode);
}
@@ -1520,6 +1561,8 @@
DQUOT_INIT(dir);
error = dir->i_op->mkdir(dir, dentry, mode);
if (!error) {
+ if (inode_flags(dir, S_MADCOWLINK))
+ set_madcowflag(dentry->d_inode);
inode_dir_notify(dir, DN_CREATE);
security_inode_post_mkdir(dir,dentry, mode);
}
@@ -1823,6 +1866,13 @@
return -EXDEV;
/*
+ * Madcowlink attribute is inherited from directory, but here,
+ * the inode already has one. If they don't match, bail out.
+ */
+ if (inode_flags(dir, S_MADCOWLINK) != inode_flags(inode, S_MADCOWLINK))
+ return -EMLINK;
+
+ /*
* A link to an append-only or immutable file cannot be created.
*/
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
@@ -2003,6 +2053,26 @@
return error;
}
+static int madcow_allow_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir)
+{
+ struct inode *old_inode = old_dentry->d_inode;
+
+ /* source and target share directory: allow */
+ if (old_dir == new_dir)
+ return 0;
+ /* source and target directory have identical madcowlink flag: allow */
+ if (inode_flags(old_inode, S_MADCOWLINK) == inode_flags(new_dir, S_MADCOWLINK))
+ return 0;
+ /* We could always fail here, but madcowlink flag is only defined for
+ * files and directories, so let's allow special files */
+ if (!S_ISREG(old_inode->i_mode))
+ return -EMLINK;
+ if (!S_ISDIR(old_inode->i_mode))
+ return -EMLINK;
+ return 0;
+}
+
int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
@@ -2026,6 +2096,10 @@
if (!old_dir->i_op || !old_dir->i_op->rename)
return -EPERM;
+ error = madcow_allow_rename(old_dir, old_dentry, new_dir);
+ if (error)
+ return error;
+
DQUOT_INIT(old_dir);
DQUOT_INIT(new_dir);
@@ -2118,7 +2118,7 @@
return error;
}
-static inline int do_rename(const char * oldname, const char * newname)
+int do_rename(const char * oldname, const char * newname)
{
int error = 0;
struct dentry * old_dir, * new_dir;
--- linux-2.6.5cow/include/linux/ext2_fs.h~madcow 2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/include/linux/ext2_fs.h 2004-04-27 17:01:30.000000000 +0200
@@ -192,6 +192,7 @@
#define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define EXT2_MADCOWLINK_FL 0x00040000 /* COW behaviour for hard links */
#define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
#define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
--- linux-2.6.5cow/include/linux/ext3_fs.h~madcow 2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/include/linux/ext3_fs.h 2004-04-27 17:03:46.000000000 +0200
@@ -185,6 +185,7 @@
#define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define EXT3_MADCOWLINK_FL 0x00040000 /* COW behaviour for hard links */
#define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
--- linux-2.6.5cow/include/linux/fcntl.h~madcow 2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/include/linux/fcntl.h 2004-04-27 16:49:45.000000000 +0200
@@ -23,6 +23,9 @@
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
+#define F_SETMADCOW (F_LINUX_SPECIFIC_BASE+3)
+#define F_GETMADCOW (F_LINUX_SPECIFIC_BASE+4)
+
#ifdef __KERNEL__
#if BITS_PER_LONG == 32
--- linux-2.6.5cow/include/linux/fs.h~madcow 2004-04-27 16:48:55.000000000 +0200
+++ linux-2.6.5cow/include/linux/fs.h 2004-04-30 08:40:04.000000000 +0200
@@ -90,6 +90,7 @@
/* public flags for file_system_type */
#define FS_REQUIRES_DEV 1
#define FS_BINARY_MOUNTDATA 2
+#define FS_MADCOW 4
#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
#define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon
* as nfs_rename() will be cleaned up
@@ -139,6 +140,9 @@
#define S_NOQUOTA 64 /* Inode is not counted to quota */
#define S_DIRSYNC 128 /* Directory modifications are synchronous */
#define S_NOCMTIME 256 /* Do not update file c/mtime */
+#define S_MADCOWLINK 512 /* Hard links have copy on write semantics.
+ * This flag has no meaning for directories,
+ * but is inherited to directory children */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
next prev parent reply other threads:[~2004-05-06 13:27 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-05-06 13:17 [ANNOUNCEMENT PATCH COW] proof of concept impementation of cowlinks Jörn Engel
2004-05-06 13:18 ` [PATCH COW] generic_sendpage Jörn Engel
2004-05-06 13:19 ` [PATCH COW] sendfile Jörn Engel
2004-05-06 13:19 ` [PATCH COW] copyfile Jörn Engel
2004-05-06 13:20 ` [PATCH COW] lock_flags Jörn Engel
2004-05-06 13:21 ` Jörn Engel [this message]
2004-05-08 13:45 ` [ANNOUNCEMENT PATCH COW] proof of concept impementation of cowlinks Denis Vlasenko
2004-05-08 22:10 ` Pavel Machek
2004-05-09 14:09 ` Denis Vlasenko
2004-05-09 21:53 ` Pavel Machek
2004-05-10 15:44 ` Jörn Engel
2004-05-10 15:51 ` Pavel Machek
2004-05-10 15:56 ` Jörn Engel
2004-05-12 0:26 ` Jamie Lokier
2004-05-13 10:56 ` Jörn Engel
2004-05-12 20:29 ` Rob Landley
2004-05-08 22:48 ` Pavel Machek
2004-05-10 15:53 ` Jörn Engel
2004-05-10 19:26 ` Jan Harkes
2004-05-11 10:02 ` Jörn Engel
2004-05-11 14:08 ` Jan Harkes
2004-05-11 14:18 ` Jan Harkes
2004-05-11 14:33 ` Jörn Engel
2004-05-21 23:23 ` Rob Landley
2004-05-25 22:46 ` Jan Harkes
2004-05-11 15:40 ` Steve French
2004-05-11 15:58 ` Jörn Engel
2004-05-10 5:15 ` Eric W. Biederman
2004-05-10 15:59 ` Jörn Engel
2004-05-12 16:39 ` Rob Landley
2004-05-20 13:49 ` Pavel Machek
2004-05-25 21:55 ` Rob Landley
2004-05-25 22:08 ` Pavel Machek
2004-05-25 23:16 ` Rob Landley
2004-05-26 0:16 ` Ian Stirling
2004-05-26 9:52 ` Jörn Engel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040506132105.GF7930@wohnheim.fh-wedel.de \
--to=joern@wohnheim.fh-wedel.de \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.