All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jörn Engel" <joern@wohnheim.fh-wedel.de>
To: linux-kernel@vger.kernel.org
Subject: [PATCH COW] MAD COW
Date: Thu, 6 May 2004 15:21:05 +0200	[thread overview]
Message-ID: <20040506132105.GF7930@wohnheim.fh-wedel.de> (raw)
In-Reply-To: <20040506131731.GA7930@wohnheim.fh-wedel.de>

Patch 5

Jörn

-- 
A victorious army first wins and then seeks battle.
-- Sun Tzu

Allow COW behaviour for hard links, depending on an inode flag.

Semantics:
o Files with S_COWLINK do cow. (yes, really ;)
o Directories with S_COWLINK inherit flag to new files.
o If in doubt, return -EMLINK and let the user sort it out:
  - When linking non-cow files to cow directories.
  - When moving non-cow files/directories to cow directories.
  - When moving cow files/directories to non-cow directories.

Thanks to Sytse and Andrew for tips.

 fs/ext2/inode.c         |   11 ++++++
 fs/ext2/super.c         |    2 -
 fs/ext3/inode.c         |   11 ++++++
 fs/ext3/super.c         |    2 -
 fs/fcntl.c              |   24 ++++++++++++++
 fs/namei.c              |   76 +++++++++++++++++++++++++++++++++++++++++++++-
 fs/open.c               |   79 +++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/ext2_fs.h |    1 
 include/linux/ext3_fs.h |    1 
 include/linux/fcntl.h   |    3 +
 include/linux/fs.h      |    4 ++
 11 files changed, 205 insertions(+), 9 deletions(-)


--- linux-2.6.5cow/fs/open.c~break_madcow	2004-05-04 16:31:50.000000000 +0200
+++ linux-2.6.5cow/fs/open.c	2004-05-06 01:02:57.000000000 +0200
@@ -723,6 +723,71 @@
 	return error;
 }
 
+char *index(const char *s, int c)
+{
+	while (*s) {
+		if (*s == c)
+			return (char *) s;
+		s++;
+	}
+	return NULL;
+}
+
+char *rindex(const char *s, int c)
+{
+	char *ret = NULL;
+	while (*s) {
+		if (*s == c)
+			ret = (char *) s;
+		s++;
+	}
+	return ret;
+}
+
+char *madcow_temp_name(const char *name)
+{
+	const char temp[] = "__MADCOW_BREAK_LINK"; /* FIXME: add random part */
+	char *last_slash = rindex(name, '/');
+	size_t dir_len = last_slash ? last_slash + 1 - name : 0;
+	size_t len = sizeof(temp) + dir_len;
+	char *ret;
+
+	ret = kmalloc(len, GFP_KERNEL);
+	if (!ret)
+		return NULL;
+
+	strncpy(ret, name, dir_len);
+	strcpy(ret+dir_len, temp);
+	return ret;
+}
+
+int vfs_rename_other(struct inode *, struct dentry *,
+		struct inode *, struct dentry *);
+int do_copyfile(const char *from, const char *to, int mode);
+int do_rename(const char * oldname, const char * newname);
+
+int madcow_break_link(const char *from)
+{
+	int err, ret = -EMLINK;
+	char *to = madcow_temp_name(from);
+
+	printk("break link '%s' -> '%s'\n", from, to);
+	err = do_copyfile(from, to, -1);
+	printk("do_copyfile returned %d\n", err);
+	if (err)
+		goto out;
+
+	err = do_rename(to, from);
+	printk("do_rename returned %d\n", err);
+	if (err)
+		goto out;
+
+	ret = 0;
+out:
+	kfree(to);
+	return ret;
+}
+
 /*
  * Note that while the flag value (low two bits) for sys_open means:
  *	00 - read-only
@@ -746,13 +811,19 @@
 	if ((namei_flags+1) & O_ACCMODE)
 		namei_flags++;
 	if (namei_flags & O_TRUNC)
-		namei_flags |= 2;
+		namei_flags |= FMODE_WRITE;
 
 	error = open_namei(filename, namei_flags, mode, &nd);
-	if (!error)
-		return dentry_open(nd.dentry, nd.mnt, flags);
 
-	return ERR_PTR(error);
+	if (error == -EMLINK) {
+		error = madcow_break_link(filename);
+		if (!error) /*retry*/
+			error = open_namei(filename, namei_flags, mode, &nd);
+	}
+	if (error)
+		return ERR_PTR(error);
+
+	return dentry_open(nd.dentry, nd.mnt, flags);
 }
 
 EXPORT_SYMBOL(filp_open);
--- linux-2.6.5cow/fs/ext2/inode.c~madcow	2004-04-27 16:48:55.000000000 +0200
+++ linux-2.6.5cow/fs/ext2/inode.c	2004-04-27 17:02:33.000000000 +0200
@@ -1021,7 +1021,8 @@
 	unsigned int flags = EXT2_I(inode)->i_flags;
 
 	spin_lock(inode->i_lock);
-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+	inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME
+			| S_DIRSYNC | S_MADCOWLINK);
 	if (flags & EXT2_SYNC_FL)
 		inode->i_flags |= S_SYNC;
 	if (flags & EXT2_APPEND_FL)
@@ -1032,6 +1033,8 @@
 		inode->i_flags |= S_NOATIME;
 	if (flags & EXT2_DIRSYNC_FL)
 		inode->i_flags |= S_DIRSYNC;
+	if (flags & EXT2_MADCOWLINK_FL)
+		inode->i_flags |= S_MADCOWLINK;
 	spin_unlock(inode->i_lock);
 }
 
@@ -1159,6 +1162,12 @@
 	if (IS_ERR(raw_inode))
  		return -EIO;
 
+	/* vfs inode holds the current MADCOWLINK flag, so we have to update
+	 * ei->i_flags first */
+	ei->i_flags &= ~EXT2_MADCOWLINK_FL;
+	if (inode_flags(inode, S_MADCOWLINK))
+		ei->i_flags |= EXT2_MADCOWLINK_FL;
+
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
 	if (ei->i_state & EXT2_STATE_NEW)
--- linux-2.6.5cow/fs/ext2/super.c~madcow	2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/fs/ext2/super.c	2004-04-27 17:03:10.000000000 +0200
@@ -1015,7 +1015,7 @@
 	.name		= "ext2",
 	.get_sb		= ext2_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_MADCOW,
 };
 
 static int __init init_ext2_fs(void)
--- linux-2.6.5cow/fs/ext3/inode.c~madcow	2004-04-27 16:48:55.000000000 +0200
+++ linux-2.6.5cow/fs/ext3/inode.c	2004-04-27 17:04:41.000000000 +0200
@@ -2448,7 +2448,8 @@
 	unsigned int flags = EXT3_I(inode)->i_flags;
 
 	spin_lock(inode->i_lock);
-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+	inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME
+			| S_DIRSYNC | S_MADCOWLINK);
 	if (flags & EXT3_SYNC_FL)
 		inode->i_flags |= S_SYNC;
 	if (flags & EXT3_APPEND_FL)
@@ -2459,6 +2460,8 @@
 		inode->i_flags |= S_NOATIME;
 	if (flags & EXT3_DIRSYNC_FL)
 		inode->i_flags |= S_DIRSYNC;
+	if (flags & EXT3_MADCOWLINK_FL)
+		inode->i_flags |= S_MADCOWLINK;
 	spin_unlock(inode->i_lock);
 }
 
@@ -2594,6 +2597,12 @@
 	struct buffer_head *bh = iloc->bh;
 	int err = 0, rc, block;
 
+	/* vfs inode holds the current MADCOWLINK flag, so we have to update
+	 * ei->i_flags first */
+	ei->i_flags &= ~EXT3_MADCOWLINK_FL;
+	if (inode_flags(inode, S_MADCOWLINK))
+		ei->i_flags |= EXT3_MADCOWLINK_FL;
+
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
 	if (ei->i_state & EXT3_STATE_NEW)
--- linux-2.6.5cow/fs/ext3/super.c~madcow	2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/fs/ext3/super.c	2004-04-27 17:05:22.000000000 +0200
@@ -2004,7 +2004,7 @@
 	.name		= "ext3",
 	.get_sb		= ext3_get_sb,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.fs_flags	= FS_REQUIRES_DEV | FS_MADCOW,
 };
 
 static int __init init_ext3_fs(void)
--- linux-2.6.5cow/fs/fcntl.c~madcow	2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/fs/fcntl.c	2004-04-27 16:55:42.000000000 +0200
@@ -282,6 +282,24 @@
 
 EXPORT_SYMBOL(f_delown);
 
+static long fcntl_setmadcow(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+
+	if (!(inode->i_sb->s_type->fs_flags & FS_MADCOW))
+		return -EINVAL;
+	//FIXME: -EPERM?
+
+	spin_lock(&inode->i_lock);
+	if (arg)
+		inode->i_flags |= S_MADCOWLINK;
+	else
+		inode->i_flags &= ~S_MADCOWLINK;
+	spin_unlock(&inode->i_lock);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
 static long do_fcntl(unsigned int fd, unsigned int cmd,
 		     unsigned long arg, struct file * filp)
 {
@@ -346,6 +364,12 @@
 		case F_NOTIFY:
 			err = fcntl_dirnotify(fd, filp, arg);
 			break;
+		case F_SETMADCOW:
+			err = fcntl_setmadcow(filp, arg);
+			break;
+		case F_GETMADCOW:
+			err = !!inode_flags(filp->f_dentry->d_inode, S_MADCOWLINK);
+			break;
 		default:
 			break;
 	}
--- linux-2.6.5cow/fs/namei.c~madcow	2004-04-27 16:48:55.000000000 +0200
+++ linux-2.6.5cow/fs/namei.c	2004-05-02 15:07:57.000000000 +0200
@@ -223,6 +223,41 @@
 	return security_inode_permission(inode, mask, nd);
 }
 
+static inline void set_madcowflag(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	inode->i_flags |= S_MADCOWLINK;
+	spin_unlock(&inode->i_lock);
+	mark_inode_dirty(inode);
+}
+
+/*
+ * Files with the S_MADCOWLINK flag set cannot be written to, if more
+ * than one hard link to them exists.  Ultimately, this function
+ * should copy the inode, assign the copy to the dentry and lower use
+ * count of the old inode - one day.
+ * For now, it is sufficient to return an error and let userspace
+ * deal with the messy part.  Not exactly the meaning of
+ * copy-on-write, but much better than writing to fifty files at once
+ * and noticing month later.
+ *
+ * Yes, this breaks the kernel interface and is simply wrong.  This
+ * is intended behaviour, so Linus will not merge the code before
+ * it is complete.  Or will he?
+ */
+static int break_madcow_link(struct inode *inode)
+{
+	if (!inode_flags(inode, S_MADCOWLINK))
+		return 0;
+	if (!S_ISREG(inode->i_mode))
+		return 0;
+	if (inode->i_nlink < 2)
+		return 0;
+	/* TODO: As soon as sendfile can do normal file copies, use that
+	 * and always return 0 */
+	return -EMLINK;
+}
+
 /*
  * get_write_access() gets write permission for a file.
  * put_write_access() releases this write permission.
@@ -243,6 +278,10 @@
 
 int get_write_access(struct inode * inode)
 {
+	int error = break_madcow_link(inode);
+	if (error)
+		return error;
+
 	spin_lock(&inode->i_lock);
 	if (atomic_read(&inode->i_writecount) < 0) {
 		spin_unlock(&inode->i_lock);
@@ -1146,6 +1185,8 @@
 	DQUOT_INIT(dir);
 	error = dir->i_op->create(dir, dentry, mode, nd);
 	if (!error) {
+		if (inode_flags(dir, S_MADCOWLINK))
+			set_madcowflag(dentry->d_inode);
 		inode_dir_notify(dir, DN_CREATE);
 		security_inode_post_create(dir, dentry, mode);
 	}
@@ -1520,6 +1561,8 @@
 	DQUOT_INIT(dir);
 	error = dir->i_op->mkdir(dir, dentry, mode);
 	if (!error) {
+		if (inode_flags(dir, S_MADCOWLINK))
+			set_madcowflag(dentry->d_inode);
 		inode_dir_notify(dir, DN_CREATE);
 		security_inode_post_mkdir(dir,dentry, mode);
 	}
@@ -1823,6 +1866,13 @@
 		return -EXDEV;
 
 	/*
+	 * Madcowlink attribute is inherited from directory, but here,
+	 * the inode already has one.  If they don't match, bail out.
+	 */
+	if (inode_flags(dir, S_MADCOWLINK) != inode_flags(inode, S_MADCOWLINK))
+		return -EMLINK;
+
+	/*
 	 * A link to an append-only or immutable file cannot be created.
 	 */
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
@@ -2003,6 +2053,26 @@
 	return error;
 }
 
+static int madcow_allow_rename(struct inode *old_dir, struct dentry *old_dentry,
+			       struct inode *new_dir)
+{
+	struct inode *old_inode = old_dentry->d_inode;
+
+	/* source and target share directory: allow */
+	if (old_dir == new_dir)
+		return 0;
+	/* source and target directory have identical madcowlink flag: allow */
+	if (inode_flags(old_inode, S_MADCOWLINK) == inode_flags(new_dir, S_MADCOWLINK))
+		return 0;
+	/* We could always fail here, but madcowlink flag is only defined for
+	 * files and directories, so let's allow special files */
+	if (!S_ISREG(old_inode->i_mode))
+		return -EMLINK;
+	if (!S_ISDIR(old_inode->i_mode))
+		return -EMLINK;
+	return 0;
+}
+
 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	       struct inode *new_dir, struct dentry *new_dentry)
 {
@@ -2026,6 +2096,10 @@
 	if (!old_dir->i_op || !old_dir->i_op->rename)
 		return -EPERM;
 
+	error = madcow_allow_rename(old_dir, old_dentry, new_dir);
+	if (error)
+		return error;
+
 	DQUOT_INIT(old_dir);
 	DQUOT_INIT(new_dir);
 
@@ -2118,7 +2118,7 @@
 	return error;
 }
 
-static inline int do_rename(const char * oldname, const char * newname)
+int do_rename(const char * oldname, const char * newname)
 {
 	int error = 0;
 	struct dentry * old_dir, * new_dir;
--- linux-2.6.5cow/include/linux/ext2_fs.h~madcow	2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/include/linux/ext2_fs.h	2004-04-27 17:01:30.000000000 +0200
@@ -192,6 +192,7 @@
 #define EXT2_NOTAIL_FL			0x00008000 /* file tail should not be merged */
 #define EXT2_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
 #define EXT2_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
+#define EXT2_MADCOWLINK_FL		0x00040000 /* COW behaviour for hard links */
 #define EXT2_RESERVED_FL		0x80000000 /* reserved for ext2 lib */
 
 #define EXT2_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
--- linux-2.6.5cow/include/linux/ext3_fs.h~madcow	2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/include/linux/ext3_fs.h	2004-04-27 17:03:46.000000000 +0200
@@ -185,6 +185,7 @@
 #define EXT3_NOTAIL_FL			0x00008000 /* file tail should not be merged */
 #define EXT3_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
 #define EXT3_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
+#define EXT3_MADCOWLINK_FL		0x00040000 /* COW behaviour for hard links */
 #define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */
 
 #define EXT3_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
--- linux-2.6.5cow/include/linux/fcntl.h~madcow	2004-04-27 16:34:42.000000000 +0200
+++ linux-2.6.5cow/include/linux/fcntl.h	2004-04-27 16:49:45.000000000 +0200
@@ -23,6 +23,9 @@
 #define DN_ATTRIB	0x00000020	/* File changed attibutes */
 #define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
 
+#define F_SETMADCOW	(F_LINUX_SPECIFIC_BASE+3)
+#define F_GETMADCOW	(F_LINUX_SPECIFIC_BASE+4)
+
 #ifdef __KERNEL__
 
 #if BITS_PER_LONG == 32
--- linux-2.6.5cow/include/linux/fs.h~madcow	2004-04-27 16:48:55.000000000 +0200
+++ linux-2.6.5cow/include/linux/fs.h	2004-04-30 08:40:04.000000000 +0200
@@ -90,6 +90,7 @@
 /* public flags for file_system_type */
 #define FS_REQUIRES_DEV 1 
 #define FS_BINARY_MOUNTDATA 2
+#define FS_MADCOW	4
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_ODD_RENAME	32768	/* Temporary stuff; will go away as soon
 				  * as nfs_rename() will be cleaned up
@@ -139,6 +140,9 @@
 #define S_NOQUOTA	64	/* Inode is not counted to quota */
 #define S_DIRSYNC	128	/* Directory modifications are synchronous */
 #define S_NOCMTIME	256	/* Do not update file c/mtime */
+#define S_MADCOWLINK	512	/* Hard links have copy on write semantics.
+				 * This flag has no meaning for directories,
+				 * but is inherited to directory children */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system

  parent reply	other threads:[~2004-05-06 13:27 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-05-06 13:17 [ANNOUNCEMENT PATCH COW] proof of concept impementation of cowlinks Jörn Engel
2004-05-06 13:18 ` [PATCH COW] generic_sendpage Jörn Engel
2004-05-06 13:19 ` [PATCH COW] sendfile Jörn Engel
2004-05-06 13:19 ` [PATCH COW] copyfile Jörn Engel
2004-05-06 13:20 ` [PATCH COW] lock_flags Jörn Engel
2004-05-06 13:21 ` Jörn Engel [this message]
2004-05-08 13:45 ` [ANNOUNCEMENT PATCH COW] proof of concept impementation of cowlinks Denis Vlasenko
2004-05-08 22:10   ` Pavel Machek
2004-05-09 14:09     ` Denis Vlasenko
2004-05-09 21:53       ` Pavel Machek
2004-05-10 15:44         ` Jörn Engel
2004-05-10 15:51           ` Pavel Machek
2004-05-10 15:56             ` Jörn Engel
2004-05-12  0:26           ` Jamie Lokier
2004-05-13 10:56             ` Jörn Engel
2004-05-12 20:29         ` Rob Landley
2004-05-08 22:48 ` Pavel Machek
2004-05-10 15:53   ` Jörn Engel
2004-05-10 19:26     ` Jan Harkes
2004-05-11 10:02       ` Jörn Engel
2004-05-11 14:08         ` Jan Harkes
2004-05-11 14:18           ` Jan Harkes
2004-05-11 14:33           ` Jörn Engel
2004-05-21 23:23           ` Rob Landley
2004-05-25 22:46             ` Jan Harkes
2004-05-11 15:40         ` Steve French
2004-05-11 15:58           ` Jörn Engel
2004-05-10  5:15 ` Eric W. Biederman
2004-05-10 15:59   ` Jörn Engel
2004-05-12 16:39 ` Rob Landley
2004-05-20 13:49   ` Pavel Machek
2004-05-25 21:55     ` Rob Landley
2004-05-25 22:08       ` Pavel Machek
2004-05-25 23:16         ` Rob Landley
2004-05-26  0:16           ` Ian Stirling
2004-05-26  9:52           ` Jörn Engel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040506132105.GF7930@wohnheim.fh-wedel.de \
    --to=joern@wohnheim.fh-wedel.de \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.