* [PATCH 01/10] sysfs: Support for preventing unmounts.
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
@ 2008-04-29 17:10 ` Benjamin Thery
2008-04-29 17:10 ` [PATCH 02/10] sysfs: sysfs_get_dentry add a sb parameter Benjamin Thery
` (9 subsequent siblings)
10 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:10 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Benjamin Thery
sysfs: Support for preventing unmounts.
To support mounting multiple instances of sysfs occassionally I
need to walk through all of the currently present sysfs super blocks.
To allow this iteration this patch adds sysfs_grab_supers
and sysfs_release_supers. While a piece of code is in
a section surrounded by these no more sysfs super blocks
will be either created or destroyed.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/mount.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++------
fs/sysfs/sysfs.h | 10 ++++++
2 files changed, 81 insertions(+), 8 deletions(-)
Index: linux-mm/fs/sysfs/mount.c
===================================================================
--- linux-mm.orig/fs/sysfs/mount.c
+++ linux-mm/fs/sysfs/mount.c
@@ -41,47 +41,110 @@ struct sysfs_dirent sysfs_root = {
static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
{
- struct inode *inode;
- struct dentry *root;
+ struct sysfs_super_info *info = NULL;
+ struct inode *inode = NULL;
+ struct dentry *root = NULL;
+ int error;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = SYSFS_MAGIC;
sb->s_op = &sysfs_ops;
sb->s_time_gran = 1;
- sysfs_sb = sb;
+ if (!sysfs_sb)
+ sysfs_sb = sb;
+
+ error = -ENOMEM;
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ goto out_err;
/* get root inode, initialize and unlock it */
+ error = -ENOMEM;
inode = sysfs_get_inode(&sysfs_root);
if (!inode) {
pr_debug("sysfs: could not get root inode\n");
- return -ENOMEM;
+ goto out_err;
}
/* instantiate and link root dentry */
+ error = -ENOMEM;
root = d_alloc_root(inode);
if (!root) {
pr_debug("%s: could not get root dentry!\n",__func__);
- iput(inode);
- return -ENOMEM;
+ goto out_err;
}
root->d_fsdata = &sysfs_root;
sb->s_root = root;
+ sb->s_fs_info = info;
return 0;
+
+out_err:
+ dput(root);
+ iput(inode);
+ kfree(info);
+ if (sysfs_sb == sb)
+ sysfs_sb = NULL;
+ return error;
}
static int sysfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
- return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
+ int rc;
+ mutex_lock(&sysfs_rename_mutex);
+ rc = get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
+ mutex_unlock(&sysfs_rename_mutex);
+ return rc;
}
-static struct file_system_type sysfs_fs_type = {
+struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.get_sb = sysfs_get_sb,
.kill_sb = kill_anon_super,
};
+void sysfs_grab_supers(void)
+{
+ /* must hold sysfs_rename_mutex */
+ struct super_block *sb;
+ /* Loop until I have taken s_umount on all sysfs superblocks */
+restart:
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ if (sysfs_info(sb)->grabbed)
+ continue;
+ /* Wait for unmount activity to complete. */
+ if (sb->s_count < S_BIAS) {
+ sb->s_count += 1;
+ spin_unlock(&sb_lock);
+ down_read(&sb->s_umount);
+ drop_super(sb);
+ goto restart;
+ }
+ atomic_inc(&sb->s_active);
+ sysfs_info(sb)->grabbed = 1;
+ }
+ spin_unlock(&sb_lock);
+}
+
+void sysfs_release_supers(void)
+{
+ /* must hold sysfs_rename_mutex */
+ struct super_block *sb;
+restart:
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ if (!sysfs_info(sb)->grabbed)
+ continue;
+ sysfs_info(sb)->grabbed = 0;
+ spin_unlock(&sb_lock);
+ deactivate_super(sb);
+ goto restart;
+ }
+ spin_unlock(&sb_lock);
+}
+
int __init sysfs_init(void)
{
int err = -ENOMEM;
Index: linux-mm/fs/sysfs/sysfs.h
===================================================================
--- linux-mm.orig/fs/sysfs/sysfs.h
+++ linux-mm/fs/sysfs/sysfs.h
@@ -85,6 +85,12 @@ struct sysfs_addrm_cxt {
int cnt;
};
+struct sysfs_super_info {
+ int grabbed;
+};
+
+#define sysfs_info(SB) ((struct sysfs_super_info *)(SB)->s_fs_info)
+
/*
* mount.c
*/
@@ -92,6 +98,10 @@ extern struct sysfs_dirent sysfs_root;
extern struct super_block *sysfs_sb;
extern struct kmem_cache *sysfs_dir_cachep;
extern struct vfsmount *sysfs_mount;
+extern struct file_system_type sysfs_fs_type;
+
+void sysfs_grab_supers(void);
+void sysfs_release_supers(void);
/*
* dir.c
--
^ permalink raw reply [flat|nested] 30+ messages in thread* [PATCH 02/10] sysfs: sysfs_get_dentry add a sb parameter
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
2008-04-29 17:10 ` [PATCH 01/10] sysfs: Support for preventing unmounts Benjamin Thery
@ 2008-04-29 17:10 ` Benjamin Thery
2008-04-29 17:10 ` [PATCH 03/10] sysfs: Implement __sysfs_get_dentry Benjamin Thery
` (8 subsequent siblings)
10 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:10 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Benjamin Thery
sysfs: sysfs_get_dentry add a sb parameter
In preparation for multiple mounts of sysfs add a superblock parameter to
sysfs_get_dentry.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/dir.c | 11 ++++++-----
fs/sysfs/file.c | 2 +-
fs/sysfs/sysfs.h | 2 +-
3 files changed, 8 insertions(+), 7 deletions(-)
Index: linux-mm/fs/sysfs/dir.c
===================================================================
--- linux-mm.orig/fs/sysfs/dir.c
+++ linux-mm/fs/sysfs/dir.c
@@ -87,6 +87,7 @@ static void sysfs_unlink_sibling(struct
/**
* sysfs_get_dentry - get dentry for the given sysfs_dirent
+ * @sb: superblock of the dentry to return
* @sd: sysfs_dirent of interest
*
* Get dentry for @sd. Dentry is looked up if currently not
@@ -99,9 +100,9 @@ static void sysfs_unlink_sibling(struct
* RETURNS:
* Pointer to found dentry on success, ERR_PTR() value on error.
*/
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd)
{
- struct dentry *dentry = dget(sysfs_sb->s_root);
+ struct dentry *dentry = dget(sb->s_root);
while (dentry->d_fsdata != sd) {
struct sysfs_dirent *cur;
@@ -790,7 +791,7 @@ int sysfs_rename_dir(struct kobject * ko
goto out; /* nothing to rename */
/* get the original dentry */
- old_dentry = sysfs_get_dentry(sd);
+ old_dentry = sysfs_get_dentry(sysfs_sb, sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
old_dentry = NULL;
@@ -858,7 +859,7 @@ int sysfs_move_dir(struct kobject *kobj,
goto out; /* nothing to move */
/* get dentries */
- old_dentry = sysfs_get_dentry(sd);
+ old_dentry = sysfs_get_dentry(sysfs_sb, sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
old_dentry = NULL;
@@ -866,7 +867,7 @@ int sysfs_move_dir(struct kobject *kobj,
}
old_parent = old_dentry->d_parent;
- new_parent = sysfs_get_dentry(new_parent_sd);
+ new_parent = sysfs_get_dentry(sysfs_sb, new_parent_sd);
if (IS_ERR(new_parent)) {
error = PTR_ERR(new_parent);
new_parent = NULL;
Index: linux-mm/fs/sysfs/file.c
===================================================================
--- linux-mm.orig/fs/sysfs/file.c
+++ linux-mm/fs/sysfs/file.c
@@ -578,7 +578,7 @@ int sysfs_chmod_file(struct kobject *kob
goto out;
mutex_lock(&sysfs_rename_mutex);
- victim = sysfs_get_dentry(victim_sd);
+ victim = sysfs_get_dentry(sysfs_sb, victim_sd);
mutex_unlock(&sysfs_rename_mutex);
if (IS_ERR(victim)) {
rc = PTR_ERR(victim);
Index: linux-mm/fs/sysfs/sysfs.h
===================================================================
--- linux-mm.orig/fs/sysfs/sysfs.h
+++ linux-mm/fs/sysfs/sysfs.h
@@ -113,7 +113,7 @@ extern spinlock_t sysfs_assoc_lock;
extern const struct file_operations sysfs_dir_operations;
extern const struct inode_operations sysfs_dir_inode_operations;
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
+struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd);
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
void sysfs_put_active_two(struct sysfs_dirent *sd);
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
--
^ permalink raw reply [flat|nested] 30+ messages in thread* [PATCH 03/10] sysfs: Implement __sysfs_get_dentry
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
2008-04-29 17:10 ` [PATCH 01/10] sysfs: Support for preventing unmounts Benjamin Thery
2008-04-29 17:10 ` [PATCH 02/10] sysfs: sysfs_get_dentry add a sb parameter Benjamin Thery
@ 2008-04-29 17:10 ` Benjamin Thery
2008-04-29 17:10 ` [PATCH 04/10] sysfs: Rename Support multiple superblocks Benjamin Thery
` (7 subsequent siblings)
10 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:10 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Benjamin Thery
sysfs: Implement __sysfs_get_dentry
This function is similar but much simpler to sysfs_get_dentry
returns a sysfs dentry if one curently exists.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/dir.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
Index: linux-mm/fs/sysfs/dir.c
===================================================================
--- linux-mm.orig/fs/sysfs/dir.c
+++ linux-mm/fs/sysfs/dir.c
@@ -776,6 +776,44 @@ void sysfs_remove_dir(struct kobject * k
__sysfs_remove_dir(sd);
}
+/**
+ * __sysfs_get_dentry - get dentry for the given sysfs_dirent
+ * @sb: superblock of the dentry to return
+ * @sd: sysfs_dirent of interest
+ *
+ * Get dentry for @sd. Only return a dentry if one currently
+ * exists.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * Pointer to found dentry on success, NULL on failure.
+ */
+static struct dentry *__sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd)
+{
+ struct inode *inode;
+ struct dentry *dentry = NULL;
+
+ inode = ilookup5_nowait(sysfs_sb, sd->s_ino, sysfs_ilookup_test, sd);
+ if (inode && !(inode->i_state & I_NEW)) {
+ struct dentry *alias;
+ spin_lock(&dcache_lock);
+ list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ if (!IS_ROOT(alias) && d_unhashed(alias))
+ continue;
+ if (alias->d_sb != sb)
+ continue;
+ dentry = alias;
+ dget_locked(dentry);
+ break;
+ }
+ spin_unlock(&dcache_lock);
+ }
+ iput(inode);
+ return dentry;
+}
+
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
{
struct sysfs_dirent *sd = kobj->sd;
--
^ permalink raw reply [flat|nested] 30+ messages in thread* [PATCH 04/10] sysfs: Rename Support multiple superblocks
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
` (2 preceding siblings ...)
2008-04-29 17:10 ` [PATCH 03/10] sysfs: Implement __sysfs_get_dentry Benjamin Thery
@ 2008-04-29 17:10 ` Benjamin Thery
2008-04-29 17:11 ` [PATCH 05/10] sysfs: sysfs_chmod_file handle " Benjamin Thery
` (6 subsequent siblings)
10 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:10 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Benjamin Thery
sysfs: Rename Support multiple superblocks
This patch modifies the sysfs_rename_dir and sysfs_move_dir routines
to support multiple sysfs dentry tries rooted in different
sysfs superblocks.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/dir.c | 194 +++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 136 insertions(+), 58 deletions(-)
Index: linux-mm/fs/sysfs/dir.c
===================================================================
--- linux-mm.orig/fs/sysfs/dir.c
+++ linux-mm/fs/sysfs/dir.c
@@ -814,43 +814,112 @@ static struct dentry *__sysfs_get_dentry
return dentry;
}
+struct sysfs_rename_struct {
+ struct list_head list;
+ struct dentry *old_dentry;
+ struct dentry *new_dentry;
+ struct dentry *old_parent;
+ struct dentry *new_parent;
+};
+
+static void post_rename(struct list_head *head)
+{
+ struct sysfs_rename_struct *srs;
+ while (!list_empty(head)) {
+ srs = list_entry(head->next, struct sysfs_rename_struct, list);
+ dput(srs->old_dentry);
+ dput(srs->new_dentry);
+ dput(srs->old_parent);
+ dput(srs->new_parent);
+ list_del(&srs->list);
+ kfree(srs);
+ }
+}
+
+static int prep_rename(struct list_head *head,
+ struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
+ const char *name)
+{
+ struct sysfs_rename_struct *srs;
+ struct super_block *sb;
+ struct dentry *dentry;
+ int error;
+
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ dentry = sysfs_get_dentry(sb, sd);
+ if (dentry == ERR_PTR(-EXDEV))
+ continue;
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto err_out;
+ }
+
+ srs = kzalloc(sizeof(*srs), GFP_KERNEL);
+ if (!srs) {
+ dput(dentry);
+ goto err_out;
+ }
+
+ INIT_LIST_HEAD(&srs->list);
+ list_add(head, &srs->list);
+ srs->old_dentry = dentry;
+ srs->old_parent = dget(dentry->d_parent);
+
+ dentry = sysfs_get_dentry(sb, new_parent_sd);
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto err_out;
+ }
+ srs->new_parent = dentry;
+
+ error = -ENOMEM;
+ dentry = d_alloc_name(srs->new_parent, name);
+ if (!dentry)
+ goto err_out;
+ srs->new_dentry = dentry;
+ }
+ return 0;
+
+err_out:
+ post_rename(head);
+ return error;
+}
+
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
{
struct sysfs_dirent *sd = kobj->sd;
- struct dentry *parent = NULL;
- struct dentry *old_dentry = NULL, *new_dentry = NULL;
+ struct list_head todo;
+ struct sysfs_rename_struct *srs;
+ struct inode *parent_inode = NULL;
const char *dup_name = NULL;
int error;
+ INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
error = 0;
if (strcmp(sd->s_name, new_name) == 0)
goto out; /* nothing to rename */
- /* get the original dentry */
- old_dentry = sysfs_get_dentry(sysfs_sb, sd);
- if (IS_ERR(old_dentry)) {
- error = PTR_ERR(old_dentry);
- old_dentry = NULL;
- goto out;
- }
+ sysfs_grab_supers();
+ error = prep_rename(&todo, sd, sd->s_parent, new_name);
+ if (error)
+ goto out_release;
- parent = old_dentry->d_parent;
+ error = -ENOMEM;
+ mutex_lock(&sysfs_mutex);
+ parent_inode = sysfs_get_inode(sd->s_parent);
+ mutex_unlock(&sysfs_mutex);
+ if (!parent_inode)
+ goto out_release;
- /* lock parent and get dentry for new name */
- mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&parent_inode->i_mutex);
mutex_lock(&sysfs_mutex);
error = -EEXIST;
if (sysfs_find_dirent(sd->s_parent, new_name))
goto out_unlock;
- error = -ENOMEM;
- new_dentry = d_alloc_name(parent, new_name);
- if (!new_dentry)
- goto out_unlock;
-
/* rename kobject and sysfs_dirent */
error = -ENOMEM;
new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
@@ -865,17 +934,21 @@ int sysfs_rename_dir(struct kobject * ko
sd->s_name = new_name;
/* rename */
- d_add(new_dentry, NULL);
- d_move(old_dentry, new_dentry);
+ list_for_each_entry(srs, &todo, list) {
+ d_add(srs->new_dentry, NULL);
+ d_move(srs->old_dentry, srs->new_dentry);
+ }
error = 0;
- out_unlock:
+out_unlock:
mutex_unlock(&sysfs_mutex);
- mutex_unlock(&parent->d_inode->i_mutex);
+ mutex_unlock(&parent_inode->i_mutex);
kfree(dup_name);
- dput(old_dentry);
- dput(new_dentry);
- out:
+out_release:
+ iput(parent_inode);
+ post_rename(&todo);
+ sysfs_release_supers();
+out:
mutex_unlock(&sysfs_rename_mutex);
return error;
}
@@ -884,10 +957,12 @@ int sysfs_move_dir(struct kobject *kobj,
{
struct sysfs_dirent *sd = kobj->sd;
struct sysfs_dirent *new_parent_sd;
- struct dentry *old_parent, *new_parent = NULL;
- struct dentry *old_dentry = NULL, *new_dentry = NULL;
+ struct list_head todo;
+ struct sysfs_rename_struct *srs;
+ struct inode *old_parent_inode = NULL, *new_parent_inode = NULL;
int error;
+ INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
BUG_ON(!sd->s_parent);
new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
@@ -896,26 +971,29 @@ int sysfs_move_dir(struct kobject *kobj,
if (sd->s_parent == new_parent_sd)
goto out; /* nothing to move */
- /* get dentries */
- old_dentry = sysfs_get_dentry(sysfs_sb, sd);
- if (IS_ERR(old_dentry)) {
- error = PTR_ERR(old_dentry);
- old_dentry = NULL;
- goto out;
- }
- old_parent = old_dentry->d_parent;
-
- new_parent = sysfs_get_dentry(sysfs_sb, new_parent_sd);
- if (IS_ERR(new_parent)) {
- error = PTR_ERR(new_parent);
- new_parent = NULL;
- goto out;
- }
+ sysfs_grab_supers();
+ error = prep_rename(&todo, sd, new_parent_sd, sd->s_name);
+ if (error)
+ goto out_release;
+
+ error = -ENOMEM;
+ mutex_lock(&sysfs_mutex);
+ old_parent_inode = sysfs_get_inode(sd->s_parent);
+ mutex_unlock(&sysfs_mutex);
+ if (!old_parent_inode)
+ goto out_release;
+
+ error = -ENOMEM;
+ mutex_lock(&sysfs_mutex);
+ new_parent_inode = sysfs_get_inode(new_parent_sd);
+ mutex_unlock(&sysfs_mutex);
+ if (!new_parent_inode)
+ goto out_release;
again:
- mutex_lock(&old_parent->d_inode->i_mutex);
- if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
- mutex_unlock(&old_parent->d_inode->i_mutex);
+ mutex_lock(&old_parent_inode->i_mutex);
+ if (!mutex_trylock(&new_parent_inode->i_mutex)) {
+ mutex_unlock(&old_parent_inode->i_mutex);
goto again;
}
mutex_lock(&sysfs_mutex);
@@ -924,14 +1002,11 @@ again:
if (sysfs_find_dirent(new_parent_sd, sd->s_name))
goto out_unlock;
- error = -ENOMEM;
- new_dentry = d_alloc_name(new_parent, sd->s_name);
- if (!new_dentry)
- goto out_unlock;
-
error = 0;
- d_add(new_dentry, NULL);
- d_move(old_dentry, new_dentry);
+ list_for_each_entry(srs, &todo, list) {
+ d_add(srs->new_dentry, NULL);
+ d_move(srs->old_dentry, srs->new_dentry);
+ }
/* Remove from old parent's list and insert into new parent's list. */
sysfs_unlink_sibling(sd);
@@ -940,14 +1015,17 @@ again:
sd->s_parent = new_parent_sd;
sysfs_link_sibling(sd);
- out_unlock:
+out_unlock:
mutex_unlock(&sysfs_mutex);
- mutex_unlock(&new_parent->d_inode->i_mutex);
- mutex_unlock(&old_parent->d_inode->i_mutex);
- out:
- dput(new_parent);
- dput(old_dentry);
- dput(new_dentry);
+ mutex_unlock(&new_parent_inode->i_mutex);
+ mutex_unlock(&old_parent_inode->i_mutex);
+
+out_release:
+ iput(new_parent_inode);
+ iput(old_parent_inode);
+ post_rename(&todo);
+ sysfs_release_supers();
+out:
mutex_unlock(&sysfs_rename_mutex);
return error;
}
--
^ permalink raw reply [flat|nested] 30+ messages in thread* [PATCH 05/10] sysfs: sysfs_chmod_file handle multiple superblocks
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
` (3 preceding siblings ...)
2008-04-29 17:10 ` [PATCH 04/10] sysfs: Rename Support multiple superblocks Benjamin Thery
@ 2008-04-29 17:11 ` Benjamin Thery
2008-04-29 17:11 ` [PATCH 06/10] sysfs: Implement sysfs tagged directory support Benjamin Thery
` (5 subsequent siblings)
10 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:11 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Benjamin Thery
sysfs: sysfs_chmod_file handle multiple superblocks
Teach sysfs_chmod_file how to handle multiple sysfs
superblocks.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/file.c | 53 +++++++++++++++++++++++++++++------------------------
1 file changed, 29 insertions(+), 24 deletions(-)
Index: linux-mm/fs/sysfs/file.c
===================================================================
--- linux-mm.orig/fs/sysfs/file.c
+++ linux-mm/fs/sysfs/file.c
@@ -567,7 +567,8 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_grou
int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
{
struct sysfs_dirent *victim_sd = NULL;
- struct dentry *victim = NULL;
+ struct super_block *sb;
+ struct dentry *victim;
struct inode * inode;
struct iattr newattrs;
int rc;
@@ -578,31 +579,35 @@ int sysfs_chmod_file(struct kobject *kob
goto out;
mutex_lock(&sysfs_rename_mutex);
- victim = sysfs_get_dentry(sysfs_sb, victim_sd);
- mutex_unlock(&sysfs_rename_mutex);
- if (IS_ERR(victim)) {
- rc = PTR_ERR(victim);
- victim = NULL;
- goto out;
- }
-
- inode = victim->d_inode;
+ sysfs_grab_supers();
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ victim = sysfs_get_dentry(sb, victim_sd);
+ if (victim == ERR_PTR(-EXDEV))
+ continue;
+ if (IS_ERR(victim)) {
+ rc = PTR_ERR(victim);
+ victim = NULL;
+ goto out_unlock;
+ }
+
+ inode = victim->d_inode;
+ mutex_lock(&inode->i_mutex);
+ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+ rc = notify_change(victim, &newattrs);
+ if (rc == 0) {
+ mutex_lock(&sysfs_mutex);
+ victim_sd->s_mode = newattrs.ia_mode;
+ mutex_unlock(&sysfs_mutex);
+ }
+ mutex_unlock(&inode->i_mutex);
- mutex_lock(&inode->i_mutex);
-
- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- rc = notify_change(victim, &newattrs);
-
- if (rc == 0) {
- mutex_lock(&sysfs_mutex);
- victim_sd->s_mode = newattrs.ia_mode;
- mutex_unlock(&sysfs_mutex);
+ dput(victim);
}
-
- mutex_unlock(&inode->i_mutex);
- out:
- dput(victim);
+out_unlock:
+ sysfs_release_supers();
+ mutex_unlock(&sysfs_rename_mutex);
+out:
sysfs_put(victim_sd);
return rc;
}
--
^ permalink raw reply [flat|nested] 30+ messages in thread* [PATCH 06/10] sysfs: Implement sysfs tagged directory support.
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
` (4 preceding siblings ...)
2008-04-29 17:11 ` [PATCH 05/10] sysfs: sysfs_chmod_file handle " Benjamin Thery
@ 2008-04-29 17:11 ` Benjamin Thery
2008-04-29 17:11 ` [PATCH 07/10] sysfs: Implement sysfs_delete_link and sysfs_rename_link Benjamin Thery
` (4 subsequent siblings)
10 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:11 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Benjamin Thery
sysfs: Implement sysfs tagged directory support.
The problem. When implementing a network namespace I need to be able
to have multiple network devices with the same name. Currently this
is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and
potentially a few other directories of the form /sys/ ... /net/*.
What this patch does is to add an additional tag field to the
sysfs dirent structure. For directories that should show different
contents depending on the context such as /sys/class/net/, and
/sys/devices/virtual/net/ this tag field is used to specify the
context in which those directories should be visible. Effectively
this is the same as creating multiple distinct directories with
the same name but internally to sysfs the result is nicer.
I am calling the concept of a single directory that looks like multiple
directories all at the same path in the filesystem tagged directories.
For the networking namespace the set of directories whose contents I need
to filter with tags can depend on the presence or absence of hotplug
hardware or which modules are currently loaded. Which means I need
a simple race free way to setup those directories as tagged.
To achieve a race free design all tagged directories are created
and managed by sysfs itself. The upper level code that knows what
tagged directories we need provides just two methods that enable
this:
sb_tag() - that returns a "void *" tag that identifies the context of
the process that mounted sysfs.
kobject_tag(kobj) - that returns a "void *" tag that identifies the context
a kobject should be in.
Everything else is left up to sysfs.
For the network namespace sb_tag and kobject_tag are essentially
one line functions, and look to remain that.
The work needed in sysfs is more extensive. At each directory
or symlink creating I need to check if the directory it is being
created in is a tagged directory and if so generate the appropriate
tag to place on the sysfs_dirent. Likewise at each symlink or
directory removal I need to check if the sysfs directory it is
being removed from is a tagged directory and if so figure out
which tag goes along with the name I am deleting.
Currently only directories which hold kobjects, and
symlinks are supported. There is not enough information
in the current file attribute interfaces to give us anything
to discriminate on which makes it useless, and there are
no potential users which makes it an uninteresting problem
to solve.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/bin.c | 2
fs/sysfs/dir.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++----
fs/sysfs/file.c | 8 +-
fs/sysfs/group.c | 2
fs/sysfs/inode.c | 6 +
fs/sysfs/mount.c | 44 ++++++++++--
fs/sysfs/symlink.c | 2
fs/sysfs/sysfs.h | 16 ++++
include/linux/sysfs.h | 16 ++++
9 files changed, 250 insertions(+), 28 deletions(-)
Index: linux-mm/fs/sysfs/bin.c
===================================================================
--- linux-mm.orig/fs/sysfs/bin.c
+++ linux-mm/fs/sysfs/bin.c
@@ -252,7 +252,7 @@ int sysfs_create_bin_file(struct kobject
void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
{
- sysfs_hash_and_remove(kobj->sd, attr->attr.name);
+ sysfs_hash_and_remove(kobj, kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
Index: linux-mm/fs/sysfs/dir.c
===================================================================
--- linux-mm.orig/fs/sysfs/dir.c
+++ linux-mm/fs/sysfs/dir.c
@@ -102,8 +102,17 @@ static void sysfs_unlink_sibling(struct
*/
struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd)
{
- struct dentry *dentry = dget(sb->s_root);
+ struct dentry *dentry;
+
+ /* Bail if this sd won't show up in this superblock */
+ if (sd->s_parent && sd->s_parent->s_flags & SYSFS_FLAG_TAGGED) {
+ const void *tag;
+ tag = sysfs_lookup_tag(sd->s_parent, sb);
+ if (sd->s_tag.tag != tag)
+ return ERR_PTR(-EXDEV);
+ }
+ dentry = dget(sb->s_root);
while (dentry->d_fsdata != sd) {
struct sysfs_dirent *cur;
struct dentry *parent;
@@ -422,7 +431,11 @@ void sysfs_addrm_start(struct sysfs_addr
*/
int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
- if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) {
+ const void *tag = NULL;
+
+ tag = sysfs_creation_tag(acxt->parent_sd, sd);
+
+ if (sysfs_find_dirent(acxt->parent_sd, tag, sd->s_name)) {
printk(KERN_WARNING "sysfs: duplicate filename '%s' "
"can not be created\n", sd->s_name);
WARN_ON(1);
@@ -438,6 +451,9 @@ int sysfs_add_one(struct sysfs_addrm_cxt
sd->s_parent = sysfs_get(acxt->parent_sd);
+ if (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)
+ sd->s_tag.tag = tag;
+
if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
inc_nlink(acxt->parent_inode);
@@ -584,13 +600,18 @@ void sysfs_addrm_finish(struct sysfs_add
* Pointer to sysfs_dirent if found, NULL if not.
*/
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+ const void *tag,
const unsigned char *name)
{
struct sysfs_dirent *sd;
- for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
+ for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
+ if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ (sd->s_tag.tag != tag))
+ continue;
if (!strcmp(sd->s_name, name))
return sd;
+ }
return NULL;
}
@@ -614,7 +635,7 @@ struct sysfs_dirent *sysfs_get_dirent(st
struct sysfs_dirent *sd;
mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, name);
+ sd = sysfs_find_dirent(parent_sd, NULL, name);
sysfs_get(sd);
mutex_unlock(&sysfs_mutex);
@@ -680,13 +701,16 @@ static struct dentry * sysfs_lookup(stru
struct nameidata *nd)
{
struct dentry *ret = NULL;
- struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
+ struct dentry *parent = dentry->d_parent;
+ struct sysfs_dirent *parent_sd = parent->d_fsdata;
struct sysfs_dirent *sd;
struct inode *inode;
+ const void *tag;
mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
+ tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
+ sd = sysfs_find_dirent(parent_sd, tag, dentry->d_name.name);
/* no such entry */
if (!sd) {
@@ -892,19 +916,24 @@ int sysfs_rename_dir(struct kobject * ko
struct sysfs_rename_struct *srs;
struct inode *parent_inode = NULL;
const char *dup_name = NULL;
+ const void *old_tag, *tag;
int error;
INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
+ old_tag = sysfs_dirent_tag(sd);
+ tag = sysfs_creation_tag(sd->s_parent, sd);
error = 0;
- if (strcmp(sd->s_name, new_name) == 0)
+ if ((old_tag == tag) && (strcmp(sd->s_name, new_name) == 0))
goto out; /* nothing to rename */
sysfs_grab_supers();
- error = prep_rename(&todo, sd, sd->s_parent, new_name);
- if (error)
- goto out_release;
+ if (old_tag == tag) {
+ error = prep_rename(&todo, sd, sd->s_parent, new_name);
+ if (error)
+ goto out_release;
+ }
error = -ENOMEM;
mutex_lock(&sysfs_mutex);
@@ -917,7 +946,7 @@ int sysfs_rename_dir(struct kobject * ko
mutex_lock(&sysfs_mutex);
error = -EEXIST;
- if (sysfs_find_dirent(sd->s_parent, new_name))
+ if (sysfs_find_dirent(sd->s_parent, tag, new_name))
goto out_unlock;
/* rename kobject and sysfs_dirent */
@@ -932,6 +961,8 @@ int sysfs_rename_dir(struct kobject * ko
dup_name = sd->s_name;
sd->s_name = new_name;
+ if (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)
+ sd->s_tag.tag = tag;
/* rename */
list_for_each_entry(srs, &todo, list) {
@@ -939,6 +970,20 @@ int sysfs_rename_dir(struct kobject * ko
d_move(srs->old_dentry, srs->new_dentry);
}
+ /* If we are moving across superblocks drop the dcache entries */
+ if (old_tag != tag) {
+ struct super_block *sb;
+ struct dentry *dentry;
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ dentry = __sysfs_get_dentry(sb, sd);
+ if (!dentry)
+ continue;
+ shrink_dcache_parent(dentry);
+ d_drop(dentry);
+ dput(dentry);
+ }
+ }
+
error = 0;
out_unlock:
mutex_unlock(&sysfs_mutex);
@@ -961,11 +1006,13 @@ int sysfs_move_dir(struct kobject *kobj,
struct sysfs_rename_struct *srs;
struct inode *old_parent_inode = NULL, *new_parent_inode = NULL;
int error;
+ const void *tag;
INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
BUG_ON(!sd->s_parent);
new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
+ tag = sysfs_dirent_tag(sd);
error = 0;
if (sd->s_parent == new_parent_sd)
@@ -999,7 +1046,7 @@ again:
mutex_lock(&sysfs_mutex);
error = -EEXIST;
- if (sysfs_find_dirent(new_parent_sd, sd->s_name))
+ if (sysfs_find_dirent(new_parent_sd, tag, sd->s_name))
goto out_unlock;
error = 0;
@@ -1038,10 +1085,11 @@ static inline unsigned char dt_type(stru
static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct sysfs_dirent * parent_sd = dentry->d_fsdata;
+ struct dentry *parent = filp->f_path.dentry;
+ struct sysfs_dirent * parent_sd = parent->d_fsdata;
struct sysfs_dirent *pos;
ino_t ino;
+ const void *tag;
if (filp->f_pos == 0) {
ino = parent_sd->s_ino;
@@ -1059,6 +1107,8 @@ static int sysfs_readdir(struct file * f
if ((filp->f_pos > 1) && (filp->f_pos < INT_MAX)) {
mutex_lock(&sysfs_mutex);
+ tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
+
/* Skip the dentries we have already reported */
pos = parent_sd->s_dir.children;
while (pos && (filp->f_pos > pos->s_ino))
@@ -1068,6 +1118,10 @@ static int sysfs_readdir(struct file * f
const char * name;
int len;
+ if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ (pos->s_tag.tag != tag))
+ continue;
+
name = pos->s_name;
len = strlen(name);
filp->f_pos = ino = pos->s_ino;
@@ -1088,3 +1142,104 @@ const struct file_operations sysfs_dir_o
.read = generic_read_dir,
.readdir = sysfs_readdir,
};
+
+const void *sysfs_creation_tag(struct sysfs_dirent *parent_sd, struct sysfs_dirent *sd)
+{
+ const void *tag = NULL;
+
+ if (parent_sd->s_flags & SYSFS_FLAG_TAGGED) {
+ struct kobject *kobj;
+ switch (sysfs_type(sd)) {
+ case SYSFS_DIR:
+ kobj = sd->s_dir.kobj;
+ break;
+ case SYSFS_KOBJ_LINK:
+ kobj = sd->s_symlink.target_sd->s_dir.kobj;
+ break;
+ default:
+ BUG();
+ }
+ tag = parent_sd->s_tag.ops->kobject_tag(kobj);
+ }
+ return tag;
+}
+
+const void *sysfs_removal_tag(struct kobject *kobj, struct sysfs_dirent *dir_sd)
+{
+ const void *tag = NULL;
+
+ if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
+ tag = kobj->sd->s_tag.tag;
+
+ return tag;
+}
+
+const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd, struct super_block *sb)
+{
+ const void *tag = NULL;
+
+ if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
+ tag = dir_sd->s_tag.ops->sb_tag(&sysfs_info(sb)->tag);
+
+ return tag;
+}
+
+const void *sysfs_dirent_tag(struct sysfs_dirent *sd)
+{
+ const void *tag = NULL;
+
+ if (sd->s_parent && (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED))
+ tag = sd->s_tag.tag;
+
+ return tag;
+}
+
+/**
+ * sysfs_enable_tagging - Automatically tag all of the children in a
+ * directory.
+ * @kobj: object whose children should be filtered by tags
+ *
+ * Once tagging has been enabled on a directory the contents
+ * of the directory become dependent upon context captured when
+ * sysfs was mounted.
+ *
+ * tag_ops->sb_tag() returns the context for a given superblock.
+ *
+ * tag_ops->kobject_tag() returns the context that a given kobj
+ * resides in.
+ *
+ * Using those methods the sysfs code on tagged directories
+ * carefully stores the files so that when we lookup files
+ * we get the proper answer for our context.
+ *
+ * If the context of a kobject is changed it is expected that
+ * the kobject will be renamed so the appopriate sysfs data structures
+ * can be updated.
+ */
+int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops)
+{
+ struct sysfs_dirent *sd;
+ int err;
+
+ err = -ENOENT;
+ sd = kobj->sd;
+
+ mutex_lock(&sysfs_mutex);
+ err = -EINVAL;
+ /* We can only enable tagging on empty directories
+ * where tagging is not already enabled, and
+ * who are not subdirectories of directories where tagging is
+ * enabled.
+ */
+ if (!sd->s_dir.children && (sysfs_type(sd) == SYSFS_DIR) &&
+ !(sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ sd->s_parent &&
+ !(sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)) {
+ err = 0;
+ sd->s_flags |= SYSFS_FLAG_TAGGED;
+ sd->s_tag.ops = tag_ops;
+ }
+ mutex_unlock(&sysfs_mutex);
+ return err;
+}
Index: linux-mm/fs/sysfs/file.c
===================================================================
--- linux-mm.orig/fs/sysfs/file.c
+++ linux-mm/fs/sysfs/file.c
@@ -460,9 +460,9 @@ void sysfs_notify(struct kobject *k, cha
mutex_lock(&sysfs_mutex);
if (sd && dir)
- sd = sysfs_find_dirent(sd, dir);
+ sd = sysfs_find_dirent(sd, NULL, dir);
if (sd && attr)
- sd = sysfs_find_dirent(sd, attr);
+ sd = sysfs_find_dirent(sd, NULL, attr);
if (sd) {
struct sysfs_open_dirent *od;
@@ -624,7 +624,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
{
- sysfs_hash_and_remove(kobj->sd, attr->name);
+ sysfs_hash_and_remove(kobj, kobj->sd, attr->name);
}
@@ -644,7 +644,7 @@ void sysfs_remove_file_from_group(struct
else
dir_sd = sysfs_get(kobj->sd);
if (dir_sd) {
- sysfs_hash_and_remove(dir_sd, attr->name);
+ sysfs_hash_and_remove(kobj, dir_sd, attr->name);
sysfs_put(dir_sd);
}
}
Index: linux-mm/fs/sysfs/group.c
===================================================================
--- linux-mm.orig/fs/sysfs/group.c
+++ linux-mm/fs/sysfs/group.c
@@ -25,7 +25,7 @@ static void remove_files(struct sysfs_di
for (i = 0, attr = grp->attrs; *attr; i++, attr++)
if (!grp->is_visible ||
grp->is_visible(kobj, *attr, i))
- sysfs_hash_and_remove(dir_sd, (*attr)->name);
+ sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name);
}
static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
Index: linux-mm/fs/sysfs/inode.c
===================================================================
--- linux-mm.orig/fs/sysfs/inode.c
+++ linux-mm/fs/sysfs/inode.c
@@ -215,17 +215,19 @@ struct inode * sysfs_get_inode(struct sy
return inode;
}
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
+int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd, const char *name)
{
struct sysfs_addrm_cxt acxt;
struct sysfs_dirent *sd;
+ const void *tag;
if (!dir_sd)
return -ENOENT;
sysfs_addrm_start(&acxt, dir_sd);
+ tag = sysfs_removal_tag(kobj, dir_sd);
- sd = sysfs_find_dirent(dir_sd, name);
+ sd = sysfs_find_dirent(dir_sd, tag, name);
if (sd)
sysfs_remove_one(&acxt, sd);
Index: linux-mm/fs/sysfs/mount.c
===================================================================
--- linux-mm.orig/fs/sysfs/mount.c
+++ linux-mm/fs/sysfs/mount.c
@@ -75,6 +75,7 @@ static int sysfs_fill_super(struct super
goto out_err;
}
root->d_fsdata = &sysfs_root;
+ root->d_sb = sb;
sb->s_root = root;
sb->s_fs_info = info;
return 0;
@@ -88,20 +89,55 @@ out_err:
return error;
}
+static int sysfs_test_super(struct super_block *sb, void *ptr)
+{
+ struct task_struct *task = ptr;
+ struct sysfs_super_info *info = sysfs_info(sb);
+ int found = 1;
+
+ return found;
+}
+
static int sysfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
- int rc;
+ struct super_block *sb;
+ int error;
mutex_lock(&sysfs_rename_mutex);
- rc = get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
+ sb = sget(fs_type, sysfs_test_super, set_anon_super, current);
+ if (IS_ERR(sb)) {
+ error = PTR_ERR(sb);
+ goto out;
+ }
+ if (!sb->s_root) {
+ sb->s_flags = flags;
+ error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
+ if (error) {
+ up_write(&sb->s_umount);
+ deactivate_super(sb);
+ goto out;
+ }
+ sb->s_flags |= MS_ACTIVE;
+ }
+ do_remount_sb(sb, flags, data, 0);
+ error = simple_set_mnt(mnt, sb);
+out:
mutex_unlock(&sysfs_rename_mutex);
- return rc;
+ return error;
+}
+
+static void sysfs_kill_sb(struct super_block *sb)
+{
+ struct sysfs_super_info *info = sysfs_info(sb);
+
+ kill_anon_super(sb);
+ kfree(info);
}
struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.get_sb = sysfs_get_sb,
- .kill_sb = kill_anon_super,
+ .kill_sb = sysfs_kill_sb,
};
void sysfs_grab_supers(void)
Index: linux-mm/fs/sysfs/symlink.c
===================================================================
--- linux-mm.orig/fs/sysfs/symlink.c
+++ linux-mm/fs/sysfs/symlink.c
@@ -94,7 +94,7 @@ void sysfs_remove_link(struct kobject *
else
parent_sd = kobj->sd;
- sysfs_hash_and_remove(parent_sd, name);
+ sysfs_hash_and_remove(kobj, parent_sd, name);
}
static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
Index: linux-mm/fs/sysfs/sysfs.h
===================================================================
--- linux-mm.orig/fs/sysfs/sysfs.h
+++ linux-mm/fs/sysfs/sysfs.h
@@ -46,6 +46,10 @@ struct sysfs_dirent {
const char *s_name;
union {
+ const struct sysfs_tagged_dir_operations *ops;
+ const void *tag;
+ } s_tag;
+ union {
struct sysfs_elem_dir s_dir;
struct sysfs_elem_symlink s_symlink;
struct sysfs_elem_attr s_attr;
@@ -69,6 +73,7 @@ struct sysfs_dirent {
#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK
#define SYSFS_FLAG_REMOVED 0x0200
+#define SYSFS_FLAG_TAGGED 0x0400
static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
{
@@ -87,6 +92,7 @@ struct sysfs_addrm_cxt {
struct sysfs_super_info {
int grabbed;
+ struct sysfs_tag_info tag;
};
#define sysfs_info(SB) ((struct sysfs_super_info *)(SB)->s_fs_info)
@@ -113,6 +119,13 @@ extern spinlock_t sysfs_assoc_lock;
extern const struct file_operations sysfs_dir_operations;
extern const struct inode_operations sysfs_dir_inode_operations;
+extern const void *sysfs_creation_tag(struct sysfs_dirent *parent_sd,
+ struct sysfs_dirent *sd);
+extern const void *sysfs_removal_tag(struct kobject *kobj,
+ struct sysfs_dirent *dir_sd);
+extern const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd,
+ struct super_block *sb);
+extern const void *sysfs_dirent_tag(struct sysfs_dirent *sd);
struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd);
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
void sysfs_put_active_two(struct sysfs_dirent *sd);
@@ -123,6 +136,7 @@ void sysfs_remove_one(struct sysfs_addrm
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+ const void *tag,
const unsigned char *name);
struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
const unsigned char *name);
@@ -154,7 +168,7 @@ static inline void sysfs_put(struct sysf
*/
struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
+int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd, const char *name);
int sysfs_inode_init(void);
/*
Index: linux-mm/include/linux/sysfs.h
===================================================================
--- linux-mm.orig/include/linux/sysfs.h
+++ linux-mm/include/linux/sysfs.h
@@ -78,6 +78,14 @@ struct sysfs_ops {
ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t);
};
+struct sysfs_tag_info {
+};
+
+struct sysfs_tagged_dir_operations {
+ const void *(*sb_tag)(struct sysfs_tag_info *info);
+ const void *(*kobject_tag)(struct kobject *kobj);
+};
+
#ifdef CONFIG_SYSFS
int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
@@ -115,6 +123,8 @@ void sysfs_remove_file_from_group(struct
void sysfs_notify(struct kobject *kobj, char *dir, char *attr);
void sysfs_printk_last_file(void);
+int sysfs_enable_tagging(struct kobject *, const struct sysfs_tagged_dir_operations *);
+
extern int __must_check sysfs_init(void);
#else /* CONFIG_SYSFS */
@@ -209,6 +219,12 @@ static inline void sysfs_notify(struct k
{
}
+static inline int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops)
+{
+ return 0;
+}
+
static inline int __must_check sysfs_init(void)
{
return 0;
--
^ permalink raw reply [flat|nested] 30+ messages in thread* [PATCH 07/10] sysfs: Implement sysfs_delete_link and sysfs_rename_link
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
` (5 preceding siblings ...)
2008-04-29 17:11 ` [PATCH 06/10] sysfs: Implement sysfs tagged directory support Benjamin Thery
@ 2008-04-29 17:11 ` Benjamin Thery
2008-04-29 17:11 ` [PATCH 08/10] driver core: Implement tagged directory support for device classes Benjamin Thery
` (3 subsequent siblings)
10 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:11 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Benjamin Thery
sysfs: Implement sysfs_delete_link and sysfs_rename_link
When removing a symlink sysfs_remove_link does not provide
enough information to figure out which tagged directory the symlink
falls in. So I need sysfs_delete_link which is passed the target
of the symlink to delete.
Further half the time when we are removing a symlink the code is
actually renaming the symlink but not doing so explicitly because
we don't have a symlink rename method. So I have added sysfs_rename_link
as well.
Both of these functions now have enough information to find a symlink
in a tagged directory. The only restriction is that they must be called
before the target kobject is renamed or deleted. If they are called
later I loose track of which tag the target kobject was marked with
and can no longer find the old symlink to remove it.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/symlink.c | 31 +++++++++++++++++++++++++++++++
include/linux/sysfs.h | 17 +++++++++++++++++
2 files changed, 48 insertions(+)
Index: linux-mm/fs/sysfs/symlink.c
===================================================================
--- linux-mm.orig/fs/sysfs/symlink.c
+++ linux-mm/fs/sysfs/symlink.c
@@ -80,6 +80,21 @@ int sysfs_create_link(struct kobject * k
}
/**
+ * sysfs_delete_link - remove symlink in object's directory.
+ * @kobj: object we're acting for.
+ * @targ: object we're pointing to.
+ * @name: name of the symlink to remove.
+ *
+ * Unlike sysfs_remove_link sysfs_delete_link has enough information
+ * to successfully delete symlinks in tagged directories.
+ */
+void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
+ const char *name)
+{
+ sysfs_hash_and_remove(targ, kobj->sd, name);
+}
+
+/**
* sysfs_remove_link - remove symlink in object's directory.
* @kobj: object we're acting for.
* @name: name of the symlink to remove.
@@ -97,6 +112,22 @@ void sysfs_remove_link(struct kobject *
sysfs_hash_and_remove(kobj, parent_sd, name);
}
+/**
+ * sysfs_rename_link - rename symlink in object's directory.
+ * @kobj: object we're acting for.
+ * @targ: object we're pointing to.
+ * @old: previous name of the symlink.
+ * @new: new name of the symlink.
+ *
+ * A helper function for the common rename symlink idiom.
+ */
+int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
+ const char *old, const char *new)
+{
+ sysfs_delete_link(kobj, targ, old);
+ return sysfs_create_link(kobj, targ, new);
+}
+
static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
struct sysfs_dirent *target_sd, char *path)
{
Index: linux-mm/include/linux/sysfs.h
===================================================================
--- linux-mm.orig/include/linux/sysfs.h
+++ linux-mm/include/linux/sysfs.h
@@ -111,6 +111,12 @@ int __must_check sysfs_create_link(struc
const char *name);
void sysfs_remove_link(struct kobject *kobj, const char *name);
+int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
+ const char *old_name, const char *new_name);
+
+void sysfs_delete_link(struct kobject *dir, struct kobject *targ,
+ const char *name);
+
int __must_check sysfs_create_group(struct kobject *kobj,
const struct attribute_group *grp);
void sysfs_remove_group(struct kobject *kobj,
@@ -193,6 +199,17 @@ static inline void sysfs_remove_link(str
{
}
+static inline int sysfs_rename_link(struct kobject * k, struct kobject *t,
+ const char *old_name, const char * new_name)
+{
+ return 0;
+}
+
+static inline void sysfs_delete_link(struct kobject *k, struct kobject *t,
+ const char *name)
+{
+}
+
static inline int sysfs_create_group(struct kobject *kobj,
const struct attribute_group *grp)
{
--
^ permalink raw reply [flat|nested] 30+ messages in thread* [PATCH 08/10] driver core: Implement tagged directory support for device classes.
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
` (6 preceding siblings ...)
2008-04-29 17:11 ` [PATCH 07/10] sysfs: Implement sysfs_delete_link and sysfs_rename_link Benjamin Thery
@ 2008-04-29 17:11 ` Benjamin Thery
2008-04-29 17:11 ` [PATCH 09/10] net: netns: Enable tagging for net_class directories in sysfs Benjamin Thery
` (2 subsequent siblings)
10 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:11 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Benjamin Thery
driver core: Implement tagged directory support for device classes.
This patch enables tagging on every class directory if struct class
has tag_ops.
In addition device_del and device_rename were modified to use
sysfs_delete_link and sysfs_rename_link respectively to ensure
when these operations happen on devices whose classes have
tag_ops that they work properly.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
drivers/base/class.c | 30 ++++++++++++++++++++++++++----
drivers/base/core.c | 41 ++++++++++++++++++++++-------------------
include/linux/device.h | 2 ++
3 files changed, 50 insertions(+), 23 deletions(-)
Index: linux-mm/drivers/base/class.c
===================================================================
--- linux-mm.orig/drivers/base/class.c
+++ linux-mm/drivers/base/class.c
@@ -134,6 +134,17 @@ static void remove_class_attrs(struct cl
}
}
+static int class_setup_tagging(struct class *cls)
+{
+ const struct sysfs_tagged_dir_operations *tag_ops;
+
+ tag_ops = cls->tag_ops;
+ if (!tag_ops)
+ return 0;
+
+ return sysfs_enable_tagging(&cls->subsys.kobj, tag_ops);
+}
+
int class_register(struct class *cls)
{
int error;
@@ -159,11 +170,22 @@ int class_register(struct class *cls)
cls->subsys.kobj.ktype = &class_ktype;
error = kset_register(&cls->subsys);
- if (!error) {
- error = add_class_attrs(class_get(cls));
- class_put(cls);
- }
+ if (error)
+ goto out;
+
+ error = class_setup_tagging(cls);
+ if (error)
+ goto out_unregister;
+
+ error = add_class_attrs(cls);
+ if (error)
+ goto out_unregister;
+
+out:
return error;
+out_unregister:
+ kset_unregister(&cls->subsys);
+ goto out;
}
void class_unregister(struct class *cls)
Index: linux-mm/drivers/base/core.c
===================================================================
--- linux-mm.orig/drivers/base/core.c
+++ linux-mm/drivers/base/core.c
@@ -618,6 +618,11 @@ static struct kobject *get_device_parent
kobject_put(k);
return NULL;
}
+ /* If we created a new class-directory setup tagging */
+ if (dev->class->tag_ops) {
+ sysfs_enable_tagging(k, dev->class->tag_ops);
+ }
+
/* do not emit an uevent for this simple "glue" directory */
return k;
}
@@ -752,12 +757,12 @@ static void device_remove_class_symlinks
if (dev->kobj.parent != &dev->class->subsys.kobj &&
device_is_not_partition(dev))
- sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
+ sysfs_delete_link(&dev->class->subsys.kobj, &dev->kobj, dev->bus_id);
#else
if (dev->parent && device_is_not_partition(dev))
sysfs_remove_link(&dev->kobj, "device");
- sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
+ sysfs_delete_link(&dev->class->subsys.kobj, &dev->kobj, dev->bus_id);
#endif
sysfs_remove_link(&dev->kobj, "subsystem");
@@ -1237,6 +1242,15 @@ int device_rename(struct device *dev, ch
strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE);
strlcpy(dev->bus_id, new_name, BUS_ID_SIZE);
+#ifndef CONFIG_SYSFS_DEPRECATED
+ if (dev->class && (dev->kobj.parent != &dev->class->subsys.kobj)) {
+ error = sysfs_rename_link(&dev->class->subsys.kobj,
+ &dev->kobj, old_device_name, new_name);
+ if (error)
+ goto out;
+ }
+#endif
+
error = kobject_rename(&dev->kobj, new_name);
if (error) {
strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE);
@@ -1245,24 +1259,13 @@ int device_rename(struct device *dev, ch
#ifdef CONFIG_SYSFS_DEPRECATED
if (old_class_name) {
+ error = -ENOMEM;
new_class_name = make_class_name(dev->class->name, &dev->kobj);
- if (new_class_name) {
- error = sysfs_create_link(&dev->parent->kobj,
- &dev->kobj, new_class_name);
- if (error)
- goto out;
- sysfs_remove_link(&dev->parent->kobj, old_class_name);
- }
- }
-#else
- if (dev->class) {
- sysfs_remove_link(&dev->class->subsys.kobj, old_device_name);
- error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
- dev->bus_id);
- if (error) {
- dev_err(dev, "%s: sysfs_create_symlink failed (%d)\n",
- __func__, error);
- }
+ if (new_class_name)
+ error = sysfs_rename_link(&dev->parent->kobj,
+ &dev->kobj,
+ old_class_name,
+ new_class_name);
}
#endif
Index: linux-mm/include/linux/device.h
===================================================================
--- linux-mm.orig/include/linux/device.h
+++ linux-mm/include/linux/device.h
@@ -208,6 +208,8 @@ struct class {
int (*resume)(struct device *dev);
struct pm_ops *pm;
+
+ const struct sysfs_tagged_dir_operations *tag_ops;
};
extern int __must_check class_register(struct class *class);
--
^ permalink raw reply [flat|nested] 30+ messages in thread* [PATCH 09/10] net: netns: Enable tagging for net_class directories in sysfs
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
` (7 preceding siblings ...)
2008-04-29 17:11 ` [PATCH 08/10] driver core: Implement tagged directory support for device classes Benjamin Thery
@ 2008-04-29 17:11 ` Benjamin Thery
2008-04-29 17:11 ` [PATCH 10/10] sysfs: user namespaces: add ns to user_struct Benjamin Thery
2008-04-29 17:36 ` [PATCH 00/10] sysfs tagged directories Greg KH
10 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:11 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Benjamin Thery
net: Enable tagging for net_class directories in sysfs
The problem. Network devices show up in sysfs and with the network
namespace active multiple devices with the same name can show up in
the same directory, ouch!
To avoid that problem and allow existing applications in network namespaces
to see the same interface that is currently presented in sysfs, this
patch enables the tagging directory support in sysfs.
By using the network namespace pointers as tags to separate out the
the sysfs directory entries we ensure that we don't have conflicts
in the directories and applications only see a limited set of
the network devices.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/mount.c | 36 ++++++++++++++++++++++++++++++++++++
include/linux/sysfs.h | 2 ++
net/Kconfig | 2 +-
net/core/net-sysfs.c | 20 ++++++++++++++++++++
4 files changed, 59 insertions(+), 1 deletion(-)
Index: linux-mm/fs/sysfs/mount.c
===================================================================
--- linux-mm.orig/fs/sysfs/mount.c
+++ linux-mm/fs/sysfs/mount.c
@@ -16,6 +16,8 @@
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
#include "sysfs.h"
@@ -78,6 +80,7 @@ static int sysfs_fill_super(struct super
root->d_sb = sb;
sb->s_root = root;
sb->s_fs_info = info;
+ info->tag.net_ns = hold_net(current->nsproxy->net_ns);
return 0;
out_err:
@@ -95,6 +98,9 @@ static int sysfs_test_super(struct super
struct sysfs_super_info *info = sysfs_info(sb);
int found = 1;
+ if (task->nsproxy->net_ns != info->tag.net_ns)
+ found = 0;
+
return found;
}
@@ -131,6 +137,8 @@ static void sysfs_kill_sb(struct super_b
struct sysfs_super_info *info = sysfs_info(sb);
kill_anon_super(sb);
+ if (info->tag.net_ns)
+ release_net(info->tag.net_ns);
kfree(info);
}
@@ -181,6 +189,31 @@ restart:
spin_unlock(&sb_lock);
}
+#ifdef CONFIG_NET
+static void sysfs_net_exit(struct net *net)
+{
+ /* Allow the net namespace to go away while sysfs is still mounted. */
+ struct super_block *sb;
+ mutex_lock(&sysfs_rename_mutex);
+ sysfs_grab_supers();
+ mutex_lock(&sysfs_mutex);
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ struct sysfs_super_info *info = sysfs_info(sb);
+ if (info->tag.net_ns != net)
+ continue;
+ release_net(info->tag.net_ns);
+ info->tag.net_ns = NULL;
+ }
+ mutex_unlock(&sysfs_mutex);
+ sysfs_release_supers();
+ mutex_unlock(&sysfs_rename_mutex);
+}
+
+static struct pernet_operations sysfs_net_ops = {
+ .exit = sysfs_net_exit,
+};
+#endif
+
int __init sysfs_init(void)
{
int err = -ENOMEM;
@@ -205,6 +238,9 @@ int __init sysfs_init(void)
unregister_filesystem(&sysfs_fs_type);
goto out_err;
}
+#ifdef CONFIG_NET
+ register_pernet_subsys(&sysfs_net_ops);
+#endif
} else
goto out_err;
out:
Index: linux-mm/include/linux/sysfs.h
===================================================================
--- linux-mm.orig/include/linux/sysfs.h
+++ linux-mm/include/linux/sysfs.h
@@ -19,6 +19,7 @@
struct kobject;
struct module;
+struct net;
/* FIXME
* The *owner field is no longer used, but leave around
@@ -79,6 +80,7 @@ struct sysfs_ops {
};
struct sysfs_tag_info {
+ struct net *net_ns;
};
struct sysfs_tagged_dir_operations {
Index: linux-mm/net/Kconfig
===================================================================
--- linux-mm.orig/net/Kconfig
+++ linux-mm/net/Kconfig
@@ -30,7 +30,7 @@ menu "Networking options"
config NET_NS
bool "Network namespace support"
default n
- depends on EXPERIMENTAL && !SYSFS && NAMESPACES
+ depends on EXPERIMENTAL && NAMESPACES
help
Allow user space to create what appear to be multiple instances
of the network stack.
Index: linux-mm/net/core/net-sysfs.c
===================================================================
--- linux-mm.orig/net/core/net-sysfs.c
+++ linux-mm/net/core/net-sysfs.c
@@ -13,7 +13,9 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
+#include <linux/nsproxy.h>
#include <net/sock.h>
+#include <net/net_namespace.h>
#include <linux/rtnetlink.h>
#include <linux/wireless.h>
#include <net/iw_handler.h>
@@ -419,6 +421,23 @@ static void netdev_release(struct device
kfree((char *)dev - dev->padded);
}
+static const void *net_sb_tag(struct sysfs_tag_info *info)
+{
+ return info->net_ns;
+}
+
+static const void *net_kobject_tag(struct kobject *kobj)
+{
+ struct net_device *dev;
+ dev = container_of(kobj, struct net_device, dev.kobj);
+ return dev_net(dev);
+}
+
+static const struct sysfs_tagged_dir_operations net_tagged_dir_operations = {
+ .sb_tag = net_sb_tag,
+ .kobject_tag = net_kobject_tag,
+};
+
static struct class net_class = {
.name = "net",
.dev_release = netdev_release,
@@ -428,6 +447,7 @@ static struct class net_class = {
#ifdef CONFIG_HOTPLUG
.dev_uevent = netdev_uevent,
#endif
+ .tag_ops = &net_tagged_dir_operations,
};
/* Delete sysfs entries but hold kobject reference until after all
--
^ permalink raw reply [flat|nested] 30+ messages in thread* [PATCH 10/10] sysfs: user namespaces: add ns to user_struct
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
` (8 preceding siblings ...)
2008-04-29 17:11 ` [PATCH 09/10] net: netns: Enable tagging for net_class directories in sysfs Benjamin Thery
@ 2008-04-29 17:11 ` Benjamin Thery
2008-04-29 19:51 ` Eric W. Biederman
2008-04-29 17:36 ` [PATCH 00/10] sysfs tagged directories Greg KH
10 siblings, 1 reply; 30+ messages in thread
From: Benjamin Thery @ 2008-04-29 17:11 UTC (permalink / raw)
To: linux-kernel, Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev
Add the user_namespace to user_struct.
Add a user_namespace to the sysfs_tag struct.
Mark the /sys/kernel/uids directory to be tagged so that processes in
different user namespaces can remount /sys and see their own uid
listings.
Without this patch, having CONFIG_FAIR_SCHED=y makes user namespaces
unusable, because when you
clone(CLONE_NEWUSER)
it will auto-create the root userid and try to create
/sys/kernel/uids/0. Since that already exists from the parent user
namespace, the create fails, and the clone misleadingly ends up
returning -ENOMEM.
This patch fixes the issue by allowing each user namespace to remount
/sys, and having /sys filter the /sys/kernel/uid/ entries by user
namespace.
Signed-off-by: Serge Hallyn <serue@us.ibm.com>
---
fs/dquot.c | 2 -
fs/ioprio.c | 2 -
fs/sysfs/mount.c | 25 +++++++++++++++++++++
include/linux/sched.h | 3 +-
include/linux/sysfs.h | 9 +++++++
include/linux/types.h | 5 ++++
include/linux/user_namespace.h | 5 ++++
kernel/user.c | 48 ++++++++++++++++++++++++++++++++++-------
kernel/user_namespace.c | 3 +-
security/keys/process_keys.c | 14 +++++------
10 files changed, 97 insertions(+), 19 deletions(-)
Index: linux-mm/fs/dquot.c
===================================================================
--- linux-mm.orig/fs/dquot.c
+++ linux-mm/fs/dquot.c
@@ -968,7 +968,7 @@ static void send_warning(const struct dq
MINOR(dquot->dq_sb->s_dev));
if (ret)
goto attr_err_out;
- ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
+ ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid.uid);
if (ret)
goto attr_err_out;
genlmsg_end(skb, msg_head);
Index: linux-mm/fs/ioprio.c
===================================================================
--- linux-mm.orig/fs/ioprio.c
+++ linux-mm/fs/ioprio.c
@@ -226,7 +226,7 @@ asmlinkage long sys_ioprio_get(int which
break;
do_each_thread(g, p) {
- if (p->uid != user->uid)
+ if (!task_user_equiv(p, user))
continue;
tmpio = get_task_ioprio(p);
if (tmpio < 0)
Index: linux-mm/fs/sysfs/mount.c
===================================================================
--- linux-mm.orig/fs/sysfs/mount.c
+++ linux-mm/fs/sysfs/mount.c
@@ -17,6 +17,7 @@
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/nsproxy.h>
+#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include "sysfs.h"
@@ -81,6 +82,7 @@ static int sysfs_fill_super(struct super
sb->s_root = root;
sb->s_fs_info = info;
info->tag.net_ns = hold_net(current->nsproxy->net_ns);
+ info->tag.user_ns = current->nsproxy->user_ns;
return 0;
out_err:
@@ -100,6 +102,8 @@ static int sysfs_test_super(struct super
if (task->nsproxy->net_ns != info->tag.net_ns)
found = 0;
+ if (task->nsproxy->user_ns != info->tag.user_ns)
+ found = 0;
return found;
}
@@ -214,6 +218,27 @@ static struct pernet_operations sysfs_ne
};
#endif
+#ifdef CONFIG_USER_NS
+void sysfs_userns_exit(struct user_namespace *user_ns)
+{
+ /* Allow the net namespace to go away while sysfs is still mounted. */
+ struct super_block *sb;
+ printk(KERN_NOTICE "sysfs: user namespace exiting\n");
+ mutex_lock(&sysfs_rename_mutex);
+ sysfs_grab_supers();
+ mutex_lock(&sysfs_mutex);
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ struct sysfs_super_info *info = sysfs_info(sb);
+ if (info->tag.user_ns != user_ns)
+ continue;
+ info->tag.user_ns = NULL;
+ }
+ mutex_unlock(&sysfs_mutex);
+ sysfs_release_supers();
+ mutex_unlock(&sysfs_rename_mutex);
+}
+#endif
+
int __init sysfs_init(void)
{
int err = -ENOMEM;
Index: linux-mm/include/linux/sched.h
===================================================================
--- linux-mm.orig/include/linux/sched.h
+++ linux-mm/include/linux/sched.h
@@ -598,7 +598,7 @@ struct user_struct {
/* Hash table maintenance information */
struct hlist_node uidhash_node;
- uid_t uid;
+ struct k_uid_t uid;
#ifdef CONFIG_USER_SCHED
struct task_group *tg;
@@ -1675,6 +1675,7 @@ static inline struct user_struct *get_ui
extern void free_uid(struct user_struct *);
extern void switch_uid(struct user_struct *);
extern void release_uids(struct user_namespace *ns);
+extern int task_user_equiv(struct task_struct *tsk, struct user_struct *u);
#include <asm/current.h>
Index: linux-mm/include/linux/sysfs.h
===================================================================
--- linux-mm.orig/include/linux/sysfs.h
+++ linux-mm/include/linux/sysfs.h
@@ -20,6 +20,7 @@
struct kobject;
struct module;
struct net;
+struct user_namespace;
/* FIXME
* The *owner field is no longer used, but leave around
@@ -81,6 +82,7 @@ struct sysfs_ops {
struct sysfs_tag_info {
struct net *net_ns;
+ struct user_namespace *user_ns;
};
struct sysfs_tagged_dir_operations {
@@ -135,6 +137,9 @@ int sysfs_enable_tagging(struct kobject
extern int __must_check sysfs_init(void);
+struct user_namespace;
+void sysfs_userns_exit(struct user_namespace *user_ns);
+
#else /* CONFIG_SYSFS */
static inline int sysfs_schedule_callback(struct kobject *kobj,
@@ -253,6 +258,10 @@ static inline void sysfs_printk_last_fil
{
}
+static inline void sysfs_userns_exit(struct user_namespace *user_ns)
+{
+}
+
#endif /* CONFIG_SYSFS */
#endif /* _SYSFS_H_ */
Index: linux-mm/include/linux/types.h
===================================================================
--- linux-mm.orig/include/linux/types.h
+++ linux-mm/include/linux/types.h
@@ -37,6 +37,11 @@ typedef __kernel_gid32_t gid_t;
typedef __kernel_uid16_t uid16_t;
typedef __kernel_gid16_t gid16_t;
+struct k_uid_t {
+ uid_t uid;
+ struct user_namespace *ns;
+};
+
typedef unsigned long uintptr_t;
#ifdef CONFIG_UID16
Index: linux-mm/include/linux/user_namespace.h
===================================================================
--- linux-mm.orig/include/linux/user_namespace.h
+++ linux-mm/include/linux/user_namespace.h
@@ -12,10 +12,15 @@
struct user_namespace {
struct kref kref;
struct hlist_head uidhash_table[UIDHASH_SZ];
+ struct kset *kset;
struct user_struct *root_user;
};
extern struct user_namespace init_user_ns;
+extern int register_user_ns_kobj(struct user_namespace *ns);
+extern void unregister_user_ns_kobj(struct user_namespace *ns);
+extern int register_user_ns_kobj(struct user_namespace *ns);
+extern void unregister_user_ns_kobj(struct user_namespace *ns);
#ifdef CONFIG_USER_NS
Index: linux-mm/kernel/user.c
===================================================================
--- linux-mm.orig/kernel/user.c
+++ linux-mm/kernel/user.c
@@ -53,6 +53,10 @@ struct user_struct root_user = {
.files = ATOMIC_INIT(0),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
+ .uid = {
+ .uid = 0,
+ .ns = &init_user_ns,
+ },
#ifdef CONFIG_USER_SCHED
.tg = &init_task_group,
#endif
@@ -71,13 +75,23 @@ static void uid_hash_remove(struct user_
hlist_del_init(&up->uidhash_node);
}
-static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
+int task_user_equiv(struct task_struct *tsk, struct user_struct *u)
+{
+ if (tsk->uid != u->uid.uid)
+ return 0;
+ if (tsk->nsproxy->user_ns != u->uid.ns)
+ return 0;
+ return 1;
+}
+
+static struct user_struct *uid_hash_find(uid_t uid,
+ struct hlist_head *hashent)
{
struct user_struct *user;
struct hlist_node *h;
hlist_for_each_entry(user, h, hashent, uidhash_node) {
- if (user->uid == uid) {
+ if (user->uid.uid == uid) {
atomic_inc(&user->__count);
return user;
}
@@ -236,6 +250,23 @@ static void uids_release(struct kobject
return;
}
+static const void *userns_sb_tag(struct sysfs_tag_info *info)
+{
+ return info->user_ns;
+}
+
+static const void *userns_kobject_tag(struct kobject *kobj)
+{
+ struct user_struct *up;
+ up = container_of(kobj, struct user_struct, kobj);
+ return up->uid.ns;
+}
+
+static struct sysfs_tagged_dir_operations userns_tagged_dir_operations = {
+ .sb_tag = userns_sb_tag,
+ .kobject_tag = userns_kobject_tag,
+};
+
static struct kobj_type uids_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_attrs = uids_attributes,
@@ -246,19 +277,19 @@ static struct kobj_type uids_ktype = {
static int uids_user_create(struct user_struct *up)
{
struct kobject *kobj = &up->kobj;
- int error;
+ int err;
memset(kobj, 0, sizeof(struct kobject));
kobj->kset = uids_kset;
- error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
- if (error) {
+ err = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid.uid);
+ if (err) {
kobject_put(kobj);
goto done;
}
kobject_uevent(kobj, KOBJ_ADD);
done:
- return error;
+ return err;
}
/* create these entries in sysfs:
@@ -271,7 +302,7 @@ int __init uids_sysfs_init(void)
uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
if (!uids_kset)
return -ENOMEM;
-
+ sysfs_enable_tagging(&uids_kset->kobj, &userns_tagged_dir_operations);
return uids_user_create(&root_user);
}
@@ -403,7 +434,8 @@ struct user_struct *alloc_uid(struct use
if (!new)
goto out_unlock;
- new->uid = uid;
+ new->uid.uid = uid;
+ new->uid.ns = ns;
atomic_set(&new->__count, 1);
if (sched_create_user(new) < 0)
Index: linux-mm/kernel/user_namespace.c
===================================================================
--- linux-mm.orig/kernel/user_namespace.c
+++ linux-mm/kernel/user_namespace.c
@@ -22,7 +22,7 @@ static struct user_namespace *clone_user
struct user_struct *new_user;
int n;
- ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
+ ns = kzalloc(sizeof(struct user_namespace), GFP_KERNEL);
if (!ns)
return ERR_PTR(-ENOMEM);
@@ -71,6 +71,7 @@ void free_user_ns(struct kref *kref)
struct user_namespace *ns;
ns = container_of(kref, struct user_namespace, kref);
+ sysfs_userns_exit(ns);
release_uids(ns);
kfree(ns);
}
Index: linux-mm/security/keys/process_keys.c
===================================================================
--- linux-mm.orig/security/keys/process_keys.c
+++ linux-mm/security/keys/process_keys.c
@@ -47,7 +47,7 @@ static int install_user_keyrings(struct
char buf[20];
int ret;
- kenter("%p{%u}", user, user->uid);
+ kenter("%p{%u}", user, user->uid.uid);
if (user->uid_keyring) {
kleave(" = 0 [exist]");
@@ -62,13 +62,13 @@ static int install_user_keyrings(struct
* - there may be one in existence already as it may have been
* pinned by a session, but the user_struct pointing to it
* may have been destroyed by setuid */
- sprintf(buf, "_uid.%u", user->uid);
+ sprintf(buf, "_uid.%u", user->uid.uid);
uid_keyring = find_keyring_by_name(buf, true);
if (IS_ERR(uid_keyring)) {
- uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1,
- tsk, KEY_ALLOC_IN_QUOTA,
- NULL);
+ uid_keyring = keyring_alloc(buf, user->uid.uid,
+ (gid_t) -1, tsk,
+ KEY_ALLOC_IN_QUOTA, NULL);
if (IS_ERR(uid_keyring)) {
ret = PTR_ERR(uid_keyring);
goto error;
@@ -77,12 +77,12 @@ static int install_user_keyrings(struct
/* get a default session keyring (which might also exist
* already) */
- sprintf(buf, "_uid_ses.%u", user->uid);
+ sprintf(buf, "_uid_ses.%u", user->uid.uid);
session_keyring = find_keyring_by_name(buf, true);
if (IS_ERR(session_keyring)) {
session_keyring =
- keyring_alloc(buf, user->uid, (gid_t) -1,
+ keyring_alloc(buf, user->uid.uid, (gid_t) -1,
tsk, KEY_ALLOC_IN_QUOTA, NULL);
if (IS_ERR(session_keyring)) {
ret = PTR_ERR(session_keyring);
--
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 10/10] sysfs: user namespaces: add ns to user_struct
2008-04-29 17:11 ` [PATCH 10/10] sysfs: user namespaces: add ns to user_struct Benjamin Thery
@ 2008-04-29 19:51 ` Eric W. Biederman
2008-04-29 23:18 ` Serge E. Hallyn
0 siblings, 1 reply; 30+ messages in thread
From: Eric W. Biederman @ 2008-04-29 19:51 UTC (permalink / raw)
To: Benjamin Thery
Cc: linux-kernel, Tejun Heo, Greg Kroah-Hartman, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
Benjamin Thery <benjamin.thery@bull.net> writes:
> Add the user_namespace to user_struct.
>
> Add a user_namespace to the sysfs_tag struct.
>
> Mark the /sys/kernel/uids directory to be tagged so that processes in
> different user namespaces can remount /sys and see their own uid
> listings.
>
> Without this patch, having CONFIG_FAIR_SCHED=y makes user namespaces
> unusable, because when you
> clone(CLONE_NEWUSER)
> it will auto-create the root userid and try to create
> /sys/kernel/uids/0. Since that already exists from the parent user
> namespace, the create fails, and the clone misleadingly ends up
> returning -ENOMEM.
>
> This patch fixes the issue by allowing each user namespace to remount
> /sys, and having /sys filter the /sys/kernel/uid/ entries by user
> namespace.
There is a lot of promising code in this patch. This patch appears to
be building up a lot of userns infrastructure that is only indirectly
related to sysfs, and it still appears to have some debugging code
present.
As a proof of concept patch (which I asked for) this looks good.
I do think this patch should get split into logical changes
and cleaned up a bit before it gets merged into mainline.
More comments inline below.
Eric
> Index: linux-mm/fs/sysfs/mount.c
> ===================================================================
> --- linux-mm.orig/fs/sysfs/mount.c
> +++ linux-mm/fs/sysfs/mount.c
> @@ -17,6 +17,7 @@
> #include <linux/pagemap.h>
> #include <linux/init.h>
> #include <linux/nsproxy.h>
> +#include <linux/user_namespace.h>
> #include <net/net_namespace.h>
>
> #include "sysfs.h"
> @@ -81,6 +82,7 @@ static int sysfs_fill_super(struct super
> sb->s_root = root;
> sb->s_fs_info = info;
> info->tag.net_ns = hold_net(current->nsproxy->net_ns);
> + info->tag.user_ns = current->nsproxy->user_ns;
> return 0;
>
> out_err:
> @@ -100,6 +102,8 @@ static int sysfs_test_super(struct super
>
> if (task->nsproxy->net_ns != info->tag.net_ns)
> found = 0;
> + if (task->nsproxy->user_ns != info->tag.user_ns)
> + found = 0;
>
> return found;
> }
> @@ -214,6 +218,27 @@ static struct pernet_operations sysfs_ne
> };
> #endif
>
> +#ifdef CONFIG_USER_NS
> +void sysfs_userns_exit(struct user_namespace *user_ns)
> +{
> + /* Allow the net namespace to go away while sysfs is still mounted. */
> + struct super_block *sb;
> + printk(KERN_NOTICE "sysfs: user namespace exiting\n");
Debugging?
> + mutex_lock(&sysfs_rename_mutex);
> + sysfs_grab_supers();
> + mutex_lock(&sysfs_mutex);
> + list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
> + struct sysfs_super_info *info = sysfs_info(sb);
> + if (info->tag.user_ns != user_ns)
> + continue;
> + info->tag.user_ns = NULL;
> + }
> + mutex_unlock(&sysfs_mutex);
> + sysfs_release_supers();
> + mutex_unlock(&sysfs_rename_mutex);
> +}
> +#endif
> +
> int __init sysfs_init(void)
> {
> int err = -ENOMEM;
> Index: linux-mm/include/linux/sched.h
> ===================================================================
> --- linux-mm.orig/include/linux/sched.h
> +++ linux-mm/include/linux/sched.h
> @@ -598,7 +598,7 @@ struct user_struct {
>
> /* Hash table maintenance information */
> struct hlist_node uidhash_node;
> - uid_t uid;
> + struct k_uid_t uid;
If we are going to go this direction my inclination
is to include an array of a single element in user_struct.
Maybe that makes sense. I just know we need to talk about
how a user maps into different user namespaces. As that
is a real concept that really occurs in real filesystems
like nfsv4 and p9fs, and having infrastructure that can
deal with the concept (even if it doesn't support it yet) would be
useful.
> #endif /* _SYSFS_H_ */
> Index: linux-mm/include/linux/types.h
> ===================================================================
> --- linux-mm.orig/include/linux/types.h
> +++ linux-mm/include/linux/types.h
> @@ -37,6 +37,11 @@ typedef __kernel_gid32_t gid_t;
> typedef __kernel_uid16_t uid16_t;
> typedef __kernel_gid16_t gid16_t;
>
> +struct k_uid_t {
> + uid_t uid;
> + struct user_namespace *ns;
> +};
Weird a _t suffix in a structure name. Usually _t indicates
that something is a typedef.
> +
> typedef unsigned long uintptr_t;
>
> #ifdef CONFIG_UID16
> Index: linux-mm/include/linux/user_namespace.h
> ===================================================================
> --- linux-mm.orig/include/linux/user_namespace.h
> +++ linux-mm/include/linux/user_namespace.h
> @@ -12,10 +12,15 @@
> struct user_namespace {
> struct kref kref;
> struct hlist_head uidhash_table[UIDHASH_SZ];
> + struct kset *kset;
> struct user_struct *root_user;
> };
>
> extern struct user_namespace init_user_ns;
> +extern int register_user_ns_kobj(struct user_namespace *ns);
> +extern void unregister_user_ns_kobj(struct user_namespace *ns);
> +extern int register_user_ns_kobj(struct user_namespace *ns);
> +extern void unregister_user_ns_kobj(struct user_namespace *ns);
>
> #ifdef CONFIG_USER_NS
>
> Index: linux-mm/kernel/user.c
> ===================================================================
> --- linux-mm.orig/kernel/user.c
> +++ linux-mm/kernel/user.c
> @@ -53,6 +53,10 @@ struct user_struct root_user = {
> .files = ATOMIC_INIT(0),
> .sigpending = ATOMIC_INIT(0),
> .locked_shm = 0,
> + .uid = {
> + .uid = 0,
> + .ns = &init_user_ns,
> + },
> #ifdef CONFIG_USER_SCHED
> .tg = &init_task_group,
> #endif
> @@ -71,13 +75,23 @@ static void uid_hash_remove(struct user_
> hlist_del_init(&up->uidhash_node);
> }
>
> -static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
> +int task_user_equiv(struct task_struct *tsk, struct user_struct *u)
> +{
> + if (tsk->uid != u->uid.uid)
> + return 0;
> + if (tsk->nsproxy->user_ns != u->uid.ns)
> + return 0;
> + return 1;
> +}
> +
> +static struct user_struct *uid_hash_find(uid_t uid,
> + struct hlist_head *hashent)
> {
> struct user_struct *user;
> struct hlist_node *h;
>
> hlist_for_each_entry(user, h, hashent, uidhash_node) {
> - if (user->uid == uid) {
> + if (user->uid.uid == uid) {
> atomic_inc(&user->__count);
> return user;
> }
> @@ -236,6 +250,23 @@ static void uids_release(struct kobject
> return;
> }
>
> +static const void *userns_sb_tag(struct sysfs_tag_info *info)
> +{
> + return info->user_ns;
> +}
> +
> +static const void *userns_kobject_tag(struct kobject *kobj)
> +{
> + struct user_struct *up;
> + up = container_of(kobj, struct user_struct, kobj);
> + return up->uid.ns;
> +}
> +
> +static struct sysfs_tagged_dir_operations userns_tagged_dir_operations = {
> + .sb_tag = userns_sb_tag,
> + .kobject_tag = userns_kobject_tag,
> +};
> +
> static struct kobj_type uids_ktype = {
> .sysfs_ops = &kobj_sysfs_ops,
> .default_attrs = uids_attributes,
> @@ -246,19 +277,19 @@ static struct kobj_type uids_ktype = {
> static int uids_user_create(struct user_struct *up)
> {
> struct kobject *kobj = &up->kobj;
> - int error;
> + int err;
>
> memset(kobj, 0, sizeof(struct kobject));
> kobj->kset = uids_kset;
> - error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
> - if (error) {
> + err = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid.uid);
> + if (err) {
> kobject_put(kobj);
> goto done;
> }
>
> kobject_uevent(kobj, KOBJ_ADD);
> done:
> - return error;
> + return err;
> }
>
> /* create these entries in sysfs:
> @@ -271,7 +302,7 @@ int __init uids_sysfs_init(void)
> uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
> if (!uids_kset)
> return -ENOMEM;
> -
> + sysfs_enable_tagging(&uids_kset->kobj, &userns_tagged_dir_operations);
> return uids_user_create(&root_user);
> }
>
> @@ -403,7 +434,8 @@ struct user_struct *alloc_uid(struct use
> if (!new)
> goto out_unlock;
>
> - new->uid = uid;
> + new->uid.uid = uid;
> + new->uid.ns = ns;
> atomic_set(&new->__count, 1);
>
> if (sched_create_user(new) < 0)
> Index: linux-mm/kernel/user_namespace.c
> ===================================================================
> --- linux-mm.orig/kernel/user_namespace.c
> +++ linux-mm/kernel/user_namespace.c
> @@ -22,7 +22,7 @@ static struct user_namespace *clone_user
> struct user_struct *new_user;
> int n;
>
> - ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
> + ns = kzalloc(sizeof(struct user_namespace), GFP_KERNEL);
> if (!ns)
> return ERR_PTR(-ENOMEM);
>
> @@ -71,6 +71,7 @@ void free_user_ns(struct kref *kref)
> struct user_namespace *ns;
>
> ns = container_of(kref, struct user_namespace, kref);
> + sysfs_userns_exit(ns);
> release_uids(ns);
> kfree(ns);
> }
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 10/10] sysfs: user namespaces: add ns to user_struct
2008-04-29 19:51 ` Eric W. Biederman
@ 2008-04-29 23:18 ` Serge E. Hallyn
0 siblings, 0 replies; 30+ messages in thread
From: Serge E. Hallyn @ 2008-04-29 23:18 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Benjamin Thery, linux-kernel, Tejun Heo, Greg Kroah-Hartman,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev
Quoting Eric W. Biederman (ebiederm@xmission.com):
> Benjamin Thery <benjamin.thery@bull.net> writes:
>
> > Add the user_namespace to user_struct.
> >
> > Add a user_namespace to the sysfs_tag struct.
> >
> > Mark the /sys/kernel/uids directory to be tagged so that processes in
> > different user namespaces can remount /sys and see their own uid
> > listings.
> >
> > Without this patch, having CONFIG_FAIR_SCHED=y makes user namespaces
> > unusable, because when you
> > clone(CLONE_NEWUSER)
> > it will auto-create the root userid and try to create
> > /sys/kernel/uids/0. Since that already exists from the parent user
> > namespace, the create fails, and the clone misleadingly ends up
> > returning -ENOMEM.
> >
> > This patch fixes the issue by allowing each user namespace to remount
> > /sys, and having /sys filter the /sys/kernel/uid/ entries by user
> > namespace.
>
> There is a lot of promising code in this patch. This patch appears to
> be building up a lot of userns infrastructure that is only indirectly
> related to sysfs, and it still appears to have some debugging code
> present.
Good point.
I left in more cruft than I thought I had. Really the k_uid_t
shouldn't be there at all. But to have any fix at all I do need the
user_namespace in the user_struct. So unless you're ok with putting
that in explicitly for now, then until we decide how to handle the
user_struct being in multiple namespaces, the fix will have to wait.
> As a proof of concept patch (which I asked for) this looks good.
>
> I do think this patch should get split into logical changes
> and cleaned up a bit before it gets merged into mainline.
Agreed. I didn't pull out enough for the simple sysfs fix.
> More comments inline below.
>
> Eric
>
> > Index: linux-mm/fs/sysfs/mount.c
> > ===================================================================
> > --- linux-mm.orig/fs/sysfs/mount.c
> > +++ linux-mm/fs/sysfs/mount.c
> > @@ -17,6 +17,7 @@
> > #include <linux/pagemap.h>
> > #include <linux/init.h>
> > #include <linux/nsproxy.h>
> > +#include <linux/user_namespace.h>
> > #include <net/net_namespace.h>
> >
> > #include "sysfs.h"
> > @@ -81,6 +82,7 @@ static int sysfs_fill_super(struct super
> > sb->s_root = root;
> > sb->s_fs_info = info;
> > info->tag.net_ns = hold_net(current->nsproxy->net_ns);
> > + info->tag.user_ns = current->nsproxy->user_ns;
> > return 0;
> >
> > out_err:
> > @@ -100,6 +102,8 @@ static int sysfs_test_super(struct super
> >
> > if (task->nsproxy->net_ns != info->tag.net_ns)
> > found = 0;
> > + if (task->nsproxy->user_ns != info->tag.user_ns)
> > + found = 0;
> >
> > return found;
> > }
> > @@ -214,6 +218,27 @@ static struct pernet_operations sysfs_ne
> > };
> > #endif
> >
> > +#ifdef CONFIG_USER_NS
> > +void sysfs_userns_exit(struct user_namespace *user_ns)
> > +{
> > + /* Allow the net namespace to go away while sysfs is still mounted. */
> > + struct super_block *sb;
> > + printk(KERN_NOTICE "sysfs: user namespace exiting\n");
> Debugging?
Yes, sorry.
> > + mutex_lock(&sysfs_rename_mutex);
> > + sysfs_grab_supers();
> > + mutex_lock(&sysfs_mutex);
> > + list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
> > + struct sysfs_super_info *info = sysfs_info(sb);
> > + if (info->tag.user_ns != user_ns)
> > + continue;
> > + info->tag.user_ns = NULL;
> > + }
> > + mutex_unlock(&sysfs_mutex);
> > + sysfs_release_supers();
> > + mutex_unlock(&sysfs_rename_mutex);
> > +}
> > +#endif
> > +
> > int __init sysfs_init(void)
> > {
> > int err = -ENOMEM;
> > Index: linux-mm/include/linux/sched.h
> > ===================================================================
> > --- linux-mm.orig/include/linux/sched.h
> > +++ linux-mm/include/linux/sched.h
> > @@ -598,7 +598,7 @@ struct user_struct {
> >
> > /* Hash table maintenance information */
> > struct hlist_node uidhash_node;
> > - uid_t uid;
> > + struct k_uid_t uid;
>
> If we are going to go this direction my inclination
> is to include an array of a single element in user_struct.
>
> Maybe that makes sense. I just know we need to talk about
> how a user maps into different user namespaces. As that
My thought had been that a task belongs to several user_structs, but
each user_struct belongs to just one user namespace. Maybe as you
suggest that's not the right way to go.
But are you ok with just sticking a user_namespace * in here for now,
and making it clear that the user_struct-user_namespace relation is yet
to be defined?
If not that's fine, we just won't be able to clone(CLONE_NEWUSER)
until we get the relationship straightened out.
> is a real concept that really occurs in real filesystems
> like nfsv4 and p9fs, and having infrastructure that can
> deal with the concept (even if it doesn't support it yet) would be
> useful.
I'll have to look at 9p, bc right now I don't know what you're talking
about. Then I'll move to the containers list to discuss what the
user_struct should look like.
Thanks for taking a look.
-serge
> > #endif /* _SYSFS_H_ */
> > Index: linux-mm/include/linux/types.h
> > ===================================================================
> > --- linux-mm.orig/include/linux/types.h
> > +++ linux-mm/include/linux/types.h
> > @@ -37,6 +37,11 @@ typedef __kernel_gid32_t gid_t;
> > typedef __kernel_uid16_t uid16_t;
> > typedef __kernel_gid16_t gid16_t;
> >
> > +struct k_uid_t {
> > + uid_t uid;
> > + struct user_namespace *ns;
> > +};
>
> Weird a _t suffix in a structure name. Usually _t indicates
> that something is a typedef.
>
> > +
> > typedef unsigned long uintptr_t;
> >
> > #ifdef CONFIG_UID16
> > Index: linux-mm/include/linux/user_namespace.h
> > ===================================================================
> > --- linux-mm.orig/include/linux/user_namespace.h
> > +++ linux-mm/include/linux/user_namespace.h
> > @@ -12,10 +12,15 @@
> > struct user_namespace {
> > struct kref kref;
> > struct hlist_head uidhash_table[UIDHASH_SZ];
> > + struct kset *kset;
> > struct user_struct *root_user;
> > };
> >
> > extern struct user_namespace init_user_ns;
> > +extern int register_user_ns_kobj(struct user_namespace *ns);
> > +extern void unregister_user_ns_kobj(struct user_namespace *ns);
> > +extern int register_user_ns_kobj(struct user_namespace *ns);
> > +extern void unregister_user_ns_kobj(struct user_namespace *ns);
> >
> > #ifdef CONFIG_USER_NS
> >
> > Index: linux-mm/kernel/user.c
> > ===================================================================
> > --- linux-mm.orig/kernel/user.c
> > +++ linux-mm/kernel/user.c
> > @@ -53,6 +53,10 @@ struct user_struct root_user = {
> > .files = ATOMIC_INIT(0),
> > .sigpending = ATOMIC_INIT(0),
> > .locked_shm = 0,
> > + .uid = {
> > + .uid = 0,
> > + .ns = &init_user_ns,
> > + },
> > #ifdef CONFIG_USER_SCHED
> > .tg = &init_task_group,
> > #endif
> > @@ -71,13 +75,23 @@ static void uid_hash_remove(struct user_
> > hlist_del_init(&up->uidhash_node);
> > }
> >
> > -static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
> > +int task_user_equiv(struct task_struct *tsk, struct user_struct *u)
> > +{
> > + if (tsk->uid != u->uid.uid)
> > + return 0;
> > + if (tsk->nsproxy->user_ns != u->uid.ns)
> > + return 0;
> > + return 1;
> > +}
> > +
> > +static struct user_struct *uid_hash_find(uid_t uid,
> > + struct hlist_head *hashent)
> > {
> > struct user_struct *user;
> > struct hlist_node *h;
> >
> > hlist_for_each_entry(user, h, hashent, uidhash_node) {
> > - if (user->uid == uid) {
> > + if (user->uid.uid == uid) {
> > atomic_inc(&user->__count);
> > return user;
> > }
> > @@ -236,6 +250,23 @@ static void uids_release(struct kobject
> > return;
> > }
> >
> > +static const void *userns_sb_tag(struct sysfs_tag_info *info)
> > +{
> > + return info->user_ns;
> > +}
> > +
> > +static const void *userns_kobject_tag(struct kobject *kobj)
> > +{
> > + struct user_struct *up;
> > + up = container_of(kobj, struct user_struct, kobj);
> > + return up->uid.ns;
> > +}
> > +
> > +static struct sysfs_tagged_dir_operations userns_tagged_dir_operations = {
> > + .sb_tag = userns_sb_tag,
> > + .kobject_tag = userns_kobject_tag,
> > +};
> > +
> > static struct kobj_type uids_ktype = {
> > .sysfs_ops = &kobj_sysfs_ops,
> > .default_attrs = uids_attributes,
> > @@ -246,19 +277,19 @@ static struct kobj_type uids_ktype = {
> > static int uids_user_create(struct user_struct *up)
> > {
> > struct kobject *kobj = &up->kobj;
> > - int error;
> > + int err;
> >
> > memset(kobj, 0, sizeof(struct kobject));
> > kobj->kset = uids_kset;
> > - error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
> > - if (error) {
> > + err = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid.uid);
> > + if (err) {
> > kobject_put(kobj);
> > goto done;
> > }
> >
> > kobject_uevent(kobj, KOBJ_ADD);
> > done:
> > - return error;
> > + return err;
> > }
> >
> > /* create these entries in sysfs:
> > @@ -271,7 +302,7 @@ int __init uids_sysfs_init(void)
> > uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
> > if (!uids_kset)
> > return -ENOMEM;
> > -
> > + sysfs_enable_tagging(&uids_kset->kobj, &userns_tagged_dir_operations);
> > return uids_user_create(&root_user);
> > }
> >
> > @@ -403,7 +434,8 @@ struct user_struct *alloc_uid(struct use
> > if (!new)
> > goto out_unlock;
> >
> > - new->uid = uid;
> > + new->uid.uid = uid;
> > + new->uid.ns = ns;
> > atomic_set(&new->__count, 1);
> >
> > if (sched_create_user(new) < 0)
> > Index: linux-mm/kernel/user_namespace.c
> > ===================================================================
> > --- linux-mm.orig/kernel/user_namespace.c
> > +++ linux-mm/kernel/user_namespace.c
> > @@ -22,7 +22,7 @@ static struct user_namespace *clone_user
> > struct user_struct *new_user;
> > int n;
> >
> > - ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
> > + ns = kzalloc(sizeof(struct user_namespace), GFP_KERNEL);
> > if (!ns)
> > return ERR_PTR(-ENOMEM);
> >
> > @@ -71,6 +71,7 @@ void free_user_ns(struct kref *kref)
> > struct user_namespace *ns;
> >
> > ns = container_of(kref, struct user_namespace, kref);
> > + sysfs_userns_exit(ns);
> > release_uids(ns);
> > kfree(ns);
> > }
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-04-29 17:10 [PATCH 00/10] sysfs tagged directories Benjamin Thery
` (9 preceding siblings ...)
2008-04-29 17:11 ` [PATCH 10/10] sysfs: user namespaces: add ns to user_struct Benjamin Thery
@ 2008-04-29 17:36 ` Greg KH
2008-04-29 18:04 ` Serge E. Hallyn
2008-04-29 19:14 ` Eric W. Biederman
10 siblings, 2 replies; 30+ messages in thread
From: Greg KH @ 2008-04-29 17:36 UTC (permalink / raw)
To: Benjamin Thery
Cc: linux-kernel, Eric W. Biederman, Tejun Heo, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
On Tue, Apr 29, 2008 at 07:10:15PM +0200, Benjamin Thery wrote:
> Here is the announcement Eric wrote back in December to introduce his
> patchset:
<snip>
Are the objections that Al Viro made to this patchset when it was last
sent out addressed in this new series?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 00/10] sysfs tagged directories
2008-04-29 17:36 ` [PATCH 00/10] sysfs tagged directories Greg KH
@ 2008-04-29 18:04 ` Serge E. Hallyn
2008-04-29 18:41 ` Greg KH
2008-04-29 19:14 ` Eric W. Biederman
1 sibling, 1 reply; 30+ messages in thread
From: Serge E. Hallyn @ 2008-04-29 18:04 UTC (permalink / raw)
To: Greg KH
Cc: Benjamin Thery, linux-kernel, Eric W. Biederman, Tejun Heo,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev
Quoting Greg KH (gregkh@suse.de):
> On Tue, Apr 29, 2008 at 07:10:15PM +0200, Benjamin Thery wrote:
> > Here is the announcement Eric wrote back in December to introduce his
> > patchset:
>
> <snip>
>
> Are the objections that Al Viro made to this patchset when it was last
> sent out addressed in this new series?
>
> thanks,
>
> greg k-h
Which objections were those? The last submission which I see by Eric
was http://lkml.org/lkml/2007/12/1/15 this past December. I see no
response from Al and get the feeling you were ok with them.
So my hunch would be that Eric had addressed those before that last
submission, but if not I'm sorry, and please do set me straight.
thanks,
-serge
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-04-29 18:04 ` Serge E. Hallyn
@ 2008-04-29 18:41 ` Greg KH
2008-04-29 19:34 ` Serge E. Hallyn
2008-04-29 19:35 ` Eric W. Biederman
0 siblings, 2 replies; 30+ messages in thread
From: Greg KH @ 2008-04-29 18:41 UTC (permalink / raw)
To: Serge E. Hallyn
Cc: Benjamin Thery, linux-kernel, Eric W. Biederman, Tejun Heo,
Daniel Lezcano, Pavel Emelyanov, netdev
On Tue, Apr 29, 2008 at 01:04:45PM -0500, Serge E. Hallyn wrote:
> Quoting Greg KH (gregkh@suse.de):
> > On Tue, Apr 29, 2008 at 07:10:15PM +0200, Benjamin Thery wrote:
> > > Here is the announcement Eric wrote back in December to introduce his
> > > patchset:
> >
> > <snip>
> >
> > Are the objections that Al Viro made to this patchset when it was last
> > sent out addressed in this new series?
> >
> > thanks,
> >
> > greg k-h
>
> Which objections were those? The last submission which I see by Eric
> was http://lkml.org/lkml/2007/12/1/15 this past December. I see no
> response from Al and get the feeling you were ok with them.
>
> So my hunch would be that Eric had addressed those before that last
> submission, but if not I'm sorry, and please do set me straight.
See the thread from Al starting with:
Date: Mon, 7 Jan 2008 10:24:17 +0000
From: Al Viro <viro@ZenIV.linux.org.uk>
To: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: linux-kernel@vger.kernel.org, htejun@gmail.com,
linux-fsdevel@vger.kernel.org, gregkh@suse.de
Subject: [RFC] netns / sysfs interaction
Message-ID: <20080107072301.GW27894@ZenIV.linux.org.uk>
He had a lot of questions and objections to this way forward, and I
share those objections.
thanks,
greg k-h
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-04-29 18:41 ` Greg KH
@ 2008-04-29 19:34 ` Serge E. Hallyn
2008-04-29 20:10 ` Eric W. Biederman
` (2 more replies)
2008-04-29 19:35 ` Eric W. Biederman
1 sibling, 3 replies; 30+ messages in thread
From: Serge E. Hallyn @ 2008-04-29 19:34 UTC (permalink / raw)
To: Greg KH
Cc: Serge E. Hallyn, Benjamin Thery, linux-kernel, Al Viro,
Eric W. Biederman, Tejun Heo, Daniel Lezcano, Pavel Emelyanov,
netdev
Quoting Greg KH (gregkh@suse.de):
> On Tue, Apr 29, 2008 at 01:04:45PM -0500, Serge E. Hallyn wrote:
> > Quoting Greg KH (gregkh@suse.de):
> > > On Tue, Apr 29, 2008 at 07:10:15PM +0200, Benjamin Thery wrote:
> > > > Here is the announcement Eric wrote back in December to introduce his
> > > > patchset:
> > >
> > > <snip>
> > >
> > > Are the objections that Al Viro made to this patchset when it was last
> > > sent out addressed in this new series?
> > >
> > > thanks,
> > >
> > > greg k-h
> >
> > Which objections were those? The last submission which I see by Eric
> > was http://lkml.org/lkml/2007/12/1/15 this past December. I see no
> > response from Al and get the feeling you were ok with them.
> >
> > So my hunch would be that Eric had addressed those before that last
> > submission, but if not I'm sorry, and please do set me straight.
>
> See the thread from Al starting with:
> Date: Mon, 7 Jan 2008 10:24:17 +0000
> From: Al Viro <viro@ZenIV.linux.org.uk>
> To: "Eric W. Biederman" <ebiederm@xmission.com>
> Cc: linux-kernel@vger.kernel.org, htejun@gmail.com,
> linux-fsdevel@vger.kernel.org, gregkh@suse.de
> Subject: [RFC] netns / sysfs interaction
> Message-ID: <20080107072301.GW27894@ZenIV.linux.org.uk>
>
> He had a lot of questions and objections to this way forward, and I
> share those objections.
Ah I see it, thanks.
All Al's questions appear to be about how a task migration will be handled
in the face of funky userspace usage of sysfs files. But it seems clear the
first use of these will not be for migration but for vservers. The key
thing to remember is that we don't (as decided at kernel-summit 06) aim
to hide from userspace the fact that it's in a vserver, we just give it
what it needs so that it can pretend.
As we start implementing checkpoint and restart to effect migration,
*clearly* if we're trying to restart a task which has cwd or an open fd
in /sys/class/net/eth42/, but that directory doesn't exist on the target
machine, then the restart (and hence migrate) fails.
There was a concern about
/sys/devices/pci0000\:00/0000\:00\:0a.0/net:eth0. Since that's a
symlink to ../../../class/net/eth0, it will either point nowhere or
point to the virtualized eth0, if veth1 (or vethN) was renamed to eth0
in the container. (see below) If that is the wrong thing to do we
could try to address it in this patchset, but I suspect it is better
left until device namespace are implemented. Does that sounds sane?
The last question of Al's which went unanswered was
> Excuse me, _what_? Are you seriously suggesting going through all dentry
> trees, doing d_move() in each? I want to see your locking. It's promising
> to be worse than devfs had ever been. Much worse.
I think this is answered in patch 4. So yeah, it does d_move() in each
sysfs mount. It's all done under the sysfs_rename_mutex. Judging by
the phrasing of the question, is that not acceptable?
Finally, to give an idea about how the trees end up looking, here is
what I just did on my test box;
/usr/sbin/ip link add type veth
mount --bind /mnt /mnt
mkdir /mnt/sys
mount --make-shared /mnt
ns_exec -cmn /bin/sh # unshare netns and mounts ns
# At this point, I still see eth0 and friends under /sys/class/net etc
mount -t sysfs none /sys
# At this point, /sys/class/net has only lo0 and sit0, and
# /sys/devices/pci0000:00/0000:00:03.0/net:eth0 is a dead link
mount --bind /sys /mnt/sys
echo $$
3050
(back in another shell):
/usr/sbin/ip link set veth1 netns 3050
(back in container shell):
/usr/sbin/ip link set veth1 name eth0
# Now /sys/devices/pci0000:00/0000:00:03.0/net:eth0 is a live link to
# the /sys/class/net/eth0 which is really the original veth1
exit
ls /mnt/sys/class/net
# empty directory
thanks,
-serge
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 00/10] sysfs tagged directories
2008-04-29 19:34 ` Serge E. Hallyn
@ 2008-04-29 20:10 ` Eric W. Biederman
2008-05-01 3:12 ` Greg KH
2008-05-01 3:13 ` Greg KH
2 siblings, 0 replies; 30+ messages in thread
From: Eric W. Biederman @ 2008-04-29 20:10 UTC (permalink / raw)
To: Serge E. Hallyn
Cc: Greg KH, Benjamin Thery, linux-kernel, Al Viro, Tejun Heo,
Daniel Lezcano, Pavel Emelyanov, netdev
"Serge E. Hallyn" <serue@us.ibm.com> writes:
> Ah I see it, thanks.
Serge thanks for your productive and detailed reply to this.
> The last question of Al's which went unanswered was
>
>> Excuse me, _what_? Are you seriously suggesting going through all dentry
>> trees, doing d_move() in each? I want to see your locking. It's promising
>> to be worse than devfs had ever been. Much worse.
>
> I think this is answered in patch 4. So yeah, it does d_move() in each
> sysfs mount. It's all done under the sysfs_rename_mutex. Judging by
> the phrasing of the question, is that not acceptable?
We also have to call sysfs_grab_supers to ensure none of the superblocks
we know about will go away during the rename. I believe that is the
only locking change from the current code.
Eric
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-04-29 19:34 ` Serge E. Hallyn
2008-04-29 20:10 ` Eric W. Biederman
@ 2008-05-01 3:12 ` Greg KH
2008-05-01 3:13 ` Greg KH
2 siblings, 0 replies; 30+ messages in thread
From: Greg KH @ 2008-05-01 3:12 UTC (permalink / raw)
To: Serge E. Hallyn
Cc: Benjamin Thery, linux-kernel, Al Viro, Eric W. Biederman,
Tejun Heo, Daniel Lezcano, Pavel Emelyanov, netdev
On Tue, Apr 29, 2008 at 02:34:17PM -0500, Serge E. Hallyn wrote:
> Quoting Greg KH (gregkh@suse.de):
> > On Tue, Apr 29, 2008 at 01:04:45PM -0500, Serge E. Hallyn wrote:
> > > Quoting Greg KH (gregkh@suse.de):
> > > > On Tue, Apr 29, 2008 at 07:10:15PM +0200, Benjamin Thery wrote:
> > > > > Here is the announcement Eric wrote back in December to introduce his
> > > > > patchset:
> > > >
> > > > <snip>
> > > >
> > > > Are the objections that Al Viro made to this patchset when it was last
> > > > sent out addressed in this new series?
> > > >
> > > > thanks,
> > > >
> > > > greg k-h
> > >
> > > Which objections were those? The last submission which I see by Eric
> > > was http://lkml.org/lkml/2007/12/1/15 this past December. I see no
> > > response from Al and get the feeling you were ok with them.
> > >
> > > So my hunch would be that Eric had addressed those before that last
> > > submission, but if not I'm sorry, and please do set me straight.
> >
> > See the thread from Al starting with:
> > Date: Mon, 7 Jan 2008 10:24:17 +0000
> > From: Al Viro <viro@ZenIV.linux.org.uk>
> > To: "Eric W. Biederman" <ebiederm@xmission.com>
> > Cc: linux-kernel@vger.kernel.org, htejun@gmail.com,
> > linux-fsdevel@vger.kernel.org, gregkh@suse.de
> > Subject: [RFC] netns / sysfs interaction
> > Message-ID: <20080107072301.GW27894@ZenIV.linux.org.uk>
> >
> > He had a lot of questions and objections to this way forward, and I
> > share those objections.
>
> Ah I see it, thanks.
>
> All Al's questions appear to be about how a task migration will be handled
> in the face of funky userspace usage of sysfs files. But it seems clear the
> first use of these will not be for migration but for vservers. The key
> thing to remember is that we don't (as decided at kernel-summit 06) aim
> to hide from userspace the fact that it's in a vserver, we just give it
> what it needs so that it can pretend.
>
> As we start implementing checkpoint and restart to effect migration,
> *clearly* if we're trying to restart a task which has cwd or an open fd
> in /sys/class/net/eth42/, but that directory doesn't exist on the target
> machine, then the restart (and hence migrate) fails.
>
> There was a concern about
> /sys/devices/pci0000\:00/0000\:00\:0a.0/net:eth0. Since that's a
> symlink to ../../../class/net/eth0, it will either point nowhere or
> point to the virtualized eth0, if veth1 (or vethN) was renamed to eth0
> in the container. (see below) If that is the wrong thing to do we
> could try to address it in this patchset, but I suspect it is better
> left until device namespace are implemented. Does that sounds sane?
I really don't think so, but I'll wait for the reworked patches to
review them and see how badly they mess the code up :)
thanks,
greg k-h
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-04-29 19:34 ` Serge E. Hallyn
2008-04-29 20:10 ` Eric W. Biederman
2008-05-01 3:12 ` Greg KH
@ 2008-05-01 3:13 ` Greg KH
2008-05-01 15:10 ` Serge E. Hallyn
2 siblings, 1 reply; 30+ messages in thread
From: Greg KH @ 2008-05-01 3:13 UTC (permalink / raw)
To: Serge E. Hallyn
Cc: Benjamin Thery, linux-kernel, Al Viro, Eric W. Biederman,
Tejun Heo, Daniel Lezcano, Pavel Emelyanov, netdev
On Tue, Apr 29, 2008 at 02:34:17PM -0500, Serge E. Hallyn wrote:
> Finally, to give an idea about how the trees end up looking, here is
> what I just did on my test box;
>
> /usr/sbin/ip link add type veth
> mount --bind /mnt /mnt
> mkdir /mnt/sys
> mount --make-shared /mnt
> ns_exec -cmn /bin/sh # unshare netns and mounts ns
> # At this point, I still see eth0 and friends under /sys/class/net etc
> mount -t sysfs none /sys
> # At this point, /sys/class/net has only lo0 and sit0, and
> # /sys/devices/pci0000:00/0000:00:03.0/net:eth0 is a dead link
> mount --bind /sys /mnt/sys
> echo $$
> 3050
>
> (back in another shell):
> /usr/sbin/ip link set veth1 netns 3050
>
> (back in container shell):
> /usr/sbin/ip link set veth1 name eth0
> # Now /sys/devices/pci0000:00/0000:00:03.0/net:eth0 is a live link to
> # the /sys/class/net/eth0 which is really the original veth1
> exit
>
> ls /mnt/sys/class/net
> # empty directory
What does this all look like without CONFIG_SYSFS_DEPRECATED enabled,
which is what all sane distros do these days. That's going to change
the look of the tree for stuff like this a lot I think...
thanks,
greg k-h
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-05-01 3:13 ` Greg KH
@ 2008-05-01 15:10 ` Serge E. Hallyn
2008-05-01 18:34 ` Eric W. Biederman
0 siblings, 1 reply; 30+ messages in thread
From: Serge E. Hallyn @ 2008-05-01 15:10 UTC (permalink / raw)
To: Greg KH
Cc: Serge E. Hallyn, Benjamin Thery, linux-kernel, Al Viro,
Eric W. Biederman, Tejun Heo, Daniel Lezcano, Pavel Emelyanov,
netdev
Quoting Greg KH (gregkh@suse.de):
> On Tue, Apr 29, 2008 at 02:34:17PM -0500, Serge E. Hallyn wrote:
> > Finally, to give an idea about how the trees end up looking, here is
> > what I just did on my test box;
> >
> > /usr/sbin/ip link add type veth
> > mount --bind /mnt /mnt
> > mkdir /mnt/sys
> > mount --make-shared /mnt
> > ns_exec -cmn /bin/sh # unshare netns and mounts ns
> > # At this point, I still see eth0 and friends under /sys/class/net etc
> > mount -t sysfs none /sys
> > # At this point, /sys/class/net has only lo0 and sit0, and
> > # /sys/devices/pci0000:00/0000:00:03.0/net:eth0 is a dead link
> > mount --bind /sys /mnt/sys
> > echo $$
> > 3050
> >
> > (back in another shell):
> > /usr/sbin/ip link set veth1 netns 3050
> >
> > (back in container shell):
> > /usr/sbin/ip link set veth1 name eth0
> > # Now /sys/devices/pci0000:00/0000:00:03.0/net:eth0 is a live link to
> > # the /sys/class/net/eth0 which is really the original veth1
> > exit
> >
> > ls /mnt/sys/class/net
> > # empty directory
>
> What does this all look like without CONFIG_SYSFS_DEPRECATED enabled,
> which is what all sane distros do these days. That's going to change
> the look of the tree for stuff like this a lot I think...
>
> thanks,
>
> greg k-h
Now before moving veth1 to the new netns, we have in the container:
/sys/class/net:
lo sit0
/sys/devices/virtual/net:
lo sit0
and after moving veth1, we have in the container:
/sys/class/net:
lo sit0 veth1
/sys/devices/virtual/net:
lo sit0
In the parent network namespace, veth1 is removed from /sys/class/net
but remains in /sys/devices/virtual/net.
I'm not sure whether this is the renaming bug that Daniel Lezcano's
patch addresses. If not (as I suspect) then that clearly needs to be
fixed.
Benjamin can you play around with this and test it with Daniel's
patch?
thanks,
-serge
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-05-01 15:10 ` Serge E. Hallyn
@ 2008-05-01 18:34 ` Eric W. Biederman
2008-05-01 21:05 ` Serge E. Hallyn
0 siblings, 1 reply; 30+ messages in thread
From: Eric W. Biederman @ 2008-05-01 18:34 UTC (permalink / raw)
To: Serge E. Hallyn
Cc: Greg KH, Benjamin Thery, linux-kernel, Al Viro, Tejun Heo,
Daniel Lezcano, Pavel Emelyanov, netdev
"Serge E. Hallyn" <serue@us.ibm.com> writes:
> Quoting Greg KH (gregkh@suse.de):
> Now before moving veth1 to the new netns, we have in the container:
> /sys/class/net:
> lo sit0
>
> /sys/devices/virtual/net:
> lo sit0
>
> and after moving veth1, we have in the container:
>
> /sys/class/net:
> lo sit0 veth1
>
> /sys/devices/virtual/net:
> lo sit0
>
> In the parent network namespace, veth1 is removed from /sys/class/net
> but remains in /sys/devices/virtual/net.
The symlink is gone by the real directory remains?
> I'm not sure whether this is the renaming bug that Daniel Lezcano's
> patch addresses. If not (as I suspect) then that clearly needs to be
> fixed.
>
> Benjamin can you play around with this and test it with Daniel's
> patch?
Darn. It appears we have a regression in this patchset.
That part used to work.
I was thinking of blaming sysfs_rename_link. But it the
links are fine so it looks more likely that sysfs has morphed once
again and we have a reference counting issue or something similar.
Yuck. d_move and the other moves should have worked.
>From a purely get the good less controversial parts of this
patchset in. I suggest we look at patches 7/10 and 8/10 (without
the tag_ops). And introduce and start using sysfs_delete_link
and sysfs_rename_link. That code seems pretty stable and is
generally a code reduction all by itself by reducing a common
idiom into a single function.
Eric
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-05-01 18:34 ` Eric W. Biederman
@ 2008-05-01 21:05 ` Serge E. Hallyn
2008-05-01 21:58 ` Eric W. Biederman
0 siblings, 1 reply; 30+ messages in thread
From: Serge E. Hallyn @ 2008-05-01 21:05 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Serge E. Hallyn, Greg KH, Benjamin Thery, linux-kernel, Al Viro,
Tejun Heo, Daniel Lezcano, Pavel Emelyanov, netdev
Quoting Eric W. Biederman (ebiederm@xmission.com):
> "Serge E. Hallyn" <serue@us.ibm.com> writes:
>
> > Quoting Greg KH (gregkh@suse.de):
> > Now before moving veth1 to the new netns, we have in the container:
> > /sys/class/net:
> > lo sit0
> >
> > /sys/devices/virtual/net:
> > lo sit0
> >
> > and after moving veth1, we have in the container:
> >
> > /sys/class/net:
> > lo sit0 veth1
> >
> > /sys/devices/virtual/net:
> > lo sit0
> >
> > In the parent network namespace, veth1 is removed from /sys/class/net
> > but remains in /sys/devices/virtual/net.
>
> The symlink is gone by the real directory remains?
>
> > I'm not sure whether this is the renaming bug that Daniel Lezcano's
> > patch addresses. If not (as I suspect) then that clearly needs to be
> > fixed.
> >
> > Benjamin can you play around with this and test it with Daniel's
> > patch?
>
> Darn. It appears we have a regression in this patchset.
> That part used to work.
>
> I was thinking of blaming sysfs_rename_link. But it the
> links are fine so it looks more likely that sysfs has morphed once
> again and we have a reference counting issue or something similar.
> Yuck. d_move and the other moves should have worked.
>
> >From a purely get the good less controversial parts of this
> patchset in. I suggest we look at patches 7/10 and 8/10 (without
> the tag_ops). And introduce and start using sysfs_delete_link
> and sysfs_rename_link. That code seems pretty stable and is
> generally a code reduction all by itself by reducing a common
> idiom into a single function.
>
> Eric
Ok as it turns out Benjamin Thery does in fact have a patch to fix this.
Benjamin, please send your renaming patch out tomorrow if you can?
I guess this is why I didn't really see the problem Benjamin said there
was - it just works with SYSFS_DEPRECATED :)
-serge
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-05-01 21:05 ` Serge E. Hallyn
@ 2008-05-01 21:58 ` Eric W. Biederman
2008-05-02 17:42 ` Serge E. Hallyn
0 siblings, 1 reply; 30+ messages in thread
From: Eric W. Biederman @ 2008-05-01 21:58 UTC (permalink / raw)
To: Serge E. Hallyn
Cc: Greg KH, Benjamin Thery, linux-kernel, Al Viro, Tejun Heo,
Daniel Lezcano, Pavel Emelyanov, netdev
"Serge E. Hallyn" <serue@us.ibm.com> writes:
> Quoting Eric W. Biederman (ebiederm@xmission.com):
> Ok as it turns out Benjamin Thery does in fact have a patch to fix this.
> Benjamin, please send your renaming patch out tomorrow if you can?
>
> I guess this is why I didn't really see the problem Benjamin said there
> was - it just works with SYSFS_DEPRECATED :)
It will be nice to see that patch. I'm curious what the problem was.
Eric
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-05-01 21:58 ` Eric W. Biederman
@ 2008-05-02 17:42 ` Serge E. Hallyn
2008-05-04 23:13 ` Daniel Lezcano
0 siblings, 1 reply; 30+ messages in thread
From: Serge E. Hallyn @ 2008-05-02 17:42 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Serge E. Hallyn, Greg KH, Benjamin Thery, linux-kernel, Al Viro,
Tejun Heo, Daniel Lezcano, Pavel Emelyanov, netdev
Quoting Eric W. Biederman (ebiederm@xmission.com):
> "Serge E. Hallyn" <serue@us.ibm.com> writes:
>
> > Quoting Eric W. Biederman (ebiederm@xmission.com):
>
> > Ok as it turns out Benjamin Thery does in fact have a patch to fix this.
> > Benjamin, please send your renaming patch out tomorrow if you can?
> >
> > I guess this is why I didn't really see the problem Benjamin said there
> > was - it just works with SYSFS_DEPRECATED :)
>
> It will be nice to see that patch. I'm curious what the problem was.
>
> Eric
Here is the patch he had privately sent. As the comments indicate he's
still refining it so yell at me not him... but this on top of the
tagged dir patchset makes everything work for me.
(I'd *really* like to see network namespaces be generally usable sometime
in 2.6.26.)
thanks,
-serge
[This is a proposal to fix the issue with network devices movement
and sysfs not being updated correctly.
Please comment.
There is no locking in sysfs_tag_will_change(), this may be wrong.]
When a network device is moved between namespaces its sysfs entry isn't
updated correctly because kobject_rename() fails before sysfs_rename_dir()
is called. kobject_rename() prints a warning and returns when it detects
a kobject in the same kset already has the same name.
When moving between namespaces the device keeps its name but the sysfs
tag associated with its sysfs_dirent should be updated to reflect the fact
it now belongs to another namespace. This is done in sysfs_rename_dir().
* The simplest change we can make to "fix" kobject_rename() is to allow
a kobject to be renamed to the same name without complaining: we allow
dumb renaming. We only check the name is actually "owned" by it.
(lib/kobject.c: kobject_rename())
temp_kobj = kset_find_obj(kobj->kset, new_name);
if (temp_kobj) {
- printk(KERN_WARNING "kobject '%s' cannot be renamed "
- "to '%s' as '%s' is already in existence.\n",
- kobject_name(kobj), new_name, new_name);
+ if (kobj != temp_kobj) {
+ printk(KERN_WARNING "kobject '%s' cannot be renamed "
+ "to '%s' as '%s' is already in existence.\n",
+ kobject_name(kobj), new_name, new_name);
+ kobject_put(temp_kobj);
+ return -EINVAL;
+ }
kobject_put(temp_kobj);
- return -EINVAL;
}
* This patch goes a bit further, it allows a kobject to be renamed to
the same name only if the sysfs tag associated with it will change.
This patch applies on 2.6.25-mm1 on top of the sysfs tagged directories
patchset.
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/dir.c | 11 +++++++++++
include/linux/sysfs.h | 6 ++++++
lib/kobject.c | 16 +++++++++++-----
3 files changed, 28 insertions(+), 5 deletions(-)
Index: linux-mm/fs/sysfs/dir.c
===================================================================
--- linux-mm.orig/fs/sysfs/dir.c
+++ linux-mm/fs/sysfs/dir.c
@@ -909,6 +909,17 @@ err_out:
return error;
}
+int sysfs_tag_will_change(struct kobject * kobj)
+{
+ struct sysfs_dirent *sd = kobj->sd;
+ const void *old_tag, *tag;
+
+ old_tag = sysfs_dirent_tag(sd);
+ tag = sysfs_creation_tag(sd->s_parent, sd);
+
+ return old_tag != tag;
+}
+
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
{
struct sysfs_dirent *sd = kobj->sd;
Index: linux-mm/include/linux/sysfs.h
===================================================================
--- linux-mm.orig/include/linux/sysfs.h
+++ linux-mm/include/linux/sysfs.h
@@ -95,6 +95,7 @@ int sysfs_schedule_callback(struct kobje
int __must_check sysfs_create_dir(struct kobject *kobj);
void sysfs_remove_dir(struct kobject *kobj);
+int sysfs_tag_will_change(struct kobject *kobj);
int __must_check sysfs_rename_dir(struct kobject *kobj, const char *new_name);
int __must_check sysfs_move_dir(struct kobject *kobj,
struct kobject *new_parent_kobj);
@@ -152,6 +153,11 @@ static inline void sysfs_remove_dir(stru
{
}
+static inline int sysfs_tag_will_change(struct kobject *kobj)
+{
+ return 0;
+}
+
static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
{
return 0;
Index: linux-mm/lib/kobject.c
===================================================================
--- linux-mm.orig/lib/kobject.c
+++ linux-mm/lib/kobject.c
@@ -466,11 +466,17 @@ int kobject_rename(struct kobject *kobj,
struct kobject *temp_kobj;
temp_kobj = kset_find_obj(kobj->kset, new_name);
if (temp_kobj) {
- printk(KERN_WARNING "kobject '%s' cannot be renamed "
- "to '%s' as '%s' is already in existence.\n",
- kobject_name(kobj), new_name, new_name);
- kobject_put(temp_kobj);
- return -EINVAL;
+ if (temp_kobj == kobj && sysfs_tag_will_change(kobj)) {
+ /* OK: Same name, but the sysfs tag will change */
+ kobject_put(temp_kobj);
+ } else {
+ /* Even sysfs tag won't change */
+ printk(KERN_WARNING "kobject '%s' cannot be renamed "
+ "to '%s' as '%s' is already in existence.\n",
+ kobject_name(kobj), new_name, new_name);
+ kobject_put(temp_kobj);
+ return -EINVAL;
+ }
}
}
--
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups "lxc-dev" group.
To post to this group, send email to lxc-dev@googlegroups.com
To unsubscribe from this group, send email to lxc-dev-unsubscribe@googlegroups.com
For more options, visit this group at http://groups.google.com/group/lxc-dev?hl=en
-~----------~----~----~----~------~----~------~--~---
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 00/10] sysfs tagged directories
2008-05-02 17:42 ` Serge E. Hallyn
@ 2008-05-04 23:13 ` Daniel Lezcano
2008-05-05 16:18 ` Serge E. Hallyn
0 siblings, 1 reply; 30+ messages in thread
From: Daniel Lezcano @ 2008-05-04 23:13 UTC (permalink / raw)
To: Serge E. Hallyn
Cc: Eric W. Biederman, Greg KH, Benjamin Thery, linux-kernel, Al Viro,
Tejun Heo, Pavel Emelyanov, netdev
Serge E. Hallyn wrote:
> Quoting Eric W. Biederman (ebiederm@xmission.com):
>> "Serge E. Hallyn" <serue@us.ibm.com> writes:
>>
>>> Quoting Eric W. Biederman (ebiederm@xmission.com):
>>> Ok as it turns out Benjamin Thery does in fact have a patch to fix this.
>>> Benjamin, please send your renaming patch out tomorrow if you can?
>>>
>>> I guess this is why I didn't really see the problem Benjamin said there
>>> was - it just works with SYSFS_DEPRECATED :)
>> It will be nice to see that patch. I'm curious what the problem was.
>>
>> Eric
>
> Here is the patch he had privately sent. As the comments indicate he's
> still refining it so yell at me not him... but this on top of the
> tagged dir patchset makes everything work for me.
>
> (I'd *really* like to see network namespaces be generally usable sometime
> in 2.6.26.)
Serge,
I posted a fix to Dave Miller which was included in net-2.6 and merged
in 2.6.26-rc1.
http://marc.info/?l=linux-netdev&m=120959135229360&w=2
This one should fix your problem for sysfs.
There is another fix to do in order to avoid name conflicts with network
interfaces belonging to different namespaces, eg. allows to rename
veth1234 to eth0 without conflicting with the network device belonging
in init_net, I send this patch to container@ first to have it merged
with the sysfs patchset.
Thanks,
-- Daniel
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-05-04 23:13 ` Daniel Lezcano
@ 2008-05-05 16:18 ` Serge E. Hallyn
2008-05-06 16:53 ` Benjamin Thery
0 siblings, 1 reply; 30+ messages in thread
From: Serge E. Hallyn @ 2008-05-05 16:18 UTC (permalink / raw)
To: Daniel Lezcano
Cc: Serge E. Hallyn, Eric W. Biederman, Greg KH, Benjamin Thery,
linux-kernel, Al Viro, Tejun Heo, Pavel Emelyanov, netdev
Quoting Daniel Lezcano (dlezcano@fr.ibm.com):
> Serge E. Hallyn wrote:
>> Quoting Eric W. Biederman (ebiederm@xmission.com):
>>> "Serge E. Hallyn" <serue@us.ibm.com> writes:
>>>
>>>> Quoting Eric W. Biederman (ebiederm@xmission.com):
>>>> Ok as it turns out Benjamin Thery does in fact have a patch to fix this.
>>>> Benjamin, please send your renaming patch out tomorrow if you can?
>>>>
>>>> I guess this is why I didn't really see the problem Benjamin said there
>>>> was - it just works with SYSFS_DEPRECATED :)
>>> It will be nice to see that patch. I'm curious what the problem was.
>>>
>>> Eric
>> Here is the patch he had privately sent. As the comments indicate he's
>> still refining it so yell at me not him... but this on top of the
>> tagged dir patchset makes everything work for me.
>> (I'd *really* like to see network namespaces be generally usable sometime
>> in 2.6.26.)
>
> Serge,
>
> I posted a fix to Dave Miller which was included in net-2.6 and merged in
> 2.6.26-rc1.
Cool, I see it in gitweb, thanks.
> http://marc.info/?l=linux-netdev&m=120959135229360&w=2
>
> This one should fix your problem for sysfs.
>
> There is another fix to do in order to avoid name conflicts with network
> interfaces belonging to different namespaces, eg. allows to rename veth1234
> to eth0 without conflicting with the network device belonging in init_net,
> I send this patch to container@ first to have it merged with the sysfs
> patchset.
Ok.
Benjamin, what are you doing with Eric's sysfs patchset then? Are you
going to re-post it asap on top of whatever tree has these two patches,
or are you planning to wait longer?
thanks,
-serge
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-05-05 16:18 ` Serge E. Hallyn
@ 2008-05-06 16:53 ` Benjamin Thery
0 siblings, 0 replies; 30+ messages in thread
From: Benjamin Thery @ 2008-05-06 16:53 UTC (permalink / raw)
To: Serge E. Hallyn
Cc: Daniel Lezcano, Eric W. Biederman, Greg KH, linux-kernel, Al Viro,
Tejun Heo, Pavel Emelyanov, netdev
Serge E. Hallyn wrote:
> Quoting Daniel Lezcano (dlezcano@fr.ibm.com):
>> Serge E. Hallyn wrote:
>>> Quoting Eric W. Biederman (ebiederm@xmission.com):
>>>> "Serge E. Hallyn" <serue@us.ibm.com> writes:
>>>>
>>>>> Quoting Eric W. Biederman (ebiederm@xmission.com):
>>>>> Ok as it turns out Benjamin Thery does in fact have a patch to fix this.
>>>>> Benjamin, please send your renaming patch out tomorrow if you can?
>>>>>
>>>>> I guess this is why I didn't really see the problem Benjamin said there
>>>>> was - it just works with SYSFS_DEPRECATED :)
>>>> It will be nice to see that patch. I'm curious what the problem was.
>>>>
>>>> Eric
>>> Here is the patch he had privately sent. As the comments indicate he's
>>> still refining it so yell at me not him... but this on top of the
>>> tagged dir patchset makes everything work for me.
>>> (I'd *really* like to see network namespaces be generally usable sometime
>>> in 2.6.26.)
>> Serge,
>>
>> I posted a fix to Dave Miller which was included in net-2.6 and merged in
>> 2.6.26-rc1.
>
> Cool, I see it in gitweb, thanks.
>
>> http://marc.info/?l=linux-netdev&m=120959135229360&w=2
>>
>> This one should fix your problem for sysfs.
>>
>> There is another fix to do in order to avoid name conflicts with network
>> interfaces belonging to different namespaces, eg. allows to rename veth1234
>> to eth0 without conflicting with the network device belonging in init_net,
>> I send this patch to container@ first to have it merged with the sysfs
>> patchset.
>
> Ok.
>
> Benjamin, what are you doing with Eric's sysfs patchset then? Are you
> going to re-post it asap on top of whatever tree has these two patches,
> or are you planning to wait longer?
I will resend the patchset on top of 2.6.26-rc1,
with Daniel's last patch merged.
(If anyone prefer to have it merged on top of net-2.6, say so)
-benjamin
>
> thanks,
> -serge
>
>
--
B e n j a m i n T h e r y - BULL/DT/Open Software R&D
http://www.bull.com
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-04-29 18:41 ` Greg KH
2008-04-29 19:34 ` Serge E. Hallyn
@ 2008-04-29 19:35 ` Eric W. Biederman
1 sibling, 0 replies; 30+ messages in thread
From: Eric W. Biederman @ 2008-04-29 19:35 UTC (permalink / raw)
To: Greg KH
Cc: Serge E. Hallyn, Benjamin Thery, linux-kernel, Tejun Heo,
Daniel Lezcano, Pavel Emelyanov, netdev
Greg KH <gregkh@suse.de> writes:
> See the thread from Al starting with:
> Date: Mon, 7 Jan 2008 10:24:17 +0000
> From: Al Viro <viro@ZenIV.linux.org.uk>
> To: "Eric W. Biederman" <ebiederm@xmission.com>
> Cc: linux-kernel@vger.kernel.org, htejun@gmail.com,
> linux-fsdevel@vger.kernel.org, gregkh@suse.de
> Subject: [RFC] netns / sysfs interaction
> Message-ID: <20080107072301.GW27894@ZenIV.linux.org.uk>
>
> He had a lot of questions and objections to this way forward, and I
> share those objections.
If either of you will take those objections and see how they actually
apply to the patches I would be happy.
We are quite successfully using multiple mounts in proc for the
pid namespace with no large issues.
Similarly for the network namespace we have the same set of files
showing up in different places in /proc with no large coherency
issues despite the same files being in multiple places in the
dcache.
The only piece of the puzzle new in sysfs is directory rename
support. Which takes a little work but seems sane.
When I mentioned I was doing this Al said:
> Excuse me, _what_? Are you seriously suggesting going through all dentry
> trees, doing d_move() in each? I want to see your locking. It's promising
> to be worse than devfs had ever been. Much worse.
Odd I thought I sent Al a reply to that bold statement but it doesn't
appear in the archive.
At any rate I'm not afraid of more code review and testing.
At the time Al made those statements his concerns about coherency
and locking nightmares did not seem to apply to these patches and
they don't seem to apply now.
I will happily admit the VFS does not like to work with filesystems
where the state changes behind it's back. That is the position we are
in fundamentally with sysfs and proc, and the locking works today.
Multiple superblocks for sysfs does not change that in any significant
way.
So bring on the tough code review and concrete objections.
The code can take it and it can only get better for it.
Eric
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 00/10] sysfs tagged directories
2008-04-29 17:36 ` [PATCH 00/10] sysfs tagged directories Greg KH
2008-04-29 18:04 ` Serge E. Hallyn
@ 2008-04-29 19:14 ` Eric W. Biederman
1 sibling, 0 replies; 30+ messages in thread
From: Eric W. Biederman @ 2008-04-29 19:14 UTC (permalink / raw)
To: Greg KH
Cc: Benjamin Thery, linux-kernel, Tejun Heo, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
Greg KH <gregkh@suse.de> writes:
> On Tue, Apr 29, 2008 at 07:10:15PM +0200, Benjamin Thery wrote:
>> Here is the announcement Eric wrote back in December to introduce his
>> patchset:
>
> <snip>
>
> Are the objections that Al Viro made to this patchset when it was last
> sent out addressed in this new series?
I'm trying to recall. Al was nervous when the approach was described
to him but I don't remember him looking at specific patches and
objecting.
There was also an issue about races in sysfs between rename
and unlink that Al brought up, that causes real problems in
at least one piece of code that uses that functionality. I have been
busy so I don't know if anyone has addressed that issue. It is
independent but this patchset may make that issue slightly harder
to fix.
If the concern is Al nervousness with respect to locking order
(and that is complex) the only really sane way to fix that is
to dig into the VFS and change the lock ordering so that is
friendlier to distributed filesystems like sysfs.
This patchset does not introduce any new lock ordering issues
but it may make the existing convolutions we have to go through
to keep the dcache for existing file handles in sync with the
internal sysfs tree more visible. As of my last posting I am
not aware of any locking problems in the code itself.
Greg I had thought you had dropped the patchset simply because
you got busy. I know it languished for a long time because of
that.
Eric
^ permalink raw reply [flat|nested] 30+ messages in thread