Re: [rfc patch] how to show propagation state for mounts

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Serge E. Hallyn" <serue@us.ibm.com>
To: Miklos Szeredi <miklos@szeredi.hu>
Cc: viro@ZenIV.linux.org.uk, linuxram@us.ibm.com,
	linux-fsdevel@vger.kernel.org
Subject: Re: [rfc patch] how to show propagation state for mounts
Date: Wed, 5 Mar 2008 13:25:59 -0600	[thread overview]
Message-ID: <20080305192559.GA9702@sergelap.ibm.com> (raw)
In-Reply-To: <E1JSZAc-0001Kk-MK@pomaz-ex.szeredi.hu>

Quoting Miklos Szeredi (miklos@szeredi.hu):
> > > If you get down to it, the thing is about delegating control over part
> > > of namespace to somebody, without letting them control, see, etc. the
> > > rest of it.  So I'd rather be very conservative about extra information
> > > we allow to piggyback on that.  I don't know... perhaps with stable peer
> > > group IDs it would be OK to show peer group ID by (our) vfsmount + peer
> > > group ID of master + peer group ID of nearest dominating group that has
> > > intersection with our namespace.  Then we don't leak information (AFAICS),
> > > get full propagation information between our vfsmounts and cooperating
> > > tasks in different namespaces can figure the things out as much as possible
> > > without leaking 3rd-party information to either.
> > 
> 
> Here's a patch against current -mm implementing this (with some
> cleanups thrown in).  Done some testing on it as well, it wasn't
> entirey trivial to figure out a setup, where propagation goes out of
> the namespace first, then comes back in:

Looks nice, and a bit of testing/playing around showed no problem on my
end.

This definately would be a nice feature to have, and heck, it might
greatly simplify an LTP testcase for mounts propagation which is long
overdue.

thanks,
-serge

>   mount --bind /mnt1 /mnt1
>   mount --make-shared /mnt1
>   mount --bind /mnt2 /mnt2
>   mount --make-shared /mnt2
>   newns
>   mount --make-slave /mnt1
> 
> old ns:
>   mount --make-slave /mnt2
>   mount --bind /mnt1/tmp /mnt1/tmp
> 
> new ns:
>   mount --make-shared /mnt1/tmp
>   mount --bind /mnt1/tmp /mnt2/tmp
> 
> Voila.
> 
> 
> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
> ---
> 
> Index: linux/fs/pnode.c
> ===================================================================
> --- linux.orig/fs/pnode.c	2008-02-22 15:27:23.000000000 +0100
> +++ linux/fs/pnode.c	2008-02-22 15:27:26.000000000 +0100
> @@ -9,8 +9,12 @@
>  #include <linux/mnt_namespace.h>
>  #include <linux/mount.h>
>  #include <linux/fs.h>
> +#include <linux/idr.h>
>  #include "pnode.h"
> 
> +static DEFINE_SPINLOCK(mnt_pgid_lock);
> +static DEFINE_IDA(mnt_pgid_ida);
> +
>  /* return the next shared peer mount of @p */
>  static inline struct vfsmount *next_peer(struct vfsmount *p)
>  {
> @@ -27,36 +31,90 @@ static inline struct vfsmount *next_slav
>  	return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
>  }
> 
> -static int __peer_group_id(struct vfsmount *mnt)
> +static void __set_mnt_shared(struct vfsmount *mnt)
>  {
> -	struct vfsmount *m;
> -	int id = mnt->mnt_id;
> +	mnt->mnt_flags &= ~MNT_PNODE_MASK;
> +	mnt->mnt_flags |= MNT_SHARED;
> +}
> +
> +void set_mnt_shared(struct vfsmount *mnt)
> +{
> +	int res;
> 
> -	for (m = next_peer(mnt); m != mnt; m = next_peer(m))
> -		id = min(id, m->mnt_id);
> + retry:
> +	spin_lock(&mnt_pgid_lock);
> +	if (IS_MNT_SHARED(mnt)) {
> +		spin_unlock(&mnt_pgid_lock);
> +		return;
> +	}
> 
> -	return id;
> +	res = ida_get_new(&mnt_pgid_ida, &mnt->mnt_pgid);
> +	spin_unlock(&mnt_pgid_lock);
> +	if (res == -EAGAIN) {
> +		if (ida_pre_get(&mnt_pgid_ida, GFP_KERNEL))
> +			goto retry;
> +	}
> +	__set_mnt_shared(mnt);
> +}
> +
> +void clear_mnt_shared(struct vfsmount *mnt)
> +{
> +	if (IS_MNT_SHARED(mnt)) {
> +		mnt->mnt_flags &= ~MNT_SHARED;
> +		mnt->mnt_pgid = -1;
> +	}
> +}
> +
> +void make_mnt_peer(struct vfsmount *old, struct vfsmount *mnt)
> +{
> +	mnt->mnt_pgid = old->mnt_pgid;
> +	list_add(&mnt->mnt_share, &old->mnt_share);
> +	__set_mnt_shared(mnt);
>  }
> 
> -/* return the smallest ID within the peer group */
>  int get_peer_group_id(struct vfsmount *mnt)
>  {
> +	return mnt->mnt_pgid;
> +}
> +
> +int get_master_id(struct vfsmount *mnt)
> +{
>  	int id;
> 
>  	spin_lock(&vfsmount_lock);
> -	id = __peer_group_id(mnt);
> +	id = get_peer_group_id(mnt->mnt_master);
>  	spin_unlock(&vfsmount_lock);
> 
>  	return id;
>  }
> 
> -/* return the smallest ID within the master's peer group */
> -int get_master_id(struct vfsmount *mnt)
> +static struct vfsmount *get_peer_in_ns(struct vfsmount *mnt,
> +				       struct mnt_namespace *ns)
>  {
> -	int id;
> +	struct vfsmount *m = mnt;
> +
> +	do {
> +		if (m->mnt_ns == ns)
> +			return m;
> +		m = next_peer(m);
> +	} while (m != mnt);
> +
> +	return NULL;
> +}
> +
> +int get_dominator_id_same_ns(struct vfsmount *mnt)
> +{
> +	int id = -1;
> +	struct vfsmount *m;
> 
>  	spin_lock(&vfsmount_lock);
> -	id = __peer_group_id(mnt->mnt_master);
> +	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
> +		struct vfsmount *d = get_peer_in_ns(m, mnt->mnt_ns);
> +		if (d) {
> +			id = d->mnt_pgid;
> +			break;
> +		}
> +	}
>  	spin_unlock(&vfsmount_lock);
> 
>  	return id;
> @@ -80,7 +138,13 @@ static int do_make_slave(struct vfsmount
>  		if (peer_mnt == mnt)
>  			peer_mnt = NULL;
>  	}
> -	list_del_init(&mnt->mnt_share);
> +	if (!list_empty(&mnt->mnt_share))
> +		list_del_init(&mnt->mnt_share);
> +	else if (IS_MNT_SHARED(mnt)) {
> +		spin_lock(&mnt_pgid_lock);
> +		ida_remove(&mnt_pgid_ida, mnt->mnt_pgid);
> +		spin_unlock(&mnt_pgid_lock);
> +	}
> 
>  	if (peer_mnt)
>  		master = peer_mnt;
> @@ -89,20 +153,18 @@ static int do_make_slave(struct vfsmount
>  		list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
>  			slave_mnt->mnt_master = master;
>  		list_move(&mnt->mnt_slave, &master->mnt_slave_list);
> -		list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
> -		INIT_LIST_HEAD(&mnt->mnt_slave_list);
> +		list_splice_init(&mnt->mnt_slave_list,
> +				 master->mnt_slave_list.prev);
>  	} else {
> -		struct list_head *p = &mnt->mnt_slave_list;
> -		while (!list_empty(p)) {
> -                        slave_mnt = list_first_entry(p,
> +		while (!list_empty(&mnt->mnt_slave_list)) {
> +			slave_mnt = list_first_entry(&mnt->mnt_slave_list,
>  					struct vfsmount, mnt_slave);
>  			list_del_init(&slave_mnt->mnt_slave);
>  			slave_mnt->mnt_master = NULL;
>  		}
>  	}
>  	mnt->mnt_master = master;
> -	CLEAR_MNT_SHARED(mnt);
> -	INIT_LIST_HEAD(&mnt->mnt_slave_list);
> +	clear_mnt_shared(mnt);
>  	return 0;
>  }
> 
> Index: linux/fs/namespace.c
> ===================================================================
> --- linux.orig/fs/namespace.c	2008-02-22 15:27:23.000000000 +0100
> +++ linux/fs/namespace.c	2008-02-22 15:27:26.000000000 +0100
> @@ -95,6 +95,7 @@ struct vfsmount *alloc_vfsmnt(const char
>  			return NULL;
>  		}
> 
> +		mnt->mnt_pgid = -1;
>  		atomic_set(&mnt->mnt_count, 1);
>  		INIT_LIST_HEAD(&mnt->mnt_hash);
>  		INIT_LIST_HEAD(&mnt->mnt_child);
> @@ -537,10 +538,12 @@ static struct vfsmount *clone_mnt(struct
>  		if (flag & CL_SLAVE) {
>  			list_add(&mnt->mnt_slave, &old->mnt_slave_list);
>  			mnt->mnt_master = old;
> -			CLEAR_MNT_SHARED(mnt);
> +			clear_mnt_shared(mnt);
>  		} else if (!(flag & CL_PRIVATE)) {
> -			if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
> -				list_add(&mnt->mnt_share, &old->mnt_share);
> +			if (flag & CL_PROPAGATION)
> +				set_mnt_shared(old);
> +			if (IS_MNT_SHARED(old))
> +				make_mnt_peer(old, mnt);
>  			if (IS_MNT_SLAVE(old))
>  				list_add(&mnt->mnt_slave, &old->mnt_slave);
>  			mnt->mnt_master = old->mnt_master;
> @@ -795,16 +798,24 @@ static int show_mountinfo(struct seq_fil
>  	show_sb_opts(m, sb);
>  	if (sb->s_op->show_options)
>  		err = sb->s_op->show_options(m, mnt);
> -	if (IS_MNT_SHARED(mnt)) {
> -		seq_printf(m, " shared:%i", get_peer_group_id(mnt));
> -		if (IS_MNT_SLAVE(mnt))
> -			seq_printf(m, ",slave:%i", get_master_id(mnt));
> -	} else if (IS_MNT_SLAVE(mnt)) {
> -		seq_printf(m, " slave:%i", get_master_id(mnt));
> +	seq_putc(m, ' ');
> +	if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) {
> +		if (IS_MNT_SHARED(mnt))
> +			seq_printf(m, "shared:%i", get_peer_group_id(mnt));
> +		if (IS_MNT_SLAVE(mnt)) {
> +			int dominator_id = get_dominator_id_same_ns(mnt);
> +
> +			if (IS_MNT_SHARED(mnt))
> +				seq_putc(m, ',');
> +
> +			seq_printf(m, "slave:%i", get_master_id(mnt));
> +			if (dominator_id != -1)
> +				seq_printf(m, ":%i", dominator_id);
> +		}
>  	} else if (IS_MNT_UNBINDABLE(mnt)) {
> -		seq_printf(m, " unbindable");
> +		seq_printf(m, "unbindable");
>  	} else {
> -		seq_printf(m, " private");
> +		seq_printf(m, "private");
>  	}
>  	seq_putc(m, '\n');
>  	return err;
> Index: linux/fs/pnode.h
> ===================================================================
> --- linux.orig/fs/pnode.h	2008-02-22 15:27:23.000000000 +0100
> +++ linux/fs/pnode.h	2008-02-22 15:27:26.000000000 +0100
> @@ -14,7 +14,6 @@
>  #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
>  #define IS_MNT_SLAVE(mnt) (mnt->mnt_master)
>  #define IS_MNT_NEW(mnt)  (!mnt->mnt_ns)
> -#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
>  #define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE)
> 
>  #define CL_EXPIRE    		0x01
> @@ -24,12 +23,9 @@
>  #define CL_PROPAGATION 		0x10
>  #define CL_PRIVATE 		0x20
> 
> -static inline void set_mnt_shared(struct vfsmount *mnt)
> -{
> -	mnt->mnt_flags &= ~MNT_PNODE_MASK;
> -	mnt->mnt_flags |= MNT_SHARED;
> -}
> -
> +void set_mnt_shared(struct vfsmount *);
> +void clear_mnt_shared(struct vfsmount *);
> +void make_mnt_peer(struct vfsmount *, struct vfsmount *);
>  void change_mnt_propagation(struct vfsmount *, int);
>  int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
>  		struct list_head *);
> @@ -37,4 +33,5 @@ int propagate_umount(struct list_head *)
>  int propagate_mount_busy(struct vfsmount *, int);
>  int get_peer_group_id(struct vfsmount *);
>  int get_master_id(struct vfsmount *);
> +int get_dominator_id_same_ns(struct vfsmount *);
>  #endif /* _LINUX_PNODE_H */
> Index: linux/include/linux/mount.h
> ===================================================================
> --- linux.orig/include/linux/mount.h	2008-02-22 15:27:23.000000000 +0100
> +++ linux/include/linux/mount.h	2008-02-22 15:27:26.000000000 +0100
> @@ -57,6 +57,7 @@ struct vfsmount {
>  	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
>  	struct mnt_namespace *mnt_ns;	/* containing namespace */
>  	int mnt_id;			/* mount identifier */
> +	int mnt_pgid;			/* peer group identifier */
>  	/*
>  	 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
>  	 * to let these frequently modified fields in a separate cache line
> Index: linux/Documentation/filesystems/proc.txt
> ===================================================================
> --- linux.orig/Documentation/filesystems/proc.txt	2008-02-22 15:27:23.000000000 +0100
> +++ linux/Documentation/filesystems/proc.txt	2008-02-22 15:27:26.000000000 +0100
> @@ -2367,21 +2367,20 @@ MNTOPTS: per mount options
>  SBOPTS: per super block options
>  PROPAGATION: propagation type
> 
> -propagation type: <propagation_flag>[:<mntid>][,...]
> -	note: 'shared' flag is followed by the mntid of its peer mount
> -	      'slave' flag is followed by the mntid of its master mount
> +propagation type: <propagation_flag>[:<peergrpid>[:<domgrpid>]][,...]
> +	note: 'shared' flag is followed by the id of this mount's peer group
> +	      'slave' flag is followed by the peer group id of its master mount,
> +	      	      optionally followed by the id of the closest dominant(*)
> +		      peer group in the same namespace, if one exists.
>  	      'private' flag stands by itself
>  	      'unbindable' flag stands by itself
> 
> -The 'mntid' used in the propagation type is a canonical ID of the peer
> -group (currently the smallest ID within the group is used for this
> -purpose, but this should not be relied on).  Since mounts can be added
> -or removed from the peer group, this ID only guaranteed to stay the
> -same on a static propagation tree.
> +(*) A dominant peer group is an ancestor of this mount in the
> +propagation tree, in other words, this mount receives propagation from
> +the dominant peer group, but not the other way round.
> 
>  For more information see:
> 
>    Documentation/filesystems/sharedsubtree.txt
> 
> -
>  ------------------------------------------------------------------------------
> -
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

next prev parent reply	other threads:[~2008-03-05 19:26 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-02-20 15:39 how to show propagation state for mounts Miklos Szeredi
2008-02-20 16:04 ` Al Viro
2008-02-20 16:27   ` Miklos Szeredi
2008-02-20 19:29     ` Ram Pai
2008-02-20 21:14       ` Al Viro
2008-02-20 21:35         ` Miklos Szeredi
2008-02-22 14:46           ` [rfc patch] " Miklos Szeredi
2008-03-05 19:25             ` Serge E. Hallyn [this message]
2008-03-05 19:34               ` Miklos Szeredi
2008-03-05 20:23                 ` Ram Pai
2008-03-10  6:53             ` [RFC PATCH v2] vfs: optimization to /proc/<pid>/mountinfo patch Ram Pai
2008-03-10  7:10               ` Christoph Hellwig
2008-03-10 11:39                 ` Miklos Szeredi
2008-02-20 16:31   ` how to show propagation state for mounts Matthew Wilcox
2008-02-20 19:42     ` Ram Pai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080305192559.GA9702@sergelap.ibm.com \
    --to=serue@us.ibm.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linuxram@us.ibm.com \
    --cc=miklos@szeredi.hu \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.