public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* /dev/root: which approach ? [PATCH]
@ 2004-03-10 16:20 Miquel van Smoorenburg
  2004-03-10 20:01 ` Andrew Morton
  2004-03-10 20:17 ` Mika Penttilä
  0 siblings, 2 replies; 7+ messages in thread
From: Miquel van Smoorenburg @ 2004-03-10 16:20 UTC (permalink / raw)
  To: linux-kernel

Currently if you boot from a blockdevice with a dynamically
allocated major number (such as LVM or partitionable raid),
there is no way to check the root filesystem. The root
fs is still read-only, so you cannot create a device node
anywhere to point fsck at.

This was discussed on the linux-raid mailinglist, and I proposed
(as proof of concept) a simple check in bdget() to see if the
device is being opened is the /dev/root node and if so redirect
it to the current root device. This is a 8-line patch, the only
disadvantage I can think of is that for an open file, inode->i_rdev
is then different from blockdevice->bd_dev. Shouldn't be a problem.

Neil Brown also proposed 2 potential solutions; one is making
rootfs remountable so that you can  mount -t rootfs rootfs /mnt/root
and then fsck /mnt/root/dev/root (below as remount_rootfs.patch).

His second one is a patch that adds a /proc/pid/rootdev blockdevice,
below as proc_pid_rootdev.patch (partially rewritten by me).
This should probably be optimized to move the i_rdev resolving
out of init_proc_pid_rootdev_inode() and into open() / getattr().

My question to the FS hackers: which one is the preferred approach?


dev_root_alias.patch

--- linux-2.6.4-rc2-mm1.orig/fs/block_dev.c	2004-03-09 17:14:32.000000000 +0100
+++ linux-2.6.4-rc2-mm1/fs/block_dev.c	2004-03-10 16:39:30.000000000 +0100
@@ -338,6 +338,16 @@ struct block_device *bdget(dev_t dev)
 {
 	struct block_device *bdev;
 	struct inode *inode;
+	struct vfsmount *mnt;
+
+	/* See if device is the /dev/root alias. */
+	if (dev == MKDEV(4, 1)) {
+		read_lock(&current->fs->lock);
+		mnt = mntget(current->fs->rootmnt);
+		dev = mnt->mnt_sb->s_dev;
+		mntput(mnt);
+		read_unlock(&current->fs->lock);
+	}
 
 	inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
 			bdev_test, bdev_set, &dev);


remount_rootfs.patch

--- linux/fs/ramfs/inode.c~current~	2004-03-01 11:20:58.000000000 +1100
+++ linux/fs/ramfs/inode.c	2004-03-01 11:21:15.000000000 +1100
@@ -207,7 +207,7 @@ static struct super_block *ramfs_get_sb(
 static struct super_block *rootfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
+	return get_sb_single(fs_type, flags, data, ramfs_fill_super);
 }
 
 static struct file_system_type ramfs_fs_type = {


proc_rootdev.patch

--- linux-2.6.3/fs/proc/base.c	2004-02-18 04:58:32.000000000 +0100
+++ linux-2.6.3-bk8-mdp/fs/proc/base.c	2004-03-01 15:20:22.000000000 +0100
@@ -50,6 +50,7 @@
 	PROC_TGID_MEM,
 	PROC_TGID_CWD,
 	PROC_TGID_ROOT,
+	PROC_TGID_ROOTDEV,
 	PROC_TGID_EXE,
 	PROC_TGID_FD,
 	PROC_TGID_ENVIRON,
@@ -73,6 +74,7 @@
 	PROC_TID_MEM,
 	PROC_TID_CWD,
 	PROC_TID_ROOT,
+	PROC_TID_ROOTDEV,
 	PROC_TID_EXE,
 	PROC_TID_FD,
 	PROC_TID_ENVIRON,
@@ -115,6 +117,7 @@
 	E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
 	E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
+	E(PROC_TGID_ROOTDEV,   "rootdev", S_IFBLK|S_IRUSR|S_IWUSR),
 	E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
 #ifdef CONFIG_SECURITY
@@ -137,6 +140,7 @@
 	E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
 	E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
+	E(PROC_TID_ROOTDEV,    "rootdev", S_IFBLK|S_IRUSR|S_IWUSR),
 	E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
 #ifdef CONFIG_SECURITY
@@ -771,6 +775,32 @@
 	.follow_link	= proc_pid_follow_link
 };
 
+static int init_proc_pid_rootdev_inode(struct inode *inode)
+{
+	struct fs_struct *fs;
+	struct vfsmount *vmnt;
+	int result = -ENOENT;
+	dev_t rootdev = 0;
+
+	task_lock(proc_task(inode));
+	fs = proc_task(inode)->fs;
+	if(fs)
+		atomic_inc(&fs->count);
+	task_unlock(proc_task(inode));
+	if (fs) {
+		read_lock(&fs->lock);
+		vmnt = mntget(fs->rootmnt);
+		rootdev = vmnt->mnt_sb->s_dev;
+		mntput(vmnt);
+		read_unlock(&fs->lock);
+		result = 0;
+		put_fs_struct(fs);
+	}
+	init_special_inode(inode, inode->i_mode, rootdev);
+
+	return result;
+}
+
 static int pid_alive(struct task_struct *p)
 {
 	BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid);
@@ -958,7 +988,9 @@
 	ei->type = ino;
 	inode->i_uid = 0;
 	inode->i_gid = 0;
-	if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) {
+	if (ino != PROC_TGID_ROOTDEV && ino != PROC_TID_ROOTDEV &&
+	    (ino == PROC_TGID_INO || ino == PROC_TID_INO ||
+	     task_dumpable(task))) {
 		inode->i_uid = task->euid;
 		inode->i_gid = task->egid;
 	}
@@ -988,7 +1020,10 @@
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task = proc_task(inode);
 	if (pid_alive(task)) {
-		if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
+		int ino = proc_type(inode);
+		if (ino != PROC_TGID_ROOTDEV && ino != PROC_TID_ROOTDEV &&
+		    (ino == PROC_TGID_INO || ino == PROC_TID_INO ||
+		     task_dumpable(task))) {
 			inode->i_uid = task->euid;
 			inode->i_gid = task->egid;
 		} else {
@@ -1319,6 +1354,10 @@
 			inode->i_op = &proc_pid_link_inode_operations;
 			ei->op.proc_get_link = proc_root_link;
 			break;
+		case PROC_TID_ROOTDEV:
+		case PROC_TGID_ROOTDEV:
+			init_proc_pid_rootdev_inode(inode);
+			break;
 		case PROC_TID_ENVIRON:
 		case PROC_TGID_ENVIRON:
 			inode->i_fop = &proc_info_file_operations;


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: /dev/root: which approach ? [PATCH]
  2004-03-10 16:20 /dev/root: which approach ? [PATCH] Miquel van Smoorenburg
@ 2004-03-10 20:01 ` Andrew Morton
  2004-03-10 20:18   ` Miquel van Smoorenburg
  2004-03-10 20:17 ` Mika Penttilä
  1 sibling, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2004-03-10 20:01 UTC (permalink / raw)
  To: Miquel van Smoorenburg; +Cc: linux-kernel

Miquel van Smoorenburg <miquels@cistron.nl> wrote:
>
> Currently if you boot from a blockdevice with a dynamically
> allocated major number (such as LVM or partitionable raid),
> there is no way to check the root filesystem. The root
> fs is still read-only, so you cannot create a device node
> anywhere to point fsck at.
> 
> This was discussed on the linux-raid mailinglist, and I proposed
> (as proof of concept) a simple check in bdget() to see if the
> device is being opened is the /dev/root node and if so redirect
> it to the current root device. This is a 8-line patch, the only
> disadvantage I can think of is that for an open file, inode->i_rdev
> is then different from blockdevice->bd_dev.

The /dev/root alias resolution looks nice to me, which probably means that
it has a fatal flaw.

Is it not possible to create a device node on ramfs or ramdisk and point
fsck at that?


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: /dev/root: which approach ? [PATCH]
  2004-03-10 16:20 /dev/root: which approach ? [PATCH] Miquel van Smoorenburg
  2004-03-10 20:01 ` Andrew Morton
@ 2004-03-10 20:17 ` Mika Penttilä
  2004-03-10 20:52   ` Miquel van Smoorenburg
  2004-03-10 21:01   ` Chris Wedgwood
  1 sibling, 2 replies; 7+ messages in thread
From: Mika Penttilä @ 2004-03-10 20:17 UTC (permalink / raw)
  To: Miquel van Smoorenburg; +Cc: linux-kernel


>My question to the FS hackers: which one is the preferred approach?
>
>
>dev_root_alias.patch
>
>--- linux-2.6.4-rc2-mm1.orig/fs/block_dev.c	2004-03-09 17:14:32.000000000 +0100
>+++ linux-2.6.4-rc2-mm1/fs/block_dev.c	2004-03-10 16:39:30.000000000 +0100
>@@ -338,6 +338,16 @@ struct block_device *bdget(dev_t dev)
> {
> 	struct block_device *bdev;
> 	struct inode *inode;
>+	struct vfsmount *mnt;
>+
>+	/* See if device is the /dev/root alias. */
>+	if (dev == MKDEV(4, 1)) {
>+		read_lock(&current->fs->lock);
>+		mnt = mntget(current->fs->rootmnt);
>+		dev = mnt->mnt_sb->s_dev;
>+		mntput(mnt);
>+		read_unlock(&current->fs->lock);
>+	}
> 
> 	inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
> 			bdev_test, bdev_set, &dev);
>  
>
what is this 4,1, a tty???

--Mika



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: /dev/root: which approach ? [PATCH]
  2004-03-10 20:01 ` Andrew Morton
@ 2004-03-10 20:18   ` Miquel van Smoorenburg
  0 siblings, 0 replies; 7+ messages in thread
From: Miquel van Smoorenburg @ 2004-03-10 20:18 UTC (permalink / raw)
  To: linux-kernel

In article <20040310120145.248ae62d.akpm@osdl.org>,
Andrew Morton  <akpm@osdl.org> wrote:
>Miquel van Smoorenburg <miquels@cistron.nl> wrote:
>>
>> Currently if you boot from a blockdevice with a dynamically
>> allocated major number (such as LVM or partitionable raid),
>> there is no way to check the root filesystem. The root
>> fs is still read-only, so you cannot create a device node
>> anywhere to point fsck at.
>> 
>> This was discussed on the linux-raid mailinglist, and I proposed
>> (as proof of concept) a simple check in bdget() to see if the
>> device is being opened is the /dev/root node and if so redirect
>> it to the current root device. This is a 8-line patch, the only
>> disadvantage I can think of is that for an open file, inode->i_rdev
>> is then different from blockdevice->bd_dev.
>
>The /dev/root alias resolution looks nice to me, which probably means that
>it has a fatal flaw.
>
>Is it not possible to create a device node on ramfs or ramdisk and point
>fsck at that?

Yes, I thought of that too. But that wouldn't be trivial for
existing installations, unless you're the maintainer of the
distributions init package. Oh wait .. ;)

Anyway, it seemed to me to be very useful, and since /proc/mounts
already refers to /dev/root it seemed to fit in naturally hence
the proposed patches. If the definitive answer is "do it in
userspace" then that's OK too.

Mike.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: /dev/root: which approach ? [PATCH]
  2004-03-10 20:17 ` Mika Penttilä
@ 2004-03-10 20:52   ` Miquel van Smoorenburg
  2004-03-12  8:10     ` H. Peter Anvin
  2004-03-10 21:01   ` Chris Wedgwood
  1 sibling, 1 reply; 7+ messages in thread
From: Miquel van Smoorenburg @ 2004-03-10 20:52 UTC (permalink / raw)
  To: linux-kernel

In article <404F77F3.9070106@kolumbus.fi>,
Mika Penttilä  <mika.penttila@kolumbus.fi> wrote:
>
>>My question to the FS hackers: which one is the preferred approach?
>>
>>dev_root_alias.patch
>>
>>+	/* See if device is the /dev/root alias. */
>>+	if (dev == MKDEV(4, 1)) {
>
>what is this 4,1, a tty???

If it was a character device, yes. But it's a block device, and
major 4 isn't used yet. I just made it up, a major would need to
be allocated by LANANA ofcourse.

Mike.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: /dev/root: which approach ? [PATCH]
  2004-03-10 20:17 ` Mika Penttilä
  2004-03-10 20:52   ` Miquel van Smoorenburg
@ 2004-03-10 21:01   ` Chris Wedgwood
  1 sibling, 0 replies; 7+ messages in thread
From: Chris Wedgwood @ 2004-03-10 21:01 UTC (permalink / raw)
  To: Mika Penttilä; +Cc: Miquel van Smoorenburg, linux-kernel

On Wed, Mar 10, 2004 at 10:17:55PM +0200, Mika Penttilä wrote:

> what is this 4,1, a tty???

/dev/tty1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: /dev/root: which approach ? [PATCH]
  2004-03-10 20:52   ` Miquel van Smoorenburg
@ 2004-03-12  8:10     ` H. Peter Anvin
  0 siblings, 0 replies; 7+ messages in thread
From: H. Peter Anvin @ 2004-03-12  8:10 UTC (permalink / raw)
  To: linux-kernel

Followup to:  <c2nv6c$j5$2@news.cistron.nl>
By author:    "Miquel van Smoorenburg" <miquels@cistron.nl>
In newsgroup: linux.dev.kernel
>
> In article <404F77F3.9070106@kolumbus.fi>,
> Mika Penttilä  <mika.penttila@kolumbus.fi> wrote:
> >
> >>My question to the FS hackers: which one is the preferred approach?
> >>
> >>dev_root_alias.patch
> >>
> >>+	/* See if device is the /dev/root alias. */
> >>+	if (dev == MKDEV(4, 1)) {
> >
> >what is this 4,1, a tty???
> 
> If it was a character device, yes. But it's a block device, and
> major 4 isn't used yet. I just made it up, a major would need to
> be allocated by LANANA ofcourse.
> 

Please contact John Cagle <device@lanana.org>.

	-hpa

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2004-03-12  8:10 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-03-10 16:20 /dev/root: which approach ? [PATCH] Miquel van Smoorenburg
2004-03-10 20:01 ` Andrew Morton
2004-03-10 20:18   ` Miquel van Smoorenburg
2004-03-10 20:17 ` Mika Penttilä
2004-03-10 20:52   ` Miquel van Smoorenburg
2004-03-12  8:10     ` H. Peter Anvin
2004-03-10 21:01   ` Chris Wedgwood

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox