public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] struct char_device
@ 2001-05-22 14:18 Alexander Viro
  0 siblings, 0 replies; 55+ messages in thread
From: Alexander Viro @ 2001-05-22 14:18 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel

	Linus, patch below adds the missing half of kdev_t -
for block devices we already have a unique pointer (struct block_device *,
inode->i_bdev) and that adds a similar animal for character devices.
	That is, it adds a new structure (struct char_device) and a cache
indexed by dev_t. init_special_inode() sets ->i_cdev to corresponding
element of cache (creating it if needed).
	Result: ->i_cdev is shared by all inodes of given character device
(i.e. if we need per-device objects we can put them there), we can use
stuct char_device * as ID for character devices, we can (in 2.5) get
rid of i_rdev - it's covered by ->i_bdev and ->i_cdev now.
	Patch is pretty straightforward - cache handling is lifted from
fs/block_device, the rest is trivial.
	Please, consider applying.
							Al

diff -urN S5-pre4/fs/Makefile S5-pre4-cdev/fs/Makefile
--- S5-pre4/fs/Makefile	Thu May  3 17:13:26 2001
+++ S5-pre4-cdev/fs/Makefile	Tue May 22 09:12:11 2001
@@ -11,8 +11,8 @@
 mod-subdirs :=	nls
 
 obj-y :=	open.o read_write.o devices.o file_table.o buffer.o \
-		super.o  block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
-		ioctl.o readdir.o select.o fifo.o locks.o \
+		super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
+		fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
 		dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \
 		filesystems.o
 
diff -urN S5-pre4/fs/block_dev.c S5-pre4-cdev/fs/block_dev.c
--- S5-pre4/fs/block_dev.c	Sat May 19 22:46:35 2001
+++ S5-pre4-cdev/fs/block_dev.c	Tue May 22 08:34:44 2001
@@ -392,7 +392,7 @@
 	}
 }
 
-void __init bdev_init(void)
+void __init bdev_cache_init(void)
 {
 	int i;
 	struct list_head *head = bdev_hashtable;
diff -urN S5-pre4/fs/char_dev.c S5-pre4-cdev/fs/char_dev.c
--- S5-pre4/fs/char_dev.c	Wed Dec 31 19:00:00 1969
+++ S5-pre4-cdev/fs/char_dev.c	Tue May 22 10:03:10 2001
@@ -0,0 +1,114 @@
+/*
+ *  linux/fs/block_dev.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#define HASH_BITS	6
+#define HASH_SIZE	(1UL << HASH_BITS)
+#define HASH_MASK	(HASH_SIZE-1)
+static struct list_head cdev_hashtable[HASH_SIZE];
+static spinlock_t cdev_lock = SPIN_LOCK_UNLOCKED;
+static kmem_cache_t * cdev_cachep;
+
+#define alloc_cdev() \
+	 ((struct char_device *) kmem_cache_alloc(cdev_cachep, SLAB_KERNEL))
+#define destroy_cdev(cdev) kmem_cache_free(cdev_cachep, (cdev))
+
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	struct char_device * cdev = (struct char_device *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR)
+	{
+		memset(cdev, 0, sizeof(*cdev));
+		sema_init(&cdev->sem, 1);
+	}
+}
+
+void __init cdev_cache_init(void)
+{
+	int i;
+	struct list_head *head = cdev_hashtable;
+
+	i = HASH_SIZE;
+	do {
+		INIT_LIST_HEAD(head);
+		head++;
+		i--;
+	} while (i);
+
+	cdev_cachep = kmem_cache_create("cdev_cache",
+					 sizeof(struct char_device),
+					 0, SLAB_HWCACHE_ALIGN, init_once,
+					 NULL);
+	if (!cdev_cachep)
+		panic("Cannot create cdev_cache SLAB cache");
+}
+
+/*
+ * Most likely _very_ bad one - but then it's hardly critical for small
+ * /dev and can be fixed when somebody will need really large one.
+ */
+static inline unsigned long hash(dev_t dev)
+{
+	unsigned long tmp = dev;
+	tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
+	return tmp & HASH_MASK;
+}
+
+static struct char_device *cdfind(dev_t dev, struct list_head *head)
+{
+	struct list_head *p;
+	struct char_device *cdev;
+	for (p=head->next; p!=head; p=p->next) {
+		cdev = list_entry(p, struct char_device, hash);
+		if (cdev->dev != dev)
+			continue;
+		atomic_inc(&cdev->count);
+		return cdev;
+	}
+	return NULL;
+}
+
+struct char_device *cdget(dev_t dev)
+{
+	struct list_head * head = cdev_hashtable + hash(dev);
+	struct char_device *cdev, *new_cdev;
+	spin_lock(&cdev_lock);
+	cdev = cdfind(dev, head);
+	spin_unlock(&cdev_lock);
+	if (cdev)
+		return cdev;
+	new_cdev = alloc_cdev();
+	if (!new_cdev)
+		return NULL;
+	atomic_set(&new_cdev->count,1);
+	new_cdev->dev = dev;
+	spin_lock(&cdev_lock);
+	cdev = cdfind(dev, head);
+	if (!cdev) {
+		list_add(&new_cdev->hash, head);
+		spin_unlock(&cdev_lock);
+		return new_cdev;
+	}
+	spin_unlock(&cdev_lock);
+	destroy_cdev(new_cdev);
+	return cdev;
+}
+
+void cdput(struct char_device *cdev)
+{
+	if (atomic_dec_and_test(&cdev->count)) {
+		spin_lock(&cdev_lock);
+		list_del(&cdev->hash);
+		spin_unlock(&cdev_lock);
+		destroy_cdev(cdev);
+	}
+}
+
diff -urN S5-pre4/fs/dcache.c S5-pre4-cdev/fs/dcache.c
--- S5-pre4/fs/dcache.c	Sat Apr 28 02:12:56 2001
+++ S5-pre4-cdev/fs/dcache.c	Tue May 22 09:22:43 2001
@@ -1250,6 +1250,9 @@
 kmem_cache_t *bh_cachep;
 EXPORT_SYMBOL(bh_cachep);
 
+extern void bdev_cache_init(void);
+extern void cdev_cache_init(void);
+
 void __init vfs_caches_init(unsigned long mempages)
 {
 	bh_cachep = kmem_cache_create("buffer_head",
@@ -1279,4 +1282,7 @@
 #endif
 
 	dcache_init(mempages);
+	inode_init(mempages);
+	bdev_cache_init();
+	cdev_cache_init();
 }
diff -urN S5-pre4/fs/devfs/base.c S5-pre4-cdev/fs/devfs/base.c
--- S5-pre4/fs/devfs/base.c	Sat May 19 22:46:35 2001
+++ S5-pre4-cdev/fs/devfs/base.c	Tue May 22 08:51:03 2001
@@ -2256,6 +2256,7 @@
     {
 	inode->i_rdev = MKDEV (de->u.fcb.u.device.major,
 			       de->u.fcb.u.device.minor);
+	inode->i_cdev = cdget (kdev_t_to_nr(inode->i_rdev));
     }
     else if ( S_ISBLK (de->inode.mode) )
     {
diff -urN S5-pre4/fs/devices.c S5-pre4-cdev/fs/devices.c
--- S5-pre4/fs/devices.c	Fri Feb 16 19:00:19 2001
+++ S5-pre4-cdev/fs/devices.c	Tue May 22 08:31:04 2001
@@ -203,6 +203,7 @@
 	if (S_ISCHR(mode)) {
 		inode->i_fop = &def_chr_fops;
 		inode->i_rdev = to_kdev_t(rdev);
+		inode->i_cdev = cdget(rdev);
 	} else if (S_ISBLK(mode)) {
 		inode->i_fop = &def_blk_fops;
 		inode->i_rdev = to_kdev_t(rdev);
diff -urN S5-pre4/fs/inode.c S5-pre4-cdev/fs/inode.c
--- S5-pre4/fs/inode.c	Sat May 19 22:46:35 2001
+++ S5-pre4-cdev/fs/inode.c	Mon May 21 22:49:48 2001
@@ -497,6 +497,10 @@
 		bdput(inode->i_bdev);
 		inode->i_bdev = NULL;
 	}
+	if (inode->i_cdev) {
+		cdput(inode->i_cdev);
+		inode->i_cdev = NULL;
+	}
 	inode->i_state = I_CLEAR;
 }
 
@@ -750,6 +754,7 @@
 	memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
 	inode->i_pipe = NULL;
 	inode->i_bdev = NULL;
+	inode->i_cdev = NULL;
 	inode->i_data.a_ops = &empty_aops;
 	inode->i_data.host = inode;
 	inode->i_data.gfp_mask = GFP_HIGHUSER;
diff -urN S5-pre4/include/linux/fs.h S5-pre4-cdev/include/linux/fs.h
--- S5-pre4/include/linux/fs.h	Sat May 19 22:46:36 2001
+++ S5-pre4-cdev/include/linux/fs.h	Tue May 22 09:14:25 2001
@@ -384,6 +384,14 @@
 	int			gfp_mask;	/* how to allocate the pages */
 };
 
+struct char_device {
+	struct list_head	hash;
+	atomic_t		count;
+	dev_t			dev;
+	atomic_t		openers;
+	struct semaphore	sem;
+};
+
 struct block_device {
 	struct list_head	bd_hash;
 	atomic_t		bd_count;
@@ -426,8 +434,10 @@
 	struct address_space	*i_mapping;
 	struct address_space	i_data;	
 	struct dquot		*i_dquot[MAXQUOTAS];
+	/* These three should probably be a union */
 	struct pipe_inode_info	*i_pipe;
 	struct block_device	*i_bdev;
+	struct char_device	*i_cdev;
 
 	unsigned long		i_dnotify_mask; /* Directory notify events */
 	struct dnotify_struct	*i_dnotify; /* for directory notifications */
@@ -982,6 +992,8 @@
 extern int unregister_blkdev(unsigned int, const char *);
 extern struct block_device *bdget(dev_t);
 extern void bdput(struct block_device *);
+extern struct char_device *cdget(dev_t);
+extern void cdput(struct char_device *);
 extern int blkdev_open(struct inode *, struct file *);
 extern struct file_operations def_blk_fops;
 extern struct file_operations def_fifo_fops;
diff -urN S5-pre4/init/main.c S5-pre4-cdev/init/main.c
--- S5-pre4/init/main.c	Sat May 19 22:46:36 2001
+++ S5-pre4-cdev/init/main.c	Tue May 22 08:34:09 2001
@@ -93,7 +93,6 @@
 extern void ppc_init(void);
 extern void sysctl_init(void);
 extern void signals_init(void);
-extern void bdev_init(void);
 extern int init_pcmcia_ds(void);
 extern void net_notifier_init(void);
 
@@ -569,8 +568,6 @@
 	ccwcache_init();
 #endif
 	signals_init();
-	bdev_init();
-	inode_init(mempages);
 #ifdef CONFIG_PROC_FS
 	proc_root_init();
 #endif
diff -urN S5-pre4/kernel/ksyms.c S5-pre4-cdev/kernel/ksyms.c
--- S5-pre4/kernel/ksyms.c	Sat May 19 22:46:37 2001
+++ S5-pre4-cdev/kernel/ksyms.c	Tue May 22 09:06:47 2001
@@ -186,6 +186,8 @@
 EXPORT_SYMBOL(notify_change);
 EXPORT_SYMBOL(set_blocksize);
 EXPORT_SYMBOL(getblk);
+EXPORT_SYMBOL(cdget);
+EXPORT_SYMBOL(cdput);
 EXPORT_SYMBOL(bdget);
 EXPORT_SYMBOL(bdput);
 EXPORT_SYMBOL(bread);


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-22 14:40 Tommy Hallgren
  0 siblings, 0 replies; 55+ messages in thread
From: Tommy Hallgren @ 2001-05-22 14:40 UTC (permalink / raw)
  To: viro, linux-kernel

Hi Alexander!

If I'm not entirely mistaken, this:

+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 
+ SLAB_CTOR_CONSTRUCTOR) 
+ { 
+ memset(cdev, 0, sizeof(*cdev)); 
+ sema_init(&cdev->sem, 1); 
+ } 
+} 

could be replaced with this:

+ if ((flags & SLAB_CTOR_CONSTRUCTOR) == SLAB_CTOR_CONSTRUCTOR) 
+ { 
+ memset(cdev, 0, sizeof(*cdev)); 
+ sema_init(&cdev->sem, 1); 
+ } 
+} 

Regards, Tommy


__________________________________________________
Do You Yahoo!?
Yahoo! Auctions - buy the things you want at great prices
http://auctions.yahoo.com/

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
       [not found] <Pine.LNX.4.10.10105221050080.8984-100000@coffee.psychology.mcmaster.ca>
@ 2001-05-22 14:59 ` Tommy Hallgren
  0 siblings, 0 replies; 55+ messages in thread
From: Tommy Hallgren @ 2001-05-22 14:59 UTC (permalink / raw)
  To: Mark Hahn, linux-kernel

Yes, I thought about it again, and you are right. Sorry for the noise.

Regards, Tommy

--- Mark Hahn <hahn@coffee.psychology.mcmaster.ca> wrote:
> > + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 
> > + SLAB_CTOR_CONSTRUCTOR) 
> ...
> > + if ((flags & SLAB_CTOR_CONSTRUCTOR) == SLAB_CTOR_CONSTRUCTOR) 
> 
> consider whether flags contains SLAB_CTOR_VERIFY.  in the first case,
> it must be zero.  in the second case, it's ignored.
> 


__________________________________________________
Do You Yahoo!?
Yahoo! Auctions - buy the things you want at great prices
http://auctions.yahoo.com/

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
       [not found] <Pine.GSO.4.21.0105221007460.15685-100000@weyl.math.psu.edu >
@ 2001-05-22 15:26 ` Anton Altaparmakov
  2001-05-22 16:08   ` Oliver Xymoron
  0 siblings, 1 reply; 55+ messages in thread
From: Anton Altaparmakov @ 2001-05-22 15:26 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Linus Torvalds, linux-kernel

Hello,

At 15:18 22/05/01, Alexander Viro wrote:
[snip cool stuff]
>diff -urN S5-pre4/include/linux/fs.h S5-pre4-cdev/include/linux/fs.h
>--- S5-pre4/include/linux/fs.h  Sat May 19 22:46:36 2001
>+++ S5-pre4-cdev/include/linux/fs.h     Tue May 22 09:14:25 2001
>@@ -384,6 +384,14 @@
>         int                     gfp_mask;       /* how to allocate the 
> pages */
>  };
>
>+struct char_device {
>+       struct list_head        hash;
>+       atomic_t                count;
>+       dev_t                   dev;
>+       atomic_t                openers;
>+       struct semaphore        sem;

Why not name consistently with the struct block_device?
I.e.:
struct char_device {
         struct list_head        cd_hash;
         atomic_t                cd_count;
         dev_t                   cd_dev;
         atomic_t                cd_openers;
         struct semaphore        cd_sem;
};

>@@ -426,8 +434,10 @@
>         struct address_space    *i_mapping;
>         struct address_space    i_data;
>         struct dquot            *i_dquot[MAXQUOTAS];
>+       /* These three should probably be a union */
>         struct pipe_inode_info  *i_pipe;
>         struct block_device     *i_bdev;
>+       struct char_device      *i_cdev;

You could then use an (perhaps even unnamed if we require a high enough gcc 
version) union in the inode structure so you don't waste space having a 
pointer to both c_dev and b_dev, such as:

         union {
                 struct block_device     *i_bdev;
                 struct char_device      *i_cdev;
         };

It isn't possible that both i_bdev and i_cdev are used at the same time, is 
it? If it was my suggestion is obviously bogus...

Any reasons why my proposal would be a bad idea?

Just IMVHO.

Best regards,

Anton


-- 
   "Nothing succeeds like success." - Alexandre Dumas
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Linux NTFS Maintainer / WWW: http://sf.net/projects/linux-ntfs/
ICQ: 8561279 / WWW: http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 15:26 ` Anton Altaparmakov
@ 2001-05-22 16:08   ` Oliver Xymoron
  2001-05-22 16:12     ` Alexander Viro
  2001-05-22 19:22     ` Guest section DW
  0 siblings, 2 replies; 55+ messages in thread
From: Oliver Xymoron @ 2001-05-22 16:08 UTC (permalink / raw)
  To: Anton Altaparmakov; +Cc: Alexander Viro, Linus Torvalds, linux-kernel

On Tue, 22 May 2001, Anton Altaparmakov wrote:

> Hello,
>
> At 15:18 22/05/01, Alexander Viro wrote:
> [snip cool stuff]
> >+struct char_device {
> >+       struct list_head        hash;
> >+       atomic_t                count;
> >+       dev_t                   dev;
> >+       atomic_t                openers;
> >+       struct semaphore        sem;
>
> Why not name consistently with the struct block_device?
> I.e.:
> struct char_device {
>          struct list_head        cd_hash;
>          atomic_t                cd_count;
>          dev_t                   cd_dev;
>          atomic_t                cd_openers;
>          struct semaphore        cd_sem;
> };

Because foo_ is a throwback to the days when C compilers had a single
namespace for all structure elements, not a readability aid. If you need
foo_ to know what type of structure you're futzing with, you need to name
your variables better.

--
 "Love the dolphins," she advised him. "Write by W.A.S.T.E.."


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 16:08   ` Oliver Xymoron
@ 2001-05-22 16:12     ` Alexander Viro
  2001-05-22 17:30       ` Oliver Xymoron
  2001-05-22 19:22     ` Guest section DW
  1 sibling, 1 reply; 55+ messages in thread
From: Alexander Viro @ 2001-05-22 16:12 UTC (permalink / raw)
  To: Oliver Xymoron; +Cc: Anton Altaparmakov, Linus Torvalds, linux-kernel



On Tue, 22 May 2001, Oliver Xymoron wrote:

> Because foo_ is a throwback to the days when C compilers had a single
> namespace for all structure elements, not a readability aid. If you need
> foo_ to know what type of structure you're futzing with, you need to name
> your variables better.

Not always. If the thing is used all over the tree, it'd better be
greppable. I hate the foo->de stuff - it's not localized and it's
impossible to grep for.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 16:12     ` Alexander Viro
@ 2001-05-22 17:30       ` Oliver Xymoron
  2001-05-22 17:41         ` Alexander Viro
  0 siblings, 1 reply; 55+ messages in thread
From: Oliver Xymoron @ 2001-05-22 17:30 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Anton Altaparmakov, Linus Torvalds, linux-kernel

On Tue, 22 May 2001, Alexander Viro wrote:

> On Tue, 22 May 2001, Oliver Xymoron wrote:
>
> > Because foo_ is a throwback to the days when C compilers had a single
> > namespace for all structure elements, not a readability aid. If you need
> > foo_ to know what type of structure you're futzing with, you need to name
> > your variables better.
>
> Not always. If the thing is used all over the tree, it'd better be
> greppable. I hate the foo->de stuff - it's not localized and it's
> impossible to grep for.

I'd like to say the compiler will happily find it for you, but the
kernel's mostly conditionally compiled..

Have you tried something like:

 find -type f | xargs grep -A100 'struct_name' | grep -e '\belement\b'

Obviously not a fix, but possibly helpful.

--
 "Love the dolphins," she advised him. "Write by W.A.S.T.E.."




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 17:30       ` Oliver Xymoron
@ 2001-05-22 17:41         ` Alexander Viro
  0 siblings, 0 replies; 55+ messages in thread
From: Alexander Viro @ 2001-05-22 17:41 UTC (permalink / raw)
  To: Oliver Xymoron; +Cc: Anton Altaparmakov, Linus Torvalds, linux-kernel



On Tue, 22 May 2001, Oliver Xymoron wrote:

> > Not always. If the thing is used all over the tree, it'd better be
> > greppable. I hate the foo->de stuff - it's not localized and it's
> > impossible to grep for.
> 
> I'd like to say the compiler will happily find it for you, but the
> kernel's mostly conditionally compiled..
> 
> Have you tried something like:
> 
>  find -type f | xargs grep -A100 'struct_name' | grep -e '\belement\b'
> 
> Obviously not a fix, but possibly helpful.

Too many false negatives. And you apparently missed a part of fun -
in that case there's a certain TLD...


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 16:08   ` Oliver Xymoron
  2001-05-22 16:12     ` Alexander Viro
@ 2001-05-22 19:22     ` Guest section DW
  2001-05-22 19:25       ` Alexander Viro
  2001-05-22 19:38       ` Oliver Xymoron
  1 sibling, 2 replies; 55+ messages in thread
From: Guest section DW @ 2001-05-22 19:22 UTC (permalink / raw)
  To: Oliver Xymoron, Anton Altaparmakov
  Cc: Alexander Viro, Linus Torvalds, linux-kernel

On Tue, May 22, 2001 at 11:08:16AM -0500, Oliver Xymoron wrote:

> > >+       struct list_head        hash;

> > Why not name consistently with the struct block_device?
> >          struct list_head        cd_hash;

> Because foo_ is a throwback to the days when C compilers had a single
> namespace for all structure elements, not a readability aid. If you need
> foo_ to know what type of structure you're futzing with, you need to name
> your variables better.

One often has to go through all occurrences of a variable or
field of a struct. That is much easier with cd_hash and cd_dev
than with hash and dev.

No, it is a good habit, these prefixes, even though it is no longer
necessary because of the C compiler. 

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 19:22     ` Guest section DW
@ 2001-05-22 19:25       ` Alexander Viro
  2001-05-22 19:38       ` Oliver Xymoron
  1 sibling, 0 replies; 55+ messages in thread
From: Alexander Viro @ 2001-05-22 19:25 UTC (permalink / raw)
  To: Guest section DW
  Cc: Oliver Xymoron, Anton Altaparmakov, Linus Torvalds, linux-kernel



On Tue, 22 May 2001, Guest section DW wrote:

> One often has to go through all occurrences of a variable or
> field of a struct. That is much easier with cd_hash and cd_dev
> than with hash and dev.
> 
> No, it is a good habit, these prefixes, even though it is no longer
> necessary because of the C compiler. 

True, except for the stuff that should remain local.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 19:22     ` Guest section DW
  2001-05-22 19:25       ` Alexander Viro
@ 2001-05-22 19:38       ` Oliver Xymoron
  1 sibling, 0 replies; 55+ messages in thread
From: Oliver Xymoron @ 2001-05-22 19:38 UTC (permalink / raw)
  To: Guest section DW; +Cc: Anton Altaparmakov, Alexander Viro, linux-kernel

On Tue, 22 May 2001, Guest section DW wrote:

> On Tue, May 22, 2001 at 11:08:16AM -0500, Oliver Xymoron wrote:
>
> > > >+       struct list_head        hash;
>
> > > Why not name consistently with the struct block_device?
> > >          struct list_head        cd_hash;
>
> > Because foo_ is a throwback to the days when C compilers had a single
> > namespace for all structure elements, not a readability aid. If you need
> > foo_ to know what type of structure you're futzing with, you need to name
> > your variables better.
>
> One often has to go through all occurrences of a variable or
> field of a struct. That is much easier with cd_hash and cd_dev
> than with hash and dev.
>
> No, it is a good habit, these prefixes, even though it is no longer
> necessary because of the C compiler.

A better habit is encapsulating your data structures well enough that the
entire kernel doesn't feel the need to go digging through them. The fact
that you have to change many widely-scattered instances of something
points to bad modularity. Supporting that practice with verbose naming is
not doing yourself a favor in the long run.

If you must, use accessor functions instead. At best you'll be able to
make sweeping semantic changes in one spot. At worst, you'll be able to
grep for it.

--
 "Love the dolphins," she advised him. "Write by W.A.S.T.E.."



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-22 19:52 Andries.Brouwer
  2001-05-22 20:10 ` Alexander Viro
  0 siblings, 1 reply; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-22 19:52 UTC (permalink / raw)
  To: torvalds, viro; +Cc: linux-kernel

Alexander Viro writes:

> patch below adds the missing half of kdev_t -
> for block devices we already have a unique pointer
> and that adds a similar animal for character devices.

Very good.
(Of course I did precisely the same, but am a bit slower in
submitting things during a stable series or code freeze.)

One remark, repeating what I wrote on some web page:
-----
A struct block_device provides the connection between a device number
and a struct block_device_operations. 
...
Clearly, we also want to associate a struct char_device_operations
to a character device number. When we do this, all bdev code will
have to be duplicated for cdev, so there seems no point in having
bdev code - kdev, for both bdev and cdev, seems more elegant. 
-----

And a second remark: don't forget that presently the point where
bdev is introduced is not quite right. We must only introduce it
when we really have a device, not when there only is a device
number (like on a mknod call).

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 19:52 Andries.Brouwer
@ 2001-05-22 20:10 ` Alexander Viro
  0 siblings, 0 replies; 55+ messages in thread
From: Alexander Viro @ 2001-05-22 20:10 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: torvalds, linux-kernel



On Tue, 22 May 2001 Andries.Brouwer@cwi.nl wrote:

> One remark, repeating what I wrote on some web page:
> -----
> A struct block_device provides the connection between a device number
> and a struct block_device_operations. 
> ...
> Clearly, we also want to associate a struct char_device_operations
> to a character device number. When we do this, all bdev code will

They are entirely different. Too different sets of operations.

> have to be duplicated for cdev, so there seems no point in having
> bdev code - kdev, for both bdev and cdev, seems more elegant. 
> -----

Not really. For struct block_device you have partitioning stuff sitting
nearby. It should be handled by generic code. Nothing of that kind for
character devices.

But the main point is that for block devices ->read() and ->write() do
not belong to the natural interface. Request queue does. They are about
as far from each other as from FIFOs and sockets.

> And a second remark: don't forget that presently the point where
> bdev is introduced is not quite right. We must only introduce it
> when we really have a device, not when there only is a device
> number (like on a mknod call).

That's simply wrong. kdev_t is used for unopened objects quite often.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-22 20:54 Andries.Brouwer
  2001-05-22 21:17 ` Martin Dalecki
  2001-05-22 22:37 ` Linus Torvalds
  0 siblings, 2 replies; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-22 20:54 UTC (permalink / raw)
  To: Andries.Brouwer, viro; +Cc: linux-kernel, torvalds

> They are entirely different. Too different sets of operations.

Maybe you didnt understand what I meant.
both bdev and cdev take care of the correspondence
device number <---> struct with operations.

The operations are different, but all bdev/cdev code is identical.

So the choice is between two uglies:
(i) have some not entirely trivial amount of code twice in the kernel
(ii) have a union at the point where the struct operations
is assigned.

I preferred the union.

>> And a second remark: don't forget that presently the point where
>> bdev is introduced is not quite right. We must only introduce it
>> when we really have a device, not when there only is a device
>> number (like on a mknod call).

> That's simply wrong. kdev_t is used for unopened objects quite often.

Yes, but that was my design mistake in 1995.
I think you'll find if you continue on this way,
as I found and already wrote in kdev_t.h
that it is bad to carry pointers around for unopened and unknown devices.

So, I think that the setup must be changed a tiny little bit
and distinguish meaningless numbers from devices.

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 20:54 Andries.Brouwer
@ 2001-05-22 21:17 ` Martin Dalecki
  2001-05-22 22:37 ` Linus Torvalds
  1 sibling, 0 replies; 55+ messages in thread
From: Martin Dalecki @ 2001-05-22 21:17 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: viro, linux-kernel, torvalds

Andries.Brouwer@cwi.nl wrote:
> 
> > They are entirely different. Too different sets of operations.
> 
> Maybe you didnt understand what I meant.
> both bdev and cdev take care of the correspondence
> device number <---> struct with operations.
> 
> The operations are different, but all bdev/cdev code is identical.
> 
> So the choice is between two uglies:
> (i) have some not entirely trivial amount of code twice in the kernel
> (ii) have a union at the point where the struct operations
> is assigned.
> 
> I preferred the union.
> 
> >> And a second remark: don't forget that presently the point where
> >> bdev is introduced is not quite right. We must only introduce it
> >> when we really have a device, not when there only is a device
> >> number (like on a mknod call).
> 
> > That's simply wrong. kdev_t is used for unopened objects quite often.
> 
> Yes, but that was my design mistake in 1995.

I fully agree with you. Most of the places where there kernel is passing
kdev_t
would be entierly satisfied with only the knowlendge of the minor number
used to
distinguish between different device ranges, which is BTW an abuse by
itself as well
since minors where for encounters of instances of similiar devices in
linux...
The places where this is the case are namely:

1. literally: all character devices.

2. The whole scsi stuff.

3. most of the ide stuff.

4. md/lvm and similiar culprits.

I did "discover" this by splitting the i_dev field from stuct inode
into explicit i_minor and i_major fields and then actually "fixing" my
particular kernel configuration until it worked again. This was
*very* insigtfull, since it discovered all the places where kdev_t get's
used, where it shouldn't be of any need anylonger anyway.

The remaining places where kdev_t comes into sight are mostly
the places where the kernel is mounting the initial root
device.

In case you would like to have a look at the resulting bit huge
patch I can send it to you...

> I think you'll find if you continue on this way,
> as I found and already wrote in kdev_t.h
> that it is bad to carry pointers around for unopened and unknown devices.
> 
> So, I think that the setup must be changed a tiny little bit
> and distinguish meaningless numbers from devices.
> 
> Andries
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
- phone: +49 214 8656 283
- job:   eVision-Ventures AG, LEV .de (MY OPINIONS ARE MY OWN!)
- langs: de_DE.ISO8859-1, en_US, pl_PL.ISO8859-2, last ressort:
ru_RU.KOI8-R

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-22 21:35 Andries.Brouwer
  2001-05-22 22:00 ` Martin Dalecki
  0 siblings, 1 reply; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-22 21:35 UTC (permalink / raw)
  To: Andries.Brouwer, dalecki; +Cc: linux-kernel, torvalds, viro

Martin Dalecki writes:

> I fully agree with you.

Good.

Unfortunately I do not fully agree with you.

> Most of the places where there kernel is passing kdev_t
> would be entierly satisfied with only the knowlendge of
> the minor number.

My kdev_t is a pointer to a structure with device data
and device operations. Among the operations a routine
that produces a name. Among the data, in the case of a
block device, the size, and the sectorsize, ..

A minor gives no name, and no data.

Linus' minor is 20-bit if I recall correctly.
My minor is 32-bit. Neither of the two can be
used to index arrays.

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 21:35 Andries.Brouwer
@ 2001-05-22 22:00 ` Martin Dalecki
  0 siblings, 0 replies; 55+ messages in thread
From: Martin Dalecki @ 2001-05-22 22:00 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel, torvalds, viro

Andries.Brouwer@cwi.nl wrote:
> 
> Martin Dalecki writes:
> 
> > I fully agree with you.
> 
> Good.
> 
> Unfortunately I do not fully agree with you.
> 
> > Most of the places where there kernel is passing kdev_t
> > would be entierly satisfied with only the knowlendge of
> > the minor number.
> 
> My kdev_t is a pointer to a structure with device data
> and device operations. Among the operations a routine
> that produces a name. Among the data, in the case of a
> block device, the size, and the sectorsize, ..
> 
> A minor gives no name, and no data.
> 
> Linus' minor is 20-bit if I recall correctly.
> My minor is 32-bit. Neither of the two can be
> used to index arrays.

Erm... I wasn't talking about the DESIRED state of affairs!
I was talking about the CURRENT state of affairs. OK?
The fact still remains that most of the places which a have pointed
out just need the minor nibble of whatever bits you pass to them.

Apparently nobody on this list here blabbering about a new improved
minor/major space didn't actually take the time and looked into
all those places where the kernel is CURRENTLY replying in minor/major
array enumeration. They are TON's of them. The most ugly are RAID
drivers
an all those MD LVW and whatever stuff as well as abused minor number
spaces as replacements of differnt majors.

At least you have admitted that you where the one responsible for
the design of this MESS.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-22 22:17 Andries.Brouwer
  2001-05-22 22:34 ` Martin Dalecki
  2001-05-22 22:47 ` Martin Dalecki
  0 siblings, 2 replies; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-22 22:17 UTC (permalink / raw)
  To: Andries.Brouwer, dalecki; +Cc: linux-kernel, torvalds, viro

Martin Dalecki writes:

> Erm... I wasn't talking about the DESIRED state of affairs!
> I was talking about the CURRENT state of affairs. OK?

Oh, but in 1995 it was quite possible to compile the kernel
with kdev_t a pointer type, and I have done it several times since.

The kernel keeps growing, so each time it is more work than
the previous time.

> At least you have admitted that you where the one responsible
> for the design of this MESS.

Thank you! However, you give me too much honour.

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 22:17 Andries.Brouwer
@ 2001-05-22 22:34 ` Martin Dalecki
  2001-05-22 22:47 ` Martin Dalecki
  1 sibling, 0 replies; 55+ messages in thread
From: Martin Dalecki @ 2001-05-22 22:34 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel, torvalds, viro

Andries.Brouwer@cwi.nl wrote:
> 
> Martin Dalecki writes:
> 
> > Erm... I wasn't talking about the DESIRED state of affairs!
> > I was talking about the CURRENT state of affairs. OK?
> 
> Oh, but in 1995 it was quite possible to compile the kernel
> with kdev_t a pointer type, and I have done it several times since.

Yes I remember but unfortunately some big L* did ignore
your *fine* efforts entierly in favour of developing 
/proc and /dev/random and other crap maybe?

> The kernel keeps growing, so each time it is more work than
> the previous time.
> 
> > At least you have admitted that you where the one responsible
> > for the design of this MESS.
> 
> Thank you! However, you give me too much honour.

Well ... you ask for it in the corresponding header ;-).
But it isn't yours fault indeed I admit...
I know the discussions from memmory since I'm returning REGULARLY to
this
topic in intervals of about between 6 and 24 months since about
maybe already 6 years!!! Currently they have just started to hurt
seriously. And please remember the change I have mentioned above
wasn't intended as developement but just only as an experiment...

Well let's us stop throw flames at each other.
Please have a tight look at the following *EXPERIMENT* I have
already done. It's really really only intended to mark the
places where the full mess shows it's ugly head:

http://www.dalecki.de/big-002.diff

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 20:54 Andries.Brouwer
  2001-05-22 21:17 ` Martin Dalecki
@ 2001-05-22 22:37 ` Linus Torvalds
  2001-05-22 23:51   ` Alexander Viro
  1 sibling, 1 reply; 55+ messages in thread
From: Linus Torvalds @ 2001-05-22 22:37 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: viro, linux-kernel


On Tue, 22 May 2001 Andries.Brouwer@cwi.nl wrote:
> 
> The operations are different, but all bdev/cdev code is identical.
> 
> So the choice is between two uglies:
> (i) have some not entirely trivial amount of code twice in the kernel
> (ii) have a union at the point where the struct operations
> is assigned.
> 
> I preferred the union.

I would much prefer a union of pointers over a pointer to a union.

So I'd much rather have the inode have a

	union {
		struct block_device *block;
		struct char_device *char;
	} dev;

and then have people do

	cdev = inode->dev.char;

to get the right information, than to have 

	union block_char_union {
		struct block_device block;
		struct char_device char;
	};

	.. with struct inode containing ..
	union block_char_union *dev;

Why? Because if you have a "struct inode", you also have enough
information to decide _which_ of the two types of pointers you have, so
you can do the proper dis-ambiguation of the union and properly select
either 'inode->dev.char' or 'inode->dev.block' depending on other
information in the inode.

In contrast, if you have a pointer to a union, you don't have information
of which sub-type it is, and you'd have to carry that along some other way
(for example, by having common fields at the beginning). Which I think is
broken.

So my suggestion for eventual interfaces:

 - have functions like

	struct block_dev *bdget(struct inode *);
	struct char_dev *cdget(struct inode *);

   which populate the "inode->dev" union pointer, which in turn is _only_
   a cache of the lookup. Right now we do this purely based on "dev_t",
   and I think that is bogus. We should never pass a "dev_t" around
   without an inode, I think.

   And we should not depend on the "inode->dev.xxxx" pointer being valid all
   the time, as there is absolutely zero point in initializing the pointer
   every time the inode is read just because somebody does a "ls -l /dev".
   Thus the "cache" part above.

 - NO reason to try to make "struct block_dev" and "struct char_dev" look
   similar. They will have some commonality for lookup purposes (that
   issue is similar, as Andries points out), and maybe that commonality
   can be separated out into a sub-structure or something. But apart from
   that, they have absolutely nothing to do with each other, and I'd
   rather not have them have even a _superficial_ connection.

   Block devices will have the "request queue" pointer, and the size and
   partitioning information. Character devices currently would not have
   much more than the operations pointer and name, but who knows..

But the most important thing is to be able to do this in steps. One of the
reasons Andries has had patches for a long time is that it was never very
gradual. Al's patch is gradual, and I like that.

		Linus


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 22:17 Andries.Brouwer
  2001-05-22 22:34 ` Martin Dalecki
@ 2001-05-22 22:47 ` Martin Dalecki
  2001-05-23  0:02   ` Jeff Garzik
  1 sibling, 1 reply; 55+ messages in thread
From: Martin Dalecki @ 2001-05-22 22:47 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel, torvalds, viro

And if we are at the topic... Those are the places where blk_size[]
get's
abused, since it's in fact a property of a FS in fact and not the
property of
a particular device... blksect_size is the array describing the physical
access limits of a device and blk_size get's usually checked against it.
However due to the bad naming and the fact that this information is
associated with major/minor number usage same device driver writers got
*very* confused as you can see below:

./fs/block_dev.c: Here this information should be passed entierly insice
the request.

./fs/partitions/check.c: Here it basically get's reset or ignored


Here it's serving the purpose of a sector size, which is bogous!

./mm/swapfile.c:#include <linux/blkdev.h> /* for blk_size */
./mm/swapfile.c:		if (!dev || (blk_size[MAJOR(dev)] &&
./mm/swapfile.c:		     !blk_size[MAJOR(dev)][MINOR(dev)]))
./mm/swapfile.c:		if (blk_size[MAJOR(dev)])
./mm/swapfile.c:			swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]


Here it shouldn't be needed
./drivers/block/ll_rw_blk.c: 


./drivers/block/floppy.c:	blk_size[MAJOR_NR] = floppy_sizes;
./drivers/block/nbd.c:	blk_size[MAJOR_NR] = nbd_sizes;
./drivers/block/rd.c: * and set blk_size for -ENOSPC,     Werner Fink
<werner@suse.de>, Apr '99
./drivers/block/amiflop.c:	blk_size[MAJOR_NR] = floppy_sizes;
./drivers/block/loop.c:	if (blk_size[MAJOR(lodev)])
./drivers/block/ataflop.c: *   - Set blk_size for proper size checking
./drivers/block/ataflop.c:	blk_size[MAJOR_NR] = floppy_sizes;
./drivers/block/cpqarray.c:				drv->blk_size;
./drivers/block/z2ram.c:	blk_size[ MAJOR_NR ] = z2_sizes;
./drivers/block/swim3.c:		blk_size[MAJOR_NR] = floppy_sizes;
./drivers/block/swim_iop.c:	blk_size[MAJOR_NR] = floppy_sizes;
./drivers/char/raw.c:	if (blk_size[MAJOR(dev)])
./drivers/scsi/advansys.c:    ASC_DCNT            blk_size;
./drivers/scsi/sd.c:		blk_size[SD_MAJOR(i)] = NULL;
./drivers/scsi/sr.c:	blk_size[MAJOR_NR] = sr_sizes;
./drivers/scsi/sr.c:	blk_size[MAJOR_NR] = NULL;
./drivers/sbus/char/jsflash.c:	blk_size[JSFD_MAJOR] = jsfd_sizes;
./drivers/ide/ide-cd.c:	blk_size[HWIF(drive)->major] =
HWIF(drive)->gd->sizes;
./drivers/ide/ide-floppy.c: *	Revalidate the new media. Should set
blk_size[]
./drivers/acorn/block/fd1772.c:	blk_size[MAJOR_NR] = floppy_sizes;
./drivers/i2o/i2o_block.c:	blk_size[MAJOR_NR] = i2ob_sizes;

In the following they are REALLY confusing it and then compensating for
this misunderstanding in lvm.h by redefining the corresponding default
values.

./drivers/s390/*

And then some minor confusions follow...

./drivers/mtd/mtdblock.c:	blk_size[MAJOR_NR] = NULL;
./drivers/md/md.c:	if (blk_size[MAJOR(dev)])
./arch/m68k/atari/stram.c:    blk_size[STRAM_MAJOR] = stram_sizes;

Basically one should just stop setting blk_size[][] inside *ANY* driver
and anything should still work fine unless the driver is broken...

Well that's the point for another fine kernel experiment I will do
and report whatever it works really out like this in reality 8-)

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-22 23:33 Andries.Brouwer
  2001-05-23  0:03 ` Alexander Viro
  0 siblings, 1 reply; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-22 23:33 UTC (permalink / raw)
  To: Andries.Brouwer, torvalds; +Cc: linux-kernel, viro

    From torvalds@transmeta.com Wed May 23 00:39:23 2001

    On Tue, 22 May 2001 Andries.Brouwer@cwi.nl wrote:
    > 
    > The operations are different, but all bdev/cdev code is identical.
    > 
    > So the choice is between two uglies:
    > (i) have some not entirely trivial amount of code twice in the kernel
    > (ii) have a union at the point where the struct operations
    > is assigned.
    > 
    > I preferred the union.

    I would much prefer a union of pointers over a pointer to a union.

    Why? Because if you have a "struct inode", you also have enough
    information to decide _which_ of the two types of pointers you have, so
    you can do the proper dis-ambiguation of the union and properly select
    either 'inode->dev.char' or 'inode->dev.block' depending on other
    information in the inode.

I am not sure whether we agree or differ in opinion. I wouldn't mind

/* pairing for dev_t to bd_op/cd_op */
struct bc_device {
        struct list_head        bd_hash;
        atomic_t                bd_count;
        dev_t                   bd_dev;
        atomic_t                bd_openers;
        union {
		struct block_device_operations_and_data *bd_op;
		struct char_device_operations_and_data *cd_op;
	}
        struct semaphore        bd_sem;
};

typedef struct bc_device *kdev_t;

and in an inode

	kdev_t dev;
	dev_t rdev;

In reality we want the pair (dev_t, pointer to stuff), but then
there is all this administrative nonsense needed to make sure
that nobody uses the pointer after the module has been unloaded
that makes the pointer a bit thicker.

       And we should not depend on the "inode->dev.xxxx" pointer
       being valid all the time, as there is absolutely zero point
       in initializing the pointer every time somebody does a "ls -l /dev".

Yes, that is why I want to go back and have dev_t rdev, not kdev_t.
The lookup is done when the device is opened.

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 22:37 ` Linus Torvalds
@ 2001-05-22 23:51   ` Alexander Viro
  2001-05-23  0:06     ` Jeff Garzik
  2001-05-23  2:35     ` Linus Torvalds
  0 siblings, 2 replies; 55+ messages in thread
From: Alexander Viro @ 2001-05-22 23:51 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andries.Brouwer, linux-kernel



On Tue, 22 May 2001, Linus Torvalds wrote:
 
> I would much prefer a union of pointers over a pointer to a union.
> 
> So I'd much rather have the inode have a
> 
> 	union {
> 		struct block_device *block;
> 		struct char_device *char;
		struct pipe_inode_info *pipe;
and possibly
		struct socket *socket;
> 	} dev;

> 	struct block_dev *bdget(struct inode *);
> 	struct char_dev *cdget(struct inode *);
> 
>    which populate the "inode->dev" union pointer, which in turn is _only_
>    a cache of the lookup. Right now we do this purely based on "dev_t",
>    and I think that is bogus. We should never pass a "dev_t" around
>    without an inode, I think.

I doubt it. First of all, then we'd better make i_rdev dev_t. Right now
we have it kdev_t and it makes absolutely no sense that way. If we
only use it for stat() (when we convert it to dev_t) and for keeping the
information between mknod/read_inode and open() - why bother converting it
to anything? Currently it's used by drivers to identify the device.
->dev.{block,char} is perfectly fine for that and gives more information
without extra lookups, etc. And that's it - nothing else cares for
->i_rdev. What you suggest is reusing it so that we had a way to get
the right ->dev.{block,char} when we open. Fine, but there's no reason
to tie it to kdev_t (or to have kdev_t, for that matter).

The real thing is inode->dev. Notice that for devfs (and per-device
filesystems) we can set it upon inode creation, since they _know_ it
from the very beginning and there is absolutely no reason to do any
hash lookups. Moreover, in these cases we may have no meaningful device
number at all.

>    And we should not depend on the "inode->dev.xxxx" pointer being valid all
>    the time, as there is absolutely zero point in initializing the pointer
>    every time the inode is read just because somebody does a "ls -l /dev".
>    Thus the "cache" part above.

OK, but see comments above.

>  - NO reason to try to make "struct block_dev" and "struct char_dev" look
>    similar. They will have some commonality for lookup purposes (that
>    issue is similar, as Andries points out), and maybe that commonality
>    can be separated out into a sub-structure or something. But apart from
>    that, they have absolutely nothing to do with each other, and I'd
>    rather not have them have even a _superficial_ connection.

Aye.

>    Block devices will have the "request queue" pointer, and the size and
>    partitioning information. Character devices currently would not have

Do we really want a separate queue for each partition? I'd rather have
disk_struct created when driver sees the disk and list of partitions
(possibly represented by struct block_device) anchored in disk_struct
and populated by grok_partitions().

Then disk_struct would keep the queue _and_ ll_rw_block could do all
remapping, so that driver itself wouldn't know anything about the
partitioning.

I have a half-baked patch for that (circa last Spring) and Ben LaHaise got
suckere^W^Whad kindly volunteered to try porting it to current tree...

								Al



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23  0:01 Andries.Brouwer
  0 siblings, 0 replies; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23  0:01 UTC (permalink / raw)
  To: torvalds, viro; +Cc: Andries.Brouwer, linux-kernel

> Do we really want a separate queue for each partition?

No.

> I have a half-baked patch

Me too. (Not half-baked but brewed.) In principle the change
is trivial, but there are a few IDE issues that are presently
solved in a very low-level way (and incorrectly).
This makes the patch larger than expected at first sight.

Andries


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 22:47 ` Martin Dalecki
@ 2001-05-23  0:02   ` Jeff Garzik
  2001-05-23  0:14     ` Jens Axboe
  2001-05-23  2:40     ` Linus Torvalds
  0 siblings, 2 replies; 55+ messages in thread
From: Jeff Garzik @ 2001-05-23  0:02 UTC (permalink / raw)
  To: Martin Dalecki; +Cc: Andries.Brouwer, linux-kernel, torvalds, viro, axboe

IMHO it would be nice to (for 2.4) create wrappers for accessing the
block arrays, so that we can more easily dispose of the arrays when 2.5
rolls around...
-- 
Jeff Garzik      | "Are you the police?"
Building 1024    | "No, ma'am.  We're musicians."
MandrakeSoft     |

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 23:33 Andries.Brouwer
@ 2001-05-23  0:03 ` Alexander Viro
  0 siblings, 0 replies; 55+ messages in thread
From: Alexander Viro @ 2001-05-23  0:03 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: torvalds, linux-kernel



On Wed, 23 May 2001 Andries.Brouwer@cwi.nl wrote:

> I am not sure whether we agree or differ in opinion. I wouldn't mind
> 
> /* pairing for dev_t to bd_op/cd_op */
> struct bc_device {
>         struct list_head        bd_hash;
>         atomic_t                bd_count;
>         dev_t                   bd_dev;
>         atomic_t                bd_openers;
>         union {
> 		struct block_device_operations_and_data *bd_op;
> 		struct char_device_operations_and_data *cd_op;
> 	}
>         struct semaphore        bd_sem;
> };
> 
> typedef struct bc_device *kdev_t;

What for? What part of the kernel needs a device and doesn't know apriory
whether it's block or character one?

> and in an inode
> 
> 	kdev_t dev;
Useless. If you hope that block_device will help to solve rmmod races -
sorry, it won't. Wrong layer.

> 	dev_t rdev;
Reasonable.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 23:51   ` Alexander Viro
@ 2001-05-23  0:06     ` Jeff Garzik
  2001-05-23  0:14       ` Jens Axboe
  2001-05-23  2:37       ` Linus Torvalds
  2001-05-23  2:35     ` Linus Torvalds
  1 sibling, 2 replies; 55+ messages in thread
From: Jeff Garzik @ 2001-05-23  0:06 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Linus Torvalds, Andries.Brouwer, linux-kernel

Alexander Viro wrote:
> Do we really want a separate queue for each partition? I'd rather have
> disk_struct created when driver sees the disk and list of partitions
> (possibly represented by struct block_device) anchored in disk_struct
> and populated by grok_partitions().

Alan recently straightened me out with "EVMS/LVM is partitions done
right"

so... why not implement partitions as simply doing block remaps to the
lower level device?  That's what EVMS/LVM/md are doing already.

-- 
Jeff Garzik      | "Are you the police?"
Building 1024    | "No, ma'am.  We're musicians."
MandrakeSoft     |

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  0:02   ` Jeff Garzik
@ 2001-05-23  0:14     ` Jens Axboe
  2001-05-23  2:40     ` Linus Torvalds
  1 sibling, 0 replies; 55+ messages in thread
From: Jens Axboe @ 2001-05-23  0:14 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Martin Dalecki, Andries.Brouwer, linux-kernel, torvalds, viro

On Tue, May 22 2001, Jeff Garzik wrote:
> IMHO it would be nice to (for 2.4) create wrappers for accessing the
> block arrays, so that we can more easily dispose of the arrays when 2.5
> rolls around...

Agreed, in fact I have a lot of stuff that could be included in the
kcompat files for 2.4 (of course still changing :)

BTW, max_sectors/max_segments/hardsect_size already in place. Still some
to go.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  0:06     ` Jeff Garzik
@ 2001-05-23  0:14       ` Jens Axboe
  2001-05-23  2:37       ` Linus Torvalds
  1 sibling, 0 replies; 55+ messages in thread
From: Jens Axboe @ 2001-05-23  0:14 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Alexander Viro, Linus Torvalds, Andries.Brouwer, linux-kernel

On Tue, May 22 2001, Jeff Garzik wrote:
> so... why not implement partitions as simply doing block remaps to the
> lower level device?  That's what EVMS/LVM/md are doing already.

That is indeed the plan, having partitions simply being 'just another'
sector remap when submitting I/O

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23  0:20 Andries.Brouwer
  2001-05-23  2:43 ` Linus Torvalds
  0 siblings, 1 reply; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23  0:20 UTC (permalink / raw)
  To: jgarzik, viro; +Cc: Andries.Brouwer, linux-kernel, torvalds

> why not implement partitions as simply doing block remaps

Everybody agrees.

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23  0:22 Andries.Brouwer
  2001-05-23  0:29 ` Martin Dalecki
  0 siblings, 1 reply; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23  0:22 UTC (permalink / raw)
  To: dalecki, jgarzik; +Cc: Andries.Brouwer, axboe, linux-kernel, torvalds, viro

> IMHO it would be nice to create wrappers for accessing the block arrays

Last year Linus didnt like that at all. Maybe this year.

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23  0:28 Andries.Brouwer
  2001-05-23  0:38 ` Alexander Viro
  0 siblings, 1 reply; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23  0:28 UTC (permalink / raw)
  To: Andries.Brouwer, viro; +Cc: linux-kernel, torvalds

>> 	dev_t rdev;

> Reasonable.

Good. We all agree.
(But now you see what I meant in comments about mknod.)

>> 	kdev_t dev;

> Useless. If you hope that block_device will help to solve rmmod races

Yes. Why am I mistaken?

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  0:22 Andries.Brouwer
@ 2001-05-23  0:29 ` Martin Dalecki
  0 siblings, 0 replies; 55+ messages in thread
From: Martin Dalecki @ 2001-05-23  0:29 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: jgarzik, axboe, linux-kernel, torvalds, viro

[-- Attachment #1: Type: text/plain, Size: 308 bytes --]

Andries.Brouwer@cwi.nl wrote:
> 
> > IMHO it would be nice to create wrappers for accessing the block arrays
> 
> Last year Linus didnt like that at all. Maybe this year.

Well... the attached patch lines up into this effort and fixes
some abuses, removes redundant code and so on. Please have a second
look.

[-- Attachment #2: blksize_size.diff --]
[-- Type: text/plain, Size: 15802 bytes --]

diff -urN linux/drivers/block/ll_rw_blk.c new/drivers/block/ll_rw_blk.c
--- linux/drivers/block/ll_rw_blk.c	Thu Apr 12 21:15:52 2001
+++ new/drivers/block/ll_rw_blk.c	Mon Apr 30 23:16:03 2001
@@ -85,25 +85,21 @@
 int * blk_size[MAX_BLKDEV];
 
 /*
- * blksize_size contains the size of all block-devices:
+ * blksize_size contains the block size of all block-devices:
  *
  * blksize_size[MAJOR][MINOR]
  *
- * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
+ * Access to this array should happen through the get_blksize_size() function.
+ * If (!blksize_size[MAJOR]) then 1024 bytes is assumed.
  */
 int * blksize_size[MAX_BLKDEV];
 
 /*
  * hardsect_size contains the size of the hardware sector of a device.
  *
- * hardsect_size[MAJOR][MINOR]
- *
- * if (!hardsect_size[MAJOR])
- *		then 512 bytes is assumed.
- * else
- *		sector_size is hardsect_size[MAJOR][MINOR]
- * This is currently set by some scsi devices and read by the msdos fs driver.
- * Other uses may appear later.
+ * Access to this array should happen through the get_hardsect_size() function.
+ * The default value is assumed to be 512 unless specified differently by the
+ * corresponding low-level driver.
  */
 int * hardsect_size[MAX_BLKDEV];
 
@@ -992,22 +988,14 @@
 
 void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
 {
-	unsigned int major;
-	int correct_size;
+	ssize_t correct_size;
 	int i;
 
 	if (!nr)
 		return;
 
-	major = MAJOR(bhs[0]->b_dev);
-
 	/* Determine correct block size for this device. */
-	correct_size = BLOCK_SIZE;
-	if (blksize_size[major]) {
-		i = blksize_size[major][MINOR(bhs[0]->b_dev)];
-		if (i)
-			correct_size = i;
-	}
+	correct_size = get_blksize_size(bhs[0]->b_dev);
 
 	/* Verify requested block sizes. */
 	for (i = 0; i < nr; i++) {
diff -urN linux/drivers/block/loop.c new/drivers/block/loop.c
--- linux/drivers/block/loop.c	Thu Apr 12 04:05:14 2001
+++ new/drivers/block/loop.c	Mon Apr 30 23:30:17 2001
@@ -272,22 +272,10 @@
 	return desc.error;
 }
 
-static inline int loop_get_bs(struct loop_device *lo)
-{
-	int bs = 0;
-
-	if (blksize_size[MAJOR(lo->lo_device)])
-		bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
-	if (!bs)
-		bs = BLOCK_SIZE;	
-
-	return bs;
-}
-
 static inline unsigned long loop_get_iv(struct loop_device *lo,
 					unsigned long sector)
 {
-	int bs = loop_get_bs(lo);
+	int bs = get_blksize_size(lo->lo_device);
 	unsigned long offset, IV;
 
 	IV = sector / (bs >> 9) + lo->lo_offset / bs;
@@ -306,9 +294,9 @@
 	pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
 
 	if (rw == WRITE)
-		ret = lo_send(lo, bh, loop_get_bs(lo), pos);
+		ret = lo_send(lo, bh, get_blksize_size(lo->lo_device), pos);
 	else
-		ret = lo_receive(lo, bh, loop_get_bs(lo), pos);
+		ret = lo_receive(lo, bh, get_blksize_size(lo->lo_device), pos);
 
 	return ret;
 }
@@ -650,12 +638,7 @@
 	lo->old_gfp_mask = inode->i_mapping->gfp_mask;
 	inode->i_mapping->gfp_mask = GFP_BUFFER;
 
-	bs = 0;
-	if (blksize_size[MAJOR(inode->i_rdev)])
-		bs = blksize_size[MAJOR(inode->i_rdev)][MINOR(inode->i_rdev)];
-	if (!bs)
-		bs = BLOCK_SIZE;
-
+	bs = get_blksize_size(inode->i_rdev);
 	set_blocksize(dev, bs);
 
 	lo->lo_bh = lo->lo_bhtail = NULL;
diff -urN linux/drivers/char/raw.c new/drivers/char/raw.c
--- linux/drivers/char/raw.c	Fri Apr 27 23:23:25 2001
+++ new/drivers/char/raw.c	Mon Apr 30 22:57:20 2001
@@ -124,22 +124,25 @@
 		return err;
 	}
 
-	
-	/* 
-	 * Don't interfere with mounted devices: we cannot safely set
-	 * the blocksize on a device which is already mounted.  
+	/*
+	 * 29.04.2001 Martin Dalecki:
+	 *
+	 * The original comment here was saying:
+	 *
+	 * "Don't interfere with mounted devices: we cannot safely set the
+	 * blocksize on a device which is already mounted."
+	 *
+	 * However the code below was setting happily the blocksize
+	 * disregarding the previous check. I have fixed this, however I'm
+	 * quite sure, that the statement above isn't right and we should be
+	 * able to remove the first arm of the branch below entierly.
 	 */
-	
-	sector_size = 512;
 	if (get_super(rdev) != NULL) {
-		if (blksize_size[MAJOR(rdev)])
-			sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)];
+		sector_size = get_blksize_size(rdev);
 	} else {
-		if (hardsect_size[MAJOR(rdev)])
-			sector_size = hardsect_size[MAJOR(rdev)][MINOR(rdev)];
+		sector_size = get_hardsect_size(rdev);
+		set_blocksize(rdev, sector_size);
 	}
-
-	set_blocksize(rdev, sector_size);
 	raw_devices[minor].sector_size = sector_size;
 
 	for (sector_bits = 0; !(sector_size & 1); )
@@ -148,7 +151,7 @@
 
  out:
 	up(&raw_devices[minor].mutex);
-	
+
 	return err;
 }
 
diff -urN linux/drivers/md/lvm-snap.c new/drivers/md/lvm-snap.c
--- linux/drivers/md/lvm-snap.c	Fri Apr 27 23:23:25 2001
+++ new/drivers/md/lvm-snap.c	Mon Apr 30 23:27:40 2001
@@ -172,20 +172,6 @@
 		blocks[i] = start++;
 }
 
-inline int lvm_get_blksize(kdev_t dev)
-{
-	int correct_size = BLOCK_SIZE, i, major;
-
-	major = MAJOR(dev);
-	if (blksize_size[major])
-	{
-		i = blksize_size[major][MINOR(dev)];
-		if (i)
-			correct_size = i;
-	}
-	return correct_size;
-}
-
 #ifdef DEBUG_SNAPSHOT
 static inline void invalidate_snap_cache(unsigned long start, unsigned long nr,
 					 kdev_t dev)
@@ -218,7 +204,7 @@
 
 	if (is == 0) return;
 	is--;
-        blksize_snap = lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new);
+        blksize_snap = get_blksize_size(lv_snap->lv_block_exception[is].rdev_new);
         is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
 
 	memset(lv_COW_table, 0, blksize_snap);
@@ -262,7 +248,7 @@
 	snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
 	snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
 
-	blksize_snap = lvm_get_blksize(snap_phys_dev);
+	blksize_snap = get_blksize_size(snap_phys_dev);
 
         COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
         idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
@@ -307,7 +293,7 @@
 			idx++;
 			snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
 			snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
-			blksize_snap = lvm_get_blksize(snap_phys_dev);
+			blksize_snap = get_blksize_size(snap_phys_dev);
 			iobuf->blocks[0] = snap_pe_start >> (blksize_snap >> 10);
 		} else iobuf->blocks[0]++;
 
@@ -384,8 +370,8 @@
 
 	iobuf = lv_snap->lv_iobuf;
 
-	blksize_org = lvm_get_blksize(org_phys_dev);
-	blksize_snap = lvm_get_blksize(snap_phys_dev);
+	blksize_org = get_blksize_size(org_phys_dev);
+	blksize_snap = get_blksize_size(snap_phys_dev);
 	max_blksize = max(blksize_org, blksize_snap);
 	min_blksize = min(blksize_org, blksize_snap);
 	max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
diff -urN linux/drivers/md/lvm-snap.h new/drivers/md/lvm-snap.h
--- linux/drivers/md/lvm-snap.h	Mon Jan 29 01:11:20 2001
+++ new/drivers/md/lvm-snap.h	Mon Apr 30 23:26:28 2001
@@ -32,7 +32,6 @@
 #define LVM_SNAP_H
 
 /* external snapshot calls */
-extern inline int lvm_get_blksize(kdev_t);
 extern int lvm_snapshot_alloc(lv_t *);
 extern void lvm_snapshot_fill_COW_page(vg_t *, lv_t *);
 extern int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *);
diff -urN linux/drivers/md/lvm.c new/drivers/md/lvm.c
--- linux/drivers/md/lvm.c	Sat Apr 21 19:37:16 2001
+++ new/drivers/md/lvm.c	Mon Apr 30 23:31:56 2001
@@ -1077,7 +1077,7 @@
 	memset(&bh,0,sizeof bh);
 	bh.b_rsector = block;
 	bh.b_dev = bh.b_rdev = inode->i_dev;
-	bh.b_size = lvm_get_blksize(bh.b_dev);
+	bh.b_size = get_blksize_size(bh.b_dev);
 	if ((err=lvm_map(&bh, READ)) < 0)  {
 		printk("lvm map failed: %d\n", err);
 		return -EINVAL;
diff -urN linux/drivers/md/raid5.c new/drivers/md/raid5.c
--- linux/drivers/md/raid5.c	Fri Feb  9 20:30:23 2001
+++ new/drivers/md/raid5.c	Mon Apr 30 23:22:58 2001
@@ -1136,23 +1136,6 @@
 	return 0;
 }
 
-/*
- * Determine correct block size for this device.
- */
-unsigned int device_bsize (kdev_t dev)
-{
-	unsigned int i, correct_size;
-
-	correct_size = BLOCK_SIZE;
-	if (blksize_size[MAJOR(dev)]) {
-		i = blksize_size[MAJOR(dev)][MINOR(dev)];
-		if (i)
-			correct_size = i;
-	}
-
-	return correct_size;
-}
-
 static int raid5_sync_request (mddev_t *mddev, unsigned long block_nr)
 {
 	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
diff -urN linux/drivers/s390/char/tape34xx.c new/drivers/s390/char/tape34xx.c
--- linux/drivers/s390/char/tape34xx.c	Thu Apr 12 04:02:28 2001
+++ new/drivers/s390/char/tape34xx.c	Mon Apr 30 22:36:57 2001
@@ -1369,7 +1369,8 @@
 	ccw_req_t *cqr;
 	ccw1_t *ccw;
 	__u8 *data;
-	int s2b = blksize_size[tapeblock_major][tape->blk_minor]/hardsect_size[tapeblock_major][tape->blk_minor];
+	int s2b = blksize_size[tapeblock_major][tape->blk_minor]
+		/ get_hardsect_size(MKDEV(tapeblock_major,tape->blk_minor));
 	int realcount;
 	int size,bhct = 0;
 	struct buffer_head* bh;
@@ -1388,7 +1389,7 @@
 	}
 	data[0] = 0x01;
 	data[1] = data[2] = data[3] = 0x00;
-	realcount=req->sector/s2b;
+	realcount=req->sector / s2b;
 	if (((tape34xx_disc_data_t *) tape->discdata)->modeset_byte & 0x08)	// IDRC on
 
 		data[1] = data[1] | 0x80;
diff -urN linux/fs/block_dev.c new/fs/block_dev.c
--- linux/fs/block_dev.c	Fri Feb  9 20:29:44 2001
+++ new/fs/block_dev.c	Mon Apr 30 23:03:27 2001
@@ -14,12 +14,10 @@
 #include <linux/major.h>
 #include <linux/devfs_fs_kernel.h>
 #include <linux/smp_lock.h>
+#include <linux/blkdev.h>
 
 #include <asm/uaccess.h>
 
-extern int *blk_size[];
-extern int *blksize_size[];
-
 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
 #define NBUF 64
 
@@ -42,10 +40,8 @@
 		return -EPERM;
 
 	written = write_error = buffercount = 0;
-	blocksize = BLOCK_SIZE;
-	if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
-		blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
 
+	blocksize = get_blksize_size(dev);
 	i = blocksize;
 	blocksize_bits = 0;
 	while(i != 1) {
@@ -182,9 +178,7 @@
 	ssize_t read;
 
 	dev = inode->i_rdev;
-	blocksize = BLOCK_SIZE;
-	if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
-		blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
+	blocksize = get_blksize_size(dev);
 	i = blocksize;
 	blocksize_bits = 0;
 	while (i != 1) {
diff -urN linux/fs/buffer.c new/fs/buffer.c
--- linux/fs/buffer.c	Fri Apr 27 23:23:25 2001
+++ new/fs/buffer.c	Mon Apr 30 23:47:21 2001
@@ -687,6 +687,13 @@
 	if (!blksize_size[MAJOR(dev)])
 		return;
 
+	/* 29/04/2001 Marcin Dalecki <dalecki@evision.ag>:
+	 *
+	 * The assumption that the size may not by lower then 512 is
+	 * conflicting with the assumption in partitions/check.c that the block
+	 * size may assume 256 bytes.
+	 */
+
 	/* Size must be a power of two, and between 512 and PAGE_SIZE */
 	if (size > PAGE_SIZE || size < 512 || (size & (size-1)))
 		panic("Invalid blocksize passed to set_blocksize");
diff -urN linux/fs/partitions/check.c new/fs/partitions/check.c
--- linux/fs/partitions/check.c	Sat Feb 17 01:02:37 2001
+++ new/fs/partitions/check.c	Mon Apr 30 23:37:50 2001
@@ -34,7 +34,6 @@
 #include "ultrix.h"
 
 extern void device_init(void);
-extern int *blk_size[];
 extern void rd_load(void);
 extern void initrd_load(void);
 
@@ -200,20 +199,13 @@
 	int ret = 1024;
 
 	/*
-	 * See whether the low-level driver has given us a minumum blocksize.
-	 * If so, check to see whether it is larger than the default of 1024.
-	 */
-	if (!blksize_size[MAJOR(dev)])
-		return ret;
-
-	/*
 	 * Check for certain special power of two sizes that we allow.
 	 * With anything larger than 1024, we must force the blocksize up to
 	 * the natural blocksize for the device so that we don't have to try
 	 * and read partial sectors.  Anything smaller should be just fine.
 	 */
 
-	switch (blksize_size[MAJOR(dev)][MINOR(dev)]) {
+	switch (get_blksize_size(dev)) {
 		case 2048:
 			ret = 2048;
 			break;
diff -urN linux/fs/partitions/ibm.c new/fs/partitions/ibm.c
--- linux/fs/partitions/ibm.c	Sat Apr 14 05:26:07 2001
+++ new/fs/partitions/ibm.c	Mon Apr 30 23:45:16 2001
@@ -125,12 +125,10 @@
 		return 0;
 	}
 	genhd_dasd_fillgeo(dev,&geo);
-	blocksize = hardsect_size[MAJOR(dev)][MINOR(dev)];
-	if ( blocksize <= 0 ) {
-		return 0;
-	}
 
-	set_blocksize(dev, blocksize);  /* OUCH !! */
+	blocksize = get_hardsect_size(dev);
+	set_blocksize(dev, blocksize);
+
 	if ( ( bh = bread( dev, geo.start, blocksize) ) != NULL ) {
 		strncpy ( type,bh -> b_data + 0, 4);
 		strncpy ( name,bh -> b_data + 4, 6);
diff -urN linux/fs/reiserfs/super.c new/fs/reiserfs/super.c
--- linux/fs/reiserfs/super.c	Fri Apr 27 23:18:08 2001
+++ new/fs/reiserfs/super.c	Mon Apr 30 23:53:17 2001
@@ -1,7 +1,8 @@
 /*
  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  *
- * Trivial changes by Alan Cox to add the LFS fixes
+ * Trivial changes by Alan Cox to add the LFS fixes.
+ * Trivial change by Marcin Dalecki.
  *
  * Trivial Changes:
  * Rights granted to Hans Reiser to redistribute under other terms providing
@@ -21,6 +22,7 @@
 #include <linux/smp_lock.h>
 #include <linux/locks.h>
 #include <linux/init.h>
+#include <linux/blkdev.h>
 
 #else
 
@@ -681,7 +683,6 @@
     struct inode *root_inode;
     kdev_t dev = s->s_dev;
     int j;
-    extern int *blksize_size[];
     struct reiserfs_transaction_handle th ;
     int old_format = 0;
     unsigned long blocks;
@@ -696,17 +697,22 @@
     }
 
     if (blocks) {
-  	printk("reserfs: resize option for remount only\n");
+	printk("reserfs: resize option for remount only\n");
 	return NULL;
-    }	
+    }
 
-    if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)] != 0) {
-	/* as blocksize is set for partition we use it */
-	size = blksize_size[MAJOR(dev)][MINOR(dev)];
-    } else {
+    /*
+     * 29/04/2001 Marcin Dalecki <dalecki@evision.ag>:
+     *
+     * See what the current blocksize for the device is, and use that as the
+     * blocksize.  Otherwise (or if the blocksize is smaller than the default)
+     * use the default.  This is important for devices that have a hardware
+     * sectorsize that is larger than the default.
+     */
+    size = get_hardsect_size(dev);
+    if (size < BLOCK_SIZE)
 	size = BLOCK_SIZE;
-	set_blocksize (s->s_dev, BLOCK_SIZE);
-    }
+    set_blocksize (s->s_dev, BLOCK_SIZE);
 
     /* read block (64-th 1k block), which can contain reiserfs super block */
     if (read_super_block (s, size)) {
diff -urN linux/include/linux/blkdev.h new/include/linux/blkdev.h
--- linux/include/linux/blkdev.h	Sat Apr 28 00:48:49 2001
+++ new/include/linux/blkdev.h	Mon Apr 30 22:43:51 2001
@@ -198,11 +198,18 @@
 
 static inline int get_hardsect_size(kdev_t dev)
 {
-	extern int *hardsect_size[];
 	if (hardsect_size[MAJOR(dev)] != NULL)
 		return hardsect_size[MAJOR(dev)][MINOR(dev)];
 	else
 		return 512;
+}
+
+static inline int get_blksize_size(kdev_t dev)
+{
+	if (blksize_size[MAJOR(dev)] != NULL)
+		return blksize_size[MAJOR(dev)][MINOR(dev)];
+	else
+		return 1024;
 }
 
 #define blk_finished_io(nsects)				\
diff -urN linux/include/linux/lvm.h new/include/linux/lvm.h
--- linux/include/linux/lvm.h	Sat Feb 17 01:06:17 2001
+++ new/include/linux/lvm.h	Mon Apr 30 23:47:08 2001
@@ -113,6 +113,20 @@
 #error Bad include/linux/major.h - LVM MAJOR undefined
 #endif
 
+/* 29/04/2001 Marcin Dalecki <dalecki@evision.ag>:
+ *
+ * The following games on the definition of the BLOCK_SIZE are asking for
+ * trouble for two reasons:
+ *
+ * 1. This is conflicting with the unconditional definition in fs.h
+ *
+ * 2. There are many places in the kernel, where this values is implicitly
+ * assumed to be equal to 1024 bytes.
+ *
+ * Anybody responsible for AS/390 this should resolve this as soon as possible.
+ * Remove this CRAP! Contact me plase if you need some support.
+ */
+
 #ifdef	BLOCK_SIZE
 #undef	BLOCK_SIZE
 #endif

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  0:28 Andries.Brouwer
@ 2001-05-23  0:38 ` Alexander Viro
  0 siblings, 0 replies; 55+ messages in thread
From: Alexander Viro @ 2001-05-23  0:38 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: linux-kernel, torvalds



On Wed, 23 May 2001 Andries.Brouwer@cwi.nl wrote:

> >> 	dev_t rdev;
> 
> > Reasonable.
> 
> Good. We all agree.
> (But now you see what I meant in comments about mknod.)
> 
> >> 	kdev_t dev;
> 
> > Useless. If you hope that block_device will help to solve rmmod races
> 
> Yes. Why am I mistaken?

Because the problems begin in subsystems. Solving the situation with
block_device_operations is trivial. It's stuff on the character side that
is going to bite you big way. TTY drivers, for example. They are below
the layer where your kdev_t lives.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-22 23:51   ` Alexander Viro
  2001-05-23  0:06     ` Jeff Garzik
@ 2001-05-23  2:35     ` Linus Torvalds
  1 sibling, 0 replies; 55+ messages in thread
From: Linus Torvalds @ 2001-05-23  2:35 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Andries.Brouwer, linux-kernel


On Tue, 22 May 2001, Alexander Viro wrote:

> >    which populate the "inode->dev" union pointer, which in turn is _only_
> >    a cache of the lookup. Right now we do this purely based on "dev_t",
> >    and I think that is bogus. We should never pass a "dev_t" around
> >    without an inode, I think.
> 
> I doubt it. First of all, then we'd better make i_rdev dev_t. Right now
> we have it kdev_t and it makes absolutely no sense that way.

Absolutely.

In fact, the whole "kdev_t" makes no sense if we have proper char_dev and
block_dev pointers.

We have "dev_t" which is what we export to user space. And if we split up
char dev and block dev (which I'm an avid proponent for), kdev_t will be
an anachronism.

> The real thing is inode->dev. Notice that for devfs (and per-device
> filesystems) we can set it upon inode creation, since they _know_ it
> from the very beginning and there is absolutely no reason to do any
> hash lookups. Moreover, in these cases we may have no meaningful device
> number at all.

Sure. If something like devfs _knows_ the device pointers from the very
beginning, then just do: 
 - increment reference count
 - inode->dev.char = cdev;

> >    Block devices will have the "request queue" pointer, and the size and
> >    partitioning information. Character devices currently would not have
> 
> Do we really want a separate queue for each partition?

No. 

But the pointer is not a 1:1 thing (otherwise we'd just put the whole
request queue _into_ the block device).

The block device should have a pointer to the request queue, along with
the partitioning information. Why? Because that way it becomes very simple
indeed to do request processing: none of the current "look up the proper
queue for each request and do the partition offset magic inside each
driver". Instead, the queuing function becomes:

	bh->index = bdev->index;
	bh->sector += bdev->sector_offset;
	submit_bh(bh, bdev->request_queue);

and you're done. Those three lines did both the queue lookup, the
"index" for the driver, and the partitioning offset. Whoa, nelly.

And THAT is why you want to have the queue pointer in the bdev structure.

		Linus


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  0:06     ` Jeff Garzik
  2001-05-23  0:14       ` Jens Axboe
@ 2001-05-23  2:37       ` Linus Torvalds
  2001-05-23  3:04         ` Jeff Garzik
  2001-05-23  9:05         ` Alan Cox
  1 sibling, 2 replies; 55+ messages in thread
From: Linus Torvalds @ 2001-05-23  2:37 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Alexander Viro, Andries.Brouwer, linux-kernel


On Tue, 22 May 2001, Jeff Garzik wrote:
> 
> Alan recently straightened me out with "EVMS/LVM is partitions done
> right"
> 
> so... why not implement partitions as simply doing block remaps to the
> lower level device?  That's what EVMS/LVM/md are doing already.

Because we still need the partitioning code for backwards
compatibility. There's no way I'm going to use initrd to do partition
setup with lvmtools etc.

Also, lvm and friends are _heavyweight_. The partitioning stuff should be
_one_ add (and perhaps a range check) at bh submit time. None of this
remapping crap. We don't need no steenking overhead for something we need
to do anyway.

		Linus


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  0:02   ` Jeff Garzik
  2001-05-23  0:14     ` Jens Axboe
@ 2001-05-23  2:40     ` Linus Torvalds
  2001-05-23 12:35       ` Martin Dalecki
  1 sibling, 1 reply; 55+ messages in thread
From: Linus Torvalds @ 2001-05-23  2:40 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Martin Dalecki, Andries.Brouwer, linux-kernel, viro, axboe


On Tue, 22 May 2001, Jeff Garzik wrote:
>
> IMHO it would be nice to (for 2.4) create wrappers for accessing the
> block arrays, so that we can more easily dispose of the arrays when 2.5
> rolls around...

No.

We do not create wrappers "so that we can easily change the implementation
when xxx happens".

That way lies bad implementations.

We do the _good_ implementation first, and then we create the
"kcompat-2.4" file later that makes people able to use the good
implementation.

Why this way? Because wrapping for wrappings sake just makes code
unreadable (see acpi), and often makes it less obvious what you actually
_have_ to do, and what you don't. So get the design right _first_, and
once the design is right, _then_ you worry about kcompat-2.4.

		Linus


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  0:20 Andries.Brouwer
@ 2001-05-23  2:43 ` Linus Torvalds
  0 siblings, 0 replies; 55+ messages in thread
From: Linus Torvalds @ 2001-05-23  2:43 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: jgarzik, viro, linux-kernel


On Wed, 23 May 2001 Andries.Brouwer@cwi.nl wrote:
>
> > why not implement partitions as simply doing block remaps
> 
> Everybody agrees.

No they don't.

Look at the cost of lvm. Function calls, buffer head remapping, the
works. You _need_ that for a generic LVM layer, but you sure as hell don't
need it for simple partitioning.

Can LVM do it? Sure.

Do we need LVM to do it? No.

Does LVM simplify anything? No.

It doesn't get much simpler than a single line that does the
equivalent of "bh->sector += offset". Most of the bulk of the code in the
partitioning stuff is for actually parsing the partitions, and we need
that to bootstrap. Maybe we can get rid of some of the more esoteric ones
(ie pretty much everything except for the native partitioning code for
each architecture) and tell people to use LVM for the rest.

In short, there are _no_ advantages to involve LVM here.

		Linus


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  2:37       ` Linus Torvalds
@ 2001-05-23  3:04         ` Jeff Garzik
  2001-05-23  3:21           ` Jeff Garzik
  2001-05-23  9:05         ` Alan Cox
  1 sibling, 1 reply; 55+ messages in thread
From: Jeff Garzik @ 2001-05-23  3:04 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Alexander Viro, Andries.Brouwer, linux-kernel

Linus Torvalds wrote:
> 
> On Tue, 22 May 2001, Jeff Garzik wrote:
> >
> > Alan recently straightened me out with "EVMS/LVM is partitions done
> > right"
> >
> > so... why not implement partitions as simply doing block remaps to the
> > lower level device?  That's what EVMS/LVM/md are doing already.
> 
> Because we still need the partitioning code for backwards
> compatibility. There's no way I'm going to use initrd to do partition
> setup with lvmtools etc.
> 
> Also, lvm and friends are _heavyweight_. The partitioning stuff should be
> _one_ add (and perhaps a range check) at bh submit time. None of this
> remapping crap. We don't need no steenking overhead for something we need
> to do anyway.

no no no.  Not -that- heavyweight.

Partition support becomes a -peer- of LVM.

Imagine a tiny blkdev driver that understood MS-DOS (and other) hardware
partitions, and exported N block devices, representing the underlying
device (whatever it is).  In fact, that might be even a -unifying-
factor:  this tiny blkdev module -is- your /dev/disk.  For example,

/dev/sda <-> partition_blkdev <-> /dev/disk{0,1,2,3,4}
/dev/hda <-> partition_blkdev <-> /dev/disk{5,6,7}

A nice side effect:  modular partition support, since its a normal
blkdev just like anything yes.

YES there is overhead, but if partitions are just another remapping
blkdev, you get all this stuff for free.

I do grant you that an offset at bh submit time is faster, but IMHO
partitions -not- as a remapping blkdev are an ugly special case.

Remapping to an unchanging offset in the make_request_fn can be fast,
too...

-- 
Jeff Garzik      | "Are you the police?"
Building 1024    | "No, ma'am.  We're musicians."
MandrakeSoft     |

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  3:04         ` Jeff Garzik
@ 2001-05-23  3:21           ` Jeff Garzik
  0 siblings, 0 replies; 55+ messages in thread
From: Jeff Garzik @ 2001-05-23  3:21 UTC (permalink / raw)
  To: Linus Torvalds, Alexander Viro, Andries.Brouwer, linux-kernel

Jeff Garzik wrote:
> /dev/sda <-> partition_blkdev <-> /dev/disk{0,1,2,3,4}
> /dev/hda <-> partition_blkdev <-> /dev/disk{5,6,7}

I also point out that handling disk partitions as a -tiny- remapping
blkdev also has the advantage of making it easy to have a single request
device per hardware device (a simple remap shouldn't require its own
request queue, right?), while remapping devices flexibility to do their
own request queue management.


> I do grant you that an offset at bh submit time is faster, but IMHO
> partitions -not- as a remapping blkdev are an ugly special case.

think of the simplifications possible, when partitions are just another
block device, just like anything else...  No special partitions arrays
in the lowlevel blkdev, etc.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23  6:47 Andries.Brouwer
  0 siblings, 0 replies; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23  6:47 UTC (permalink / raw)
  To: Andries.Brouwer, torvalds; +Cc: jgarzik, linux-kernel, viro


    On Wed, 23 May 2001 Andries.Brouwer@cwi.nl wrote:
    >
    > > why not implement partitions as simply doing block remaps
    > 
    > Everybody agrees.

    No they don't.

We had this discussion already. We all agree.
Maybe you read in "remap" something other than a simple addition
but I don't. This is not a topic where further discussion is needed.

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  2:37       ` Linus Torvalds
  2001-05-23  3:04         ` Jeff Garzik
@ 2001-05-23  9:05         ` Alan Cox
  1 sibling, 0 replies; 55+ messages in thread
From: Alan Cox @ 2001-05-23  9:05 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Jeff Garzik, Alexander Viro, Andries.Brouwer, linux-kernel

> Because we still need the partitioning code for backwards
> compatibility. There's no way I'm going to use initrd to do partition
> setup with lvmtools etc.

The current partitioning code consists of re-reading from disk. That is 
code that has to be present anyway even without an initrd since it is needed
for mounting other filesystems

> Also, lvm and friends are _heavyweight_. The partitioning stuff should be
> _one_ add (and perhaps a range check) at bh submit time. None of this
> remapping crap. We don't need no steenking overhead for something we need
> to do anyway.

Thats the kind of LVM layer I am thinking in terms of, not a large block
of LVM code. And for that matter the partition scanner and modifying code
can even be seperate dynamic loaded modules.

At the moment ponder what happens if you have I/Os in flight to /dev/hda3
when you delete /dev/hda2

Alan




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23 11:57 Andries.Brouwer
  2001-05-23 12:13 ` Alan Cox
  0 siblings, 1 reply; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23 11:57 UTC (permalink / raw)
  To: alan, torvalds; +Cc: Andries.Brouwer, jgarzik, linux-kernel, viro

Alan writes:

    The current partitioning code consists of re-reading from disk. That is 
    code that has to be present anyway even without an initrd since it is
    needed for mounting other filesystems

I am not quite sure what you are saying here.
(For example, the "even" was unexpected.)

It is entirely possible to remove all partition table handling code
from the kernel. User space can figure out where the partitions
are supposed to be and tell the kernel.
For the initial boot this user space can be in an initrd,
or it could just be a boot parameter: rootdev=/dev/hda,
rootpartition:offset=N,length=L, rootfstype=ext3.

mount does not need any partition reading code.

Andries


[I conjecture that we'll want to start moving partition parsing
out into userspace fairly soon. Indeed, soon we'll see EFI everywhere,
and there is no good reason to build knowledge about that into the kernel.]

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23 11:57 Andries.Brouwer
@ 2001-05-23 12:13 ` Alan Cox
  0 siblings, 0 replies; 55+ messages in thread
From: Alan Cox @ 2001-05-23 12:13 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: alan, torvalds, jgarzik, linux-kernel, viro

> It is entirely possible to remove all partition table handling code
> from the kernel. User space can figure out where the partitions
> are supposed to be and tell the kernel.
> For the initial boot this user space can be in an initrd,
> or it could just be a boot parameter: rootdev=/dev/hda,
> rootpartition:offset=N,length=L, rootfstype=ext3.

Not if you want compatibility.



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23 12:29 Andries.Brouwer
  2001-05-23 12:30 ` Alan Cox
  2001-05-23 13:26 ` Helge Hafting
  0 siblings, 2 replies; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23 12:29 UTC (permalink / raw)
  To: Andries.Brouwer, alan; +Cc: jgarzik, linux-kernel, torvalds, viro

    From alan@lxorguk.ukuu.org.uk Wed May 23 14:16:46 2001

    > It is entirely possible to remove all partition table handling code
    > from the kernel. User space can figure out where the partitions
    > are supposed to be and tell the kernel.
    > For the initial boot this user space can be in an initrd,
    > or it could just be a boot parameter: rootdev=/dev/hda,
    > rootpartition:offset=N,length=L, rootfstype=ext3.

    Not if you want compatibility.

I don't think compatibility is a problem.
It would go like this: at configure time you get the
choice of the default initrd or a custom initrd.
If you choose the custom one you construct it yourself.
If you choose the default one, then you get something
that comes together with the kernel image, just like
the piggyback stuff today. This default initrd does
the partition parsing that up to now the kernel did.
That way nobody need to notice a difference, except for
those who use initrd already now. They can solve their
problems.

Andries



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23 12:29 Andries.Brouwer
@ 2001-05-23 12:30 ` Alan Cox
  2001-05-23 13:26 ` Helge Hafting
  1 sibling, 0 replies; 55+ messages in thread
From: Alan Cox @ 2001-05-23 12:30 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: alan, jgarzik, linux-kernel, torvalds, viro

> the piggyback stuff today. This default initrd does
> the partition parsing that up to now the kernel did.
> That way nobody need to notice a difference, except for
> those who use initrd already now. They can solve their
> problems.

as a longer term path that seems reasonable. If we want large numbers of less
than guru level developers to play with 2.5 kernels for fun then its likely to
be a barrier unless its progressed stage by stage


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23  2:40     ` Linus Torvalds
@ 2001-05-23 12:35       ` Martin Dalecki
  0 siblings, 0 replies; 55+ messages in thread
From: Martin Dalecki @ 2001-05-23 12:35 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Jeff Garzik, Andries.Brouwer, linux-kernel, viro, axboe

Linus Torvalds wrote:
> 
> On Tue, 22 May 2001, Jeff Garzik wrote:
> >
> > IMHO it would be nice to (for 2.4) create wrappers for accessing the
> > block arrays, so that we can more easily dispose of the arrays when 2.5
> > rolls around...
> 
> No.
> 
> We do not create wrappers "so that we can easily change the implementation
> when xxx happens".
> 
> That way lies bad implementations.

However Linus please note that in the case of the bould arrays
used in device handling code we have code patterns like this:

	if (blah[major]) {
		size = blah[major][minor]
	} else
		size = some default;

And those have to by dragged throughout the whole places where
the arrays get used. Thus making some wrappers (many are already in
place):

1. Prevents typo kind of programming errors.

2. Possibly make the code more explicit.

and please don't forget:

3. Allows to change the underlying implementation in some soon point in
time.

However I agree that *without* the above arguments such kind of wrappers
would make the overall code as unreadable as C++ code frequently is,
which
tryies to preserve private: attributes at simple field cases..

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23 12:29 Andries.Brouwer
  2001-05-23 12:30 ` Alan Cox
@ 2001-05-23 13:26 ` Helge Hafting
  1 sibling, 0 replies; 55+ messages in thread
From: Helge Hafting @ 2001-05-23 13:26 UTC (permalink / raw)
  To: Andries.Brouwer, linux-kernel

Andries.Brouwer@cwi.nl wrote:
> 
>     From alan@lxorguk.ukuu.org.uk Wed May 23 14:16:46 2001
> 
>     > It is entirely possible to remove all partition table handling code
>     > from the kernel. User space can figure out where the partitions
>     > are supposed to be and tell the kernel.
>     > For the initial boot this user space can be in an initrd,
>     > or it could just be a boot parameter: rootdev=/dev/hda,
>     > rootpartition:offset=N,length=L, rootfstype=ext3.
> 
>     Not if you want compatibility.
> 
> I don't think compatibility is a problem.
> It would go like this: at configure time you get the
> choice of the default initrd or a custom initrd.

But I don't want an initrd.  I want to get the root fs directly from
disk the way I always have.  Initrd may be useful for install floppies
and such, not something I want for an ordinary installed system.

The kernel parameter way is better, just add to lilo.conf
and it works.  Forcing an initrd is also incompatibility,
because I don't use one now.

Helge Hafting

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23 13:34 Andries.Brouwer
  2001-05-23 17:54 ` Alexander Viro
  0 siblings, 1 reply; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23 13:34 UTC (permalink / raw)
  To: Andries.Brouwer, helgehaf, linux-kernel

> But I don't want an initrd.

Don't be afraid of words. You wouldnt notice - it would do its
job and disappear just like piggyback today.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23 15:24 Wayne.Brown
  0 siblings, 0 replies; 55+ messages in thread
From: Wayne.Brown @ 2001-05-23 15:24 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: Andries.Brouwer, helgehaf, linux-kernel








Andries.Brouwer@cwi.nl on 05/23/2001 08:34:44 AM

To:   Andries.Brouwer@cwi.nl, helgehaf@idb.hist.no, linux-kernel@vger.kernel.org
cc:    (bcc: Wayne Brown/Corporate/Altec)

Subject:  Re: [PATCH] struct char_device



>> But I don't want an initrd.
>
>Don't be afraid of words. You wouldnt notice - it would do its
>job and disappear just like piggyback today.

So nothing in the boot scripts would have to change?  (Not that it would be a
big problem if it was necessary.  However, I prefer to keep things in /etc/rc.d
as close to the distribution defaults as possible, just in case I ever need to
reinstall the distribution.)

Wayne





^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23 13:34 Andries.Brouwer
@ 2001-05-23 17:54 ` Alexander Viro
  2001-05-24 10:35   ` Stephen C. Tweedie
  0 siblings, 1 reply; 55+ messages in thread
From: Alexander Viro @ 2001-05-23 17:54 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: helgehaf, linux-kernel



On Wed, 23 May 2001 Andries.Brouwer@cwi.nl wrote:

> > But I don't want an initrd.
> 
> Don't be afraid of words. You wouldnt notice - it would do its
> job and disappear just like piggyback today.

Andries, initrd code is _sick_. Our boot sequence is not a wonder of
elegance, but that crap is the worst part. I certainly can understand
people not wanting to use that on aesthetical reasons alone.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23 18:28 Andries.Brouwer
  2001-05-23 18:42 ` Alexander Viro
  0 siblings, 1 reply; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23 18:28 UTC (permalink / raw)
  To: Andries.Brouwer, viro; +Cc: helgehaf, linux-kernel

> Andries, initrd code is _sick_.

Oh, but the fact that there exists a bad implementation
does not mean the idea is wrong. It is really easy to
make an elegant implementation.

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23 18:28 Andries.Brouwer
@ 2001-05-23 18:42 ` Alexander Viro
  0 siblings, 0 replies; 55+ messages in thread
From: Alexander Viro @ 2001-05-23 18:42 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: helgehaf, linux-kernel



On Wed, 23 May 2001 Andries.Brouwer@cwi.nl wrote:

> > Andries, initrd code is _sick_.
> 
> Oh, but the fact that there exists a bad implementation
> does not mean the idea is wrong. It is really easy to
> make an elegant implementation.

Andries, I've been doing cleanups of that logics (see namespaces-patch -
they've got merged into it) and I have to say that you are sadly mistaken.

It's not just an implementation that is ugly, it's behaviour currently
implemented (and relied upon by existing boot setups) that is extremely
ill-defined and crufty. I would rather get rid of the abortion, but that
is impossible without breaking tons of existing setups.

And that _is_ an area where "we can do something vaguely similar to
current behaviour that wouldn't take pages to describe" does not work.
You don't fsck with others' boot sequences, unless you want a free
tar-and-feather ride. I don't want it.

Besides, just on general principles, we'd better have clean interface
for changing partitioning on the kernel side and rip that crap out of
$BIGNUM fdisk implmentations. It can be made modular, so problem of
teaching it new types of partitions is not hard.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
@ 2001-05-23 20:01 Andries.Brouwer
  0 siblings, 0 replies; 55+ messages in thread
From: Andries.Brouwer @ 2001-05-23 20:01 UTC (permalink / raw)
  To: Andries.Brouwer, viro; +Cc: helgehaf, linux-kernel

> Besides, just on general principles, we'd better have clean interface
> for changing partitioning

It is not quite clear to me what you are arguing for or against.
But never mind - I'll leave few hours from now.
When the time is there I'll show you an implementation,
and if you don't like it, you'll show me a better one.

Andries

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] struct char_device
  2001-05-23 17:54 ` Alexander Viro
@ 2001-05-24 10:35   ` Stephen C. Tweedie
  0 siblings, 0 replies; 55+ messages in thread
From: Stephen C. Tweedie @ 2001-05-24 10:35 UTC (permalink / raw)
  To: Alexander Viro; +Cc: Andries.Brouwer, helgehaf, linux-kernel, Stephen Tweedie

Hi,

On Wed, May 23, 2001 at 01:54:15PM -0400, Alexander Viro wrote:
> On Wed, 23 May 2001 Andries.Brouwer@cwi.nl wrote:
> 
> > > But I don't want an initrd.
> > Don't be afraid of words. You wouldnt notice - it would do its
> > job and disappear just like piggyback today.
> 
> Andries, initrd code is _sick_. Our boot sequence is not a wonder of
> elegance, but that crap is the worst part. I certainly can understand
> people not wanting to use that on aesthetical reasons alone.

It's the principle of a kernel-linked boot image, not initrd, which is
important.  Unpacking a cramfs image from an __init section is much
cleaner than initrd and has almost the same effect: the boot
filesystem just ends up readonly that way.  Either way we can hook
in that default user-space code at boot time transparently.

--Stephen

^ permalink raw reply	[flat|nested] 55+ messages in thread

end of thread, other threads:[~2001-05-24 11:17 UTC | newest]

Thread overview: 55+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2001-05-23 20:01 [PATCH] struct char_device Andries.Brouwer
  -- strict thread matches above, loose matches on Subject: below --
2001-05-23 18:28 Andries.Brouwer
2001-05-23 18:42 ` Alexander Viro
2001-05-23 15:24 Wayne.Brown
2001-05-23 13:34 Andries.Brouwer
2001-05-23 17:54 ` Alexander Viro
2001-05-24 10:35   ` Stephen C. Tweedie
2001-05-23 12:29 Andries.Brouwer
2001-05-23 12:30 ` Alan Cox
2001-05-23 13:26 ` Helge Hafting
2001-05-23 11:57 Andries.Brouwer
2001-05-23 12:13 ` Alan Cox
2001-05-23  6:47 Andries.Brouwer
2001-05-23  0:28 Andries.Brouwer
2001-05-23  0:38 ` Alexander Viro
2001-05-23  0:22 Andries.Brouwer
2001-05-23  0:29 ` Martin Dalecki
2001-05-23  0:20 Andries.Brouwer
2001-05-23  2:43 ` Linus Torvalds
2001-05-23  0:01 Andries.Brouwer
2001-05-22 23:33 Andries.Brouwer
2001-05-23  0:03 ` Alexander Viro
2001-05-22 22:17 Andries.Brouwer
2001-05-22 22:34 ` Martin Dalecki
2001-05-22 22:47 ` Martin Dalecki
2001-05-23  0:02   ` Jeff Garzik
2001-05-23  0:14     ` Jens Axboe
2001-05-23  2:40     ` Linus Torvalds
2001-05-23 12:35       ` Martin Dalecki
2001-05-22 21:35 Andries.Brouwer
2001-05-22 22:00 ` Martin Dalecki
2001-05-22 20:54 Andries.Brouwer
2001-05-22 21:17 ` Martin Dalecki
2001-05-22 22:37 ` Linus Torvalds
2001-05-22 23:51   ` Alexander Viro
2001-05-23  0:06     ` Jeff Garzik
2001-05-23  0:14       ` Jens Axboe
2001-05-23  2:37       ` Linus Torvalds
2001-05-23  3:04         ` Jeff Garzik
2001-05-23  3:21           ` Jeff Garzik
2001-05-23  9:05         ` Alan Cox
2001-05-23  2:35     ` Linus Torvalds
2001-05-22 19:52 Andries.Brouwer
2001-05-22 20:10 ` Alexander Viro
     [not found] <Pine.GSO.4.21.0105221007460.15685-100000@weyl.math.psu.edu >
2001-05-22 15:26 ` Anton Altaparmakov
2001-05-22 16:08   ` Oliver Xymoron
2001-05-22 16:12     ` Alexander Viro
2001-05-22 17:30       ` Oliver Xymoron
2001-05-22 17:41         ` Alexander Viro
2001-05-22 19:22     ` Guest section DW
2001-05-22 19:25       ` Alexander Viro
2001-05-22 19:38       ` Oliver Xymoron
     [not found] <Pine.LNX.4.10.10105221050080.8984-100000@coffee.psychology.mcmaster.ca>
2001-05-22 14:59 ` Tommy Hallgren
2001-05-22 14:40 Tommy Hallgren
2001-05-22 14:18 Alexander Viro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox