All of lore.kernel.org

All of lore.kernel.org
 help / color / mirror / Atom feed

* Re: [parisc-linux] Problem with Samba on a D370
From: Matt Taggart @ 2002-01-06  5:08 UTC (permalink / raw)
  To: Peter Lavender; +Cc: Peter Mottram, parisc-linux, taggart
In-Reply-To: <02d801c19665$87a5d520$090ba8c0@winnie>

"Peter Lavender" writes...

> What do you mean by an appropriate line in the sources.list?  Is it a link
> to the location of the samba package?

`man sources.list` and read about deb-src.

-- 
Matt Taggart        Linux Development Lab
taggart@fc.hp.com   HP Linux Systems Operation

^ permalink raw reply

* Re: [CFT] Unbork fs.h + per-fs supers (fixed)
From: Daniel Phillips @ 2002-01-06  5:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: Arnaldo Carvalho de Melo
In-Reply-To: <E16MsC5-0001H4-00@starship.berlin>

On January 5, 2002 03:53 pm, Daniel Phillips wrote:
> Don't use the above patch please, there's a stupid oversight - it seems 
> super_block has a couple of fields *after* the fs-private union, and the
> new code doesn't take that into account.

Yes, that was it all right.  This was fixed by moving the two 'don't use' 
inode fields before the union.  (The variable-sized component of such structs 
always has to come last.)  I also cleaned up the formatting of my additions 
along the lines discussed in the recent 'janitorial' thread.

Doing 'less /proc/slabinfo' shows the ext2 inode cache objects, at 416 bytes, 
are now 64 bytes smaller than they used to be.  More space could be saved by
relaxing the slab cache alignment for these objects from 64 bytes (686
cacheline) to 4 or 8 bytes.  Has anybody checked to see if hardware cacheline
alignment for inodes is really a win?

This patch now appears stable enough for testing - I'm running it on my
server at the moment.

To Apply:

  cd /your/2.4.17/tree
  cat this/patch | patch -p0

--
Daniel

--- ../2.4.17.clean/drivers/block/rd.c	Fri Dec 21 12:41:53 2001
+++ ./drivers/block/rd.c	Sat Jan  5 10:29:35 2002
@@ -673,7 +673,7 @@
 #endif
 	ram_device = MKDEV(MAJOR_NR, unit);
 
-	if ((inode = get_empty_inode()) == NULL)
+	if ((inode = get_empty_inode(NULL)) == NULL)
 		return;
 	memset(&infile, 0, sizeof(infile));
 	memset(&in_dentry, 0, sizeof(in_dentry));
@@ -683,7 +683,7 @@
 	infile.f_op = &def_blk_fops;
 	init_special_inode(inode, S_IFBLK | S_IRUSR, kdev_t_to_nr(device));
 
-	if ((out_inode = get_empty_inode()) == NULL)
+	if ((out_inode = get_empty_inode(NULL)) == NULL)
 		goto free_inode;
 	memset(&outfile, 0, sizeof(outfile));
 	memset(&out_dentry, 0, sizeof(out_dentry));
--- ../2.4.17.clean/fs/ext2/super.c	Fri Dec 21 12:41:55 2001
+++ ./fs/ext2/super.c	Sun Jan  6 01:50:44 2002
@@ -806,16 +806,23 @@
 	return 0;
 }
 
-static DECLARE_FSTYPE_DEV(ext2_fs_type, "ext2", ext2_read_super);
-
+static struct file_system_type ext2_fs = {
+	owner:		THIS_MODULE,
+	fs_flags:	FS_REQUIRES_DEV,
+	name:		"ext2",
+	read_super:	ext2_read_super,
+	super_size:	sizeof(struct ext2_sb_info),
+	inode_size:	sizeof(struct ext2_inode_info)
+};
+ 
 static int __init init_ext2_fs(void)
 {
-        return register_filesystem(&ext2_fs_type);
+        return register_filesystem(&ext2_fs);
 }
 
 static void __exit exit_ext2_fs(void)
 {
-	unregister_filesystem(&ext2_fs_type);
+	unregister_filesystem(&ext2_fs);
 }
 
 EXPORT_NO_SYMBOLS;
--- ../2.4.17.clean/fs/inode.c	Fri Dec 21 12:41:55 2001
+++ ./fs/inode.c	Sun Jan  6 03:42:25 2002
@@ -75,29 +75,32 @@
 
 static kmem_cache_t * inode_cachep;
 
-#define alloc_inode() \
-	 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
+static inline struct inode *alloc_inode(struct super_block *sb)
+{
+	kmem_cache_t *cache = sb? sb->s_type->inode_cache: NULL;
+	return (struct inode *) kmem_cache_alloc(cache? cache: inode_cachep, SLAB_KERNEL);
+}
+
 static void destroy_inode(struct inode *inode) 
 {
+	struct super_block *sb = inode->i_sb;
+	kmem_cache_t *cache = sb? sb->s_type->inode_cache: NULL;
 	if (inode_has_buffers(inode))
 		BUG();
-	kmem_cache_free(inode_cachep, (inode));
+	kmem_cache_free(cache? cache: inode_cachep, inode);
 }
 
-
 /*
  * These are initializations that only need to be done
  * once, because the fields are idempotent across use
  * of the inode, so let the slab aware of that.
  */
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *p, kmem_cache_t *cache, unsigned long flags)
 {
-	struct inode * inode = (struct inode *) foo;
-
-	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR)
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
 	{
-		memset(inode, 0, sizeof(*inode));
+		struct inode *inode = (struct inode *) p;
+		kmem_cache_clear(cache, inode);
 		init_waitqueue_head(&inode->i_wait);
 		INIT_LIST_HEAD(&inode->i_hash);
 		INIT_LIST_HEAD(&inode->i_data.clean_pages);
@@ -710,6 +713,7 @@
 
 int shrink_icache_memory(int priority, int gfp_mask)
 {
+	struct file_system_type *fs;
 	int count = 0;
 
 	/*
@@ -725,7 +729,15 @@
 	count = inodes_stat.nr_unused / priority;
 
 	prune_icache(count);
+#if 0
+	/* Manfred thinks this isn't necessary */
 	kmem_cache_shrink(inode_cachep);
+	write_lock(&file_systems_lock);
+	for (fs = file_systems; fs; fs = fs->next)
+		kmem_cache_shrink(fs->inode_cache);
+	write_unlock(&file_systems_lock);
+	kmem_cache_shrink(inode_cachep);
+#endif
 	return 0;
 }
 
@@ -765,12 +777,14 @@
  * i_sb, i_ino, i_count, i_state and the lists have
  * been initialized elsewhere..
  */
-static void clean_inode(struct inode *inode)
+static void init_inode(struct inode *inode, struct super_block *sb)
 {
 	static struct address_space_operations empty_aops;
 	static struct inode_operations empty_iops;
 	static struct file_operations empty_fops;
-	memset(&inode->u, 0, sizeof(inode->u));
+	unsigned given = sb? sb->s_type->inode_size: 0; // only rd.c has null sb
+	memset(&inode->u, 0, given? given: sizeof(inode->u));
+	inode->i_sb = sb;
 	inode->i_sock = 0;
 	inode->i_op = &empty_iops;
 	inode->i_fop = &empty_fops;
@@ -802,20 +816,19 @@
  * lists.
  */
  
-struct inode * get_empty_inode(void)
+struct inode *get_empty_inode(struct super_block *sb)
 {
 	static unsigned long last_ino;
-	struct inode * inode;
+	struct inode *inode;
 
 	spin_lock_prefetch(&inode_lock);
-	
-	inode = alloc_inode();
+	inode = alloc_inode(sb);
 	if (inode)
 	{
 		spin_lock(&inode_lock);
 		inodes_stat.nr_inodes++;
 		list_add(&inode->i_list, &inode_in_use);
-		inode->i_sb = NULL;
+		inode->i_sb = NULL; // need this?
 		inode->i_dev = 0;
 		inode->i_blkbits = 0;
 		inode->i_ino = ++last_ino;
@@ -823,7 +836,7 @@
 		atomic_set(&inode->i_count, 1);
 		inode->i_state = 0;
 		spin_unlock(&inode_lock);
-		clean_inode(inode);
+		init_inode(inode, sb);
 	}
 	return inode;
 }
@@ -838,7 +851,7 @@
 {
 	struct inode * inode;
 
-	inode = alloc_inode();
+	inode = alloc_inode(sb);
 	if (inode) {
 		struct inode * old;
 
@@ -849,7 +862,6 @@
 			inodes_stat.nr_inodes++;
 			list_add(&inode->i_list, &inode_in_use);
 			list_add(&inode->i_hash, head);
-			inode->i_sb = sb;
 			inode->i_dev = sb->s_dev;
 			inode->i_blkbits = sb->s_blocksize_bits;
 			inode->i_ino = ino;
@@ -857,8 +869,7 @@
 			atomic_set(&inode->i_count, 1);
 			inode->i_state = I_LOCK;
 			spin_unlock(&inode_lock);
-
-			clean_inode(inode);
+			init_inode(inode, sb);
 
 			/* reiserfs specific hack right here.  We don't
 			** want this to last, and are looking for VFS changes
@@ -897,6 +908,21 @@
 		wait_on_inode(inode);
 	}
 	return inode;
+}
+
+int create_inode_cache(struct file_system_type *fs)
+{
+	if (fs->inode_size)
+		if (!(fs->inode_cache = kmem_cache_create(fs->name, 
+		    fs->inode_size + sizeof(struct inode) - sizeof(get_empty_inode(0)->u),
+		    0, SLAB_HWCACHE_ALIGN, init_once, NULL)))
+			return -ENOSPC;
+	return 0;
+}
+
+int destroy_inode_cache(struct file_system_type *fs)
+{
+	return kmem_cache_destroy(fs->inode_cache)? -EBUSY: 0;
 }
 
 static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
--- ../2.4.17.clean/fs/super.c	Fri Dec 21 12:42:03 2001
+++ ./fs/super.c	Sun Jan  6 03:44:18 2002
@@ -67,8 +67,8 @@
  *	Once the reference is obtained we can drop the spinlock.
  */
 
-static struct file_system_type *file_systems;
-static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
+struct file_system_type *file_systems;
+rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
 
 /* WARNING: This can be used only if we _already_ own a reference */
 static void get_filesystem(struct file_system_type *fs)
@@ -105,10 +105,10 @@
  *	unregistered.
  */
  
-int register_filesystem(struct file_system_type * fs)
+int register_filesystem(struct file_system_type *fs)
 {
-	int res = 0;
-	struct file_system_type ** p;
+	struct file_system_type **p;
+	int err = 0;
 
 	if (!fs)
 		return -EINVAL;
@@ -118,11 +118,12 @@
 	write_lock(&file_systems_lock);
 	p = find_filesystem(fs->name);
 	if (*p)
-		res = -EBUSY;
+		err = -EBUSY;
 	else
-		*p = fs;
+		if (!(err = create_inode_cache(fs)))
+			*p = fs;
 	write_unlock(&file_systems_lock);
-	return res;
+	return err;
 }
 
 /**
@@ -137,23 +138,25 @@
  *	may be freed or reused.
  */
  
-int unregister_filesystem(struct file_system_type * fs)
+int unregister_filesystem(struct file_system_type *fs)
 {
-	struct file_system_type ** tmp;
-
+	struct file_system_type **p;
+	int err = -EINVAL;
 	write_lock(&file_systems_lock);
-	tmp = &file_systems;
-	while (*tmp) {
-		if (fs == *tmp) {
-			*tmp = fs->next;
+	p = &file_systems;
+	while (*p) {
+		if (*p == fs) {
+			if (fs->inode_cache && (err = destroy_inode_cache(fs)))
+				break;
+			*p = fs->next;
 			fs->next = NULL;
-			write_unlock(&file_systems_lock);
-			return 0;
+			err = 0;
+			break;
 		}
-		tmp = &(*tmp)->next;
+		p = &(*p)->next;
 	}
 	write_unlock(&file_systems_lock);
-	return -EINVAL;
+	return err;
 }
 
 static int fs_index(const char * __name)
@@ -419,11 +422,17 @@
  *	the request.
  */
  
-static struct super_block *alloc_super(void)
+static struct super_block *alloc_super(struct file_system_type *fs)
 {
-	struct super_block *s = kmalloc(sizeof(struct super_block),  GFP_USER);
-	if (s) {
-		memset(s, 0, sizeof(struct super_block));
+	struct super_block *s;
+        unsigned size = sizeof(struct super_block);
+
+	if (fs->super_size)
+		size += fs->super_size - sizeof(alloc_super(0)->u);
+
+	printk(">>> %s super size is %i\n", fs->name, size);
+	if ((s = kmalloc(size, GFP_USER))) {
+		memset(s, 0, size);
 		INIT_LIST_HEAD(&s->s_dirty);
 		INIT_LIST_HEAD(&s->s_locked_inodes);
 		INIT_LIST_HEAD(&s->s_files);
@@ -446,7 +455,7 @@
 				       void *data)
 {
 	struct super_block * s;
-	s = alloc_super();
+	s = alloc_super(type);
 	if (!s)
 		goto out;
 	s->s_dev = dev;
@@ -578,7 +587,7 @@
 		goto out1;
 
 	error = -ENOMEM;
-	s = alloc_super();
+	s = alloc_super(fs_type);
 	if (!s)
 		goto out1;
 	down_write(&s->s_umount);
@@ -664,7 +673,7 @@
 static struct super_block *get_sb_single(struct file_system_type *fs_type,
 	int flags, void *data)
 {
-	struct super_block * s = alloc_super();
+	struct super_block * s = alloc_super(fs_type);
 	if (!s)
 		return ERR_PTR(-ENOMEM);
 	down_write(&s->s_umount);
--- ../2.4.17.clean/include/linux/fs.h	Fri Dec 21 12:42:03 2001
+++ ./include/linux/fs.h	Sun Jan  6 03:26:53 2002
@@ -691,6 +691,9 @@
 #include <linux/cramfs_fs_sb.h>
 #include <linux/jffs2_fs_sb.h>
 
+extern struct file_system_type *file_systems;
+extern rwlock_t file_systems_lock;
+
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
 
@@ -722,6 +725,21 @@
 	struct list_head	s_instances;
 	struct quota_mount_options s_dquot;	/* Diskquota specific options */
 
+	/*
+	 * The next field is for VFS *only*. No filesystems have any business
+	 * even looking at it. You had been warned.
+	 */
+	struct semaphore s_vfs_rename_sem;	/* Kludge */
+
+	/* The next field is used by knfsd when converting a (inode number based)
+	 * file handle into a dentry. As it builds a path in the dcache tree from
+	 * the bottom up, there may for a time be a subpath of dentrys which is not
+	 * connected to the main tree.  This semaphore ensure that there is only ever
+	 * one such free path per filesystem.  Note that unconnected files (or other
+	 * non-directories) are allowed, but not unconnected diretories.
+	 */
+	struct semaphore s_nfsd_free_path_sem;
+
 	union {
 		struct minix_sb_info	minix_sb;
 		struct ext2_sb_info	ext2_sb;
@@ -750,20 +768,6 @@
 		struct cramfs_sb_info	cramfs_sb;
 		void			*generic_sbp;
 	} u;
-	/*
-	 * The next field is for VFS *only*. No filesystems have any business
-	 * even looking at it. You had been warned.
-	 */
-	struct semaphore s_vfs_rename_sem;	/* Kludge */
-
-	/* The next field is used by knfsd when converting a (inode number based)
-	 * file handle into a dentry. As it builds a path in the dcache tree from
-	 * the bottom up, there may for a time be a subpath of dentrys which is not
-	 * connected to the main tree.  This semaphore ensure that there is only ever
-	 * one such free path per filesystem.  Note that unconnected files (or other
-	 * non-directories) are allowed, but not unconnected diretories.
-	 */
-	struct semaphore s_nfsd_free_path_sem;
 };
 
 /*
@@ -951,10 +955,14 @@
 	int fs_flags;
 	struct super_block *(*read_super) (struct super_block *, void *, int);
 	struct module *owner;
-	struct file_system_type * next;
+	struct file_system_type *next;
 	struct list_head fs_supers;
+	unsigned super_size, inode_size;
+	struct kmem_cache_s *inode_cache;
 };
 
+/* Backward compatible declarations, remove when all updated */
+
 #define DECLARE_FSTYPE(var,type,read,flags) \
 struct file_system_type var = { \
 	name:		type, \
@@ -1328,20 +1336,21 @@
 }
 
 extern void clear_inode(struct inode *);
-extern struct inode * get_empty_inode(void);
+extern struct inode *get_empty_inode(struct super_block *sb);
 
-static inline struct inode * new_inode(struct super_block *sb)
+static inline struct inode *new_inode(struct super_block *sb)
 {
-	struct inode *inode = get_empty_inode();
+	struct inode *inode = get_empty_inode(sb);
 	if (inode) {
-		inode->i_sb = sb;
 		inode->i_dev = sb->s_dev;
 		inode->i_blkbits = sb->s_blocksize_bits;
 	}
 	return inode;
 }
-extern void remove_suid(struct inode *inode);
 
+extern int create_inode_cache(struct file_system_type *fs);
+extern int destroy_inode_cache(struct file_system_type *fs);
+extern void remove_suid(struct inode *inode);
 extern void insert_inode_hash(struct inode *);
 extern void remove_inode_hash(struct inode *);
 extern struct file * get_empty_filp(void);
--- ../2.4.17.clean/include/linux/slab.h	Fri Dec 21 12:42:04 2001
+++ ./include/linux/slab.h	Sun Jan  6 03:27:29 2002
@@ -57,6 +57,7 @@
 extern int kmem_cache_shrink(kmem_cache_t *);
 extern void *kmem_cache_alloc(kmem_cache_t *, int);
 extern void kmem_cache_free(kmem_cache_t *, void *);
+extern void kmem_cache_clear(kmem_cache_t *, void *);
 
 extern void *kmalloc(size_t, int);
 extern void kfree(const void *);
--- ../2.4.17.clean/mm/slab.c	Fri Dec 21 12:42:05 2001
+++ ./mm/slab.c	Sat Jan  5 10:29:35 2002
@@ -1078,6 +1078,16 @@
 	slabp->free = 0;
 }
 
+void kmem_cache_clear(kmem_cache_t *cachep, void *objp)
+{
+	unsigned size = cachep->objsize;
+#if DEBUG
+	if (cachep->flags & SLAB_RED_ZONE)
+		size -= BYTES_PER_WORD*2;
+#endif
+	memset(objp, 0, size);
+}
+
 /*
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
--- ../2.4.17.clean/net/socket.c	Fri Dec 21 12:42:06 2001
+++ ./net/socket.c	Sat Jan  5 10:29:35 2002
@@ -438,11 +438,10 @@
 	struct inode * inode;
 	struct socket * sock;
 
-	inode = get_empty_inode();
+	inode = get_empty_inode(sock_mnt->mnt_sb);
 	if (!inode)
 		return NULL;
 
-	inode->i_sb = sock_mnt->mnt_sb;
 	sock = socki_lookup(inode);
 
 	inode->i_mode = S_IFSOCK|S_IRWXUGO;

^ permalink raw reply

* Re: [PATCH] C undefined behavior fix
From: dewar @ 2002-01-06  4:26 UTC (permalink / raw)
  To: dewar, paulus; +Cc: gcc, linux-kernel, trini, velco

<<There are some C compilers that are useful for implementing a kernel,
that's true.  But when the maintainers of such a compiler say things
that imply that they feel they are constrained only by the standard
and not by the needs of their users, it is very discouraging.
>>

What is important is for these users to *clearly* and at least 
semi-formally, state their needs. Saying general things about the need
to be useful is hardly helpful!

You quote Florian:

> You cannot manipulate machine addresses in C because C is
> defined as a high-level language, without backdoors to such low-level
> concepts as machine addresses.

Unfortunately Florian is right. The ability in C to manipulate low-level
concepts such as machine addresses is NOT part of the language, but rather
comes from exploiting aspects that are deliberately left implementation
dependent. This is why it is so important to formally state the requirements
that are being depended on.

I don't think anyone seriously objects to trying to formulate solutions
to what is indeed a very important problem.

But it is hardly helpful for people to take the attitude "we wrote this
kernel, and it worked, and any change to the compiler that stops it from
working is unacceptable".

^ permalink raw reply

* Re: [PATCH] C undefined behavior fix
From: Paul Mackerras @ 2002-01-06  3:40 UTC (permalink / raw)
  To: dewar; +Cc: gcc, linux-kernel, trini, velco
In-Reply-To: <20020104224325.04B43F319D@nile.gnat.com>

dewar@gnat.com writes:

> <<Sorry, you are correct.  I should have written "One of the reasons why
> C used to be a good language for writing operating system kernels ..."
> >>
> 
> C is perfectly well suited for writing operating system kernels, but you

Actually, having seen some of the subsequent messages in this thread,
for example this gem from Florian Weimer, I stand by my statement. :)

> You cannot manipulate machine addresses in C because C is
> defined as a high-level language, without backdoors to such low-level
> concepts as machine addresses.

We keep getting told that there is nothing in the standard that says
that the compiler has to give us a way to construct a pointer (one
that we can dereference) from an address, and that if we think we have
a way at the moment we shouldn't rely on it because it might change at
any minute, and if it does and our kernel doesn't work any more it's
all our fault.

> absolutely HAVE to know what you are doing, and that includes knowing the
> C standard accurately, and clearly identifying any implementation dependent
> behavior that you are counting on.

There are some C compilers that are useful for implementing a kernel,
that's true.  But when the maintainers of such a compiler say things
that imply that they feel they are constrained only by the standard
and not by the needs of their users, it is very discouraging.

> The "used to be" is bogus. The (base + offset) memory model of C has been
> there since the earliest days of the definition of C. The only thing that
> "used to be" the case is that people ignored these rules freely and since
> compilers were fairly stupid, they got away with this rash behavior.

Oh, I was talking about the original C language as it was designed and
implemented by 2 or 3 smart people who were also using it to build a
kernel, a compiler and a lot of other programs.  Not that the original
C language didn't have flaws; it did, and the ANSI committee that
developed the standard fixed a lot of them.  But I do feel that the
language has drifted away from its roots as a systems programming
language.

Paul.

^ permalink raw reply

* Re: [PATCH] C undefined behavior fix
From: Paul Mackerras @ 2002-01-06  4:09 UTC (permalink / raw)
  To: Joseph S. Myers
  Cc: jkl, Florian Weimer, dewar, Dautrevaux, Franz.Sirl-kernel, benh,
	gcc, jtv, linux-kernel, linuxppc-dev, minyard, rth, trini, velco
In-Reply-To: <Pine.LNX.4.33.0201051929080.485-100000@kern.srcf.societies.cam.ac.uk>

Joseph S. Myers writes:

> Just because you've created a pointer P, and it compares bitwise equal to
> a valid pointer Q you can use to access an object, does not mean that P
> can be used to access that object.  Look at DR#260, discussing the

I looked at this, and it starts out with an example that includes a
statement free(p); (where p was assigned a value returned from malloc)
and then states that "After the call to free the value of p is
indeterminate."!

This seems absolutely and completely bogus to me.  Certainly, after
the free, the value of *p is indeterminate, but the value of p itself
*is* determinate; its value after the free is identical to its value
before the free.  Why do they say that the value of p itself is
indeterminate after the free?

The two examples of why a compiler might want to change the value are
also bogus; the compiler can avoid writing the value of p from a
register back to memory only if the value is dead, and it isn't in the
example given.  As for the debugging opportunity, if I want p to be
set to NULL or some other pattern for debugging I'll do it explicitly.

In general I think that when a pointer value has been obtained by a
cast to an integer or by passing the address of a pointer to a
function, the compiler should assume that the pointer can point
anywhere.  That means reduced opportunities for optimization, but so
be it.  Note that all of the examples in DR#260 involve passing &p to
some function.

Paul.

^ permalink raw reply

* Re: i686 SMP systems with more then 12 GB ram with 2.4.x kernel ?
From: Gerrit Huizenga @ 2002-01-02 21:17 UTC (permalink / raw)
  To: Alan Cox; +Cc: M. Edward Borasky, Harald Holzer, linux-kernel
In-Reply-To: <E16LTvs-00016I-00@the-village.bc.nu>

In message <E16LTvs-00016I-00@the-village.bc.nu>, > : Alan Cox writes:

> > 2. Isn't the boundary at 2^30 really irrelevant and the three "correct"
> > zones are (0 - 2^24-1), (2^24 - 2^32-1) and (2^32 - 2^36-1)?
> 
> Nope. The limit for directly mapped memory is 2^30.

The limit *per L1 Page Table Base Pointer*, that is.  You could
in theory have a different L1 Page Table base pointer for each
task (including each proc 0 in linux).  You can also pull a few
tricks such as instantiating a 4 GB kernel virtual address space
while in kernel mode (using a virtual windowing mechanism as is used
for high mem today to map in user space for copying in data from
user space if/when needed).  The latter takes some tricky code to
get mapping correct but it wasn't a lot of code in PTX.  Just needed
a lot of careful thought, review, testing, etc.

I don't know if there are real examples of large memory systems
exhausting the ~1 GB of kernel virtual address space on machines
with > 12-32 GB of physical memory (we had this problem in PTX which
created the need for a larger kernel virtual address space in some
contexts).

> > 3. On a system without ISA DMA devices, can DMA and low be merged into a
> > single zone?
> 
> Rarely. PCI vendors are not exactly angels when it comes to implementing
> all 32bits of a DMA transfer

Would be nice to have a config option like "CONFIG_PCI_36" to imply
that all devices on a PAE system were able to access all of memory,
globally removing the need for bounce buffering and allowing a native
PCI setup for mapping memory addresses...

gerrit

^ permalink raw reply

* Re: [RFC] [PATCH] Clean up fs.h union for ext2
From: Anton Altaparmakov @ 2002-01-06  4:04 UTC (permalink / raw)
  To: Daniel Phillips
  Cc: Legacy Fishtank, linux-kernel, ext2-devel,
	Arnaldo Carvalho de Melo, Alexander Viro, Marcelo Tosatti,
	Linus Torvalds
In-Reply-To: <5.1.0.14.2.20020105145226.03163170@pop.cus.cam.ac.uk>

At 03:32 06/01/2002, Daniel Phillips wrote:
>On January 5, 2002 03:56 pm, Anton Altaparmakov wrote:
> > At 14:47 05/01/02, Daniel Phillips wrote:
> > >On January 5, 2002 03:29 pm, Anton Altaparmakov wrote:
> > > > If anyone wants a look NTFS TNG already has gone all the way (for a
> > > > while now in fact). Both fs inode and super block are fs internal slab
> > > > caches and both use static inline NTFS_I / NTFS_SB functions and the
> > > > ntfs includes from linux/fs.h are removed altogether. Code is in
> > > > sourceforge cvs. For instructions how to download the code or to 
> browse
> > > > it online, see:
> > >
> > >Nice, did you use the generic_ip fields?
> >
> > Yes. From ntfs-driver-tng/linux/fs/ntfs/fs.h:
> >
> > [...]
> >
> >   static inline ntfs_inode *NTFS_I(struct inode *inode)
> >   {
> >            return inode->u.generic_ip;
> >   }
>
>OK, so are doing two kmem_cache_allocs for every new_inode.  With the
>unbork.fs patch you could save 50% of the kmem_cache_allocs by
>rewriting as follows:
>
>     static inline ntfs_inode *NTFS_I(struct inode *inode)
>     {
>             /* should bug-check to be sure it's really one of ours */
>             return (ntfs_inode *) &(inode->u);
>     }
>
>And you just fill in the inode_size field of the file_system_type
>declaration.  The vfs will then handle all the details of allocating/freeing
>inodes and the inode slab cache.  (Note that Al seems to think this is the
>wrong way of doing it, but hasn't said why he thinks that yet.)

I will hold back any changes until all the details have been ironed out 
first and Al has given his seal of approval... To be honest I fail to see 
how one additional slab allocation will make any difference. Certainly for 
NTFS where we attach dynamically allocated data to the fs specific part of 
the inode (sometimes going via the slow vmalloc() at that) during the read 
inode call, I doubt very much that there will be any visible performance 
difference. But if it is decided that this is the Right Way(TM) to do it, I 
will of course go with it.

>For superblocks - are you sure you want a dedicated slab cache for those?  It
>seems to me that kmalloc is perfectly appropriate for this, and saves the
>code needed to set up, keep track of, and tear down the slab cache.

No, I was doing both at the same time and got slightly carried away. (-; I 
will convert the super block allocations to simple kmalloc()s when I get bored.

Best regards,

         Anton


-- 
   "I've not lost my mind. It's backed up on tape somewhere." - Unknown
-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Linux NTFS Maintainer / WWW: http://linux-ntfs.sf.net/
ICQ: 8561279 / WWW: http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply

* Re: [announce] [patch] ultra-scalable O(1) SMP and UP scheduler
From: Luc Van Oostenryck @ 2002-01-06  3:55 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar
In-Reply-To: <Pine.LNX.4.33.0201060516090.6357-100000@localhost.localdomain>

Ingo Molnar wrote:
> 
> against -pre9:
> 
>     http://redhat.com/~mingo/O(1)-scheduler/sched-O1-2.5.2-B4.patch
> 
>         Ingo

Ingo,

I am running 2.5.2-pre9 with your -B4 patch since more or less 1 hour.
I have done a little stress testing, seems OK: no crash, no freeze.

-- 
Luc Van Oostenryck

^ permalink raw reply

* Re: [parisc-linux] Problem with Samba on a D370
From: Peter Lavender @ 2002-01-06  3:52 UTC (permalink / raw)
  To: Peter Mottram; +Cc: parisc-linux
In-Reply-To: <Pine.LNX.4.42.0201031237480.8858-100000@r6.petescaff.com>

Peter,

> This package doesn't yet appear to be built for hppa, but if you add an
> appropriate deb-src line for sid in your sources.list file then you can
> download and build the new samba packages using:
>
> apt-get source samba --build
>
> & then install the packages using 'dpkg -i'
>
> Hope I'm not teaching grandma to suck eggs here.

You may be for Herb, but for those of us new to Debian, this is a great way
to learn something without having to ask first.  :)

What do you mean by an appropriate line in the sources.list?  Is it a link
to the location of the samba package?

Thanks,

Pete

^ permalink raw reply

* [ingo scheduler patch] sorry --> 2.4.17-B4 works with no modules kernel....
From: vvikram @ 2002-01-06  3:43 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo

hi ingo,

my bad - sorry. it was some other stupid change of mine to the kernel [i.e 
not a fresh kernel] which caused the lockup. 

i am mailing this after booting from a _fresh_ kernel, no modules and 
your patch applied.

it works great:). hope this helps.

i can now start stress-testing it....

	Vikram

--
1) Linux av 2.4.17 #5 Sat Jan 5 19:11:24 PST 2002 i686 unknown
2) 
processor	: 0
vendor_id	: AuthenticAMD
cpu family	: 6
model		: 3
model name	: AMD Duron(tm) Processor
stepping	: 0
cpu MHz		: 751.354
.
.
.

^ permalink raw reply

* Re: [patch] O(1) scheduler, 2.4.17-B0, 2.5.2-pre8-B0.
From: listmail @ 2002-01-06  3:34 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel
In-Reply-To: <Pine.LNX.4.33.0201060128250.1250-100000@localhost.localdomain>

How close are you and Robert Love on getting this patch and his pre-emt
patches to co-operate...seems like that might bring huge wins.  I know, I
know I could diff, and fix the rejects myself, but this seems to deep in
the kernel for a relative newbie like myself(plus I am more a file system
guy)

Bill

On Sun, 6 Jan 2002, Ingo Molnar wrote:

>
> this is the next, bugfix release of the O(1) scheduler:
>
> 	http://redhat.com/~mingo/O(1)-scheduler/sched-O1-2.5.2-B0.patch
> 	http://redhat.com/~mingo/O(1)-scheduler/sched-O1-2.4.17-B0.patch
>
> This release could fix the lockups and crashes reported by some people.
>
> Changes:
>
>  - remove the likely/unlikely define from sched.h and include compiler.h.
>    (Adrian Bunk)
>
>  - export sys_sched_yield, reported by Pawel Kot.
>
>  - turn off 'child runs first' temporarily, to see the effect.
>
>  - export nr_context_switches() as well, needed by ReiserFS.
>
>  - define resched_task() in the correct order to avoid compiler warnings
>    on UP.
>
>  - maximize the frequency of timer-tick driven load-balancing to 100 per
>    sec.
>
>  - clear ->need_resched in the RT scheduler path as well.
>
>  - simplify yield() support, remove TASK_YIELDED and __schedule_tail().
>
> Comments, bug reports, suggestions are welcome,
>
> 	Ingo
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>


^ permalink raw reply

* [linux-lvm] VG dissappeared without a trace?
From: IpSo @ 2002-01-06  3:33 UTC (permalink / raw)
  To: linux-lvm

I was trying to extend my LVM partition to another disk, but being late and my
head not screwed on straight, things went terribly wrong.

I was using diskdrake (from Mandrake) and what I did was create a LVM partition
/dev/hdb6 and added it to my "quantum" VG which contained a single partition of
/dev/hda6. I was simply doing this as a test (my first time using LVM), so as
soon as it worked, I removed /dev/hdb6 from the "quantum" VG. I then proceeded
to create a larger partition /dev/hdc that I wanted to add to "quantum" for
good. I added it to the "quantum" VG and attemped to resize the "quantum" VG to
the full size of both /dev/hda6 (17gb) and /dev/hdc (12gb). Everything "seemed"
to go fine, so I exited from diskdrake and rebooted.

During the reboot, it said it couldn't mount /dev/quantum/1 as it was not a
proper block device. So I ran the following programs to try and collect as much
data as possible:

[root@ipso root]# pvscan -v
pvscan -- reading all physical volumes (this may take a while...)
pvscan -- walking through all physical volumes found
pvscan -- inactive PV "/dev/hda6"  is associated to an unknown VG (run vgscan)
pvscan -- inactive PV "/dev/hdb6"  is associated to an unknown VG (run vgscan)
pvscan -- total: 2 [19.01 GB] / in use: 2 [19.01 GB] / in no VG: 0 [0]

[root@ipso root]# vgscan -v
vgscan -- removing "/etc/lvmtab" and "/etc/lvmtab.d"
vgscan -- reading all physical volumes (this may take a while...)
vgscan -- no volume groups found

vgcfgrestore -n quantum -l -f /etc/lvmconf/quantum.conf.1.old
--- Volume group ---
VG Name               quantum
VG Access             read/write
VG Status             NOT available/resizable
VG #                  0
MAX LV                256
Cur LV                1
Open LV               0
MAX LV Size           255.99 GB
Max PV                256
Cur PV                2
Act PV                2
VG Size               19 GB
PE Size               4 MB
Total PE              4865
Alloc PE / Size       4328 / 16.91 GB
Free  PE / Size       537 / 2.1 GB
VG UUID               54LQnu-1gQl-mNuO-KR7U-yypa-woKi-iu7PRF

[root@ipso lvmconf]# vgcfgrestore -n quantum -l -f /etc/lvmconf/quantum.conf
--- Volume group ---
VG Name               quantum
VG Access             read/write
VG Status             NOT available/resizable
VG #                  0
MAX LV                256
Cur LV                1
Open LV               0
MAX LV Size           255.99 GB
Max PV                256
Cur PV                3
Act PV                3
VG Size               30.88 GB
PE Size               4 MB
Total PE              7904
Alloc PE / Size       4328 / 16.91 GB
Free  PE / Size       3576 / 13.97 GB
VG UUID               54LQnu-1gQl-mNuO-KR7U-yypa-woKi-iu7PRF

[root@ipso lvmconf]# pvdata -a /dev/hda6

--- Physical volume ---
PV Name               /dev/hda6
VG Name               quantum
PV Size               16.91 GB / NOT usable 3.12 MB [LVM: 137 KB]
PV#                   1
PV Status             NOT available
Allocatable           yes (but full)
Cur LV                1
PE Size (KByte)       4096
Total PE              4328
Free PE               0
Allocated PE          4328
PV UUID               e42Ge3-mpoP-PNat-1xLg-TMjr-4hRU-Q2agG9

--- Volume group ---
VG Name
VG Access             read/write
VG Status             NOT available/resizable
VG #                  0
MAX LV                256
Cur LV                1
Open LV               0
MAX LV Size           255.99 GB
Max PV                256
Cur PV                2
Act PV                2
VG Size               19 GB
PE Size               4 MB
Total PE              4865
Alloc PE / Size       4328 / 16.91 GB
Free  PE / Size       537 / 2.1 GB
VG UUID               54LQnu-1gQl-mNuO-KR7U-yypa-woKi-iu7PRF

--- List of logical volumes ---

pvdata -- logical volume "/dev/quantum/1" at offset   0
pvdata -- logical volume struct at offset   1 is empty
pvdata -- logical volume struct at offset   2 is empty
pvdata -- logical volume struct at offset   3 is empty
pvdata -- logical volume struct at offset   4 is empty
pvdata -- logical volume struct at offset   5 is empty
pvdata -- logical volume struct at offset   6 is empty
pvdata -- logical volume struct at offset   7 is empty
pvdata -- logical volume struct at offset   8 is empty
pvdata -- logical volume struct at offset   9 is empty
pvdata -- logical volume struct at offset  10 is empty
...
pvdata -- logical volume struct at offset 254 is empty
pvdata -- logical volume struct at offset 255 is empty

--- List of physical extents ---

PE: 00000  LV: 001  LE: 00000
PE: 00001  LV: 001  LE: 00001
PE: 00002  LV: 001  LE: 00002
...
PE: 04326  LV: 001  LE: 04326
PE: 04327  LV: 001  LE: 04327
--- List of physical volume UUIDs ---

000: e42Ge3mpoPPNat1xLgTMjr4hRUQ2agG9
001: --- EMPTY ---

[root@ipso lvmconf]# pvdata -a /dev/hdb6

--- Physical volume ---
PV Name               /dev/hdb6
VG Name               quantum
PV Size               2.1 GB / NOT usable 1.75 MB [LVM: 123 KB]
PV#                   2
PV Status             NOT available
Allocatable           yes
Cur LV                0
PE Size (KByte)       4096
Total PE              537
Free PE               537
Allocated PE          0
PV UUID               PFyl3d-HSJD-adUL-vpzS-Sib3-jy78-tgR569

--- Volume group ---
VG Name
VG Access             read/write
VG Status             NOT available/resizable
VG #                  0
MAX LV                256
Cur LV                1
Open LV               0
MAX LV Size           255.99 GB
Max PV                256
Cur PV                3
Act PV                3
VG Size               30.88 GB
PE Size               4 MB
Total PE              7904
Alloc PE / Size       4328 / 16.91 GB
Free  PE / Size       3576 / 13.97 GB
VG UUID               54LQnu-1gQl-mNuO-KR7U-yypa-woKi-iu7PRF

--- List of logical volumes ---

pvdata -- logical volume "/dev/quantum/1" at offset   0
pvdata -- logical volume struct at offset   1 is empty
pvdata -- logical volume struct at offset   2 is empty
pvdata -- logical volume struct at offset   3 is empty
...
pvdata -- logical volume struct at offset 254 is empty
pvdata -- logical volume struct at offset 255 is empty

--- List of physical extents ---

PE: 00000  LV: ---  LE: -----
PE: 00001  LV: ---  LE: -----
PE: 00002  LV: ---  LE: -----
...
PE: 00535  LV: ---  LE: -----
PE: 00536  LV: ---  LE: -----
--- List of physical volume UUIDs ---

000: e42Ge3mpoPPNat1xLgTMjr4hRUQ2agG9
001: PFyl3dHSJDadULvpzSSib3jy78tgR569
002: TeWEsY1qLF4PPwfZAlljyT3KiE4CCajd

Unfortunately, I deleted the partition from the larger drive I was planning on
adding to the "quantum" VG, (/dev/hdc it didn't have any data on it yet). I
don't really care about /dev/hdb6 either, I would just like the data off of
/dev/hda6.

Partition table for: /dev/hda6
   Device Boot    Start       End    Blocks   Id  System
/dev/hda1   *         1       507    255496+  82  Linux swap
/dev/hda2           508     39813  19810224    5  Extended
/dev/hda5           508      4633   2079472+  83  Linux
/dev/hda6          4634     39813  17730688+  8e  Linux LVM

Any ideas how I can get /dev/hda6 mountable again? Thanks.

IpSo

--------------------------------------------------------------------
Never worry about viruses in your Email again.
Get your FREE! virus scanned Email accounts at http://snappymail.ca

^ permalink raw reply

* 2.4.17 kernel without modules...was Re:O(1) SMP and UP scheduler
From: Vikram @ 2002-01-06  3:30 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: lkml
In-Reply-To: <E16N2oW-00021c-00@the-village.bc.nu>


hi ingo,

i am running 2.4.17 on an AMD duron 256MB RAM here. i tried the 2.4.17-B4
patch on a freshly built UP kernel . it applied successfully.

as you had mentioned earlier i built it _without_ modules. it
boots up fine and all that....goes to xdm ---> hard lockup after that.

	Vikram



On Sun, 6 Jan 2002, Alan Cox wrote:

> > Ingo, you don't need that many queues, 32 are more than sufficent.
> > If you look at the distribution you'll see that it matters ( for
> > interactive feel ) only the very first ( top ) queues, while lower ones
> > can very easily tollerate a FIFO pickup w/out bad feelings.
>
> 64 queues costs a tiny amount more than 32 queues. If you can get it down
> to eight or nine queues with no actual cost (espcially for non realtime queues)
> then it represents a huge win since an 8bit ffz can be done by lookup table
> and that is fast on all processors
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>



^ permalink raw reply

* Re: PDC20269 support in 2.2?
From: Alan Cox @ 2002-01-06  3:40 UTC (permalink / raw)
  To: Andre Hedrick; +Cc: Roy Sigurd Karlsbakk, linux-kernel
In-Reply-To: <Pine.LNX.4.10.10201051849540.24436-100000@master.linux-ide.org>

> The day somebody pays for it and the check clears the bank or it is
> requested by the 2.2.X kernel maintainer for inclusion.  Since none of the

I don't see it as 2.2 material unless it can be added without disturbing
the other drivers. I was under the impression the new IDE stuff does smart
things that need new drivers to take advantage of ?

^ permalink raw reply

* Re: [RFC] [PATCH] Clean up fs.h union for ext2
From: Daniel Phillips @ 2002-01-06  3:32 UTC (permalink / raw)
  To: Anton Altaparmakov
  Cc: Legacy Fishtank, linux-kernel, ext2-devel,
	Arnaldo Carvalho de Melo, Alexander Viro, Marcelo Tosatti,
	Linus Torvalds
In-Reply-To: <5.1.0.14.2.20020105145226.03163170@pop.cus.cam.ac.uk>

On January 5, 2002 03:56 pm, Anton Altaparmakov wrote:
> At 14:47 05/01/02, Daniel Phillips wrote:
> >On January 5, 2002 03:29 pm, Anton Altaparmakov wrote:
> > > If anyone wants a look NTFS TNG already has gone all the way (for a
> > > while now in fact). Both fs inode and super block are fs internal slab 
> > > caches and both use static inline NTFS_I / NTFS_SB functions and the 
> > > ntfs includes from linux/fs.h are removed altogether. Code is in 
> > > sourceforge cvs. For instructions how to download the code or to browse 
> > > it online, see:
> >
> >Nice, did you use the generic_ip fields?
> 
> Yes. From ntfs-driver-tng/linux/fs/ntfs/fs.h:
>
> [...]
>
>   static inline ntfs_inode *NTFS_I(struct inode *inode)
>   {
>            return inode->u.generic_ip;
>   }

OK, so are doing two kmem_cache_allocs for every new_inode.  With the 
unbork.fs patch you could save 50% of the kmem_cache_allocs by 
rewriting as follows:

    static inline ntfs_inode *NTFS_I(struct inode *inode)
    {
            /* should bug-check to be sure it's really one of ours */
            return (ntfs_inode *) &(inode->u);
    }

And you just fill in the inode_size field of the file_system_type 
declaration.  The vfs will then handle all the details of allocating/freeing 
inodes and the inode slab cache.  (Note that Al seems to think this is the 
wrong way of doing it, but hasn't said why he thinks that yet.)

For superblocks - are you sure you want a dedicated slab cache for those?  It 
seems to me that kmalloc is perfectly appropriate for this, and saves the 
code needed to set up, keep track of, and tear down the slab cache.

--
Daniel

^ permalink raw reply

* Re: oops in devfs
From: Andreas Dilger @ 2002-01-06  3:27 UTC (permalink / raw)
  To: Richard Gooch; +Cc: Jason Thomas, linux-kernel, marcelo
In-Reply-To: <200201060047.g060l4p08166@vindaloo.ras.ucalgary.ca>

On Jan 05, 2002  17:47 -0700, Richard Gooch wrote:
> Ah! You're using LVM! There are known bugs in LVM which cause memory
> corruptions. I told Heinz about this on 16-DEC, but it appears the CVS
> tree hasn't been updated yet. So grab the latest CVS tree (which fixes
> some bugs) and then apply the appended patch (which fixes more
> bugs). You definately need both. The patch should be applied in the
> drivers/md directory.

Hmm, my understanding was that the LVM CVS already had this patch
applied, but I could be wrong...  In any case, I haven't seen anything
about updating the kernel LVM to match CVS since Alan merged in his
-ac LVM code into 2.4.15 or so.

Cheers, Andreas
--
Andreas Dilger
http://sourceforge.net/projects/ext2resize/
http://www-mddsp.enel.ucalgary.ca/People/adilger/


^ permalink raw reply

* Re: 2.5.1-dj11/12 compile errors
From: Sid Boyce @ 2002-01-06  3:24 UTC (permalink / raw)
  To: linux-kernel@vger.kernel.org; +Cc: davej

	Disabling initrd still produces an error, but perhaps the fix will 
address this.
Regards

gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes 
-Wno-trigraphs -O2 -fomit-frame-pointer -fno-strict-aliasing -fno-common 
-pipe -mpreferred-stack-boundary=2 -march=i686 -malign-functions=4    -c 
-o init/do_mounts.o init/do_mounts.c
init/do_mounts.c: In function `rd_load_disk':
init/do_mounts.c:634: incompatible type for argument 2 of `create_dev'
init/do_mounts.c: In function `handle_initrd':
init/do_mounts.c:752: incompatible type for argument 1 of `kdev_t_to_nr'
init/do_mounts.c:756: incompatible type for argument 2 of `create_dev'
init/do_mounts.c:777: incompatible types in assignment
init/do_mounts.c: In function `initrd_load':
init/do_mounts.c:804: incompatible type for argument 2 of `create_dev'
init/do_mounts.c:805: incompatible type for argument 2 of `create_dev'
init/do_mounts.c: In function `prepare_namespace':
init/do_mounts.c:819: incompatible types in assignment
make: *** [init/do_mounts.o] Error 1

-- 
Sid Boyce ... hamradio G3VBV ... Cessna/Warrior Pilot
Linux only shop


^ permalink raw reply

* Re: PDC20269 support in 2.2?
From: Andre Hedrick @ 2002-01-06  2:58 UTC (permalink / raw)
  To: Roy Sigurd Karlsbakk; +Cc: linux-kernel
In-Reply-To: <Pine.LNX.4.30.0201052018520.10735-100000@mustard.heime.net>

Roy,

The day somebody pays for it and the check clears the bank or it is
requested by the 2.2.X kernel maintainer for inclusion.  Since none of the
distros are willing to fund the development of the driver, it is now
market driven development.  The companys maintain the chipset code, they
pay for updates for the new hardware, somebody who has the latest
hardware effected does a gpl good deed, and when it is convienent to do
busy work for kicks who knows.  Since I recently totalled my Porsche and
technically should have died, I have more concerns than doing busy work
for something that does not produce an income (or a replacement toy car).
I suspect this had much to do with my irrational behavor lately.  One
should note I have no excuse for the flames I blasted Jens with, nor any
explaination.

Regards,

Andre Hedrick
Linux ATA Development

On Sat, 5 Jan 2002, Roy Sigurd Karlsbakk wrote:

> hi
> 
> I saw a previous thread about this, but I can't work. It's not mentioned
> in the latest patch (ide.2.2.19.05042001.patch.gz) either.
> 
> Does anyone know if or when this will be back ported to 2.2?
> 
> roy
> 
> --
> Roy Sigurd Karlsbakk, MCSE, MCNE, CLS, LCA
> 
> Computers are like air conditioners.
> They stop working when you open Windows.
> 

^ permalink raw reply

* Re: Binutils and the Linux kernel source finder
From: Erik Andersen @ 2002-01-06  2:39 UTC (permalink / raw)
  To: Dr. David Alan Gilbert; +Cc: linux-kernel
In-Reply-To: <20020105180237.GF485@gallifrey>

On Sat Jan 05, 2002 at 06:02:37PM +0000, Dr. David Alan Gilbert wrote:
> Hi,
>   I am the author of the 'Linux kernel source finder' web page that
> lists for each architecture the place to get appropriate Linux kernel
> patches - see:
>   http://www.treblig.org/Linux_kernel_source_finder.html
> 
>   I wish to extend this to include pointers to the best/latest/most
> appropriate binutils for each architecture.  I've put links in for x86,
> Alpha and MIPS to H.J.Lu's ftp site, since he tests for those 3
> platforms prior to release.
> 
>   I'd appreciate recommendations and comments from those using binutils
> on Linux for other platforms, with links to ftp, cvs or web pages
> describing the solutions for those architectures.

Note that uClinux (not ucLinux as on your page) does not natively
run the ELF binary file format, but uses what is called the
"Flat" binary format.  It is structurally much simpler (and
therefore smaller) then ELF, but more importantly, this format
helps us avoid needing to always use PIC and/or do tons of
relocations.

We use an ELF toolchain to create binaries, except the toolchain
is modified such that ld is actually a script which first runs
the real 'ld' to produce an ELF file and then also runs
'elf2flt' to create the flat executable.  

The uClinux toolchains for ARM and m68k (which are the two most
commonly used architectures) are available from 
    http://www.uclinux.org/pub/uClinux/m68k-elf-tools/

Links to toolchains for other arches and _lots_ of help
information can be found at 
    http://home.at/uclinux/

 -Erik

--
Erik B. Andersen             http://codepoet-consulting.com/
--This message was written using 73% post-consumer electrons--

^ permalink raw reply

* Re: [announce] [patch] ultra-scalable O(1) SMP and UP scheduler
From: Alan Cox @ 2002-01-06  2:30 UTC (permalink / raw)
  To: mingo; +Cc: Alan Cox, Davide Libenzi, Linus Torvalds, lkml
In-Reply-To: <Pine.LNX.4.33.0201060501560.5193-100000@localhost.localdomain>

> we can do 32-bit ffz by doing 4x 8-bit ffz's though:

There are better algorithms than the branching one already. You can
do it a 32bit one with a multiply shift and 6 bit lookup if your multiply
is ok, or for non superscalar processors using shift and adds. 

64bit is 32bit ffz(x.low|x.high) and a single bit test

I can dig out the 32bit one if need be (its from a NetBSD mailing list)

^ permalink raw reply

* Re: [announce] [patch] ultra-scalable O(1) SMP and UP scheduler
From: Ingo Molnar @ 2002-01-06  4:19 UTC (permalink / raw)
  To: Alan Cox; +Cc: Davide Libenzi, Linus Torvalds, lkml
In-Reply-To: <E16N35L-00024p-00@the-village.bc.nu>


On Sun, 6 Jan 2002, Alan Cox wrote:

> There are better algorithms than the branching one already. You can do
> it a 32bit one with a multiply shift and 6 bit lookup if your multiply
> is ok, or for non superscalar processors using shift and adds.
>
> 64bit is 32bit ffz(x.low|x.high) and a single bit test

ok - i wasnt thinking straight. as few branches as possible should be the
way to go, no BTB will help such functions so branches must be reduced.

	Ingo


^ permalink raw reply

* Re: [announce] [patch] ultra-scalable O(1) SMP and UP scheduler
From: Ingo Molnar @ 2002-01-06  4:16 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Davide Libenzi, lkml, Alan Cox
In-Reply-To: <Pine.LNX.4.33.0201060508110.5193-100000@localhost.localdomain>


against -pre9:

    http://redhat.com/~mingo/O(1)-scheduler/sched-O1-2.5.2-B4.patch

	Ingo


^ permalink raw reply

* Re: [announce] [patch] ultra-scalable O(1) SMP and UP scheduler
From: Davide Libenzi @ 2002-01-06  2:23 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Alan Cox, Linus Torvalds, lkml
In-Reply-To: <Pine.LNX.4.33.0201060501560.5193-100000@localhost.localdomain>

On Sun, 6 Jan 2002, Ingo Molnar wrote:

>
> On Sun, 6 Jan 2002, Alan Cox wrote:
>
> > 64 queues costs a tiny amount more than 32 queues. If you can get it
> > down to eight or nine queues with no actual cost (espcially for non
> > realtime queues) then it represents a huge win since an 8bit ffz can
> > be done by lookup table and that is fast on all processors
>
> i'm afraid that while 32 might work, 8 will definitely not be enough. In
> the interactivity-detection scheme i added it's important for interactive
> tasks to have some room (in terms of priority levels) to go up without
> hitting the levels of the true CPU abusers.
>
> we can do 32-bit ffz by doing 4x 8-bit ffz's though:
>
> 	if (likely(byte[0]))
> 		return ffz8[byte[0]];
> 	else if (byte[1])
> 		return ffz8[byte[1]];
> 	else if (byte[2]
> 		return ffz8[byte[2]];
> 	else if (byte[3]
> 		return ffz8[byte[3]];
> 	else
> 		return -1;
>
> and while this is still 4 branches, it's better than a loop of 32. But i
> also think that George Anzinger's idea works well too to reduce the cost
> of bitsearching. Or those platforms that decide to do so could search the
> arrray directly as well - if it's 32 queues then it's a cache footprint of
> 4 cachelines, which can be searched directly without any problem.

dyn_prio -> [0..15]

each time a task exaust its ts you decrease dyn_prio.

queue = dyn_prio >> 1

You get 16 consecutive CPU hog steps before falling in the hell of CPU
bound tasks




- Davide



^ permalink raw reply

* Re: [announce] [patch] ultra-scalable O(1) SMP and UP scheduler
From: Davide Libenzi @ 2002-01-06  2:17 UTC (permalink / raw)
  To: Alan Cox; +Cc: Ingo Molnar, Linus Torvalds, lkml
In-Reply-To: <E16N2vX-00023B-00@the-village.bc.nu>

On Sun, 6 Jan 2002, Alan Cox wrote:

> > > then it represents a huge win since an 8bit ffz can be done by lookup table
> > > and that is fast on all processors
> >
> > It's here that i want to go, but i'd liketo do it gradually :)
> > unsigned char first_bit[255];
>
> Make it [256] and you can do 9 queues since the idle task will always
> be queued...

Mistyping error :)



- Davide



^ permalink raw reply

* Re: [announce] [patch] ultra-scalable O(1) SMP and UP scheduler
From: Davide Libenzi @ 2002-01-06  2:16 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linus Torvalds, lkml, Alan Cox
In-Reply-To: <Pine.LNX.4.33.0201060441540.4730-100000@localhost.localdomain>

On Sun, 6 Jan 2002, Ingo Molnar wrote:

>
> On Sat, 5 Jan 2002, Davide Libenzi wrote:
>
> > Can you send me a link, there're different things to be fixed IMHO.
>
> my latest stuff is at:
>
>    http://redhat.com/~mingo/O(1)-scheduler/sched-O1-2.5.2-B1.patch
>
> > The load estimator can easily use the current dyn_prio/time_slice by
> > simplyfing things a _lot_
>
> i have experimented with a very high number of variants. I estimated sleep
> times, i estimated run times, i estimated runqueue times. Note that the
> current estimator measures time spent on the *runqueue*, not time spent on
> the CPU. This means that in an overload spike we have an automatically
> increasing penalization of tasks that want to run. While i'm not too
> emotional about eg. the RT bits, this part of the scheduler is pretty
> critical to handle high load smoothly.
>
> the integration effect of the estimator was written to be fast, and it's
> fast. Also note that in most of the time we do not even call the
> estimator:
>
>         if (p->run_timestamp == jiffies)
>                 goto enqueue;
>
> ie. in high frequency wakeup situations we'll call into the estimator only
> once every jiffy.

Like the current one ( pre8 )
You've a per-cpu swap array counter ( old recalc loop ) that you increment
each time you swap arrays.
Each task struct has its rcl_last that is updated when you inject the task
on the run queue :

        p->dyn_prio += rcl_curr(task_qid) - p->rcl_last;
        p->rcl_last = rcl_curr(task_qid);
        if (p->dyn_prio > MAX_DYNPRIO) p->dyn_prio = MAX_DYNPRIO;

Something like this, and you'll push the task on the given queue depending
on 'prio' ( _unsigned_ with the code above ).
Each time a task exaust its time slice you decrease prio.
It works great and it's way simpler.



>
> > I would suggest a lower number of queues, 32 is way more than necessary.
>
> the reason i used more queues is the 'penalizing' effect of the per-task
> load-average estimator. We want to have some priority room these CPU-bound
> tasks can escape into, without hurting some of the interactive jobs that
> might get a few penalties here and there but still dont reach the maximum
> where all the CPU hogs live. (this is p->prio == 63 right now.)
>
> also, i wanted to map all the 39 nice values straight into the priority
> space, just to be sure. Some people *might* rely on finegrained priorities
> still.
>
> there is one additional thing i wanted to do to reduce the effect of the
> 64 queues: instead of using a straight doubly-linked list a'la list_t, we
> can do a head-pointer that cuts the queue size into half, and reduces
> cache footprint of the scheduler data structures as well. But i did not
> want to add this until all bugs are fixed, this is an invariant
> cache-footprint optimization.

Ingo, i did a lot of testing by studying the dyn_prio distribution.
You've a lot of tasks ( i/o bound ) moving between the very firsts ( top )
queues.



> > The rt code _must_ be better, it can be easily done by a smartest
> > wakeup. There's no need to acquire the whole lock set, at least w/out
> > a checkpoint solution ( look at BMQS ) that prevents multiple failing
> > lookups inside the RT queue.
>
> regarding SCHED_OTHER, i have intentionally avoided smart wakeups, pushed
> the balancing logic more into the load balancer.
>
> load spikes and big statistical fluctuations of runqueue lengths we should
> not care much about - they are spikes we cannot flatten anyway, they can
> be gone before the task has finished flushing over its data set to the
> other CPU.
>
> regarding RT tasks, i did not want to add something that i know is broken,
> even if rt_lock() is arguably heavyweight. I've seen the 'IPI in flight
> misses the real picture' situation a number of times and if we want to do
> RT scheduling seriously and accurately on SMP then we should give a
> perfect solution to it. Would you like me to explain the 'IPI in flight'
> problem in detail, or do you agree that it's a problem?

What's the 'IPI in flight' a new counter terrorism measure ? :)
I use a chack point inside BMQS.
There's a global variable that is incremented each time an RT task is woke
up.
There's a local task struct checkpoint that is aligned to the global one
when a lookup inside the RT queue results empty :

    if (grt_chkp != rtt_chkp(cpu_number_map(this_cpu)) &&
        !list_empty(&runqueue_head(RT_QID)))
        goto rt_queue_select;

We've to work on the rt code and the balancing code/hooks




- Davide




^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.