public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sysctl: Allow /proc/sys without sys_sysctl
@ 2006-07-10 22:38 Eric W. Biederman
  2006-07-10 22:48 ` Randy.Dunlap
                   ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Eric W. Biederman @ 2006-07-10 22:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel


Since sys_sysctl is deprecated start allow it to be compiled out.
This should catch any remaining user space code that cares,
and paves the way for further sysctl cleanups.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 arch/ia64/ia32/sys_ia32.c         |    2 -
 arch/mips/kernel/linux32.c        |    4 +
 arch/powerpc/kernel/sys_ppc32.c   |    2 -
 arch/s390/kernel/compat_linux.c   |    2 -
 arch/sparc64/kernel/sys_sparc32.c |    2 -
 arch/x86_64/ia32/sys_ia32.c       |    2 -
 fs/Kconfig                        |   19 +++++++
 init/Kconfig                      |   31 ++++++-----
 kernel/sysctl.c                   |  101 ++++++++++---------------------------
 9 files changed, 71 insertions(+), 94 deletions(-)

diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 6aa3c51..bddbd22 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1942,7 +1942,7 @@ struct sysctl32 {
 	unsigned int	__unused[4];
 };
 
-#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_SYSCTL_SYSCALL
 asmlinkage long
 sys32_sysctl (struct sysctl32 __user *args)
 {
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 814cb38..02eb78d 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -991,7 +991,7 @@ struct sysctl_args32
 	unsigned int __unused[4];
 };
 
-#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_SYSCTL_SYSCALL
 
 asmlinkage long sys32_sysctl(struct sysctl_args32 __user *args)
 {
@@ -1032,7 +1032,7 @@ asmlinkage long sys32_sysctl(struct sysc
 	return error;
 }
 
-#endif /* CONFIG_SYSCTL */
+#endif /* CONFIG_SYSCTL_SYSCALL */
 
 asmlinkage long sys32_newuname(struct new_utsname __user * name)
 {
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 2e29286..5e391fc 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -740,7 +740,7 @@ asmlinkage long compat_sys_umask(u32 mas
 	return sys_umask((int)mask);
 }
 
-#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_SYSCTL_SYSCALL
 struct __sysctl_args32 {
 	u32 name;
 	int nlen;
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index cabb4ff..4feadc4 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -703,7 +703,7 @@ asmlinkage long sys32_sendfile64(int out
 	return ret;
 }
 
-#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_SYSCTL_SYSCALL
 struct __sysctl_args32 {
 	u32 name;
 	int nlen;
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index c88ae23..69444f2 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -1016,7 +1016,7 @@ struct __sysctl_args32 {
 
 asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args)
 {
-#ifndef CONFIG_SYSCTL
+#ifndef CONFIG_SYSCTL_SYSCALL
 	return -ENOSYS;
 #else
 	struct __sysctl_args32 tmp;
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index b8424eb..ebd7873 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -645,7 +645,7 @@ sys32_pause(void)
 }
 
 
-#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_SYSCTL_SYSCALL
 struct sysctl_ia32 {
 	unsigned int	name;
 	int		nlen;
diff --git a/fs/Kconfig b/fs/Kconfig
index 7532f00..b75cc54 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -829,6 +829,25 @@ config PROC_VMCORE
         help
         Exports the dump image of crashed kernel in ELF format.
 
+config PROC_SYSCTL
+	bool "Sysctl support (/proc/sys)" if EMBEDDED
+	depends on PROC_FS 
+	select SYSCTL
+	default y
+	---help---
+	  The sysctl interface provides a means of dynamically changing
+	  certain kernel parameters and variables on the fly without requiring
+	  a recompile of the kernel or reboot of the system.  The primary
+	  interface is through /proc/sys.  If you say Y here a tree of
+	  modifiable sysctl entries will be generated beneath the
+          /proc/sys directory. They are explained in the files 
+	  in <file:Documentation/sysctl/>.  Note that enabling this 
+	  option will enlarge the kernel by at least 8 KB.
+
+	  As it is generally a good thing, you should say Y here unless
+	  building a kernel for install/rescue disks or your system is very
+	  limited in memory.
+
 config SYSFS
 	bool "sysfs file system support" if EMBEDDED
 	default y
diff --git a/init/Kconfig b/init/Kconfig
index a1d7cc5..5949efb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -213,21 +213,24 @@ config TASK_DELAY_ACCT
 	  Say N if unsure.
 
 config SYSCTL
-	bool "Sysctl support" if EMBEDDED
-	default y
+	bool 
+
+config SYSCTL_SYSCALL
+	bool "Sysctl syscall support"
+	default n
+	select SYSCTL
 	---help---
-	  The sysctl interface provides a means of dynamically changing
-	  certain kernel parameters and variables on the fly without requiring
-	  a recompile of the kernel or reboot of the system.  The primary
-	  interface consists of a system call, but if you say Y to "/proc
-	  file system support", a tree of modifiable sysctl entries will be
-	  generated beneath the /proc/sys directory. They are explained in the
-	  files in <file:Documentation/sysctl/>.  Note that enabling this
-	  option will enlarge the kernel by at least 8 KB.
-
-	  As it is generally a good thing, you should say Y here unless
-	  building a kernel for install/rescue disks or your system is very
-	  limited in memory.
+	  Enable the deprecated sysctl system call.  sys_sysctl uses
+	  binary paths that have been found to be a major pain to maintain
+	  and use.  The interface in /proc/sys is now the primary and what
+	  everyone uses.
+
+	  Nothing has been using the binary sysctl interface for some time
+	  time now so nothing should break if you disable sysctl syscall
+	  support, and you kernel will get marginally smaller.
+
+	  Unless you have an application that uses the sys_syscall interface
+ 	  you should probably say N here.  
 
 config UTS_NS
 	bool "UTS Namespaces"
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5c4d19d..42610e6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -140,8 +140,11 @@ #ifdef CONFIG_RT_MUTEXES
 extern int max_lock_depth;
 #endif
 
+#ifdef CONFIG_SYSCTL_SYSCALL
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
 		       ctl_table *, void **);
+#endif
+
 #ifndef CONFIG_UTS_NS
 static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -173,7 +176,7 @@ #endif
 
 /* /proc declarations: */
 
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_PROC_SYSCTL
 
 static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
 static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
@@ -1252,12 +1255,13 @@ static void start_unregistering(struct c
 
 void __init sysctl_init(void)
 {
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_PROC_SYSCTL
 	register_proc_table(root_table, proc_sys_root, &root_table_header);
 	init_irq_proc();
 #endif
 }
 
+#ifdef CONFIG_SYSCTL_SYSCALL
 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
 	       void __user *newval, size_t newlen)
 {
@@ -1311,6 +1315,7 @@ asmlinkage long sys_sysctl(struct __sysc
 	unlock_kernel();
 	return error;
 }
+#endif /* CONFIG_SYSCTL_SYSCALL */
 
 /*
  * ctl_perm does NOT grant the superuser all rights automatically, because
@@ -1337,6 +1342,7 @@ static inline int ctl_perm(ctl_table *ta
 	return test_perm(table->mode, op);
 }
 
+#ifdef CONFIG_SYSCTL_SYSCALL
 static int parse_table(int __user *name, int nlen,
 		       void __user *oldval, size_t __user *oldlenp,
 		       void __user *newval, size_t newlen,
@@ -1426,6 +1432,7 @@ int do_sysctl_strategy (ctl_table *table
 	}
 	return 0;
 }
+#endif /* CONFIG_SYSCTL_SYSCALL */
 
 /**
  * register_sysctl_table - register a sysctl hierarchy
@@ -1513,7 +1520,7 @@ struct ctl_table_header *register_sysctl
 	else
 		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
 	spin_unlock(&sysctl_lock);
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_PROC_SYSCTL
 	register_proc_table(table, proc_sys_root, tmp);
 #endif
 	return tmp;
@@ -1531,18 +1538,31 @@ void unregister_sysctl_table(struct ctl_
 	might_sleep();
 	spin_lock(&sysctl_lock);
 	start_unregistering(header);
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_PROC_SYSCTL
 	unregister_proc_table(header->ctl_table, proc_sys_root);
 #endif
 	spin_unlock(&sysctl_lock);
 	kfree(header);
 }
 
+#else /* !CONFIG_SYSCTL */
+struct ctl_table_header * register_sysctl_table(ctl_table * table, 
+						int insert_at_head)
+{
+	return NULL;
+}
+
+void unregister_sysctl_table(struct ctl_table_header * table)
+{
+}
+
+#endif /* CONFIG_SYSCTL */
+
 /*
  * /proc/sys support
  */
 
-#ifdef CONFIG_PROC_FS
+#ifdef CONFIG_PROC_SYSCTL
 
 /* Scan the sysctl entries in table and add them all into /proc */
 static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
@@ -2520,6 +2540,7 @@ int proc_doulongvec_ms_jiffies_minmax(ct
 #endif /* CONFIG_PROC_FS */
 
 
+#ifdef CONFIG_SYSCTL_SYSCALL
 /*
  * General sysctl support routines 
  */
@@ -2662,7 +2683,7 @@ int sysctl_ms_jiffies(ctl_table *table, 
 	return 1;
 }
 
-#else /* CONFIG_SYSCTL */
+#else /* CONFIG_SYSCTL_SYSCALL */
 
 
 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
@@ -2698,73 +2719,7 @@ int sysctl_ms_jiffies(ctl_table *table, 
 	return -ENOSYS;
 }
 
-int proc_dostring(ctl_table *table, int write, struct file *filp,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
-int proc_dointvec(ctl_table *table, int write, struct file *filp,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
-int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
-int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
-int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
-int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
-int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
-int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
-int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
-				      struct file *filp,
-				      void __user *buffer,
-				      size_t *lenp, loff_t *ppos)
-{
-    return -ENOSYS;
-}
-
-struct ctl_table_header * register_sysctl_table(ctl_table * table, 
-						int insert_at_head)
-{
-	return NULL;
-}
-
-void unregister_sysctl_table(struct ctl_table_header * table)
-{
-}
-
-#endif /* CONFIG_SYSCTL */
+#endif /* CONFIG_SYSCTL_SYSCALL */
 
 /*
  * No sense putting this after each symbol definition, twice,
-- 
1.4.1.gac83a


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-10 22:38 [PATCH] sysctl: Allow /proc/sys without sys_sysctl Eric W. Biederman
@ 2006-07-10 22:48 ` Randy.Dunlap
  2006-07-11  4:00   ` Eric W. Biederman
  2006-07-11  4:19 ` Andrew Morton
  2006-07-11 22:26 ` [PATCH] sysctl: Allow /proc/sys without sys_sysctl Andi Kleen
  2 siblings, 1 reply; 21+ messages in thread
From: Randy.Dunlap @ 2006-07-10 22:48 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: akpm, linux-kernel

On Mon, 10 Jul 2006 16:38:59 -0600 Eric W. Biederman wrote:

> 
> Since sys_sysctl is deprecated start allow it to be compiled out.
> This should catch any remaining user space code that cares,
> and paves the way for further sysctl cleanups.

Where is it documented and users notified that sys_sysctl is
deprecated?  Sounds like it should be added to
Documentation/feature-removal-schedule.txt.


> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  arch/ia64/ia32/sys_ia32.c         |    2 -
>  arch/mips/kernel/linux32.c        |    4 +
>  arch/powerpc/kernel/sys_ppc32.c   |    2 -
>  arch/s390/kernel/compat_linux.c   |    2 -
>  arch/sparc64/kernel/sys_sparc32.c |    2 -
>  arch/x86_64/ia32/sys_ia32.c       |    2 -
>  fs/Kconfig                        |   19 +++++++
>  init/Kconfig                      |   31 ++++++-----
>  kernel/sysctl.c                   |  101 ++++++++++---------------------------
>  9 files changed, 71 insertions(+), 94 deletions(-)
> 
> diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
> index 6aa3c51..bddbd22 100644
> --- a/arch/ia64/ia32/sys_ia32.c
> +++ b/arch/ia64/ia32/sys_ia32.c
> @@ -1942,7 +1942,7 @@ struct sysctl32 {
>  	unsigned int	__unused[4];
>  };
>  
> -#ifdef CONFIG_SYSCTL
> +#ifdef CONFIG_SYSCTL_SYSCALL
>  asmlinkage long
>  sys32_sysctl (struct sysctl32 __user *args)
>  {
> diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
> index 814cb38..02eb78d 100644
> --- a/arch/mips/kernel/linux32.c
> +++ b/arch/mips/kernel/linux32.c
> @@ -991,7 +991,7 @@ struct sysctl_args32
>  	unsigned int __unused[4];
>  };
>  
> -#ifdef CONFIG_SYSCTL
> +#ifdef CONFIG_SYSCTL_SYSCALL
>  
>  asmlinkage long sys32_sysctl(struct sysctl_args32 __user *args)
>  {
> @@ -1032,7 +1032,7 @@ asmlinkage long sys32_sysctl(struct sysc
>  	return error;
>  }
>  
> -#endif /* CONFIG_SYSCTL */
> +#endif /* CONFIG_SYSCTL_SYSCALL */
>  
>  asmlinkage long sys32_newuname(struct new_utsname __user * name)
>  {
> diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
> index 2e29286..5e391fc 100644
> --- a/arch/powerpc/kernel/sys_ppc32.c
> +++ b/arch/powerpc/kernel/sys_ppc32.c
> @@ -740,7 +740,7 @@ asmlinkage long compat_sys_umask(u32 mas
>  	return sys_umask((int)mask);
>  }
>  
> -#ifdef CONFIG_SYSCTL
> +#ifdef CONFIG_SYSCTL_SYSCALL
>  struct __sysctl_args32 {
>  	u32 name;
>  	int nlen;
> diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
> index cabb4ff..4feadc4 100644
> --- a/arch/s390/kernel/compat_linux.c
> +++ b/arch/s390/kernel/compat_linux.c
> @@ -703,7 +703,7 @@ asmlinkage long sys32_sendfile64(int out
>  	return ret;
>  }
>  
> -#ifdef CONFIG_SYSCTL
> +#ifdef CONFIG_SYSCTL_SYSCALL
>  struct __sysctl_args32 {
>  	u32 name;
>  	int nlen;
> diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
> index c88ae23..69444f2 100644
> --- a/arch/sparc64/kernel/sys_sparc32.c
> +++ b/arch/sparc64/kernel/sys_sparc32.c
> @@ -1016,7 +1016,7 @@ struct __sysctl_args32 {
>  
>  asmlinkage long sys32_sysctl(struct __sysctl_args32 __user *args)
>  {
> -#ifndef CONFIG_SYSCTL
> +#ifndef CONFIG_SYSCTL_SYSCALL
>  	return -ENOSYS;
>  #else
>  	struct __sysctl_args32 tmp;
> diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
> index b8424eb..ebd7873 100644
> --- a/arch/x86_64/ia32/sys_ia32.c
> +++ b/arch/x86_64/ia32/sys_ia32.c
> @@ -645,7 +645,7 @@ sys32_pause(void)
>  }
>  
>  
> -#ifdef CONFIG_SYSCTL
> +#ifdef CONFIG_SYSCTL_SYSCALL
>  struct sysctl_ia32 {
>  	unsigned int	name;
>  	int		nlen;
> diff --git a/fs/Kconfig b/fs/Kconfig
> index 7532f00..b75cc54 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -829,6 +829,25 @@ config PROC_VMCORE
>          help
>          Exports the dump image of crashed kernel in ELF format.
>  
> +config PROC_SYSCTL
> +	bool "Sysctl support (/proc/sys)" if EMBEDDED
> +	depends on PROC_FS 
> +	select SYSCTL
> +	default y
> +	---help---
> +	  The sysctl interface provides a means of dynamically changing
> +	  certain kernel parameters and variables on the fly without requiring
> +	  a recompile of the kernel or reboot of the system.  The primary
> +	  interface is through /proc/sys.  If you say Y here a tree of
> +	  modifiable sysctl entries will be generated beneath the
> +          /proc/sys directory. They are explained in the files 
> +	  in <file:Documentation/sysctl/>.  Note that enabling this 
> +	  option will enlarge the kernel by at least 8 KB.
> +
> +	  As it is generally a good thing, you should say Y here unless
> +	  building a kernel for install/rescue disks or your system is very
> +	  limited in memory.
> +
>  config SYSFS
>  	bool "sysfs file system support" if EMBEDDED
>  	default y
> diff --git a/init/Kconfig b/init/Kconfig
> index a1d7cc5..5949efb 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -213,21 +213,24 @@ config TASK_DELAY_ACCT
>  	  Say N if unsure.
>  
>  config SYSCTL
> -	bool "Sysctl support" if EMBEDDED
> -	default y
> +	bool 
> +
> +config SYSCTL_SYSCALL
> +	bool "Sysctl syscall support"
> +	default n
> +	select SYSCTL
>  	---help---
> -	  The sysctl interface provides a means of dynamically changing
> -	  certain kernel parameters and variables on the fly without requiring
> -	  a recompile of the kernel or reboot of the system.  The primary
> -	  interface consists of a system call, but if you say Y to "/proc
> -	  file system support", a tree of modifiable sysctl entries will be
> -	  generated beneath the /proc/sys directory. They are explained in the
> -	  files in <file:Documentation/sysctl/>.  Note that enabling this
> -	  option will enlarge the kernel by at least 8 KB.
> -
> -	  As it is generally a good thing, you should say Y here unless
> -	  building a kernel for install/rescue disks or your system is very
> -	  limited in memory.
> +	  Enable the deprecated sysctl system call.  sys_sysctl uses
> +	  binary paths that have been found to be a major pain to maintain
> +	  and use.  The interface in /proc/sys is now the primary and what
> +	  everyone uses.
> +
> +	  Nothing has been using the binary sysctl interface for some time
> +	  time now so nothing should break if you disable sysctl syscall
> +	  support, and you kernel will get marginally smaller.
> +
> +	  Unless you have an application that uses the sys_syscall interface
> + 	  you should probably say N here.  
>  
>  config UTS_NS
>  	bool "UTS Namespaces"
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 5c4d19d..42610e6 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -140,8 +140,11 @@ #ifdef CONFIG_RT_MUTEXES
>  extern int max_lock_depth;
>  #endif
>  
> +#ifdef CONFIG_SYSCTL_SYSCALL
>  static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
>  		       ctl_table *, void **);
> +#endif
> +
>  #ifndef CONFIG_UTS_NS
>  static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
>  		  void __user *buffer, size_t *lenp, loff_t *ppos);
> @@ -173,7 +176,7 @@ #endif
>  
>  /* /proc declarations: */
>  
> -#ifdef CONFIG_PROC_FS
> +#ifdef CONFIG_PROC_SYSCTL
>  
>  static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
>  static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
> @@ -1252,12 +1255,13 @@ static void start_unregistering(struct c
>  
>  void __init sysctl_init(void)
>  {
> -#ifdef CONFIG_PROC_FS
> +#ifdef CONFIG_PROC_SYSCTL
>  	register_proc_table(root_table, proc_sys_root, &root_table_header);
>  	init_irq_proc();
>  #endif
>  }
>  
> +#ifdef CONFIG_SYSCTL_SYSCALL
>  int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
>  	       void __user *newval, size_t newlen)
>  {
> @@ -1311,6 +1315,7 @@ asmlinkage long sys_sysctl(struct __sysc
>  	unlock_kernel();
>  	return error;
>  }
> +#endif /* CONFIG_SYSCTL_SYSCALL */
>  
>  /*
>   * ctl_perm does NOT grant the superuser all rights automatically, because
> @@ -1337,6 +1342,7 @@ static inline int ctl_perm(ctl_table *ta
>  	return test_perm(table->mode, op);
>  }
>  
> +#ifdef CONFIG_SYSCTL_SYSCALL
>  static int parse_table(int __user *name, int nlen,
>  		       void __user *oldval, size_t __user *oldlenp,
>  		       void __user *newval, size_t newlen,
> @@ -1426,6 +1432,7 @@ int do_sysctl_strategy (ctl_table *table
>  	}
>  	return 0;
>  }
> +#endif /* CONFIG_SYSCTL_SYSCALL */
>  
>  /**
>   * register_sysctl_table - register a sysctl hierarchy
> @@ -1513,7 +1520,7 @@ struct ctl_table_header *register_sysctl
>  	else
>  		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
>  	spin_unlock(&sysctl_lock);
> -#ifdef CONFIG_PROC_FS
> +#ifdef CONFIG_PROC_SYSCTL
>  	register_proc_table(table, proc_sys_root, tmp);
>  #endif
>  	return tmp;
> @@ -1531,18 +1538,31 @@ void unregister_sysctl_table(struct ctl_
>  	might_sleep();
>  	spin_lock(&sysctl_lock);
>  	start_unregistering(header);
> -#ifdef CONFIG_PROC_FS
> +#ifdef CONFIG_PROC_SYSCTL
>  	unregister_proc_table(header->ctl_table, proc_sys_root);
>  #endif
>  	spin_unlock(&sysctl_lock);
>  	kfree(header);
>  }
>  
> +#else /* !CONFIG_SYSCTL */
> +struct ctl_table_header * register_sysctl_table(ctl_table * table, 
> +						int insert_at_head)
> +{
> +	return NULL;
> +}
> +
> +void unregister_sysctl_table(struct ctl_table_header * table)
> +{
> +}
> +
> +#endif /* CONFIG_SYSCTL */
> +
>  /*
>   * /proc/sys support
>   */
>  
> -#ifdef CONFIG_PROC_FS
> +#ifdef CONFIG_PROC_SYSCTL
>  
>  /* Scan the sysctl entries in table and add them all into /proc */
>  static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
> @@ -2520,6 +2540,7 @@ int proc_doulongvec_ms_jiffies_minmax(ct
>  #endif /* CONFIG_PROC_FS */
>  
>  
> +#ifdef CONFIG_SYSCTL_SYSCALL
>  /*
>   * General sysctl support routines 
>   */
> @@ -2662,7 +2683,7 @@ int sysctl_ms_jiffies(ctl_table *table, 
>  	return 1;
>  }
>  
> -#else /* CONFIG_SYSCTL */
> +#else /* CONFIG_SYSCTL_SYSCALL */
>  
>  
>  asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
> @@ -2698,73 +2719,7 @@ int sysctl_ms_jiffies(ctl_table *table, 
>  	return -ENOSYS;
>  }
>  
> -int proc_dostring(ctl_table *table, int write, struct file *filp,
> -		  void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> -
> -int proc_dointvec(ctl_table *table, int write, struct file *filp,
> -		  void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> -
> -int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
> -			void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> -
> -int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
> -		    void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> -
> -int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
> -			  void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> -
> -int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
> -			  void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> -
> -int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
> -			     void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> -
> -int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
> -		    void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> -
> -int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
> -				      struct file *filp,
> -				      void __user *buffer,
> -				      size_t *lenp, loff_t *ppos)
> -{
> -    return -ENOSYS;
> -}
> -
> -struct ctl_table_header * register_sysctl_table(ctl_table * table, 
> -						int insert_at_head)
> -{
> -	return NULL;
> -}
> -
> -void unregister_sysctl_table(struct ctl_table_header * table)
> -{
> -}
> -
> -#endif /* CONFIG_SYSCTL */
> +#endif /* CONFIG_SYSCTL_SYSCALL */
>  
>  /*
>   * No sense putting this after each symbol definition, twice,
> -- 

---
~Randy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-10 22:48 ` Randy.Dunlap
@ 2006-07-11  4:00   ` Eric W. Biederman
  0 siblings, 0 replies; 21+ messages in thread
From: Eric W. Biederman @ 2006-07-11  4:00 UTC (permalink / raw)
  To: Randy.Dunlap; +Cc: akpm, linux-kernel

"Randy.Dunlap" <rdunlap@xenotime.net> writes:

> On Mon, 10 Jul 2006 16:38:59 -0600 Eric W. Biederman wrote:
>
>> 
>> Since sys_sysctl is deprecated start allow it to be compiled out.
>> This should catch any remaining user space code that cares,
>> and paves the way for further sysctl cleanups.
>
> Where is it documented and users notified that sys_sysctl is
> deprecated?  Sounds like it should be added to
> Documentation/feature-removal-schedule.txt.

The deprecation I believe actually predates feature-remove-schedule.txt

>From include/linux/sysctl.h
 
>  ****************************************************************
>  ****************************************************************
>  **
>  **  The values in this file are exported to user space via 
>  **  the sysctl() binary interface.  However this interface
>  **  is unstable and deprecated and will be removed in the future. 
>  **  For a stable interface use /proc/sys.
>  **
>  ****************************************************************
>  ****************************************************************

And I can't actually find any user space applications that continue
to use sys_sysctl.

So the combination of the two patches and makes the deprecation official,
and makes it a compile time option so we can disable sys_sysctl now,
but still have recourse if some parts of user space need the code.

Currently the we dup the internal helpers which makes maintenance
of the code a nightmare.

Eric




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-10 22:38 [PATCH] sysctl: Allow /proc/sys without sys_sysctl Eric W. Biederman
  2006-07-10 22:48 ` Randy.Dunlap
@ 2006-07-11  4:19 ` Andrew Morton
  2006-07-11  6:57   ` Eric W. Biederman
  2006-07-11  7:23   ` [PATCH] sysctl: Scream if someone uses sys_sysctl Eric W. Biederman
  2006-07-11 22:26 ` [PATCH] sysctl: Allow /proc/sys without sys_sysctl Andi Kleen
  2 siblings, 2 replies; 21+ messages in thread
From: Andrew Morton @ 2006-07-11  4:19 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: linux-kernel

On Mon, 10 Jul 2006 16:38:59 -0600
ebiederm@xmission.com (Eric W. Biederman) wrote:

> Since sys_sysctl is deprecated start allow it to be compiled out.

This could be a tough one to get rid of (looks at sys_bdflush() again).

I'd suggest we put a sys_bdflush()-style warning in there, see what that
flushes out.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-11  4:19 ` Andrew Morton
@ 2006-07-11  6:57   ` Eric W. Biederman
  2006-07-11  7:04     ` Andrew Morton
  2006-07-11  7:23   ` [PATCH] sysctl: Scream if someone uses sys_sysctl Eric W. Biederman
  1 sibling, 1 reply; 21+ messages in thread
From: Eric W. Biederman @ 2006-07-11  6:57 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Andrew Morton <akpm@osdl.org> writes:

> On Mon, 10 Jul 2006 16:38:59 -0600
> ebiederm@xmission.com (Eric W. Biederman) wrote:
>
>> Since sys_sysctl is deprecated start allow it to be compiled out.
>
> This could be a tough one to get rid of (looks at sys_bdflush() again).
>
> I'd suggest we put a sys_bdflush()-style warning in there, see what that
> flushes out.

Sounds sane.  I know I have booted several kernels with it compiled out
but just because you can do without it doesn't mean that something
isn't using it.

Hmm.  The question is where do I want the put the warning message?

When the code is compiled out?
When the code is compiled in?

Probably both places at this point, and using the rate limited printk
I think instead of just the 5 printks that sys_bdflush uses...

I will cook up an incremental patch once I get some sleep.

Eric


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-11  6:57   ` Eric W. Biederman
@ 2006-07-11  7:04     ` Andrew Morton
  2006-07-11  7:52       ` Eric W. Biederman
  0 siblings, 1 reply; 21+ messages in thread
From: Andrew Morton @ 2006-07-11  7:04 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: linux-kernel

On Tue, 11 Jul 2006 00:57:35 -0600
ebiederm@xmission.com (Eric W. Biederman) wrote:

> Andrew Morton <akpm@osdl.org> writes:
> 
> > On Mon, 10 Jul 2006 16:38:59 -0600
> > ebiederm@xmission.com (Eric W. Biederman) wrote:
> >
> >> Since sys_sysctl is deprecated start allow it to be compiled out.
> >
> > This could be a tough one to get rid of (looks at sys_bdflush() again).
> >
> > I'd suggest we put a sys_bdflush()-style warning in there, see what that
> > flushes out.
> 
> Sounds sane.  I know I have booted several kernels with it compiled out
> but just because you can do without it doesn't mean that something
> isn't using it.
> 
> Hmm.  The question is where do I want the put the warning message?
> 
> When the code is compiled out?
> When the code is compiled in?

Both.  We want to find out who is using it.

> Probably both places at this point, and using the rate limited printk
> I think instead of just the 5 printks that sys_bdflush uses...

No, I think five is enough.  If something's using sys_sysctl() then it
might be using it a lot - there's no point in irritating people over it.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH] sysctl:  Scream if someone uses sys_sysctl
  2006-07-11  4:19 ` Andrew Morton
  2006-07-11  6:57   ` Eric W. Biederman
@ 2006-07-11  7:23   ` Eric W. Biederman
  1 sibling, 0 replies; 21+ messages in thread
From: Eric W. Biederman @ 2006-07-11  7:23 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel


As far as I can tell we never use sys_sysctl so I never expect to see
these messages.  But if we do see these it means that there are user
space applications that need to be fixed before we can safely
remove sys_sysctl.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---

Since this patch was trivial I just wipped up this incremental
version.  The code compiles is all I know.

 kernel/sysctl.c |    9 +++++++++
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 42610e6..6e7f13a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1306,6 +1306,11 @@ asmlinkage long sys_sysctl(struct __sysc
 	struct __sysctl_args tmp;
 	int error;
 
+	if (printk_ratelimit())
+		printk(KERN_INFO
+			"warning: process `%s' used the obsolete sysctl "
+			"system call\n", current->comm);
+
 	if (copy_from_user(&tmp, args, sizeof(tmp)))
 		return -EFAULT;
 
@@ -2688,6 +2693,10 @@ #else /* CONFIG_SYSCTL_SYSCALL */
 
 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
 {
+	if (printk_ratelimit())
+		printk(KERN_INFO
+			"warning: process `%s' used the removed sysctl "
+			"system call\n", current->comm);
 	return -ENOSYS;
 }
 
-- 
1.4.1.gac83a


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-11  7:04     ` Andrew Morton
@ 2006-07-11  7:52       ` Eric W. Biederman
  2006-07-11  8:05         ` Andrew Morton
  0 siblings, 1 reply; 21+ messages in thread
From: Eric W. Biederman @ 2006-07-11  7:52 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Andrew Morton <akpm@osdl.org> writes:

> On Tue, 11 Jul 2006 00:57:35 -0600
> ebiederm@xmission.com (Eric W. Biederman) wrote:
>
>> Andrew Morton <akpm@osdl.org> writes:
>> 
>> > On Mon, 10 Jul 2006 16:38:59 -0600
>> > ebiederm@xmission.com (Eric W. Biederman) wrote:
>> >
>> >> Since sys_sysctl is deprecated start allow it to be compiled out.
>> >
>> > This could be a tough one to get rid of (looks at sys_bdflush() again).
>> >
>> > I'd suggest we put a sys_bdflush()-style warning in there, see what that
>> > flushes out.
>> 
>> Sounds sane.  I know I have booted several kernels with it compiled out
>> but just because you can do without it doesn't mean that something
>> isn't using it.
>> 
>> Hmm.  The question is where do I want the put the warning message?
>> 
>> When the code is compiled out?
>> When the code is compiled in?
>
> Both.  We want to find out who is using it.
>
>> Probably both places at this point, and using the rate limited printk
>> I think instead of just the 5 printks that sys_bdflush uses...
>
> No, I think five is enough.  If something's using sys_sysctl() then it
> might be using it a lot - there's no point in irritating people over it.

You are right, especially if that user is glibc.  Here is my updated patch:

From: Eric W. Biederman <ebiederm@xmission.com>
Subject: [PATCH] sysctl:  Scream if someone uses sys_sysctl

As far as I can tell we never use sys_sysctl so I never expect to see
these messages.  But if we do see these it means that there are user
space applications that need to be fixed before we can safely
remove sys_sysctl.

Limited to only 5 messages in case something like glibc is using sys_sysctl.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/sysctl.c |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 42610e6..b7f7dcb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1303,9 +1303,15 @@ int do_sysctl(int __user *name, int nlen
 
 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
 {
+	static int msg_count;
 	struct __sysctl_args tmp;
 	int error;
 
+	if (msg_count++ < 5)
+		printk(KERN_INFO
+			"warning: process `%s' used the obsolete sysctl "
+			"system call\n", current->comm);
+
 	if (copy_from_user(&tmp, args, sizeof(tmp)))
 		return -EFAULT;
 
@@ -2688,6 +2694,12 @@ #else /* CONFIG_SYSCTL_SYSCALL */
 
 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
 {
+	static int msg_count;
+
+	if (msg_coutn++ < 5)
+		printk(KERN_INFO
+			"warning: process `%s' used the removed sysctl "
+			"system call\n", current->comm);
 	return -ENOSYS;
 }
 
-- 
1.4.1.gac83a


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-11  7:52       ` Eric W. Biederman
@ 2006-07-11  8:05         ` Andrew Morton
  0 siblings, 0 replies; 21+ messages in thread
From: Andrew Morton @ 2006-07-11  8:05 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: linux-kernel

On Tue, 11 Jul 2006 01:52:57 -0600
ebiederm@xmission.com (Eric W. Biederman) wrote:

> As far as I can tell we never use sys_sysctl so I never expect to see
> these messages.  But if we do see these it means that there are user
> space applications that need to be fixed before we can safely
> remove sys_sysctl.
> 
> Limited to only 5 messages in case something like glibc is using sys_sysctl.
> 
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  kernel/sysctl.c |   12 ++++++++++++
>  1 files changed, 12 insertions(+), 0 deletions(-)
> 
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 42610e6..b7f7dcb 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1303,9 +1303,15 @@ int do_sysctl(int __user *name, int nlen
>  
>  asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
>  {
> +	static int msg_count;
>  	struct __sysctl_args tmp;
>  	int error;
>  
> +	if (msg_count++ < 5)
> +		printk(KERN_INFO
> +			"warning: process `%s' used the obsolete sysctl "
> +			"system call\n", current->comm);
> +
>  	if (copy_from_user(&tmp, args, sizeof(tmp)))
>  		return -EFAULT;

That'll print it five times per 4 billion calls ;)

> @@ -2688,6 +2694,12 @@ #else /* CONFIG_SYSCTL_SYSCALL */
>  
>  asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
>  {
> +	static int msg_count;
> +
> +	if (msg_coutn++ < 5)

That'll have trouble compiling.

Go to bed, man.  I'll fix it up.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-10 22:38 [PATCH] sysctl: Allow /proc/sys without sys_sysctl Eric W. Biederman
  2006-07-10 22:48 ` Randy.Dunlap
  2006-07-11  4:19 ` Andrew Morton
@ 2006-07-11 22:26 ` Andi Kleen
  2006-07-12  3:13   ` Eric W. Biederman
  2 siblings, 1 reply; 21+ messages in thread
From: Andi Kleen @ 2006-07-11 22:26 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: linux-kernel

ebiederm@xmission.com (Eric W. Biederman) writes:

> Since sys_sysctl is deprecated start allow it to be compiled out.
> This should catch any remaining user space code that cares,

I tried this long ago, but found that glibc uses sysctl in each
program to get the kernel version. It probably handles ENOSYS,
but there might be slowdowns or subtle problems from it not knowing
the kernel version.

So I think it's ok to remove the big sysctl, but at a very minimal
replacement that just handles (CTL_KERN, KERN_VERSION) is needed.

Also it's useful to printk for the rest at least for some time 
so we know what uses it.

-Andi

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-11 22:26 ` [PATCH] sysctl: Allow /proc/sys without sys_sysctl Andi Kleen
@ 2006-07-12  3:13   ` Eric W. Biederman
  2006-07-12 13:32     ` Andi Kleen
  0 siblings, 1 reply; 21+ messages in thread
From: Eric W. Biederman @ 2006-07-12  3:13 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Andi Kleen <ak@suse.de> writes:

> ebiederm@xmission.com (Eric W. Biederman) writes:
>
>> Since sys_sysctl is deprecated start allow it to be compiled out.
>> This should catch any remaining user space code that cares,
>
> I tried this long ago, but found that glibc uses sysctl in each
> program to get the kernel version. It probably handles ENOSYS,
> but there might be slowdowns or subtle problems from it not knowing
> the kernel version.
>
> So I think it's ok to remove the big sysctl, but at a very minimal
> replacement that just handles (CTL_KERN, KERN_VERSION) is needed.

If glibc is looking at kernel.osrelease it might make sense.
If glibc is looking at kernel.version which is just the build number
and date I can't imagine a correct usage.

If this usage is still common in glibc we can decide what to do
when the warnings pop up.

> Also it's useful to printk for the rest at least for some time 
> so we know what uses it.

Yes.  Andrew and I discussed that and when he picked up a patch.
A printk was added to warn of people using the interface.

Eric

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-12  3:13   ` Eric W. Biederman
@ 2006-07-12 13:32     ` Andi Kleen
  2006-07-12 14:47       ` Eric W. Biederman
  0 siblings, 1 reply; 21+ messages in thread
From: Andi Kleen @ 2006-07-12 13:32 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: linux-kernel

On Wednesday 12 July 2006 05:13, Eric W. Biederman wrote:
> Andi Kleen <ak@suse.de> writes:
> > ebiederm@xmission.com (Eric W. Biederman) writes:
> >> Since sys_sysctl is deprecated start allow it to be compiled out.
> >> This should catch any remaining user space code that cares,
> >
> > I tried this long ago, but found that glibc uses sysctl in each
> > program to get the kernel version. It probably handles ENOSYS,
> > but there might be slowdowns or subtle problems from it not knowing
> > the kernel version.
> >
> > So I think it's ok to remove the big sysctl, but at a very minimal
> > replacement that just handles (CTL_KERN, KERN_VERSION) is needed.
>
> If glibc is looking at kernel.osrelease it might make sense.
> If glibc is looking at kernel.version which is just the build number
> and date I can't imagine a correct usage.

It's KERN_VERSION 

>From my /bin/ls:

_sysctl({{CTL_KERN, KERN_VERSION}, 2, 0xbfc8e1e0, 30, (nil), 0}) = 0

> If this usage is still common in glibc we can decide what to do
> when the warnings pop up.

printk for everything would annoy basically everybody. Not a good idea.

-Andi

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-12 13:32     ` Andi Kleen
@ 2006-07-12 14:47       ` Eric W. Biederman
  2006-07-12 14:52         ` Andi Kleen
  0 siblings, 1 reply; 21+ messages in thread
From: Eric W. Biederman @ 2006-07-12 14:47 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Andi Kleen <ak@suse.de> writes:

> On Wednesday 12 July 2006 05:13, Eric W. Biederman wrote:
>> Andi Kleen <ak@suse.de> writes:
>> > ebiederm@xmission.com (Eric W. Biederman) writes:
>> >> Since sys_sysctl is deprecated start allow it to be compiled out.
>> >> This should catch any remaining user space code that cares,
>> >
>> > I tried this long ago, but found that glibc uses sysctl in each
>> > program to get the kernel version. It probably handles ENOSYS,
>> > but there might be slowdowns or subtle problems from it not knowing
>> > the kernel version.
>> >
>> > So I think it's ok to remove the big sysctl, but at a very minimal
>> > replacement that just handles (CTL_KERN, KERN_VERSION) is needed.
>>
>> If glibc is looking at kernel.osrelease it might make sense.
>> If glibc is looking at kernel.version which is just the build number
>> and date I can't imagine a correct usage.
>
> It's KERN_VERSION 
>
>>From my /bin/ls:
>
> _sysctl({{CTL_KERN, KERN_VERSION}, 2, 0xbfc8e1e0, 30, (nil), 0}) = 0

Yep. I found it.

In glibc (2.3.6, 2.4, and CVS) nptl/sysdeps/unix/sysv/linux/smp.h
>
> /* Test whether the machine has more than one processor.  This is not the
>    best test but good enough.  More complicated tests would require `malloc'
>    which is not available at that time.  */
> static inline int
> is_smp_system (void)
> {
>   static const int sysctl_args[] = { CTL_KERN, KERN_VERSION };
>   char buf[512];
>   size_t reslen = sizeof (buf);
> 
>   /* Try reading the number using `sysctl' first.  */
>   if (__sysctl ((int *) sysctl_args,
> 		sizeof (sysctl_args) / sizeof (sysctl_args[0]),
> 		buf, &reslen, NULL, 0) < 0)
>     {
>       /* This was not successful.  Now try reading the /proc filesystem.  */
>       int fd = open_not_cancel_2 ("/proc/sys/kernel/version", O_RDONLY);
>       if (__builtin_expect (fd, 0) == -1
> 	  || (reslen = read_not_cancel (fd, buf, sizeof (buf))) <= 0)
> 	/* This also didn't work.  We give up and say it's a UP machine.  */
> 	buf[0] = '\0';
> 
>       close_not_cancel_no_status (fd);
>     }
> 
>   return strstr (buf, "SMP") != NULL;
> }

So it will correctly handle that sysctl being compiled out, and
the fallback to using /proc.  The code seems to have been
doing that since it was added to glibc in 2000.

It only uses it to see if it should busy wait when taking a mutex.

If the kernel has SMP support is lousy predictor if it makes sense
for glibc to busy wait.  Darn optimizations for the contended case.

>> If this usage is still common in glibc we can decide what to do
>> when the warnings pop up.
>
> printk for everything would annoy basically everybody. Not a good idea.

Well everything isn't using pthreads.  Why ls uses pthreads
is beyond me.

Anyway it looks like it's time to send of a patch.

Eric


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-12 14:47       ` Eric W. Biederman
@ 2006-07-12 14:52         ` Andi Kleen
  2006-07-12 15:32           ` Eric W. Biederman
  0 siblings, 1 reply; 21+ messages in thread
From: Andi Kleen @ 2006-07-12 14:52 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: linux-kernel


> So it will correctly handle that sysctl being compiled out, and
> the fallback to using /proc.  The code seems to have been
> doing that since it was added to glibc in 2000.

Using /proc is extremly slow for this. You added significant 
cost to each program startup.

I still think it's a good idea to simulate that sysctl and printk
the others.
 
-Andi

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-12 14:52         ` Andi Kleen
@ 2006-07-12 15:32           ` Eric W. Biederman
  2006-07-12 16:08             ` Andi Kleen
  0 siblings, 1 reply; 21+ messages in thread
From: Eric W. Biederman @ 2006-07-12 15:32 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Andi Kleen <ak@suse.de> writes:

>> So it will correctly handle that sysctl being compiled out, and
>> the fallback to using /proc.  The code seems to have been
>> doing that since it was added to glibc in 2000.
>
> Using /proc is extremly slow for this.

How so it is the same code in the kernel.  Is open much slower than
sys_sysctl?

> You added significant cost to each program startup.

Not each program only the ones that use pthreads.

> I still think it's a good idea to simulate that sysctl and printk
> the others.

To reduce the noise something like that makes sense.  I'm going to
see if I can get glibc to use uname which should have the same effect.

Eric

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-12 15:32           ` Eric W. Biederman
@ 2006-07-12 16:08             ` Andi Kleen
  2006-07-12 16:36               ` Eric W. Biederman
  0 siblings, 1 reply; 21+ messages in thread
From: Andi Kleen @ 2006-07-12 16:08 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: linux-kernel

On Wednesday 12 July 2006 17:32, Eric W. Biederman wrote:
> Andi Kleen <ak@suse.de> writes:
> 
> >> So it will correctly handle that sysctl being compiled out, and
> >> the fallback to using /proc.  The code seems to have been
> >> doing that since it was added to glibc in 2000.
> >
> > Using /proc is extremly slow for this.
> 
> How so it is the same code in the kernel.  Is open much slower than
> sys_sysctl?

Yes, the VFS adds quite a lot of overhead with its zillions of
locks and other complicated things.

I have also people complaining about /proc/cpuinfo overhead.
> 
> > You added significant cost to each program startup.
> 
> Not each program only the ones that use pthreads.

In modern glibc it's basically everything

> > I still think it's a good idea to simulate that sysctl and printk
> > the others.
> 
> To reduce the noise something like that makes sense.  I'm going to
> see if I can get glibc to use uname which should have the same effect.

And still printk for all old binaries?  Not a good idea.

You have to check for  this case in the printk stub anyways and 
if you check for it you can as well emulate it
(with a big fat comment that this won't be done for any other sysctl)

-Andi

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-12 16:08             ` Andi Kleen
@ 2006-07-12 16:36               ` Eric W. Biederman
  2006-07-12 18:24                 ` Stephen Hemminger
  0 siblings, 1 reply; 21+ messages in thread
From: Eric W. Biederman @ 2006-07-12 16:36 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel

Andi Kleen <ak@suse.de> writes:

> On Wednesday 12 July 2006 17:32, Eric W. Biederman wrote:
>> Andi Kleen <ak@suse.de> writes:
>> 
>> >> So it will correctly handle that sysctl being compiled out, and
>> >> the fallback to using /proc.  The code seems to have been
>> >> doing that since it was added to glibc in 2000.
>> >
>> > Using /proc is extremly slow for this.
>> 
>> How so it is the same code in the kernel.  Is open much slower than
>> sys_sysctl?
>
> Yes, the VFS adds quite a lot of overhead with its zillions of
> locks and other complicated things.
>
> I have also people complaining about /proc/cpuinfo overhead.

Yes. I just confirmed that /proc/sys access is about an order
of magnitude slower.  I goofed and forgot to add you to the
cc list but I just sent a patch to Ulrich to switch glibc to using
uname for this case.  uname is even faster than sysctl.

I am very curious to understand where the overhead is coming
from.  It may just be the VFS or it may be stupidities in
the /proc implementation.  If things are really as bad as
they appear it puts a serious damper on the plan9 style everything
is a filesystem approach.

>> > You added significant cost to each program startup.
>> 
>> Not each program only the ones that use pthreads.
>
> In modern glibc it's basically everything

It shouldn't be.   You can support be thread safe without pulling
in glibc.  But yes it is common.

>> > I still think it's a good idea to simulate that sysctl and printk
>> > the others.
>> 
>> To reduce the noise something like that makes sense.  I'm going to
>> see if I can get glibc to use uname which should have the same effect.
>
> And still printk for all old binaries?  Not a good idea.
>
> You have to check for  this case in the printk stub anyways and 
> if you check for it you can as well emulate it
> (with a big fat comment that this won't be done for any other sysctl)

I will see how it goes.

Eric

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-12 16:36               ` Eric W. Biederman
@ 2006-07-12 18:24                 ` Stephen Hemminger
  2006-07-12 19:58                   ` Arjan van de Ven
  0 siblings, 1 reply; 21+ messages in thread
From: Stephen Hemminger @ 2006-07-12 18:24 UTC (permalink / raw)
  To: linux-kernel

What is the motivation behind killing the sys_sysctl call anyway?
Sure its more ugly esthetically but it works.

Is it because of the desire to virtualize all namespaces?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-12 18:24                 ` Stephen Hemminger
@ 2006-07-12 19:58                   ` Arjan van de Ven
  2006-07-13  0:52                     ` Theodore Tso
  0 siblings, 1 reply; 21+ messages in thread
From: Arjan van de Ven @ 2006-07-12 19:58 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: linux-kernel

On Wed, 2006-07-12 at 11:24 -0700, Stephen Hemminger wrote:
> What is the motivation behind killing the sys_sysctl call anyway?
> Sure its more ugly esthetically but it works.

it "works" but the thing is that the number space is NOT stable, and as
such it's a really bad ABI


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-12 19:58                   ` Arjan van de Ven
@ 2006-07-13  0:52                     ` Theodore Tso
  2006-07-14 20:09                       ` H. Peter Anvin
  0 siblings, 1 reply; 21+ messages in thread
From: Theodore Tso @ 2006-07-13  0:52 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Stephen Hemminger, linux-kernel

On Wed, Jul 12, 2006 at 09:58:29PM +0200, Arjan van de Ven wrote:
> On Wed, 2006-07-12 at 11:24 -0700, Stephen Hemminger wrote:
> > What is the motivation behind killing the sys_sysctl call anyway?
> > Sure its more ugly esthetically but it works.
> 
> it "works" but the thing is that the number space is NOT stable, and as
> such it's a really bad ABI

To be fair, the older, "base" numbers are actually stable, such as
what glibc is depending on, have in practice been quite stable.  It's
only the newer fields that tend to be unstable.

But that means we can afford to do an orderly migration away from it;
it's not something that has to be urgently done within a few weeks or
even a few months.

						- Ted

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] sysctl: Allow /proc/sys without sys_sysctl
  2006-07-13  0:52                     ` Theodore Tso
@ 2006-07-14 20:09                       ` H. Peter Anvin
  0 siblings, 0 replies; 21+ messages in thread
From: H. Peter Anvin @ 2006-07-14 20:09 UTC (permalink / raw)
  To: Theodore Tso, Arjan van de Ven, Stephen Hemminger, linux-kernel

Theodore Tso wrote:
> On Wed, Jul 12, 2006 at 09:58:29PM +0200, Arjan van de Ven wrote:
>> On Wed, 2006-07-12 at 11:24 -0700, Stephen Hemminger wrote:
>>> What is the motivation behind killing the sys_sysctl call anyway?
>>> Sure its more ugly esthetically but it works.
>> it "works" but the thing is that the number space is NOT stable, and as
>> such it's a really bad ABI
> 
> To be fair, the older, "base" numbers are actually stable, such as
> what glibc is depending on, have in practice been quite stable.  It's
> only the newer fields that tend to be unstable.
> 
> But that means we can afford to do an orderly migration away from it;
> it's not something that has to be urgently done within a few weeks or
> even a few months.
> 

Another alternative would be to publish a limited set of sysctl numbers 
that will be maintained forever.

	-hpa

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2006-07-14 20:10 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-07-10 22:38 [PATCH] sysctl: Allow /proc/sys without sys_sysctl Eric W. Biederman
2006-07-10 22:48 ` Randy.Dunlap
2006-07-11  4:00   ` Eric W. Biederman
2006-07-11  4:19 ` Andrew Morton
2006-07-11  6:57   ` Eric W. Biederman
2006-07-11  7:04     ` Andrew Morton
2006-07-11  7:52       ` Eric W. Biederman
2006-07-11  8:05         ` Andrew Morton
2006-07-11  7:23   ` [PATCH] sysctl: Scream if someone uses sys_sysctl Eric W. Biederman
2006-07-11 22:26 ` [PATCH] sysctl: Allow /proc/sys without sys_sysctl Andi Kleen
2006-07-12  3:13   ` Eric W. Biederman
2006-07-12 13:32     ` Andi Kleen
2006-07-12 14:47       ` Eric W. Biederman
2006-07-12 14:52         ` Andi Kleen
2006-07-12 15:32           ` Eric W. Biederman
2006-07-12 16:08             ` Andi Kleen
2006-07-12 16:36               ` Eric W. Biederman
2006-07-12 18:24                 ` Stephen Hemminger
2006-07-12 19:58                   ` Arjan van de Ven
2006-07-13  0:52                     ` Theodore Tso
2006-07-14 20:09                       ` H. Peter Anvin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox