* (patch 1/3) lsm: add control over /proc/<pid> visibility
@ 2004-10-06 20:21 Serge Hallyn
2004-10-06 20:24 ` [patch 1/3] lsm: add bsdjail module Serge Hallyn
2004-10-06 20:25 ` [patch 3/3] lsm: add bsdjail documentation Serge Hallyn
0 siblings, 2 replies; 37+ messages in thread
From: Serge Hallyn @ 2004-10-06 20:21 UTC (permalink / raw)
To: akpm, Chris Wright, linux-kernel; +Cc: serue
Attached is a patch which introduces a new LSM hook,
security_task_lookup. This hook allows an LSM to mediate visibility of
/proc/<pid> on a per-pid level. The bsdjail lsm which will be sent
next is a user of this hook.
Please apply.
Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
diff -Nrup linux-2.6.9-rc3-bk6/fs/proc/base.c
linux-2.6.9-rc3-bk6-jail/fs/proc/base.c
--- linux-2.6.9-rc3-bk6/fs/proc/base.c 2004-10-06 10:07:55.000000000
-0500
+++ linux-2.6.9-rc3-bk6-jail/fs/proc/base.c 2004-10-06
10:51:04.000000000 -0500
@@ -1683,6 +1683,8 @@ static int get_tgid_list(int index, unsi
int tgid = p->pid;
if (!pid_alive(p))
continue;
+ if (security_task_lookup(p))
+ continue;
if (--index >= 0)
continue;
tgids[nr_tgids] = tgid;
diff -Nrup linux-2.6.9-rc3-bk6/include/linux/security.h
linux-2.6.9-rc3-bk6-jail/include/linux/security.h
--- linux-2.6.9-rc3-bk6/include/linux/security.h 2004-08-14
00:37:30.000000000 -0500
+++ linux-2.6.9-rc3-bk6-jail/include/linux/security.h 2004-10-06
10:51:04.000000000 -0500
@@ -627,6 +627,11 @@ struct swap_info_struct;
* Set the security attributes in @p->security for a kernel thread
that
* is being reparented to the init task.
* @p contains the task_struct for the kernel thread.
+ * @task_lookup:
+ * Check permission to see the /proc/<pid> entry for process @p.
+ * @p contains the task_struct for task <pid> which is being looked
+ * up under /proc
+ * return 0 if permission is granted.
* @task_to_inode:
* Set the security attributes for an inode based on an associated
task's
* security attributes, e.g. for /proc/pid inodes.
@@ -1152,6 +1157,7 @@ struct security_operations {
unsigned long arg3, unsigned long arg4,
unsigned long arg5);
void (*task_reparent_to_init) (struct task_struct * p);
+ int (*task_lookup)(struct task_struct *p);
void (*task_to_inode)(struct task_struct *p, struct inode *inode);
int (*ipc_permission) (struct kern_ipc_perm * ipcp, short flag);
@@ -1751,6 +1757,11 @@ static inline void security_task_reparen
security_ops->task_reparent_to_init (p);
}
+static inline int security_task_lookup(struct task_struct *p)
+{
+ return security_ops->task_lookup(p);
+}
+
static inline void security_task_to_inode(struct task_struct *p, struct
inode *inode)
{
security_ops->task_to_inode(p, inode);
@@ -2386,6 +2397,11 @@ static inline void security_task_reparen
cap_task_reparent_to_init (p);
}
+static inline int security_task_lookup(struct task_struct *p)
+{
+ return 0;
+}
+
static inline void security_task_to_inode(struct task_struct *p, struct
inode *inode)
{ }
diff -Nrup linux-2.6.9-rc3-bk6/security/dummy.c
linux-2.6.9-rc3-bk6-jail/security/dummy.c
--- linux-2.6.9-rc3-bk6/security/dummy.c 2004-10-06 10:11:29.000000000
-0500
+++ linux-2.6.9-rc3-bk6-jail/security/dummy.c 2004-10-06
10:51:04.000000000 -0500
@@ -616,6 +616,11 @@ static void dummy_task_reparent_to_init
return;
}
+static int dummy_task_lookup(struct task_struct *p)
+{
+ return 0;
+}
+
static void dummy_task_to_inode(struct task_struct *p, struct inode
*inode)
{ }
@@ -978,6 +983,7 @@ void security_fixup_ops (struct security
set_to_dummy_if_null(ops, task_kill);
set_to_dummy_if_null(ops, task_prctl);
set_to_dummy_if_null(ops, task_reparent_to_init);
+ set_to_dummy_if_null(ops, task_lookup);
set_to_dummy_if_null(ops, task_to_inode);
set_to_dummy_if_null(ops, ipc_permission);
set_to_dummy_if_null(ops, msg_msg_alloc_security);
^ permalink raw reply [flat|nested] 37+ messages in thread* Re: [patch 1/3] lsm: add bsdjail module 2004-10-06 20:21 (patch 1/3) lsm: add control over /proc/<pid> visibility Serge Hallyn @ 2004-10-06 20:24 ` Serge Hallyn 2004-10-06 23:26 ` Andrew Morton 2004-10-06 20:25 ` [patch 3/3] lsm: add bsdjail documentation Serge Hallyn 1 sibling, 1 reply; 37+ messages in thread From: Serge Hallyn @ 2004-10-06 20:24 UTC (permalink / raw) To: akpm; +Cc: Chris Wright, linux-kernel, serue Attached is a patch against the security Kconfig and Makefile to support bsdjail, as well as the bsdjail.c file itself. bsdjail offers functionality similar to (but more limited than) the vserver patch. A process in a jail lives under a chroot which is not vulnerable to the well-known chdir(...)(etc)chroot(.) attack against normal chroots, and may be locked to one ip address. For additional features, please see Documentation/bsdjail.txt, which is included in the next patch. Changelog: Sep 10, 2004: original version Sep 12, 2004: add ipv6 support Sep 13, 2004: support simultaneous ipv4+ipv6 Oct 6, 2004: move kref release function to kref_put from kref_init Please apply. Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> diff -Nrup linux-2.6.9-rc3-bk6/security/bsdjail.c linux-2.6.9-rc3-bk6-jail/security/bsdjail.c --- linux-2.6.9-rc3-bk6/security/bsdjail.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.9-rc3-bk6-jail/security/bsdjail.c 2004-10-06 12:20:54.000000000 -0500 @@ -0,0 +1,1525 @@ +/* + * File: linux/security/bsdjail.c + * Author: Serge Hallyn (serue@us.ibm.com) + * Date: Sep 12, 2004 + * + * (See Documentation/bsdjail.txt for more information) + * + * Copyright (C) 2004 International Business Machines <serue@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/security.h> +#include <linux/namei.h> +#include <linux/namespace.h> +#include <linux/proc_fs.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/pagemap.h> +#include <linux/ip.h> +#include <net/ipv6.h> +#include <linux/mount.h> +#include <asm/uaccess.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/seq_file.h> +#include <linux/un.h> +#include <linux/smp_lock.h> +#include <linux/kref.h> + +static int jail_debug = 0; +MODULE_PARM(jail_debug, "i"); +MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n"); + +#define DBG 0 +#define WARN 1 +#define bsdj_debug(how, fmt, arg... ) \ + do { \ + if ( how || jail_debug ) \ + printk(KERN_NOTICE "%s: %s: " fmt, \ + MY_NAME, __FUNCTION__, \ + ## arg ); \ + } while ( 0 ) + +#define MY_NAME "bsdjail" + +/* flag to keep track of how we were registered */ +static int secondary = 0; + +/* + * The task structure holding jail information. + * Taskp->security points to one of these (or is null). + * There is exactly one jail_struct for each jail. If >1 process + * are in the same jail, they share the same jail_struct. + */ +struct jail_struct { + struct kref kref; + + /* these are set on writes to /proc/<pid>/attr/exec */ + char *root_pathname; /* char * containing path to use as jail / */ + char *ip4_addr_name; /* char * containing ip4 addr to use for jail */ + char *ip6_addr_name; /* char * containing ip6 addr to use for jail */ + + /* these are set when a jail becomes active */ + __u32 addr4; /* internal form of ip4_addr_name */ + struct in6_addr addr6; /* internal form of ip6_addr_name */ + + struct dentry *dentry; /* dentry of fs root */ + struct vfsmount *mnt; /* vfsmnt of fs root */ + + /* Resource limits. 0 = no limit */ + int max_nrtask; /* maximum number of tasks within this jail. */ + int cur_nrtask; /* current number of tasks within this jail. */ + long maxtimeslice; /* max timeslice in ms for procs in this jail */ + long nice; /* nice level for processes in this jail */ + long max_data, max_memlock; /* equivalent to RLIMIT_{DATA,MEMLOCK} */ +/* values for the jail_flags field */ +#define IN_USE 1 /* if 0, task is setting up jail, not yet in it */ +#define GOT_IPV4 2 +#define GOT_IPV6 4 /* if 0, ipv4, else ipv6 */ + char jail_flags; +}; + +#define in_use(x) (x->jail_flags & IN_USE) +#define set_in_use(x) (x->jail_flags |= IN_USE) + +#define got_network(x) (x->jail_flags & (GOT_IPV4 | GOT_IPV6)) +#define got_ipv4(x) (x->jail_flags & (GOT_IPV4)) +#define got_ipv6(x) (x->jail_flags & (GOT_IPV6)) +#define set_ipv4(x) (x->jail_flags |= GOT_IPV4) +#define set_ipv6(x) (x->jail_flags |= GOT_IPV6) +#define unset_got_ipv4(x) (x->jail_flags &= ~GOT_IPV4) +#define unset_got_ipv6(x) (x->jail_flags &= ~GOT_IPV6) + +/* + * structs, defines, and functions to cope with stacking + */ + +#define get_task_security(task) (task->security) +#define get_inode_security(inode) (inode->i_security) +#define get_sock_security(sock) (sock->sk_security) +#define get_file_security(file) (file->f_security) +#define get_ipc_security(ipc) (ipc->security) + +#define jail_of(proc) (get_task_security(proc)) + +/* + * disable_jail: A jail which was in use, but has no references + * left, is disabled - we free up the mountpoint and dentry, and + * give up our reference on the module. + * + * don't need to put namespace, it will be done automatically + * when the last process in jail is put. + * DO need to put the dentry and vfsmount + */ +static void +disable_jail(struct jail_struct *tsec) +{ + dput(tsec->dentry); + mntput(tsec->mnt); + module_put(THIS_MODULE); +} + + +static void free_jail(struct jail_struct *tsec) +{ + if (!tsec) + return; + + if (tsec->root_pathname) + kfree(tsec->root_pathname); + if (tsec->ip4_addr_name) + kfree(tsec->ip4_addr_name); + if (tsec->ip6_addr_name) + kfree(tsec->ip6_addr_name); + kfree(tsec); +} + +/* release_jail: + * Callback for kref_put to use for releasing a jail when its + * last user exits. + */ +static void release_jail(struct kref *kref) +{ + struct jail_struct *tsec; + + tsec = container_of(kref,struct jail_struct,kref); + disable_jail(tsec); + free_jail(tsec); +} + +#define set_task_security(task,data) task->security = data +#define set_inode_security(inode,data) inode->i_security = data +#define set_sock_security(sock,data) sock->sk_security = data +#define set_file_security(file,data) file->f_security = data +#define set_ipc_security(ipc,data) ipc.security = data + +/* + * jail_task_free_security: this is the callback hooked into LSM. + * If there was no task->security field for bsdjail, do nothing. + * If there was, but it was never put into use, free the jail. + * If there was, and the jail is in use, then decrement the usage + * count, and disable and free the jail if the usage count hits 0. + */ +static void jail_task_free_security(struct task_struct *task) +{ + struct jail_struct *tsec; + + tsec = get_task_security(task); + + if (!tsec) + return; + + if (!in_use(tsec)) { + /* + * someone did 'echo -n x > /proc/<pid>/attr/exec' but + * then forked before execing. Nuke the old info. + */ + free_jail(tsec); + set_task_security(task,NULL); + return; + } + tsec->cur_nrtask--; + /* If this was the last process in the jail, delete the jail */ + kref_put(&tsec->kref, release_jail); +} + +static struct jail_struct * +alloc_task_security(struct task_struct *tsk) +{ + struct jail_struct *tsec; + tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL); + if (!tsec) + return ERR_PTR(-ENOMEM); + memset(tsec, 0, sizeof(struct jail_struct)); + set_task_security(tsk, tsec); + return tsec; +} + +static inline int +in_jail(struct task_struct *t) +{ + struct jail_struct *tsec = jail_of(t); + + if (tsec && in_use(tsec)) + return 1; + + return 0; +} + +/* + * If a network address was passed into /proc/<pid>/attr/exec, + * then process in its jail will only be allowed to bind/listen + * to that address. + */ +static void +setup_netaddress(struct jail_struct *tsec) +{ + unsigned int a,b,c,d, i; + unsigned int x[8]; + + unset_got_ipv4(tsec); + tsec->addr4 = 0; + unset_got_ipv6(tsec); + ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0); + + if (tsec->ip4_addr_name) { + if (sscanf(tsec->ip4_addr_name,"%u.%u.%u.%u",&a,&b,&c,&d)!=4) + return; + if (a>255 || b>255 || c>255 || d>255) + return; + tsec->addr4 = htonl((a<<24)|(b<<16)|(c<<8)|d); + set_ipv4(tsec); + bsdj_debug(DBG, "Network (ipv4) set up (%s)\n", + tsec->ip4_addr_name); + } + + if (tsec->ip6_addr_name) { + if (sscanf(tsec->ip6_addr_name,"%x:%x:%x:%x:%x:%x:%x:%x", + &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], + &x[7]) != 8) { + printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__, + tsec->ip6_addr_name); + return; + } + for (i=0; i<8; i++) { + if (x[i] > 65535) { + printk("%s: %x > 65535 at %d\n", __FUNCTION__, x[i], i); + return; + } + tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]); + } + set_ipv6(tsec); + bsdj_debug(DBG, "Network (ipv6) set up (%s)\n", + tsec->ip6_addr_name); + } +} + +/* + * enable_jail: + * Called when a process is placed into a new jail to handle the + * actual creation of the jail. + * Creates namespace + * Sets process root+pwd + * Stores the requested ip address + * Registers a unique pseudo-proc filesystem for this jail + */ +static int enable_jail(struct task_struct *tsk) +{ + struct nameidata nd; + struct jail_struct *tsec; + int retval = -EFAULT; + + tsec = jail_of(tsk); + if (!tsec || !tsec->root_pathname) + goto out; + + /* + * USE_JAIL_NAMESPACE: could be useful, so that future mounts outside + * the jail don't affect the jail. But it's not necessary, and + * requires exporting copy_namespace from fs/namespace.c + * + * Actually, it would also be useful for truly hiding + * information about mounts which do not exist in this jail. +#define USE_JAIL_NAMESPACE + */ +#ifdef USE_JAIL_NAMESPACE + bsdj_debug(DBG, "bsdjail: copying namespace.\n"); + retval = -EPERM; + if (copy_namespace(CLONE_NEWNS, tsk)) + goto out; + bsdj_debug(DBG, "bsdjail: copied namespace.\n"); +#endif + + /* find our new root directory */ + bsdj_debug(DBG, "bsdjail: looking up %s\n", tsec->root_pathname); + retval = path_lookup(tsec->root_pathname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd); + if (retval) + goto out; + + bsdj_debug(DBG, "bsdjail: got %s, setting root to it\n", tsec->root_pathname); + + /* and set the fsroot to it */ + set_fs_root(tsk->fs, nd.mnt, nd.dentry); + set_fs_pwd(tsk->fs, nd.mnt, nd.dentry); + + bsdj_debug(DBG, "bsdjail: root has been set. Have fun.\n"); + + /* set up networking */ + if (tsec->ip4_addr_name || tsec->ip6_addr_name) + setup_netaddress(tsec); + + tsec->cur_nrtask = 1; + if (tsec->nice) + set_user_nice(current, tsec->nice); + if (tsec->max_data) { + current->rlim[RLIMIT_DATA].rlim_cur = tsec->max_data; + current->rlim[RLIMIT_DATA].rlim_max = tsec->max_data; + } + if (tsec->max_memlock) { + current->rlim[RLIMIT_MEMLOCK].rlim_cur = tsec->max_memlock; + current->rlim[RLIMIT_MEMLOCK].rlim_max = tsec->max_memlock; + } + if (tsec->maxtimeslice) { + current->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice; + current->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice; + } + /* success and end */ + tsec->mnt = mntget(nd.mnt); + tsec->dentry = dget(nd.dentry); + path_release(&nd); + kref_init(&tsec->kref); + set_in_use(tsec); + + /* won't let ourselves be removed until this jail goes away */ + try_module_get(THIS_MODULE); + + return 0; + +out: + return retval; +} + +/* + * LSM /proc/<pid>/attr hooks. + * You may write into /proc/<pid>/attr/exec: + * root /some/path + * ip 2.2.2.2 + * These values will be used on the next exec() to set up your jail + * (assuming you're not already in a jail) + */ +static int +jail_setprocattr(struct task_struct *p, char *name, void *value, size_t size) +{ + struct jail_struct *tsec = jail_of(current); + long val; + int start, len; + + if (tsec && in_use(tsec)) + return -EINVAL; /* let them guess why */ + + if (p != current || strcmp(name, "exec")) + return -EPERM; + + if (strncmp(value, "root ", 5)==0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + if (tsec->root_pathname) + kfree(tsec->root_pathname); + start = 5; + len = size-start; + tsec->root_pathname = kmalloc(len+1, GFP_KERNEL); + if (!tsec->root_pathname) + return -ENOMEM; + strlcpy(tsec->root_pathname, value+start, len+1); + } else if (strncmp(value, "ip ", 3)==0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + if (tsec->ip4_addr_name) + kfree(tsec->ip4_addr_name); + start = 3; + len = size-start; + tsec->ip4_addr_name = kmalloc(len+1, GFP_KERNEL); + if (!tsec->ip4_addr_name) + return -ENOMEM; + strlcpy(tsec->ip4_addr_name, value+start, len+1); + } else if (strncmp(value, "ip6 ", 4) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + if (tsec->ip6_addr_name) + kfree(tsec->ip6_addr_name); + start = 4; + len = size-start; + tsec->ip6_addr_name = kmalloc(len+1, GFP_KERNEL); + if (!tsec->ip6_addr_name) + return -ENOMEM; + strlcpy(tsec->ip6_addr_name, value+start, len+1); + + /* the next two are equivalent */ + } else if (strncmp(value, "slice ", 6)==0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+6, NULL, 0); + tsec->maxtimeslice = val; + } else if (strncmp(value, "timeslice ", 10)==0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+10, NULL, 0); + tsec->maxtimeslice = val; + } else if (strncmp(value, "nrtask ", 7)==0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = (int) simple_strtol(value+7, NULL, 0); + if (val < 1) + return -EINVAL; + tsec->max_nrtask = val; + } else if (strncmp(value, "memlock ", 8)==0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+8, NULL, 0); + tsec->max_memlock = val; + } else if (strncmp(value, "data ", 5)==0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+5, NULL, 0); + tsec->max_data = val; + } else if (strncmp(value, "nice ", 5)==0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+5, NULL, 0); + tsec->nice = val; + } else + return -EINVAL; + + return size; +} + +static int print_jail_net_info(struct jail_struct *j, char *buf, int maxcnt) +{ + int len = 0; + + if (j->ip4_addr_name) + len += snprintf(buf, maxcnt, "%s\n", j->ip4_addr_name); + if (j->ip6_addr_name) + len += snprintf(buf, maxcnt-len, "%s\n", j->ip6_addr_name); + + return snprintf(buf, maxcnt, "No network information\n"); +} + +/* + * LSM /proc/<pid>/attr read hook. + * + * /proc/$$/attr/current output: + * If the reading process, say process 1001, is in a jail, then + * cat /proc/999/attr/current + * will print networking information. + * If the reading process, say process 1001, is not in a jail, then + * cat /proc/999/attr/current + * will return + * root: (root of jail) + * ip: (ip address of jail) + * if 999 is in a jail, or + * -EINVAL + * if 999 is not in a jail. + * + * /proc/$$/attr/exec output: + * A process in a jail gets -EINVAL for /proc/$$/attr/exec. + * A process not in a jail gets hints on starting a jail. + */ +static int +jail_getprocattr(struct task_struct *p, char *name, void *value, size_t size) +{ + struct jail_struct *tsec; + int err = 0; + + if (in_jail(current)) { + if (strcmp(name, "current")==0) { + /* provide network info */ + err = print_jail_net_info(jail_of(current), value, + size); + return err; + } + return -EINVAL; /* let them guess why */ + } + + if (strcmp(name, "exec") == 0) { + /* Print usage some help */ + err = snprintf(value, size, + "Valid keywords:\n" + "root <pathname>\n" + "ip <ip4-addr>\n" + "ip6 <ip6-addr>\n" + "nrtask <max number of tasks in this jail>\n" + "nice <nice level for processes in this jail>\n" + "slice <max timeslice per process in msecs>\n" + "data <max data size per process in bytes>\n" + "memlock <max lockable memory per process in bytes>\n"); + return err; + } + + if (strcmp(name, "current")) + return -EPERM; + + tsec = jail_of(p); + if (!tsec || !in_use(tsec)) { + err = snprintf(value, size, "Not Jailed\n"); + } else { + err = snprintf(value, size, + "Root: %s\nIPv4: %s\nIPv6: %s\n" + "max_nrtask %d current nrtask %d max_timeslice %lu " + "nice %lu\n" + "max_memlock %lu max_data %lu\n", + tsec->root_pathname, + tsec->ip4_addr_name ? tsec->ip4_addr_name : "(none)", + tsec->ip6_addr_name ? tsec->ip6_addr_name : "(none)", + tsec->max_nrtask, tsec->cur_nrtask, tsec->maxtimeslice, + tsec->nice, tsec->max_data, tsec->max_memlock); + } + + return err; +} + +/* + * Forbid a process in a jail from sending a signal to a process in another + * (or no) jail through file sigio. + * + * We consider the process which set the fowner to be the one sending the + * signal, rather than the one writing to the file. Therefore we store the + * jail of a process during jail_file_set_fowner, then check that against + * the jail of the process receiving the signal. + */ +static int +jail_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, + int fd, int reason) +{ + struct file *file; + struct jail_struct *tsec, *fsec; + + if (!in_jail(current)) + return 0; + + file = (struct file *)((long)fown - offsetof(struct file,f_owner)); + tsec = jail_of(tsk); + fsec = get_file_security(file); + + if (fsec != tsec) + return -EPERM; + + return 0; +} + +static int +jail_file_set_fowner(struct file *file) +{ + struct jail_struct *tsec; + + tsec = jail_of(current); + set_file_security(file, tsec); + if (tsec) + kref_get(&tsec->kref); + + return 0; +} + +static void free_ipc_security(struct kern_ipc_perm *ipc) +{ + struct jail_struct *tsec; + + tsec = get_ipc_security(ipc); + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + set_ipc_security((*ipc), NULL); +} + +static void free_file_security(struct file *file) +{ + struct jail_struct *tsec; + + tsec = get_file_security(file); + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + set_file_security(file, NULL); +} + +static void free_inode_security(struct inode *inode) +{ + struct jail_struct *tsec; + + tsec = get_inode_security(inode); + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + set_inode_security(inode, NULL); +} + +/* + * LSM ptrace hook: + * process in jail may not ptrace process not in the same jail + */ +static int +jail_ptrace (struct task_struct *tracer, struct task_struct *tracee) +{ + struct jail_struct *tsec = jail_of(tracer); + + if (tsec && in_use(tsec)) { + if (tsec == jail_of(tracee)) + return 0; + return -EPERM; + } + return 0; +} + +/* + * process in jail may only use one (aliased) ip address. If they try to + * attach to 127.0.0.1, that is remapped to their own address. If some + * other address (and not their own), deny permission + */ +static int jail_socket_unix_bind(struct socket *sock, struct sockaddr *address, + int addrlen); + +#define loopbackaddr htonl((127 << 24) | 1) + +static inline int jail_inet4_bind(struct socket *sock, struct sockaddr *address, + int addrlen, struct jail_struct *tsec) +{ + struct sockaddr_in *inaddr; + __u32 sin_addr, jailaddr; + + if (!got_ipv4(tsec)) + return -EPERM; + + inaddr = (struct sockaddr_in *)address; + sin_addr = inaddr->sin_addr.s_addr; + jailaddr = tsec->addr4; + + if (sin_addr == jailaddr) + return 0; + + if (sin_addr == loopbackaddr || !sin_addr) { + bsdj_debug(DBG, "Got a loopback or 0 address\n"); + sin_addr = jailaddr; + bsdj_debug(DBG, "Converted to: %u.%u.%u.%u\n", + NIPQUAD(sin_addr)); + return 0; + } + + return -EPERM; +} + +static inline int +jail_inet6_bind(struct socket *sock, struct sockaddr *address, int addrlen, + struct jail_struct *tsec) +{ + struct sockaddr_in6 *inaddr6; + struct in6_addr *sin6_addr, *jailaddr; + + if (!got_ipv6(tsec)) + return -EPERM; + + inaddr6 = (struct sockaddr_in6 *)address; + sin6_addr = &inaddr6->sin6_addr; + jailaddr = &tsec->addr6; + + if (ipv6_addr_cmp(jailaddr, sin6_addr)==0) + return 0; + + if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback)==0) { + ipv6_addr_copy(sin6_addr, jailaddr); + return 0; + } + + printk(KERN_NOTICE "%s: DENYING\n", __FUNCTION__); + printk(KERN_NOTICE "%s: a %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " + "j %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + __FUNCTION__, + NIP6(*sin6_addr), + NIP6(*jailaddr)); + + return -EPERM; +} + +static int +jail_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec)) + return 0; + + if (sock->sk->sk_family == AF_UNIX) + return jail_socket_unix_bind(sock, address, addrlen); + + if (!got_network(tsec)) + /* If we want to be strict, we could just + * deny net access when lacking a pseudo ip. + * For now we just allow it. */ + return 0; + + switch(address->sa_family) { + case AF_INET: + return jail_inet4_bind(sock, address, addrlen, tsec); + + case AF_INET6: + return jail_inet6_bind(sock, address, addrlen, tsec); + + default: + return 0; + } +} + +/* + * If locked in an ipv6 jail, don't let them use ipv4, and vice versa + */ +static int +jail_socket_create(int family, int type, int protocol, int kern) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec) || kern || !got_network(tsec)) + return 0; + + switch(family) { + case AF_INET: + if (got_ipv4(tsec)) + return 0; + return -EPERM; + case AF_INET6: + if (got_ipv6(tsec)) + return 0; + return -EPERM; + default: + return 0; + }; + + return 0; +} + +static void +jail_socket_post_create(struct socket *sock, int family, int type, + int protocol, int kern) +{ + struct inet_opt *inet; + struct ipv6_pinfo *inet6; + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec) || kern || !got_network(tsec)) + return; + + switch(family) { + case AF_INET: + inet = inet_sk(sock->sk); + inet->saddr = tsec->addr4; + break; + case AF_INET6: + inet6 = inet6_sk(sock->sk); + ipv6_addr_copy(&inet6->saddr, &tsec->addr6); + break; + default: + break; + }; + + return; +} + +static int +jail_socket_listen(struct socket *sock, int backlog) +{ + struct inet_opt *inet; + struct ipv6_pinfo *inet6; + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec) || !got_network(tsec)) + return 0; + + switch (sock->sk->sk_family) { + case AF_INET: + inet = inet_sk(sock->sk); + if (inet->saddr == tsec->addr4) + return 0; + return -EPERM; + + case AF_INET6: + inet6 = inet6_sk(sock->sk); + if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6)==0) + return 0; + return -EPERM; + + default: + return 0; + + } +} + +static void free_sock_security(struct sock *sk) +{ + struct jail_struct *tsec; + + tsec = get_sock_security(sk); + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + set_sock_security(sk, NULL); +} + +/* + * The next three (socket) hooks prevent a process in a jail from sending + * data to a abstract unix domain socket which was bound outside the jail. + */ +static int +jail_socket_unix_bind(struct socket *sock, struct sockaddr *address, + int addrlen) +{ + struct sockaddr_un *sunaddr; + struct jail_struct *tsec; + + if (sock->sk->sk_family != AF_UNIX) + return 0; + + sunaddr = (struct sockaddr_un *)address; + if (sunaddr->sun_path[0] != 0) + return 0; + + tsec = jail_of(current); + set_sock_security(sock->sk, tsec); + if (tsec) + kref_get(&tsec->kref); + return 0; +} + +/* + * Note - we deny sends both from unjailed to jailed, and from jailed + * to unjailed. As well as, of course between different jails. + */ +static int +jail_socket_unix_may_send(struct socket *sock, struct socket *other) +{ + struct jail_struct *tsec, *ssec; + + tsec = jail_of(current); /* jail of sending process */ + ssec = get_sock_security(other->sk); /* jail of receiver */ + + if (tsec != ssec) + return -EPERM; + + return 0; +} + +static int +jail_socket_unix_stream_connect(struct socket *sock, + struct socket *other, struct sock *newsk) +{ + struct jail_struct *tsec, *ssec; + + tsec = jail_of(current); /* jail of sending process */ + ssec = get_sock_security(other->sk); /* jail of receiver */ + + if (tsec != ssec) + return -EPERM; + + return 0; +} + +static int +jail_mount(char * dev_name, struct nameidata *nd, char * type, + unsigned long flags, void * data) +{ + if (in_jail(current)) + return -EPERM; + + return 0; +} + +static int +jail_umount(struct vfsmount *mnt, int flags) +{ + if (in_jail(current)) + return -EPERM; + + return 0; +} + +/* + * process in jail may not: + * use nice + * change network config + * load/unload modules + */ +static int +jail_capable (struct task_struct *tsk, int cap) +{ + if (in_jail(tsk)) { + if (cap == CAP_SYS_NICE) + return -EPERM; + if (cap == CAP_NET_ADMIN) + return -EPERM; + if (cap == CAP_SYS_MODULE) + return -EPERM; + if (cap == CAP_SYS_RAWIO) + return -EPERM; + } + + if (cap_is_fs_cap (cap) ? tsk->fsuid == 0 : tsk->euid == 0) + return 0; + return -EPERM; +} + +/* + * jail_security_task_create: + * + * If the current process is ina a jail, and that jail is about to exceed a + * maximum number of processes, then refuse to fork. If the maximum number + * of jails is listed as 0, then there is no limit for this jail, and we allow + * all forks. + */ +static inline int +jail_security_task_create (unsigned long clone_flags) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec)) + return 0; + + if (tsec->max_nrtask && tsec->cur_nrtask >= tsec->max_nrtask) + return -EPERM; + return 0; +} + +/* + * The child of a process in a jail belongs in the same jail + */ +static int +jail_task_alloc_security(struct task_struct *tsk) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec)) + return 0; + + set_task_security(tsk, tsec); + kref_get(&tsec->kref); + tsec->cur_nrtask++; + if (tsec->maxtimeslice) { + tsk->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice; + tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice; + } + if (tsec->max_data) { + tsk->rlim[RLIMIT_CPU].rlim_max = tsec->max_data; + tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->max_data; + } + if (tsec->max_memlock) { + tsk->rlim[RLIMIT_CPU].rlim_max = tsec->max_memlock; + tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->max_memlock; + } + if (tsec->nice) + set_user_nice(current, tsec->nice); + + return 0; +} + +static int +jail_bprm_alloc_security(struct linux_binprm *bprm) +{ + struct jail_struct *tsec; + int ret; + + tsec = jail_of(current); + if (!tsec) + return 0; + + if (in_use(tsec)) + return 0; + + if (tsec->root_pathname) { + ret = enable_jail(current); + if (ret) { + /* if we failed, nix out the root/ip requests */ + jail_task_free_security(current); + return ret; + } + } + return 0; +} + +/* + * Process in jail may not create devices + * Thanks to Brad Spender for pointing out fifos should be allowed. + */ +/* TODO: We may want to allow /dev/log, at least... */ +static int +jail_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + if (!in_jail(current)) + return 0; + + if (S_ISFIFO(mode)) + return 0; + + return -EPERM; +} + +/* yanked from fs/proc/base.c */ +static unsigned name_to_int(struct dentry *dentry) +{ + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + unsigned n = 0; + + if (len > 1 && *name == '0') + goto out; + while (len-- > 0) { + unsigned c = *name++ - '0'; + if (c > 9) + goto out; + if (n >= (~0U-9)/10) + goto out; + n *= 10; + n += c; + } + return n; +out: + return ~0U; +} + +/* + * jail_proc_inode_permission: + * called only when current is in a jail, and is trying to reach + * /proc/<pid>. We check whether <pid> is in the same jail as + * current. If not, permission is denied. + * + * NOTE: On the one hand, the task_to_inode(inode)->i_security + * approach seems cleaner, but on the other, this prevents us + * from unloading bsdjail for awhile... + */ +static int +jail_proc_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + struct jail_struct *tsec = jail_of(current); + struct dentry *dentry = nd->dentry; + unsigned pid; + + pid = name_to_int(dentry); + if (pid == ~0U) { + struct qstr *dname = &dentry->d_name; + if (strcmp(dname->name, "scsi")==0 || + strcmp(dname->name, "sys")==0 || + strcmp(dname->name, "ide")==0) + return -EPERM; + return 0; + } + + if (dentry->d_parent != dentry->d_sb->s_root) + return 0; + if (get_inode_security(inode) != tsec) + return -ENOENT; + + return 0; +} + +/* + * Here is our attempt to prevent chroot escapes. + */ +static int +is_jailroot_parent(struct dentry *candidate, struct dentry *root, + struct vfsmount *rootmnt) +{ + if (candidate == root) + return 0; + + /* simple case: fs->root/.. == candidate */ + if (root->d_parent == candidate) + return 1; + + /* + * now more complicated: if fs->root is a mounted directory, + * then chdir(..) out of fs->root, at follow_dotdot, will follow + * the fs->root mount point. So we must check the parent dir of + * the fs->root mount point. + */ + if (rootmnt->mnt_root == root && rootmnt->mnt_mountpoint!=root) { + root = rootmnt->mnt_mountpoint; + rootmnt = rootmnt->mnt_parent; + return is_jailroot_parent(candidate, root, rootmnt); + } + + return 0; +} + +/* + * A process in a jail may not see that /proc/<pid> exists for + * process not in its jail + * Unfortunately we can't pretend that pid for the starting process + * is 1, as vserver does. + */ +static int jail_task_lookup(struct task_struct *p) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec) + return 0; + if (tsec == jail_of(p)) + return 0; + return -EPERM; +} +/* + * security_task_to_inode: + * Set inode->security = task's jail. + */ +static void jail_task_to_inode(struct task_struct *p, struct inode *inode) +{ + struct jail_struct *tsec = jail_of(p); + + if (!tsec || !in_use(tsec)) + return; + if (get_inode_security(inode)) + return; + kref_get(&tsec->kref); + set_inode_security(inode, tsec); +} + +/* + * inode_permission: + * If we are trying to look into certain /proc files from in a jail, we + * may deny permission. + * If we are trying to cd(..), but the cwd is the root of our jail, then + * permission is denied. + */ +static int +jail_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec)) + return 0; + + if (!nd) + return 0; + + if (nd->dentry && + strcmp(nd->dentry->d_sb->s_type->name, "proc")==0) { + return jail_proc_inode_permission(inode, mask, nd); + + } + + if (!(mask&MAY_EXEC)) + return 0; + if (!inode || !S_ISDIR(inode->i_mode)) + return 0; + + if (is_jailroot_parent(nd->dentry, tsec->dentry, tsec->mnt)) { + bsdj_debug(WARN,"Attempt to chdir(..) out of jail!\n" + "(%s is a subdir of %s)\n", + tsec->dentry->d_name.name, + nd->dentry->d_name.name); + return -EPERM; + } + + return 0; +} + +/* + * A function which returns -ENOENT if dentry is the dentry for + * a /proc/<pid> directory. It returns 0 otherwise. + */ +static inline int +generic_procpid_check(struct dentry *dentry) +{ + struct jail_struct *jail = jail_of(current); + unsigned pid = name_to_int(dentry); + + if (!jail || !in_use(jail)) + return 0; + if (pid == ~0U) + return 0; + if (strcmp(dentry->d_sb->s_type->name, "proc")!=0) + return 0; + if (dentry->d_parent != dentry->d_sb->s_root) + return 0; + if (get_inode_security(dentry->d_inode) != jail) + return -ENOENT; + return 0; +} + +/* + * We want getattr to fail on /proc/<pid> to prevent leakage through, for + * instance, ls -d. + */ +static int +jail_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +{ + return generic_procpid_check(dentry); +} + +/* This probably is not necessary - /proc does not support xattrs? */ +static int +jail_inode_getxattr(struct dentry *dentry, char *name) +{ + return generic_procpid_check(dentry); +} + +/* process in jail may not send signal to process not in the same jail */ +static int +jail_task_kill(struct task_struct *p, struct siginfo *info, int sig) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec)) + return 0; + + if (tsec == jail_of(p)) + return 0; + + if (sig==SIGCHLD) + return 0; + + return -EPERM; +} + +/* + * LSM hooks to limit jailed process' abilities to muck with resource + * limits + */ +static int jail_task_setrlimit (unsigned int resource, struct rlimit *new_rlim) +{ + if (!in_jail(current)) + return 0; + + return -EPERM; +} + +static int jail_task_setscheduler (struct task_struct *p, int policy, + struct sched_param *lp) +{ + if (!in_jail(current)) + return 0; + + return -EPERM; +} + +/* + * LSM hooks to limit IPC access. + */ + +static inline int +basic_ipc_security_check(struct kern_ipc_perm *p, struct task_struct *target) +{ + struct jail_struct *tsec = jail_of(target); + + if (!tsec || !in_use(tsec)) + return 0; + + if (get_ipc_security(p) != tsec) + return -EPERM; + + return 0; +} + +static int +jail_ipc_permission(struct kern_ipc_perm *ipcp, short flag) +{ + return basic_ipc_security_check(ipcp, current); +} + +static int +jail_shm_alloc_security (struct shmid_kernel *shp) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec)) + return 0; + set_ipc_security(shp->shm_perm, tsec); + kref_get(&tsec->kref); + return 0; +} + +static void +jail_shm_free_security (struct shmid_kernel *shp) +{ + free_ipc_security(&shp->shm_perm); +} + +static int +jail_shm_associate (struct shmid_kernel *shp, int shmflg) +{ + return basic_ipc_security_check(&shp->shm_perm, current); +} + +static int +jail_shm_shmctl(struct shmid_kernel *shp, int cmd) +{ + if (cmd == IPC_INFO || cmd == SHM_INFO) + return 0; + + return basic_ipc_security_check(&shp->shm_perm, current); +} + +static int +jail_shm_shmat(struct shmid_kernel *shp, char *shmaddr, int shmflg) +{ + return basic_ipc_security_check(&shp->shm_perm, current); +} + +static int +jail_msg_queue_alloc(struct msg_queue *msq) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec)) + return 0; + set_ipc_security(msq->q_perm, tsec); + kref_get(&tsec->kref); + return 0; +} + +static void +jail_msg_queue_free(struct msg_queue *msq) +{ + free_ipc_security(&msq->q_perm); +} + +static int jail_msg_queue_associate(struct msg_queue *msq, int flag) +{ + return basic_ipc_security_check(&msq->q_perm, current); +} + +static int +jail_msg_queue_msgctl(struct msg_queue *msq, int cmd) +{ + if (cmd == IPC_INFO || cmd == MSG_INFO) + return 0; + + return basic_ipc_security_check(&msq->q_perm, current); +} + +static int +jail_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg) +{ + return basic_ipc_security_check(&msq->q_perm, current); +} + +static int +jail_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, + struct task_struct *target, long type, int mode) + +{ + return basic_ipc_security_check(&msq->q_perm, target); +} + +static int +jail_sem_alloc_security(struct sem_array *sma) +{ + struct jail_struct *tsec = jail_of(current); + + if (!tsec || !in_use(tsec)) + return 0; + set_ipc_security(sma->sem_perm, tsec); + kref_get(&tsec->kref); + return 0; +} + +static void +jail_sem_free_security(struct sem_array *sma) +{ + free_ipc_security(&sma->sem_perm); +} + +static int +jail_sem_associate(struct sem_array *sma, int semflg) +{ + return basic_ipc_security_check(&sma->sem_perm, current); +} + +static int +jail_sem_semctl(struct sem_array *sma, int cmd) +{ + if (cmd == IPC_INFO || cmd == SEM_INFO) + return 0; + return basic_ipc_security_check(&sma->sem_perm, current); +} + +static int +jail_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops, + int alter) +{ + return basic_ipc_security_check(&sma->sem_perm, current); +} + +static struct security_operations bsdjail_security_ops = { + .ptrace = jail_ptrace, + .capable = jail_capable, + + .task_kill = jail_task_kill, + .task_alloc_security = jail_task_alloc_security, + .task_free_security = jail_task_free_security, + .bprm_alloc_security = jail_bprm_alloc_security, + .task_create = jail_security_task_create, + .task_to_inode = jail_task_to_inode, + .task_lookup = jail_task_lookup, + + .task_setrlimit = jail_task_setrlimit, + .task_setscheduler = jail_task_setscheduler, + + .setprocattr = jail_setprocattr, + .getprocattr = jail_getprocattr, + + .file_set_fowner = jail_file_set_fowner, + .file_send_sigiotask = jail_file_send_sigiotask, + .file_free_security = free_file_security, + + .socket_bind = jail_socket_bind, + .socket_listen = jail_socket_listen, + .socket_create = jail_socket_create, + .socket_post_create = jail_socket_post_create, + .unix_stream_connect = jail_socket_unix_stream_connect, + .unix_may_send = jail_socket_unix_may_send, + .sk_free_security = free_sock_security, + + .inode_mknod = jail_inode_mknod, + .inode_permission = jail_inode_permission, + .inode_free_security = free_inode_security, + .inode_getattr = jail_inode_getattr, + .inode_getxattr = jail_inode_getxattr, + .sb_mount = jail_mount, + .sb_umount = jail_umount, + + .ipc_permission = jail_ipc_permission, + .shm_alloc_security = jail_shm_alloc_security, + .shm_free_security = jail_shm_free_security, + .shm_associate = jail_shm_associate, + .shm_shmctl = jail_shm_shmctl, + .shm_shmat = jail_shm_shmat, + + .msg_queue_alloc_security = jail_msg_queue_alloc, + .msg_queue_free_security = jail_msg_queue_free, + .msg_queue_associate = jail_msg_queue_associate, + .msg_queue_msgctl = jail_msg_queue_msgctl, + .msg_queue_msgsnd = jail_msg_queue_msgsnd, + .msg_queue_msgrcv = jail_msg_queue_msgrcv, + + .sem_alloc_security = jail_sem_alloc_security, + .sem_free_security = jail_sem_free_security, + .sem_associate = jail_sem_associate, + .sem_semctl = jail_sem_semctl, + .sem_semop = jail_sem_semop, +}; + +static int __init bsdjail_init (void) +{ + int rc = 0; + + if (register_security (&bsdjail_security_ops)) { + printk (KERN_INFO + "Failure registering BSD Jail module with the kernel\n"); + + rc = mod_reg_security(MY_NAME, &bsdjail_security_ops); + if (rc < 0) { + printk (KERN_INFO "Failure registering BSD Jail " + " module with primary security module.\n"); + return -EINVAL; + } + secondary = 1; + } + printk (KERN_INFO "BSD Jail module initialized.\n"); + + return 0; +} + +static void __exit bsdjail_exit (void) +{ + if (secondary) { + if (mod_unreg_security (MY_NAME, &bsdjail_security_ops)) + printk (KERN_INFO "Failure unregistering BSD Jail " + " module with primary module.\n"); + } else { + if (unregister_security (&bsdjail_security_ops)) { + printk (KERN_INFO "Failure unregistering BSD Jail " + "module with the kernel\n"); + } + } + + printk (KERN_INFO "BSD Jail module removed\n"); +} + +security_initcall (bsdjail_init); +module_exit (bsdjail_exit); + +MODULE_DESCRIPTION("BSD Jail LSM."); +MODULE_LICENSE("GPL"); diff -Nrup linux-2.6.9-rc3-bk6/security/Kconfig linux-2.6.9-rc3-bk6-jail/security/Kconfig --- linux-2.6.9-rc3-bk6/security/Kconfig 2004-10-06 10:08:02.000000000 -0500 +++ linux-2.6.9-rc3-bk6-jail/security/Kconfig 2004-10-06 10:52:13.000000000 -0500 @@ -46,5 +46,16 @@ config SECURITY_ROOTPLUG source security/selinux/Kconfig +config SECURITY_BSDJAIL + tristate "BSD Jail LSM" + depends on SECURITY + select SECURITY_NETWORK + help + Provides BSD Jail compartmentalization functionality. + See Documentation/bsdjail.txt for more information and + usage instructions. + + If you are unsure how to answer this question, answer N. + endmenu diff -Nrup linux-2.6.9-rc3-bk6/security/Makefile linux-2.6.9-rc3-bk6-jail/security/Makefile --- linux-2.6.9-rc3-bk6/security/Makefile 2004-08-14 00:37:26.000000000 -0500 +++ linux-2.6.9-rc3-bk6-jail/security/Makefile 2004-10-06 10:52:13.000000000 -0500 @@ -15,3 +15,4 @@ obj-$(CONFIG_SECURITY) += security.o d obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o +obj-$(CONFIG_SECURITY_BSDJAIL) += bsdjail.o ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-06 20:24 ` [patch 1/3] lsm: add bsdjail module Serge Hallyn @ 2004-10-06 23:26 ` Andrew Morton 2004-10-07 4:08 ` Serge E. Hallyn ` (2 more replies) 0 siblings, 3 replies; 37+ messages in thread From: Andrew Morton @ 2004-10-06 23:26 UTC (permalink / raw) To: Serge Hallyn; +Cc: chrisw, linux-kernel, serue Serge Hallyn <serue@us.ibm.com> wrote: > > > Attached is a patch against the security Kconfig and Makefile to support > bsdjail, as well as the bsdjail.c file itself. bsdjail offers > functionality similar to (but more limited than) the vserver patch. I don't recall anyone requesting this feature. Tell me why we should add it to Linux? > + > +#define in_use(x) (x->jail_flags & IN_USE) > +#define set_in_use(x) (x->jail_flags |= IN_USE) > + > +#define got_network(x) (x->jail_flags & (GOT_IPV4 | GOT_IPV6)) > +#define got_ipv4(x) (x->jail_flags & (GOT_IPV4)) > +#define got_ipv6(x) (x->jail_flags & (GOT_IPV6)) > +#define set_ipv4(x) (x->jail_flags |= GOT_IPV4) > +#define set_ipv6(x) (x->jail_flags |= GOT_IPV6) > +#define unset_got_ipv4(x) (x->jail_flags &= ~GOT_IPV4) > +#define unset_got_ipv6(x) (x->jail_flags &= ~GOT_IPV6) > +#define get_task_security(task) (task->security) > +#define get_inode_security(inode) (inode->i_security) > +#define get_sock_security(sock) (sock->sk_security) > +#define get_file_security(file) (file->f_security) > +#define get_ipc_security(ipc) (ipc->security) > +#define jail_of(proc) (get_task_security(proc)) > + The above tricks may make the code easier to type, but I find they make the code harder for others to read, and that's more important. We prefer to open-code such things. > + if (tsec->root_pathname) > + kfree(tsec->root_pathname); > + if (tsec->ip4_addr_name) > + kfree(tsec->ip4_addr_name); > + if (tsec->ip6_addr_name) > + kfree(tsec->ip6_addr_name); kfree(0) is permitted. Some people like to do the double test anyway but I don't think it adds much here. > + set_task_security(task,NULL); whitespace nit: In some places you have spaces after the commas and in others you do not. > + kref_put(&tsec->kref, release_jail); This is the preferred style. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-06 23:26 ` Andrew Morton @ 2004-10-07 4:08 ` Serge E. Hallyn 2004-10-07 6:18 ` James Morris 2004-10-07 12:06 ` Alan Cox 2004-10-07 19:01 ` [patch 2/3] " Serge E. Hallyn 2 siblings, 1 reply; 37+ messages in thread From: Serge E. Hallyn @ 2004-10-07 4:08 UTC (permalink / raw) To: Andrew Morton; +Cc: chrisw, linux-kernel, serue Thank you for the feedback. I have implemented these changes, but want to run a few tests tomorrow before I send them out to make sure I didn't break anything... Quoting Andrew Morton (akpm@osdl.org): > Serge Hallyn <serue@us.ibm.com> wrote: > > > > > > Attached is a patch against the security Kconfig and Makefile to support > > bsdjail, as well as the bsdjail.c file itself. bsdjail offers > > functionality similar to (but more limited than) the vserver patch. > > I don't recall anyone requesting this feature. Tell me why we should add > it to Linux? Because it gives Linux a functionality like FreeBSD's jail and Solaris' zones in an unobtrusive manner, without impacting users who don't wish to use it (except for the extra security_task_lookup function calls). It allows me (for instance) to compartmentalize apache and sendmail by running them in different jails. Or offer family members, customers, or whoever, ssh accounts into seemingly distinct boxes, which are simply sshd's under different jails at different network aliases. Each would see their own private filesystems and network, have their own usage limits, and (mostly) not see processes outside their respective jails. They can't {un,}load modules, ptrace unjailed processes or send signals to them, create devices, mount, or umount. It is functionality which otherwise would have to be achieved by running vmware or uml, but far more lightweight, since no new OS needs to be run. (Once read-only bind mounts are implemented, it will become even more lightweight, as large pieces of filesystem trees will be shareable readonly between jails.) thanks, -serge ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-07 4:08 ` Serge E. Hallyn @ 2004-10-07 6:18 ` James Morris 2004-10-07 6:22 ` Andrew Morton 0 siblings, 1 reply; 37+ messages in thread From: James Morris @ 2004-10-07 6:18 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Andrew Morton, chrisw, linux-kernel On Thu, 7 Oct 2004, Serge E. Hallyn wrote: > Because it gives Linux a functionality like FreeBSD's jail and Solaris' > zones in an unobtrusive manner, without impacting users who don't wish > to use it (except for the extra security_task_lookup function calls). Yes, as an LSM module, it can be configured out. I think it's a good use of the LSM framework, and may be useful for people migrating to Linux from legacy Solaris and FreeBSD. - James -- James Morris <jmorris@redhat.com> ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-07 6:18 ` James Morris @ 2004-10-07 6:22 ` Andrew Morton 2004-10-07 16:06 ` Chris Wright 0 siblings, 1 reply; 37+ messages in thread From: Andrew Morton @ 2004-10-07 6:22 UTC (permalink / raw) To: James Morris; +Cc: serue, chrisw, linux-kernel James Morris <jmorris@redhat.com> wrote: > > On Thu, 7 Oct 2004, Serge E. Hallyn wrote: > > > Because it gives Linux a functionality like FreeBSD's jail and Solaris' > > zones in an unobtrusive manner, without impacting users who don't wish > > to use it (except for the extra security_task_lookup function calls). > > Yes, as an LSM module, it can be configured out. I think it's a good use > of the LSM framework, and may be useful for people migrating to Linux from > legacy Solaris and FreeBSD. Sure, but that's a bit speculative for adding a feature to the mainline kernel. Is there vendor pull for this feature? Do IBM have customers requiring it? "someone might like this" is not a sufficient basis for adding stuff to the kernel, sorry. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-07 6:22 ` Andrew Morton @ 2004-10-07 16:06 ` Chris Wright 2004-10-07 18:40 ` Andrew Morton 0 siblings, 1 reply; 37+ messages in thread From: Chris Wright @ 2004-10-07 16:06 UTC (permalink / raw) To: Andrew Morton; +Cc: James Morris, serue, chrisw, linux-kernel * Andrew Morton (akpm@osdl.org) wrote: > James Morris <jmorris@redhat.com> wrote: > > On Thu, 7 Oct 2004, Serge E. Hallyn wrote: > > > > > Because it gives Linux a functionality like FreeBSD's jail and Solaris' > > > zones in an unobtrusive manner, without impacting users who don't wish > > > to use it (except for the extra security_task_lookup function calls). > > > > Yes, as an LSM module, it can be configured out. I think it's a good use > > of the LSM framework, and may be useful for people migrating to Linux from > > legacy Solaris and FreeBSD. > > Sure, but that's a bit speculative for adding a feature to the mainline > kernel. Which feature are you concerned over, the additional hook or the new module? The module is a no-op for anybody who doesn't want it. I can't vouch for the number of users of this module although I've seen some positive feedback from users. One nice bit is that it goes a way towards helping vserver which does have quite a few users. This module really demonstrates one of the points of LSM...to support multiple security models. thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-07 16:06 ` Chris Wright @ 2004-10-07 18:40 ` Andrew Morton 2004-10-07 18:52 ` Chris Wright 0 siblings, 1 reply; 37+ messages in thread From: Andrew Morton @ 2004-10-07 18:40 UTC (permalink / raw) To: Chris Wright; +Cc: jmorris, serue, chrisw, linux-kernel Chris Wright <chrisw@osdl.org> wrote: > > * Andrew Morton (akpm@osdl.org) wrote: > > James Morris <jmorris@redhat.com> wrote: > > > On Thu, 7 Oct 2004, Serge E. Hallyn wrote: > > > > > > > Because it gives Linux a functionality like FreeBSD's jail and Solaris' > > > > zones in an unobtrusive manner, without impacting users who don't wish > > > > to use it (except for the extra security_task_lookup function calls). > > > > > > Yes, as an LSM module, it can be configured out. I think it's a good use > > > of the LSM framework, and may be useful for people migrating to Linux from > > > legacy Solaris and FreeBSD. > > > > Sure, but that's a bit speculative for adding a feature to the mainline > > kernel. > > Which feature are you concerned over, the additional hook or the > new module? I am concerned about the presence of new code - simple as that. We need to be able to demonstrate that the new code is sufficiently useful to a sufficiently large number of people as to warrant the cost of maintaining it in the tree for the rest of eternity. > The module is a no-op for anybody who doesn't want it. It still needs to be maintained. > I can't vouch for the number of users of this module although I've seen > some positive feedback from users. One nice bit is that it goes a way > towards helping vserver which does have quite a few users. Tell us more. > This module > really demonstrates one of the points of LSM...to support multiple > security models. Sure. But that doesn't mean that those modules have to live at kernel.org rather than, say, at bsdjail.sourceforge.net. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-07 18:40 ` Andrew Morton @ 2004-10-07 18:52 ` Chris Wright 2004-10-07 20:56 ` Serge E. Hallyn 2004-10-10 6:24 ` Herbert Poetzl 0 siblings, 2 replies; 37+ messages in thread From: Chris Wright @ 2004-10-07 18:52 UTC (permalink / raw) To: Andrew Morton; +Cc: Chris Wright, jmorris, serue, linux-kernel * Andrew Morton (akpm@osdl.org) wrote: > Chris Wright <chrisw@osdl.org> wrote: > > * Andrew Morton (akpm@osdl.org) wrote: > > Which feature are you concerned over, the additional hook or the > > new module? > > I am concerned about the presence of new code - simple as that. Understood. > We need to be able to demonstrate that the new code is sufficiently useful > to a sufficiently large number of people as to warrant the cost of > maintaining it in the tree for the rest of eternity. That's fine. Serge, can you enlighten us with an idea of the users of this code? > > The module is a no-op for anybody who doesn't want it. > > It still needs to be maintained. Absolutely. > > I can't vouch for the number of users of this module although I've seen > > some positive feedback from users. One nice bit is that it goes a way > > towards helping vserver which does have quite a few users. > > Tell us more. One portion of the vserver project (that which has to do with security and isolation) could be largely covered by this work. And vserver is an active project with many users AFAICT. The vserver maintainer has expressed some interest in this as well. The other portion of the project, which does the resource limiting has a decent chance of working well with something like CKRM or similar. > > This module > > really demonstrates one of the points of LSM...to support multiple > > security models. > > Sure. But that doesn't mean that those modules have to live at kernel.org > rather than, say, at bsdjail.sourceforge.net. I agree, some userbase does wonders to justify mainlining the code. thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-07 18:52 ` Chris Wright @ 2004-10-07 20:56 ` Serge E. Hallyn 2004-10-10 6:24 ` Herbert Poetzl 1 sibling, 0 replies; 37+ messages in thread From: Serge E. Hallyn @ 2004-10-07 20:56 UTC (permalink / raw) To: Chris Wright; +Cc: Andrew Morton, jmorris, serue, linux-kernel > * Andrew Morton (akpm@osdl.org) wrote: > > Chris Wright <chrisw@osdl.org> wrote: > > > * Andrew Morton (akpm@osdl.org) wrote: > > > Which feature are you concerned over, the additional hook or the > > > new module? > > > > I am concerned about the presence of new code - simple as that. > > Understood. We do have time allotted for maintenance of LSMs, so not only am I interested in maintaining bsdjail on my own, but I don't even have to do it in my free time :) > > We need to be able to demonstrate that the new code is sufficiently useful > > to a sufficiently large number of people as to warrant the cost of > > maintaining it in the tree for the rest of eternity. > > That's fine. Serge, can you enlighten us with an idea of the users of > this code? I am "just a developer", and don't have ready access to any marketers. There was no customer demand which we were addressing. We just saw it as a very useful feature easy to implement. Some people have privately expressed interested in the patch over the last few months as I've been sending out patches. And as Chris has mentioned, the vserver community appears to be thriving, and should be partially (though by no means fully!) served by this module. If nothing else it should reduce the size of the patch they need to maintain. I wish I had a better answer... -serge ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-07 18:52 ` Chris Wright 2004-10-07 20:56 ` Serge E. Hallyn @ 2004-10-10 6:24 ` Herbert Poetzl 1 sibling, 0 replies; 37+ messages in thread From: Herbert Poetzl @ 2004-10-10 6:24 UTC (permalink / raw) To: Chris Wright; +Cc: Andrew Morton, jmorris, serue, linux-kernel On Thu, Oct 07, 2004 at 11:52:40AM -0700, Chris Wright wrote: > * Andrew Morton (akpm@osdl.org) wrote: > > Chris Wright <chrisw@osdl.org> wrote: > > > * Andrew Morton (akpm@osdl.org) wrote: > > > Which feature are you concerned over, the additional hook or the > > > new module? > > > > I am concerned about the presence of new code - simple as that. > > Understood. > > > We need to be able to demonstrate that the new code is sufficiently useful > > to a sufficiently large number of people as to warrant the cost of > > maintaining it in the tree for the rest of eternity. > > That's fine. Serge, can you enlighten us with an idea of the users of > this code? > > > > The module is a no-op for anybody who doesn't want it. > > > > It still needs to be maintained. > > Absolutely. > > > > I can't vouch for the number of users of this module although I've seen > > > some positive feedback from users. One nice bit is that it goes a way > > > towards helping vserver which does have quite a few users. > > > > Tell us more. > > One portion of the vserver project (that which has to do with security > and isolation) could be largely covered by this work. And vserver > is an active project with many users AFAICT. The vserver maintainer > has expressed some interest in this as well. The other portion of the > project, which does the resource limiting has a decent chance of working > well with something like CKRM or similar. well, as 'the vserver project' probably means the 'linux-vserver project', I would like to point out why and where the bsdjail LSM, in it's current form is flawed from the linux-vserver point of view ... Serge, don't get me wrong, this is neither against you nor against the bsdjail LSM, which I consider an interesting approach, and I'm still confident that we find some way of cooperation ... (copied the jail struct here to comment it) | struct jail_struct { | struct kref kref; | | /* these are set on writes to /proc/<pid>/attr/exec */ | char *root_pathname; /* char * containing path to use as jail / */ linux-vserver uses namespaces to create the vservers, only the legacy method uses a simple chroot() to setup the vserver environment ... | char *ip4_addr_name; /* char * containing ip4 addr to use for jail */ | char *ip6_addr_name; /* char * containing ip6 addr to use for jail */ linux-vserver is slowly moving from chbind (which restricts a process and it's children to a set of IPs to an iptable (marking) based approach, which is much more flexible | /* these are set when a jail becomes active */ | __u32 addr4; /* internal form of ip4_addr_name */ | struct in6_addr addr6; /* internal form of ip6_addr_name */ up to 16 addresses are currently allowed in this set in the future the limit will go away (network code is actually the oldest piece) by using 'markings' (network is virtualized to allow binding to 0.0.0.0) | struct dentry *dentry; /* dentry of fs root */ | struct vfsmount *mnt; /* vfsmnt of fs root */ | /* Resource limits. 0 = no limit */ | int max_nrtask; /* maximum number of tasks within this jail. */ | int cur_nrtask; /* current number of tasks within this jail. */ linux-vserver already has a nice and usable resource management system for most resources, supporting much more limits than those ... | long maxtimeslice; /* max timeslice in ms for procs in this jail */ | long nice; /* nice level for processes in this jail */ | long max_data, max_memlock; /* equivalent to RLIMIT_{DATA,MEMLOCK} */ the resource limitations should not be part of a security module, and the scheduler slice would be a step in the wrong redirection, as linux-vserver already uses token buckets to control the scheduler | char jail_flags; |}; also many distributions (and distribution hosting is _the_ main application area for linux-vserver) require the 'jail' to be as similar as possible to a real host (like a separate init process, or the ability to renice services) so some of the 'features' of that LSM are contra productive here not to mention that linux-vserver's security is mainly based on linux capabilities which are not handled by this LSM at all ... aside from that, without the notion of a security context, which can be controlled and entered from outside (the host) the 'jail' can not be used for typical hosting purposes > > > This module > > > really demonstrates one of the points of LSM...to support multiple > > > security models. > > > > Sure. But that doesn't mean that those modules have to live at kernel.org > > rather than, say, at bsdjail.sourceforge.net. > > I agree, some userbase does wonders to justify mainlining the code. I'm pretty confident the fact that a big company seems interested in this LSM will help with the integration into mainline, but I can not say that 'linux-vserver' users or developers will have any immediate benefit from it's inclusion ... why? - the linux-vserver user would have to apply an additional patch anyway (and install special tools to control the vservers) - the LSM does not provide what linux-vserver requires and would need heavy modification (missing context, namespaces, network, most virtualization, resource isolation) - once CKRM will be able to replace the resource management currently present in linux-vserver, it (CKRM) will collide with the resource stuff done in this LSM so while I'm fine with the idea to move a part of linux-vserver to the LSM framework (once the LSM stackering issues are resolved), this part would not be usable without a decent part of kernel modifications to do the virtualization and the resource isolation of course if there _is_ interest to include linux-vserver like features into mainline, then there should be some commitment to do it properly and this includes not limiting it to a security module, where it requires much more to be useful to anyone ... finally I have no problem to maintain the 'vserver' patches outside the kernel tree, as they are probably only of limited interest for the typical linux desktop user ... best, Herbert (linux-vserver maintainer) PS: for details see the linux-vserver paper at http://www.linux-vserver.org/index.php?page=Linux-VServer-Paper > thanks, > -chris > -- > Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 1/3] lsm: add bsdjail module 2004-10-06 23:26 ` Andrew Morton 2004-10-07 4:08 ` Serge E. Hallyn @ 2004-10-07 12:06 ` Alan Cox 2004-10-07 19:01 ` [patch 2/3] " Serge E. Hallyn 2 siblings, 0 replies; 37+ messages in thread From: Alan Cox @ 2004-10-07 12:06 UTC (permalink / raw) To: Andrew Morton; +Cc: Serge Hallyn, chrisw, Linux Kernel Mailing List On Iau, 2004-10-07 at 00:26, Andrew Morton wrote: > I don't recall anyone requesting this feature. Tell me why we should add > it to Linux? Subject to the code cleanups and stuff you've noted I'd actually like to see BSD jail stuff in our security modules because it has the virtue of simplicity. If it can be extended to do all of vserver even better. J Random Admin has a good chance at configuring BSD jails etups. J Random Admin needs some serious tools that don't exist to set up SELinux the same way. In the security world simplicity is often a virtue, both in code and concepts. Alan ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-06 23:26 ` Andrew Morton 2004-10-07 4:08 ` Serge E. Hallyn 2004-10-07 12:06 ` Alan Cox @ 2004-10-07 19:01 ` Serge E. Hallyn 2004-10-07 19:42 ` Chris Wright 2004-10-10 10:41 ` Christoph Hellwig 2 siblings, 2 replies; 37+ messages in thread From: Serge E. Hallyn @ 2004-10-07 19:01 UTC (permalink / raw) To: Andrew Morton; +Cc: chrisw, linux-kernel, serue Attached is a new version of the bsdjail patch with the requested code cleanups applied. Changelog: Sep 10, 2004: original version Sep 12, 2004: add ipv6 support Sep 13, 2004: support simultaneous ipv4+ipv6 Oct 6, 2004: move kref release function to kref_put from kref_init Oct 7, 2004: requested code cleanups (mainly nix #defines) thanks, -serge Signed-Off-By: Serge E. Hallyn <serue@us.ibm.com> diff -Nrup linux-2.6.9-rc3-bk6/security/bsdjail.c linux-2.6.9-rc3-bk6-jail/security/bsdjail.c --- linux-2.6.9-rc3-bk6/security/bsdjail.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.9-rc3-bk6-jail/security/bsdjail.c 2004-10-07 11:30:21.000000000 -0500 @@ -0,0 +1,1495 @@ +/* + * File: linux/security/bsdjail.c + * Author: Serge Hallyn (serue@us.ibm.com) + * Date: Sep 12, 2004 + * + * (See Documentation/bsdjail.txt for more information) + * + * Copyright (C) 2004 International Business Machines <serue@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/security.h> +#include <linux/namei.h> +#include <linux/namespace.h> +#include <linux/proc_fs.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/pagemap.h> +#include <linux/ip.h> +#include <net/ipv6.h> +#include <linux/mount.h> +#include <asm/uaccess.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/seq_file.h> +#include <linux/un.h> +#include <linux/smp_lock.h> +#include <linux/kref.h> + +static int jail_debug = 0; +MODULE_PARM(jail_debug, "i"); +MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n"); + +#define DBG 0 +#define WARN 1 +#define bsdj_debug(how, fmt, arg... ) \ + do { \ + if ( how || jail_debug ) \ + printk(KERN_NOTICE "%s: %s: " fmt, \ + MY_NAME, __FUNCTION__, \ + ## arg ); \ + } while ( 0 ) + +#define MY_NAME "bsdjail" + +/* flag to keep track of how we were registered */ +static int secondary = 0; + +/* + * The task structure holding jail information. + * Taskp->security points to one of these (or is null). + * There is exactly one jail_struct for each jail. If >1 process + * are in the same jail, they share the same jail_struct. + */ +struct jail_struct { + struct kref kref; + + /* these are set on writes to /proc/<pid>/attr/exec */ + char *root_pathname; /* char * containing path to use as jail / */ + char *ip4_addr_name; /* char * containing ip4 addr to use for jail */ + char *ip6_addr_name; /* char * containing ip6 addr to use for jail */ + + /* these are set when a jail becomes active */ + __u32 addr4; /* internal form of ip4_addr_name */ + struct in6_addr addr6; /* internal form of ip6_addr_name */ + + struct dentry *dentry; /* dentry of fs root */ + struct vfsmount *mnt; /* vfsmnt of fs root */ + + /* Resource limits. 0 = no limit */ + int max_nrtask; /* maximum number of tasks within this jail. */ + int cur_nrtask; /* current number of tasks within this jail. */ + long maxtimeslice; /* max timeslice in ms for procs in this jail */ + long nice; /* nice level for processes in this jail */ + long max_data, max_memlock; /* equivalent to RLIMIT_{DATA, MEMLOCK} */ +/* values for the jail_flags field */ +#define IN_USE 1 /* if 0, task is setting up jail, not yet in it */ +#define GOT_IPV4 2 +#define GOT_IPV6 4 /* if 0, ipv4, else ipv6 */ + char jail_flags; +}; + +/* + * disable_jail: A jail which was in use, but has no references + * left, is disabled - we free up the mountpoint and dentry, and + * give up our reference on the module. + * + * don't need to put namespace, it will be done automatically + * when the last process in jail is put. + * DO need to put the dentry and vfsmount + */ +static void +disable_jail(struct jail_struct *tsec) +{ + dput(tsec->dentry); + mntput(tsec->mnt); + module_put(THIS_MODULE); +} + + +static void free_jail(struct jail_struct *tsec) +{ + if (!tsec) + return; + + kfree(tsec->root_pathname); + kfree(tsec->ip4_addr_name); + kfree(tsec->ip6_addr_name); + kfree(tsec); +} + +/* release_jail: + * Callback for kref_put to use for releasing a jail when its + * last user exits. + */ +static void release_jail(struct kref *kref) +{ + struct jail_struct *tsec; + + tsec = container_of(kref, struct jail_struct, kref); + disable_jail(tsec); + free_jail(tsec); +} + +/* + * jail_task_free_security: this is the callback hooked into LSM. + * If there was no task->security field for bsdjail, do nothing. + * If there was, but it was never put into use, free the jail. + * If there was, and the jail is in use, then decrement the usage + * count, and disable and free the jail if the usage count hits 0. + */ +static void jail_task_free_security(struct task_struct *task) +{ + struct jail_struct *tsec; + + tsec = task->security; + + if (!tsec) + return; + + if (!(tsec->jail_flags & IN_USE)) { + /* + * someone did 'echo -n x > /proc/<pid>/attr/exec' but + * then forked before execing. Nuke the old info. + */ + free_jail(tsec); + task->security = NULL; + return; + } + tsec->cur_nrtask--; + /* If this was the last process in the jail, delete the jail */ + kref_put(&tsec->kref, release_jail); +} + +static struct jail_struct * +alloc_task_security(struct task_struct *tsk) +{ + struct jail_struct *tsec; + tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL); + if (!tsec) + return ERR_PTR(-ENOMEM); + memset(tsec, 0, sizeof(struct jail_struct)); + tsk->security = tsec; + return tsec; +} + +static inline int +in_jail(struct task_struct *t) +{ + struct jail_struct *tsec = t->security; + + if (tsec && (tsec->jail_flags & IN_USE)) + return 1; + + return 0; +} + +/* + * If a network address was passed into /proc/<pid>/attr/exec, + * then process in its jail will only be allowed to bind/listen + * to that address. + */ +static void +setup_netaddress(struct jail_struct *tsec) +{ + unsigned int a, b, c, d, i; + unsigned int x[8]; + + tsec->jail_flags &= ~(GOT_IPV4 | GOT_IPV6); + tsec->addr4 = 0; + ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0); + + if (tsec->ip4_addr_name) { + if (sscanf(tsec->ip4_addr_name, "%u.%u.%u.%u", + &a, &b, &c, &d) != 4) + return; + if (a>255 || b>255 || c>255 || d>255) + return; + tsec->addr4 = htonl((a<<24) | (b<<16) | (c<<8) | d); + tsec->jail_flags |= GOT_IPV4; + bsdj_debug(DBG, "Network (ipv4) set up (%s)\n", + tsec->ip4_addr_name); + } + + if (tsec->ip6_addr_name) { + if (sscanf(tsec->ip6_addr_name, "%x:%x:%x:%x:%x:%x:%x:%x", + &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], + &x[7]) != 8) { + printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__, + tsec->ip6_addr_name); + return; + } + for (i=0; i<8; i++) { + if (x[i] > 65535) { + printk("%s: %x > 65535 at %d\n", __FUNCTION__, x[i], i); + return; + } + tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]); + } + tsec->jail_flags |= GOT_IPV6; + bsdj_debug(DBG, "Network (ipv6) set up (%s)\n", + tsec->ip6_addr_name); + } +} + +/* + * enable_jail: + * Called when a process is placed into a new jail to handle the + * actual creation of the jail. + * Creates namespace + * Sets process root+pwd + * Stores the requested ip address + * Registers a unique pseudo-proc filesystem for this jail + */ +static int enable_jail(struct task_struct *tsk) +{ + struct nameidata nd; + struct jail_struct *tsec = tsk->security;; + int retval = -EFAULT; + + if (!tsec || !tsec->root_pathname) + goto out; + + /* + * USE_JAIL_NAMESPACE: could be useful, so that future mounts outside + * the jail don't affect the jail. But it's not necessary, and + * requires exporting copy_namespace from fs/namespace.c + * + * Actually, it would also be useful for truly hiding + * information about mounts which do not exist in this jail. +#define USE_JAIL_NAMESPACE + */ +#ifdef USE_JAIL_NAMESPACE + bsdj_debug(DBG, "bsdjail: copying namespace.\n"); + retval = -EPERM; + if (copy_namespace(CLONE_NEWNS, tsk)) + goto out; + bsdj_debug(DBG, "bsdjail: copied namespace.\n"); +#endif + + /* find our new root directory */ + bsdj_debug(DBG, "bsdjail: looking up %s\n", tsec->root_pathname); + retval = path_lookup(tsec->root_pathname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd); + if (retval) + goto out; + + bsdj_debug(DBG, "bsdjail: got %s, setting root to it\n", tsec->root_pathname); + + /* and set the fsroot to it */ + set_fs_root(tsk->fs, nd.mnt, nd.dentry); + set_fs_pwd(tsk->fs, nd.mnt, nd.dentry); + + bsdj_debug(DBG, "bsdjail: root has been set. Have fun.\n"); + + /* set up networking */ + if (tsec->ip4_addr_name || tsec->ip6_addr_name) + setup_netaddress(tsec); + + tsec->cur_nrtask = 1; + if (tsec->nice) + set_user_nice(current, tsec->nice); + if (tsec->max_data) { + current->rlim[RLIMIT_DATA].rlim_cur = tsec->max_data; + current->rlim[RLIMIT_DATA].rlim_max = tsec->max_data; + } + if (tsec->max_memlock) { + current->rlim[RLIMIT_MEMLOCK].rlim_cur = tsec->max_memlock; + current->rlim[RLIMIT_MEMLOCK].rlim_max = tsec->max_memlock; + } + if (tsec->maxtimeslice) { + current->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice; + current->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice; + } + /* success and end */ + tsec->mnt = mntget(nd.mnt); + tsec->dentry = dget(nd.dentry); + path_release(&nd); + kref_init(&tsec->kref); + tsec->jail_flags |= IN_USE; + + /* won't let ourselves be removed until this jail goes away */ + try_module_get(THIS_MODULE); + + return 0; + +out: + return retval; +} + +/* + * LSM /proc/<pid>/attr hooks. + * You may write into /proc/<pid>/attr/exec: + * root /some/path + * ip 2.2.2.2 + * These values will be used on the next exec() to set up your jail + * (assuming you're not already in a jail) + */ +static int +jail_setprocattr(struct task_struct *p, char *name, void *value, size_t size) +{ + struct jail_struct *tsec = current->security; + long val; + int start, len; + + if (tsec && (tsec->jail_flags & IN_USE)) + return -EINVAL; /* let them guess why */ + + if (p != current || strcmp(name, "exec")) + return -EPERM; + + if (strncmp(value, "root ", 5) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + if (tsec->root_pathname) + kfree(tsec->root_pathname); + start = 5; + len = size-start; + tsec->root_pathname = kmalloc(len+1, GFP_KERNEL); + if (!tsec->root_pathname) + return -ENOMEM; + strlcpy(tsec->root_pathname, value+start, len+1); + } else if (strncmp(value, "ip ", 3) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + if (tsec->ip4_addr_name) + kfree(tsec->ip4_addr_name); + start = 3; + len = size-start; + tsec->ip4_addr_name = kmalloc(len+1, GFP_KERNEL); + if (!tsec->ip4_addr_name) + return -ENOMEM; + strlcpy(tsec->ip4_addr_name, value+start, len+1); + } else if (strncmp(value, "ip6 ", 4) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + if (tsec->ip6_addr_name) + kfree(tsec->ip6_addr_name); + start = 4; + len = size-start; + tsec->ip6_addr_name = kmalloc(len+1, GFP_KERNEL); + if (!tsec->ip6_addr_name) + return -ENOMEM; + strlcpy(tsec->ip6_addr_name, value+start, len+1); + + /* the next two are equivalent */ + } else if (strncmp(value, "slice ", 6) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+6, NULL, 0); + tsec->maxtimeslice = val; + } else if (strncmp(value, "timeslice ", 10) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+10, NULL, 0); + tsec->maxtimeslice = val; + } else if (strncmp(value, "nrtask ", 7) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = (int) simple_strtol(value+7, NULL, 0); + if (val < 1) + return -EINVAL; + tsec->max_nrtask = val; + } else if (strncmp(value, "memlock ", 8) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+8, NULL, 0); + tsec->max_memlock = val; + } else if (strncmp(value, "data ", 5) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+5, NULL, 0); + tsec->max_data = val; + } else if (strncmp(value, "nice ", 5) == 0) { + if (!tsec) + tsec = alloc_task_security(current); + if (IS_ERR(tsec)) + return -ENOMEM; + + val = simple_strtoul(value+5, NULL, 0); + tsec->nice = val; + } else + return -EINVAL; + + return size; +} + +static int print_jail_net_info(struct jail_struct *j, char *buf, int maxcnt) +{ + int len = 0; + + if (j->ip4_addr_name) + len += snprintf(buf, maxcnt, "%s\n", j->ip4_addr_name); + if (j->ip6_addr_name) + len += snprintf(buf, maxcnt-len, "%s\n", j->ip6_addr_name); + + return snprintf(buf, maxcnt, "No network information\n"); +} + +/* + * LSM /proc/<pid>/attr read hook. + * + * /proc/$$/attr/current output: + * If the reading process, say process 1001, is in a jail, then + * cat /proc/999/attr/current + * will print networking information. + * If the reading process, say process 1001, is not in a jail, then + * cat /proc/999/attr/current + * will return + * root: (root of jail) + * ip: (ip address of jail) + * if 999 is in a jail, or + * -EINVAL + * if 999 is not in a jail. + * + * /proc/$$/attr/exec output: + * A process in a jail gets -EINVAL for /proc/$$/attr/exec. + * A process not in a jail gets hints on starting a jail. + */ +static int +jail_getprocattr(struct task_struct *p, char *name, void *value, size_t size) +{ + struct jail_struct *tsec; + int err = 0; + + if (in_jail(current)) { + if (strcmp(name, "current") == 0) { + /* provide network info */ + err = print_jail_net_info(current->security, value, + size); + return err; + } + return -EINVAL; /* let them guess why */ + } + + if (strcmp(name, "exec") == 0) { + /* Print usage some help */ + err = snprintf(value, size, + "Valid keywords:\n" + "root <pathname>\n" + "ip <ip4-addr>\n" + "ip6 <ip6-addr>\n" + "nrtask <max number of tasks in this jail>\n" + "nice <nice level for processes in this jail>\n" + "slice <max timeslice per process in msecs>\n" + "data <max data size per process in bytes>\n" + "memlock <max lockable memory per process in bytes>\n"); + return err; + } + + if (strcmp(name, "current")) + return -EPERM; + + tsec = p->security; + if (!tsec || !(tsec->jail_flags & IN_USE)) { + err = snprintf(value, size, "Not Jailed\n"); + } else { + err = snprintf(value, size, + "Root: %s\nIPv4: %s\nIPv6: %s\n" + "max_nrtask %d current nrtask %d max_timeslice %lu " + "nice %lu\n" + "max_memlock %lu max_data %lu\n", + tsec->root_pathname, + tsec->ip4_addr_name ? tsec->ip4_addr_name : "(none)", + tsec->ip6_addr_name ? tsec->ip6_addr_name : "(none)", + tsec->max_nrtask, tsec->cur_nrtask, tsec->maxtimeslice, + tsec->nice, tsec->max_data, tsec->max_memlock); + } + + return err; +} + +/* + * Forbid a process in a jail from sending a signal to a process in another + * (or no) jail through file sigio. + * + * We consider the process which set the fowner to be the one sending the + * signal, rather than the one writing to the file. Therefore we store the + * jail of a process during jail_file_set_fowner, then check that against + * the jail of the process receiving the signal. + */ +static int +jail_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, + int fd, int reason) +{ + struct file *file; + struct jail_struct *tsec, *fsec; + + if (!in_jail(current)) + return 0; + + file = (struct file *) ((long)fown - offsetof(struct file, f_owner)); + tsec = tsk->security; + fsec = file->f_security; + + if (fsec != tsec) + return -EPERM; + + return 0; +} + +static int +jail_file_set_fowner(struct file *file) +{ + struct jail_struct *tsec; + + tsec = current->security; + file->f_security = tsec; + if (tsec) + kref_get(&tsec->kref); + + return 0; +} + +static void free_ipc_security(struct kern_ipc_perm *ipc) +{ + struct jail_struct *tsec; + + tsec = ipc->security; + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + ipc->security = NULL; +} + +static void free_file_security(struct file *file) +{ + struct jail_struct *tsec; + + tsec = file->f_security; + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + file->f_security = NULL; +} + +static void free_inode_security(struct inode *inode) +{ + struct jail_struct *tsec; + + tsec = inode->i_security; + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + inode->i_security = NULL; +} + +/* + * LSM ptrace hook: + * process in jail may not ptrace process not in the same jail + */ +static int +jail_ptrace (struct task_struct *tracer, struct task_struct *tracee) +{ + struct jail_struct *tsec = tracer->security; + + if (tsec && (tsec->jail_flags & IN_USE)) { + if (tsec == tracee->security) + return 0; + return -EPERM; + } + return 0; +} + +/* + * process in jail may only use one (aliased) ip address. If they try to + * attach to 127.0.0.1, that is remapped to their own address. If some + * other address (and not their own), deny permission + */ +static int jail_socket_unix_bind(struct socket *sock, struct sockaddr *address, + int addrlen); + +#define loopbackaddr htonl((127 << 24) | 1) + +static inline int jail_inet4_bind(struct socket *sock, struct sockaddr *address, + int addrlen, struct jail_struct *tsec) +{ + struct sockaddr_in *inaddr; + __u32 sin_addr, jailaddr; + + if (!(tsec->jail_flags & GOT_IPV4)) + return -EPERM; + + inaddr = (struct sockaddr_in *) address; + sin_addr = inaddr->sin_addr.s_addr; + jailaddr = tsec->addr4; + + if (sin_addr == jailaddr) + return 0; + + if (sin_addr == loopbackaddr || !sin_addr) { + bsdj_debug(DBG, "Got a loopback or 0 address\n"); + sin_addr = jailaddr; + bsdj_debug(DBG, "Converted to: %u.%u.%u.%u\n", + NIPQUAD(sin_addr)); + return 0; + } + + return -EPERM; +} + +static inline int +jail_inet6_bind(struct socket *sock, struct sockaddr *address, int addrlen, + struct jail_struct *tsec) +{ + struct sockaddr_in6 *inaddr6; + struct in6_addr *sin6_addr, *jailaddr; + + if (!(tsec->jail_flags & GOT_IPV6)) + return -EPERM; + + inaddr6 = (struct sockaddr_in6 *) address; + sin6_addr = &inaddr6->sin6_addr; + jailaddr = &tsec->addr6; + + if (ipv6_addr_cmp(jailaddr, sin6_addr) == 0) + return 0; + + if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback) == 0) { + ipv6_addr_copy(sin6_addr, jailaddr); + return 0; + } + + printk(KERN_NOTICE "%s: DENYING\n", __FUNCTION__); + printk(KERN_NOTICE "%s: a %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " + "j %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + __FUNCTION__, + NIP6(*sin6_addr), + NIP6(*jailaddr)); + + return -EPERM; +} + +static int +jail_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (sock->sk->sk_family == AF_UNIX) + return jail_socket_unix_bind(sock, address, addrlen); + + if (!(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) + /* If we want to be strict, we could just + * deny net access when lacking a pseudo ip. + * For now we just allow it. */ + return 0; + + switch(address->sa_family) { + case AF_INET: + return jail_inet4_bind(sock, address, addrlen, tsec); + + case AF_INET6: + return jail_inet6_bind(sock, address, addrlen, tsec); + + default: + return 0; + } +} + +/* + * If locked in an ipv6 jail, don't let them use ipv4, and vice versa + */ +static int +jail_socket_create(int family, int type, int protocol, int kern) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || kern || !(tsec->jail_flags & IN_USE) || + !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) + return 0; + + switch(family) { + case AF_INET: + if (tsec->jail_flags & GOT_IPV4) + return 0; + return -EPERM; + case AF_INET6: + if (tsec->jail_flags & GOT_IPV6) + return 0; + return -EPERM; + default: + return 0; + }; + + return 0; +} + +static void +jail_socket_post_create(struct socket *sock, int family, int type, + int protocol, int kern) +{ + struct inet_opt *inet; + struct ipv6_pinfo *inet6; + struct jail_struct *tsec = current->security; + + if (!tsec || kern || !(tsec->jail_flags & IN_USE) || + !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) + return; + + switch(family) { + case AF_INET: + inet = inet_sk(sock->sk); + inet->saddr = tsec->addr4; + break; + case AF_INET6: + inet6 = inet6_sk(sock->sk); + ipv6_addr_copy(&inet6->saddr, &tsec->addr6); + break; + default: + break; + }; + + return; +} + +static int +jail_socket_listen(struct socket *sock, int backlog) +{ + struct inet_opt *inet; + struct ipv6_pinfo *inet6; + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE) || + !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) + return 0; + + switch (sock->sk->sk_family) { + case AF_INET: + inet = inet_sk(sock->sk); + if (inet->saddr == tsec->addr4) + return 0; + return -EPERM; + + case AF_INET6: + inet6 = inet6_sk(sock->sk); + if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6) == 0) + return 0; + return -EPERM; + + default: + return 0; + + } +} + +static void free_sock_security(struct sock *sk) +{ + struct jail_struct *tsec; + + tsec = sk->sk_security; + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + sk->sk_security = NULL; +} + +/* + * The next three (socket) hooks prevent a process in a jail from sending + * data to a abstract unix domain socket which was bound outside the jail. + */ +static int +jail_socket_unix_bind(struct socket *sock, struct sockaddr *address, + int addrlen) +{ + struct sockaddr_un *sunaddr; + struct jail_struct *tsec; + + if (sock->sk->sk_family != AF_UNIX) + return 0; + + sunaddr = (struct sockaddr_un *) address; + if (sunaddr->sun_path[0] != 0) + return 0; + + tsec = current->security; + sock->sk->sk_security = tsec; + if (tsec) + kref_get(&tsec->kref); + return 0; +} + +/* + * Note - we deny sends both from unjailed to jailed, and from jailed + * to unjailed. As well as, of course between different jails. + */ +static int +jail_socket_unix_may_send(struct socket *sock, struct socket *other) +{ + struct jail_struct *tsec, *ssec; + + tsec = current->security; /* jail of sending process */ + ssec = other->sk->sk_security; /* jail of receiver */ + + if (tsec != ssec) + return -EPERM; + + return 0; +} + +static int +jail_socket_unix_stream_connect(struct socket *sock, + struct socket *other, struct sock *newsk) +{ + struct jail_struct *tsec, *ssec; + + tsec = current->security; /* jail of sending process */ + ssec = other->sk->sk_security; /* jail of receiver */ + + if (tsec != ssec) + return -EPERM; + + return 0; +} + +static int +jail_mount(char * dev_name, struct nameidata *nd, char * type, + unsigned long flags, void * data) +{ + if (in_jail(current)) + return -EPERM; + + return 0; +} + +static int +jail_umount(struct vfsmount *mnt, int flags) +{ + if (in_jail(current)) + return -EPERM; + + return 0; +} + +/* + * process in jail may not: + * use nice + * change network config + * load/unload modules + */ +static int +jail_capable (struct task_struct *tsk, int cap) +{ + if (in_jail(tsk)) { + if (cap == CAP_SYS_NICE) + return -EPERM; + if (cap == CAP_NET_ADMIN) + return -EPERM; + if (cap == CAP_SYS_MODULE) + return -EPERM; + if (cap == CAP_SYS_RAWIO) + return -EPERM; + } + + if (cap_is_fs_cap (cap) ? tsk->fsuid == 0 : tsk->euid == 0) + return 0; + return -EPERM; +} + +/* + * jail_security_task_create: + * + * If the current process is ina a jail, and that jail is about to exceed a + * maximum number of processes, then refuse to fork. If the maximum number + * of jails is listed as 0, then there is no limit for this jail, and we allow + * all forks. + */ +static inline int +jail_security_task_create (unsigned long clone_flags) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (tsec->max_nrtask && tsec->cur_nrtask >= tsec->max_nrtask) + return -EPERM; + return 0; +} + +/* + * The child of a process in a jail belongs in the same jail + */ +static int +jail_task_alloc_security(struct task_struct *tsk) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + tsk->security = tsec; + kref_get(&tsec->kref); + tsec->cur_nrtask++; + if (tsec->maxtimeslice) { + tsk->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice; + tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice; + } + if (tsec->max_data) { + tsk->rlim[RLIMIT_CPU].rlim_max = tsec->max_data; + tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->max_data; + } + if (tsec->max_memlock) { + tsk->rlim[RLIMIT_CPU].rlim_max = tsec->max_memlock; + tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->max_memlock; + } + if (tsec->nice) + set_user_nice(current, tsec->nice); + + return 0; +} + +static int +jail_bprm_alloc_security(struct linux_binprm *bprm) +{ + struct jail_struct *tsec; + int ret; + + tsec = current->security; + if (!tsec) + return 0; + + if (tsec->jail_flags & IN_USE) + return 0; + + if (tsec->root_pathname) { + ret = enable_jail(current); + if (ret) { + /* if we failed, nix out the root/ip requests */ + jail_task_free_security(current); + return ret; + } + } + return 0; +} + +/* + * Process in jail may not create devices + * Thanks to Brad Spender for pointing out fifos should be allowed. + */ +/* TODO: We may want to allow /dev/log, at least... */ +static int +jail_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + if (!in_jail(current)) + return 0; + + if (S_ISFIFO(mode)) + return 0; + + return -EPERM; +} + +/* yanked from fs/proc/base.c */ +static unsigned name_to_int(struct dentry *dentry) +{ + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + unsigned n = 0; + + if (len > 1 && *name == '0') + goto out; + while (len-- > 0) { + unsigned c = *name++ - '0'; + if (c > 9) + goto out; + if (n >= (~0U-9)/10) + goto out; + n *= 10; + n += c; + } + return n; +out: + return ~0U; +} + +/* + * jail_proc_inode_permission: + * called only when current is in a jail, and is trying to reach + * /proc/<pid>. We check whether <pid> is in the same jail as + * current. If not, permission is denied. + * + * NOTE: On the one hand, the task_to_inode(inode)->i_security + * approach seems cleaner, but on the other, this prevents us + * from unloading bsdjail for awhile... + */ +static int +jail_proc_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + struct jail_struct *tsec = current->security; + struct dentry *dentry = nd->dentry; + unsigned pid; + + pid = name_to_int(dentry); + if (pid == ~0U) { + struct qstr *dname = &dentry->d_name; + if (strcmp(dname->name, "scsi") == 0 || + strcmp(dname->name, "sys") == 0 || + strcmp(dname->name, "ide") == 0) + return -EPERM; + return 0; + } + + if (dentry->d_parent != dentry->d_sb->s_root) + return 0; + if (inode->i_security != tsec) + return -ENOENT; + + return 0; +} + +/* + * Here is our attempt to prevent chroot escapes. + */ +static int +is_jailroot_parent(struct dentry *candidate, struct dentry *root, + struct vfsmount *rootmnt) +{ + if (candidate == root) + return 0; + + /* simple case: fs->root/.. == candidate */ + if (root->d_parent == candidate) + return 1; + + /* + * now more complicated: if fs->root is a mounted directory, + * then chdir(..) out of fs->root, at follow_dotdot, will follow + * the fs->root mount point. So we must check the parent dir of + * the fs->root mount point. + */ + if (rootmnt->mnt_root == root && rootmnt->mnt_mountpoint!=root) { + root = rootmnt->mnt_mountpoint; + rootmnt = rootmnt->mnt_parent; + return is_jailroot_parent(candidate, root, rootmnt); + } + + return 0; +} + +/* + * A process in a jail may not see that /proc/<pid> exists for + * process not in its jail + * Unfortunately we can't pretend that pid for the starting process + * is 1, as vserver does. + */ +static int jail_task_lookup(struct task_struct *p) +{ + struct jail_struct *tsec = current->security; + + if (!tsec) + return 0; + if (tsec == p->security) + return 0; + return -EPERM; +} +/* + * security_task_to_inode: + * Set inode->security = task's jail. + */ +static void jail_task_to_inode(struct task_struct *p, struct inode *inode) +{ + struct jail_struct *tsec = p->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return; + if (inode->i_security) + return; + kref_get(&tsec->kref); + inode->i_security = tsec; +} + +/* + * inode_permission: + * If we are trying to look into certain /proc files from in a jail, we + * may deny permission. + * If we are trying to cd(..), but the cwd is the root of our jail, then + * permission is denied. + */ +static int +jail_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (!nd) + return 0; + + if (nd->dentry && + strcmp(nd->dentry->d_sb->s_type->name, "proc") == 0) { + return jail_proc_inode_permission(inode, mask, nd); + + } + + if (!(mask&MAY_EXEC)) + return 0; + if (!inode || !S_ISDIR(inode->i_mode)) + return 0; + + if (is_jailroot_parent(nd->dentry, tsec->dentry, tsec->mnt)) { + bsdj_debug(WARN, "Attempt to chdir(..) out of jail!\n" + "(%s is a subdir of %s)\n", + tsec->dentry->d_name.name, + nd->dentry->d_name.name); + return -EPERM; + } + + return 0; +} + +/* + * A function which returns -ENOENT if dentry is the dentry for + * a /proc/<pid> directory. It returns 0 otherwise. + */ +static inline int +generic_procpid_check(struct dentry *dentry) +{ + struct jail_struct *jail = current->security; + unsigned pid = name_to_int(dentry); + + if (!jail || !(jail->jail_flags & IN_USE)) + return 0; + if (pid == ~0U) + return 0; + if (strcmp(dentry->d_sb->s_type->name, "proc") != 0) + return 0; + if (dentry->d_parent != dentry->d_sb->s_root) + return 0; + if (dentry->d_inode->i_security != jail) + return -ENOENT; + return 0; +} + +/* + * We want getattr to fail on /proc/<pid> to prevent leakage through, for + * instance, ls -d. + */ +static int +jail_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +{ + return generic_procpid_check(dentry); +} + +/* This probably is not necessary - /proc does not support xattrs? */ +static int +jail_inode_getxattr(struct dentry *dentry, char *name) +{ + return generic_procpid_check(dentry); +} + +/* process in jail may not send signal to process not in the same jail */ +static int +jail_task_kill(struct task_struct *p, struct siginfo *info, int sig) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (tsec == p->security) + return 0; + + if (sig==SIGCHLD) + return 0; + + return -EPERM; +} + +/* + * LSM hooks to limit jailed process' abilities to muck with resource + * limits + */ +static int jail_task_setrlimit (unsigned int resource, struct rlimit *new_rlim) +{ + if (!in_jail(current)) + return 0; + + return -EPERM; +} + +static int jail_task_setscheduler (struct task_struct *p, int policy, + struct sched_param *lp) +{ + if (!in_jail(current)) + return 0; + + return -EPERM; +} + +/* + * LSM hooks to limit IPC access. + */ + +static inline int +basic_ipc_security_check(struct kern_ipc_perm *p, struct task_struct *target) +{ + struct jail_struct *tsec = target->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (p->security != tsec) + return -EPERM; + + return 0; +} + +static int +jail_ipc_permission(struct kern_ipc_perm *ipcp, short flag) +{ + return basic_ipc_security_check(ipcp, current); +} + +static int +jail_shm_alloc_security (struct shmid_kernel *shp) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + shp->shm_perm.security = tsec; + kref_get(&tsec->kref); + return 0; +} + +static void +jail_shm_free_security (struct shmid_kernel *shp) +{ + free_ipc_security(&shp->shm_perm); +} + +static int +jail_shm_associate (struct shmid_kernel *shp, int shmflg) +{ + return basic_ipc_security_check(&shp->shm_perm, current); +} + +static int +jail_shm_shmctl(struct shmid_kernel *shp, int cmd) +{ + if (cmd == IPC_INFO || cmd == SHM_INFO) + return 0; + + return basic_ipc_security_check(&shp->shm_perm, current); +} + +static int +jail_shm_shmat(struct shmid_kernel *shp, char *shmaddr, int shmflg) +{ + return basic_ipc_security_check(&shp->shm_perm, current); +} + +static int +jail_msg_queue_alloc(struct msg_queue *msq) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + msq->q_perm.security = tsec; + kref_get(&tsec->kref); + return 0; +} + +static void +jail_msg_queue_free(struct msg_queue *msq) +{ + free_ipc_security(&msq->q_perm); +} + +static int jail_msg_queue_associate(struct msg_queue *msq, int flag) +{ + return basic_ipc_security_check(&msq->q_perm, current); +} + +static int +jail_msg_queue_msgctl(struct msg_queue *msq, int cmd) +{ + if (cmd == IPC_INFO || cmd == MSG_INFO) + return 0; + + return basic_ipc_security_check(&msq->q_perm, current); +} + +static int +jail_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg) +{ + return basic_ipc_security_check(&msq->q_perm, current); +} + +static int +jail_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, + struct task_struct *target, long type, int mode) + +{ + return basic_ipc_security_check(&msq->q_perm, target); +} + +static int +jail_sem_alloc_security(struct sem_array *sma) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + sma->sem_perm.security = tsec; + kref_get(&tsec->kref); + return 0; +} + +static void +jail_sem_free_security(struct sem_array *sma) +{ + free_ipc_security(&sma->sem_perm); +} + +static int +jail_sem_associate(struct sem_array *sma, int semflg) +{ + return basic_ipc_security_check(&sma->sem_perm, current); +} + +static int +jail_sem_semctl(struct sem_array *sma, int cmd) +{ + if (cmd == IPC_INFO || cmd == SEM_INFO) + return 0; + return basic_ipc_security_check(&sma->sem_perm, current); +} + +static int +jail_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops, + int alter) +{ + return basic_ipc_security_check(&sma->sem_perm, current); +} + +static struct security_operations bsdjail_security_ops = { + .ptrace = jail_ptrace, + .capable = jail_capable, + + .task_kill = jail_task_kill, + .task_alloc_security = jail_task_alloc_security, + .task_free_security = jail_task_free_security, + .bprm_alloc_security = jail_bprm_alloc_security, + .task_create = jail_security_task_create, + .task_to_inode = jail_task_to_inode, + .task_lookup = jail_task_lookup, + + .task_setrlimit = jail_task_setrlimit, + .task_setscheduler = jail_task_setscheduler, + + .setprocattr = jail_setprocattr, + .getprocattr = jail_getprocattr, + + .file_set_fowner = jail_file_set_fowner, + .file_send_sigiotask = jail_file_send_sigiotask, + .file_free_security = free_file_security, + + .socket_bind = jail_socket_bind, + .socket_listen = jail_socket_listen, + .socket_create = jail_socket_create, + .socket_post_create = jail_socket_post_create, + .unix_stream_connect = jail_socket_unix_stream_connect, + .unix_may_send = jail_socket_unix_may_send, + .sk_free_security = free_sock_security, + + .inode_mknod = jail_inode_mknod, + .inode_permission = jail_inode_permission, + .inode_free_security = free_inode_security, + .inode_getattr = jail_inode_getattr, + .inode_getxattr = jail_inode_getxattr, + .sb_mount = jail_mount, + .sb_umount = jail_umount, + + .ipc_permission = jail_ipc_permission, + .shm_alloc_security = jail_shm_alloc_security, + .shm_free_security = jail_shm_free_security, + .shm_associate = jail_shm_associate, + .shm_shmctl = jail_shm_shmctl, + .shm_shmat = jail_shm_shmat, + + .msg_queue_alloc_security = jail_msg_queue_alloc, + .msg_queue_free_security = jail_msg_queue_free, + .msg_queue_associate = jail_msg_queue_associate, + .msg_queue_msgctl = jail_msg_queue_msgctl, + .msg_queue_msgsnd = jail_msg_queue_msgsnd, + .msg_queue_msgrcv = jail_msg_queue_msgrcv, + + .sem_alloc_security = jail_sem_alloc_security, + .sem_free_security = jail_sem_free_security, + .sem_associate = jail_sem_associate, + .sem_semctl = jail_sem_semctl, + .sem_semop = jail_sem_semop, +}; + +static int __init bsdjail_init (void) +{ + int rc = 0; + + if (register_security (&bsdjail_security_ops)) { + printk (KERN_INFO + "Failure registering BSD Jail module with the kernel\n"); + + rc = mod_reg_security(MY_NAME, &bsdjail_security_ops); + if (rc < 0) { + printk (KERN_INFO "Failure registering BSD Jail " + " module with primary security module.\n"); + return -EINVAL; + } + secondary = 1; + } + printk (KERN_INFO "BSD Jail module initialized.\n"); + + return 0; +} + +static void __exit bsdjail_exit (void) +{ + if (secondary) { + if (mod_unreg_security (MY_NAME, &bsdjail_security_ops)) + printk (KERN_INFO "Failure unregistering BSD Jail " + " module with primary module.\n"); + } else { + if (unregister_security (&bsdjail_security_ops)) { + printk (KERN_INFO "Failure unregistering BSD Jail " + "module with the kernel\n"); + } + } + + printk (KERN_INFO "BSD Jail module removed\n"); +} + +security_initcall (bsdjail_init); +module_exit (bsdjail_exit); + +MODULE_DESCRIPTION("BSD Jail LSM."); +MODULE_LICENSE("GPL"); diff -Nrup linux-2.6.9-rc3-bk6/security/Kconfig linux-2.6.9-rc3-bk6-jail/security/Kconfig --- linux-2.6.9-rc3-bk6/security/Kconfig 2004-10-06 10:08:02.000000000 -0500 +++ linux-2.6.9-rc3-bk6-jail/security/Kconfig 2004-10-06 10:52:13.000000000 -0500 @@ -46,5 +46,16 @@ config SECURITY_ROOTPLUG source security/selinux/Kconfig +config SECURITY_BSDJAIL + tristate "BSD Jail LSM" + depends on SECURITY + select SECURITY_NETWORK + help + Provides BSD Jail compartmentalization functionality. + See Documentation/bsdjail.txt for more information and + usage instructions. + + If you are unsure how to answer this question, answer N. + endmenu diff -Nrup linux-2.6.9-rc3-bk6/security/Makefile linux-2.6.9-rc3-bk6-jail/security/Makefile --- linux-2.6.9-rc3-bk6/security/Makefile 2004-08-14 00:37:26.000000000 -0500 +++ linux-2.6.9-rc3-bk6-jail/security/Makefile 2004-10-06 10:52:13.000000000 -0500 @@ -15,3 +15,4 @@ obj-$(CONFIG_SECURITY) += security.o d obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o +obj-$(CONFIG_SECURITY_BSDJAIL) += bsdjail.o ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-07 19:01 ` [patch 2/3] " Serge E. Hallyn @ 2004-10-07 19:42 ` Chris Wright 2004-10-07 20:05 ` Andrew Morton 2004-10-08 18:05 ` Serge E. Hallyn 2004-10-10 10:41 ` Christoph Hellwig 1 sibling, 2 replies; 37+ messages in thread From: Chris Wright @ 2004-10-07 19:42 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Andrew Morton, chrisw, linux-kernel * Serge E. Hallyn (serue@us.ibm.com) wrote: > Attached is a new version of the bsdjail patch with the requested code > cleanups applied. I noticed Andrew picked this up in -mm3, but that he had to do some diff cleanups (see the thread/rlim changes in his tree). If you'd like Andrew to pick this up, it would be courteous to get the diff clean and building against his tree. > --- linux-2.6.9-rc3-bk6/security/bsdjail.c 1969-12-31 18:00:00.000000000 -0600 > +++ linux-2.6.9-rc3-bk6-jail/security/bsdjail.c 2004-10-07 11:30:21.000000000 -0500 > @@ -0,0 +1,1495 @@ > +/* > + * File: linux/security/bsdjail.c > + * Author: Serge Hallyn (serue@us.ibm.com) > + * Date: Sep 12, 2004 > + * > + * (See Documentation/bsdjail.txt for more information) > + * > + * Copyright (C) 2004 International Business Machines <serue@us.ibm.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + */ > + > +#include <linux/config.h> > +#include <linux/module.h> > +#include <linux/kernel.h> > +#include <linux/init.h> > +#include <linux/security.h> > +#include <linux/namei.h> > +#include <linux/namespace.h> > +#include <linux/proc_fs.h> > +#include <linux/in.h> > +#include <linux/in6.h> > +#include <linux/pagemap.h> > +#include <linux/ip.h> > +#include <net/ipv6.h> > +#include <linux/mount.h> > +#include <asm/uaccess.h> > +#include <linux/netdevice.h> > +#include <linux/inetdevice.h> > +#include <linux/seq_file.h> > +#include <linux/un.h> > +#include <linux/smp_lock.h> > +#include <linux/kref.h> asm/ includes after linux/ > + > +static int jail_debug = 0; unecessary assignment to 0. > +MODULE_PARM(jail_debug, "i"); use module_param > +MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n"); > + > +#define DBG 0 > +#define WARN 1 > +#define bsdj_debug(how, fmt, arg... ) \ > + do { \ > + if ( how || jail_debug ) \ > + printk(KERN_NOTICE "%s: %s: " fmt, \ > + MY_NAME, __FUNCTION__, \ Andrew has cleanup here (__FUNCTION__ ,). I just use __func__, anyway. > + ## arg ); \ > + } while ( 0 ) > + > +#define MY_NAME "bsdjail" > + > +/* flag to keep track of how we were registered */ > +static int secondary = 0; unecessary assignment to 0 > +/* > + * The task structure holding jail information. > + * Taskp->security points to one of these (or is null). > + * There is exactly one jail_struct for each jail. If >1 process > + * are in the same jail, they share the same jail_struct. > + */ > +struct jail_struct { > + struct kref kref; > + > + /* these are set on writes to /proc/<pid>/attr/exec */ > + char *root_pathname; /* char * containing path to use as jail / */ > + char *ip4_addr_name; /* char * containing ip4 addr to use for jail */ > + char *ip6_addr_name; /* char * containing ip6 addr to use for jail */ > + > + /* these are set when a jail becomes active */ > + __u32 addr4; /* internal form of ip4_addr_name */ > + struct in6_addr addr6; /* internal form of ip6_addr_name */ > + > + struct dentry *dentry; /* dentry of fs root */ > + struct vfsmount *mnt; /* vfsmnt of fs root */ > + > + /* Resource limits. 0 = no limit */ > + int max_nrtask; /* maximum number of tasks within this jail. */ > + int cur_nrtask; /* current number of tasks within this jail. */ > + long maxtimeslice; /* max timeslice in ms for procs in this jail */ > + long nice; /* nice level for processes in this jail */ > + long max_data, max_memlock; /* equivalent to RLIMIT_{DATA, MEMLOCK} */ > +/* values for the jail_flags field */ > +#define IN_USE 1 /* if 0, task is setting up jail, not yet in it */ > +#define GOT_IPV4 2 > +#define GOT_IPV6 4 /* if 0, ipv4, else ipv6 */ > + char jail_flags; > +}; Could go into header. Perhaps not needed if it's all there is, and it's not shared anywhere though. > +/* > + * disable_jail: A jail which was in use, but has no references > + * left, is disabled - we free up the mountpoint and dentry, and > + * give up our reference on the module. > + * > + * don't need to put namespace, it will be done automatically > + * when the last process in jail is put. > + * DO need to put the dentry and vfsmount > + */ > +static void > +disable_jail(struct jail_struct *tsec) > +{ > + dput(tsec->dentry); > + mntput(tsec->mnt); > + module_put(THIS_MODULE); > +} > + > + > +static void free_jail(struct jail_struct *tsec) > +{ > + if (!tsec) > + return; > + > + kfree(tsec->root_pathname); > + kfree(tsec->ip4_addr_name); > + kfree(tsec->ip6_addr_name); > + kfree(tsec); > +} > + > +/* release_jail: > + * Callback for kref_put to use for releasing a jail when its > + * last user exits. > + */ > +static void release_jail(struct kref *kref) > +{ > + struct jail_struct *tsec; > + > + tsec = container_of(kref, struct jail_struct, kref); > + disable_jail(tsec); > + free_jail(tsec); > +} > + > +/* > + * jail_task_free_security: this is the callback hooked into LSM. > + * If there was no task->security field for bsdjail, do nothing. > + * If there was, but it was never put into use, free the jail. > + * If there was, and the jail is in use, then decrement the usage > + * count, and disable and free the jail if the usage count hits 0. > + */ > +static void jail_task_free_security(struct task_struct *task) > +{ > + struct jail_struct *tsec; > + > + tsec = task->security; > + > + if (!tsec) > + return; > + > + if (!(tsec->jail_flags & IN_USE)) { > + /* > + * someone did 'echo -n x > /proc/<pid>/attr/exec' but > + * then forked before execing. Nuke the old info. > + */ > + free_jail(tsec); > + task->security = NULL; > + return; > + } > + tsec->cur_nrtask--; > + /* If this was the last process in the jail, delete the jail */ > + kref_put(&tsec->kref, release_jail); > +} > + > +static struct jail_struct * > +alloc_task_security(struct task_struct *tsk) > +{ > + struct jail_struct *tsec; > + tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL); > + if (!tsec) > + return ERR_PTR(-ENOMEM); Just return NULL, that's expected norm, plus you're not using the error anyway. > + memset(tsec, 0, sizeof(struct jail_struct)); > + tsk->security = tsec; > + return tsec; > +} > + > +static inline int > +in_jail(struct task_struct *t) > +{ > + struct jail_struct *tsec = t->security; > + > + if (tsec && (tsec->jail_flags & IN_USE)) > + return 1; > + > + return 0; > +} > + > +/* > + * If a network address was passed into /proc/<pid>/attr/exec, > + * then process in its jail will only be allowed to bind/listen > + * to that address. > + */ > +static void > +setup_netaddress(struct jail_struct *tsec) > +{ > + unsigned int a, b, c, d, i; > + unsigned int x[8]; > + > + tsec->jail_flags &= ~(GOT_IPV4 | GOT_IPV6); > + tsec->addr4 = 0; > + ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0); > + > + if (tsec->ip4_addr_name) { > + if (sscanf(tsec->ip4_addr_name, "%u.%u.%u.%u", > + &a, &b, &c, &d) != 4) > + return; > + if (a>255 || b>255 || c>255 || d>255) > + return; > + tsec->addr4 = htonl((a<<24) | (b<<16) | (c<<8) | d); > + tsec->jail_flags |= GOT_IPV4; > + bsdj_debug(DBG, "Network (ipv4) set up (%s)\n", > + tsec->ip4_addr_name); > + } > + > + if (tsec->ip6_addr_name) { > + if (sscanf(tsec->ip6_addr_name, "%x:%x:%x:%x:%x:%x:%x:%x", > + &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], > + &x[7]) != 8) { > + printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__, > + tsec->ip6_addr_name); > + return; > + } > + for (i=0; i<8; i++) { > + if (x[i] > 65535) { > + printk("%s: %x > 65535 at %d\n", __FUNCTION__, x[i], i); > + return; > + } > + tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]); > + } > + tsec->jail_flags |= GOT_IPV6; > + bsdj_debug(DBG, "Network (ipv6) set up (%s)\n", > + tsec->ip6_addr_name); > + } > +} > + > +/* > + * enable_jail: > + * Called when a process is placed into a new jail to handle the > + * actual creation of the jail. > + * Creates namespace > + * Sets process root+pwd > + * Stores the requested ip address > + * Registers a unique pseudo-proc filesystem for this jail > + */ > +static int enable_jail(struct task_struct *tsk) > +{ > + struct nameidata nd; > + struct jail_struct *tsec = tsk->security;; ^^ generates compile error, kill the extra semi-colon > + int retval = -EFAULT; > + > + if (!tsec || !tsec->root_pathname) > + goto out; > + > + /* > + * USE_JAIL_NAMESPACE: could be useful, so that future mounts outside > + * the jail don't affect the jail. But it's not necessary, and > + * requires exporting copy_namespace from fs/namespace.c > + * > + * Actually, it would also be useful for truly hiding > + * information about mounts which do not exist in this jail. > +#define USE_JAIL_NAMESPACE > + */ > +#ifdef USE_JAIL_NAMESPACE > + bsdj_debug(DBG, "bsdjail: copying namespace.\n"); > + retval = -EPERM; > + if (copy_namespace(CLONE_NEWNS, tsk)) > + goto out; > + bsdj_debug(DBG, "bsdjail: copied namespace.\n"); > +#endif > + > + /* find our new root directory */ > + bsdj_debug(DBG, "bsdjail: looking up %s\n", tsec->root_pathname); > + retval = path_lookup(tsec->root_pathname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd); > + if (retval) > + goto out; > + > + bsdj_debug(DBG, "bsdjail: got %s, setting root to it\n", tsec->root_pathname); > + > + /* and set the fsroot to it */ > + set_fs_root(tsk->fs, nd.mnt, nd.dentry); > + set_fs_pwd(tsk->fs, nd.mnt, nd.dentry); > + > + bsdj_debug(DBG, "bsdjail: root has been set. Have fun.\n"); > + > + /* set up networking */ > + if (tsec->ip4_addr_name || tsec->ip6_addr_name) > + setup_netaddress(tsec); > + > + tsec->cur_nrtask = 1; > + if (tsec->nice) > + set_user_nice(current, tsec->nice); > + if (tsec->max_data) { > + current->rlim[RLIMIT_DATA].rlim_cur = tsec->max_data; > + current->rlim[RLIMIT_DATA].rlim_max = tsec->max_data; > + } > + if (tsec->max_memlock) { > + current->rlim[RLIMIT_MEMLOCK].rlim_cur = tsec->max_memlock; > + current->rlim[RLIMIT_MEMLOCK].rlim_max = tsec->max_memlock; > + } > + if (tsec->maxtimeslice) { > + current->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice; > + current->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice; > + } > + /* success and end */ > + tsec->mnt = mntget(nd.mnt); > + tsec->dentry = dget(nd.dentry); > + path_release(&nd); > + kref_init(&tsec->kref); > + tsec->jail_flags |= IN_USE; > + > + /* won't let ourselves be removed until this jail goes away */ > + try_module_get(THIS_MODULE); > + > + return 0; > + > +out: > + return retval; > +} > + > +/* > + * LSM /proc/<pid>/attr hooks. > + * You may write into /proc/<pid>/attr/exec: > + * root /some/path > + * ip 2.2.2.2 > + * These values will be used on the next exec() to set up your jail > + * (assuming you're not already in a jail) > + */ > +static int > +jail_setprocattr(struct task_struct *p, char *name, void *value, size_t size) > +{ > + struct jail_struct *tsec = current->security; > + long val; > + int start, len; > + > + if (tsec && (tsec->jail_flags & IN_USE)) > + return -EINVAL; /* let them guess why */ > + > + if (p != current || strcmp(name, "exec")) > + return -EPERM; > + > + if (strncmp(value, "root ", 5) == 0) { > + if (!tsec) > + tsec = alloc_task_security(current); > + if (IS_ERR(tsec)) > + return -ENOMEM; I think encoding error, testing error, then returning hardcoded error is wasteful. I'd change alloc_task_security api to return NULL on ENOMEM. > + > + if (tsec->root_pathname) > + kfree(tsec->root_pathname); > + start = 5; > + len = size-start; > + tsec->root_pathname = kmalloc(len+1, GFP_KERNEL); > + if (!tsec->root_pathname) > + return -ENOMEM; > + strlcpy(tsec->root_pathname, value+start, len+1); > + } else if (strncmp(value, "ip ", 3) == 0) { > + if (!tsec) > + tsec = alloc_task_security(current); > + if (IS_ERR(tsec)) > + return -ENOMEM; > + > + if (tsec->ip4_addr_name) > + kfree(tsec->ip4_addr_name); > + start = 3; > + len = size-start; > + tsec->ip4_addr_name = kmalloc(len+1, GFP_KERNEL); > + if (!tsec->ip4_addr_name) > + return -ENOMEM; > + strlcpy(tsec->ip4_addr_name, value+start, len+1); > + } else if (strncmp(value, "ip6 ", 4) == 0) { > + if (!tsec) > + tsec = alloc_task_security(current); > + if (IS_ERR(tsec)) > + return -ENOMEM; > + > + if (tsec->ip6_addr_name) > + kfree(tsec->ip6_addr_name); > + start = 4; > + len = size-start; > + tsec->ip6_addr_name = kmalloc(len+1, GFP_KERNEL); > + if (!tsec->ip6_addr_name) > + return -ENOMEM; > + strlcpy(tsec->ip6_addr_name, value+start, len+1); > + > + /* the next two are equivalent */ > + } else if (strncmp(value, "slice ", 6) == 0) { > + if (!tsec) > + tsec = alloc_task_security(current); > + if (IS_ERR(tsec)) > + return -ENOMEM; > + > + val = simple_strtoul(value+6, NULL, 0); > + tsec->maxtimeslice = val; > + } else if (strncmp(value, "timeslice ", 10) == 0) { > + if (!tsec) > + tsec = alloc_task_security(current); > + if (IS_ERR(tsec)) > + return -ENOMEM; > + > + val = simple_strtoul(value+10, NULL, 0); > + tsec->maxtimeslice = val; > + } else if (strncmp(value, "nrtask ", 7) == 0) { > + if (!tsec) > + tsec = alloc_task_security(current); > + if (IS_ERR(tsec)) > + return -ENOMEM; > + > + val = (int) simple_strtol(value+7, NULL, 0); > + if (val < 1) > + return -EINVAL; > + tsec->max_nrtask = val; > + } else if (strncmp(value, "memlock ", 8) == 0) { > + if (!tsec) > + tsec = alloc_task_security(current); > + if (IS_ERR(tsec)) > + return -ENOMEM; > + > + val = simple_strtoul(value+8, NULL, 0); > + tsec->max_memlock = val; > + } else if (strncmp(value, "data ", 5) == 0) { > + if (!tsec) > + tsec = alloc_task_security(current); > + if (IS_ERR(tsec)) > + return -ENOMEM; > + > + val = simple_strtoul(value+5, NULL, 0); > + tsec->max_data = val; > + } else if (strncmp(value, "nice ", 5) == 0) { > + if (!tsec) > + tsec = alloc_task_security(current); > + if (IS_ERR(tsec)) > + return -ENOMEM; > + > + val = simple_strtoul(value+5, NULL, 0); > + tsec->nice = val; > + } else > + return -EINVAL; Do you need all those alloc_task_security's in there? Why not just one at the top? And are you convinced there's no leak on the other kmalloc failures? more after lunch. -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-07 19:42 ` Chris Wright @ 2004-10-07 20:05 ` Andrew Morton 2004-10-08 18:05 ` Serge E. Hallyn 1 sibling, 0 replies; 37+ messages in thread From: Andrew Morton @ 2004-10-07 20:05 UTC (permalink / raw) To: Chris Wright; +Cc: serue, chrisw, linux-kernel Chris Wright <chrisw@osdl.org> wrote: > > * Serge E. Hallyn (serue@us.ibm.com) wrote: > > Attached is a new version of the bsdjail patch with the requested code > > cleanups applied. > > I noticed Andrew picked this up in -mm3, but that he had to do some diff > cleanups (see the thread/rlim changes in his tree). If you'd like Andrew > to pick this up, it would be courteous to get the diff clean and > building against his tree. Nah, that's OK. I can drop the old patch and pick up the new. It's only when code is settling down into a final state that I get upset about wholesale replacements. Even then I'll just feed it through interdiff. > Andrew has cleanup here (__FUNCTION__ ,). I just use __func__, anyway. That's a workaround for the gcc-2.95 pasting bug. __FUNCTION__ is preferred, actually. Just for consistency, and so the compiler will spit it out if someone tries to do compile-time string concatenation with it. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-07 19:42 ` Chris Wright 2004-10-07 20:05 ` Andrew Morton @ 2004-10-08 18:05 ` Serge E. Hallyn 1 sibling, 0 replies; 37+ messages in thread From: Serge E. Hallyn @ 2004-10-08 18:05 UTC (permalink / raw) To: Chris Wright; +Cc: Andrew Morton, linux-kernel The attached patch is against -mm3, and includes the suggestions Chris last sent out. > I think encoding error, testing error, then returning hardcoded error is > wasteful. I'd change alloc_task_security api to return NULL on ENOMEM. ... > Do you need all those alloc_task_security's in there? Why not just one > at the top? Good points - cleaned these up. > And are you convinced there's no leak on the other kmalloc > failures? Yes, they each get freed if this function is called again on the same item, and they get freed when the task closes. Unless I'm missing something... thanks, -serge Changelog: Sep 10, 2004: original version Sep 12, 2004: add ipv6 support Sep 13, 2004: support simultaneous ipv4+ipv6 Oct 6, 2004: move kref release function to kref_put from kref_init Oct 7, 2004: requested code cleanups (mainly nix #defines) Oct 8, 2004: more cleanups. Signed-Off-By: Serge E. Hallyn <serue@us.ibm.com> diff -Nrup linux-2.6.9-rc3-mm3/security/bsdjail.c linux-2.6.9-rc3-mm3-jail/security/bsdjail.c --- linux-2.6.9-rc3-mm3/security/bsdjail.c 2004-10-08 13:56:38.851128096 -0500 +++ linux-2.6.9-rc3-mm3-jail/security/bsdjail.c 2004-10-08 12:59:41.000000000 -0500 @@ -27,16 +27,16 @@ #include <linux/ip.h> #include <net/ipv6.h> #include <linux/mount.h> -#include <asm/uaccess.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> #include <linux/seq_file.h> #include <linux/un.h> #include <linux/smp_lock.h> #include <linux/kref.h> +#include <asm/uaccess.h> -static int jail_debug = 0; -MODULE_PARM(jail_debug, "i"); +static int jail_debug; +module_param(jail_debug, int, 0); MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n"); #define DBG 0 @@ -52,7 +52,7 @@ MODULE_PARM_DESC(jail_debug, "Print bsd #define MY_NAME "bsdjail" /* flag to keep track of how we were registered */ -static int secondary = 0; +static int secondary; /* * The task structure holding jail information. @@ -80,7 +80,7 @@ struct jail_struct { int cur_nrtask; /* current number of tasks within this jail. */ long maxtimeslice; /* max timeslice in ms for procs in this jail */ long nice; /* nice level for processes in this jail */ - long max_data, max_memlock; /* equivalent to RLIMIT_{DATA,MEMLOCK} */ + long max_data, max_memlock; /* equivalent to RLIMIT_{DATA, MEMLOCK} */ /* values for the jail_flags field */ #define IN_USE 1 /* if 0, task is setting up jail, not yet in it */ #define GOT_IPV4 2 @@ -88,29 +88,6 @@ struct jail_struct { char jail_flags; }; -#define in_use(x) (x->jail_flags & IN_USE) -#define set_in_use(x) (x->jail_flags |= IN_USE) - -#define got_network(x) (x->jail_flags & (GOT_IPV4 | GOT_IPV6)) -#define got_ipv4(x) (x->jail_flags & (GOT_IPV4)) -#define got_ipv6(x) (x->jail_flags & (GOT_IPV6)) -#define set_ipv4(x) (x->jail_flags |= GOT_IPV4) -#define set_ipv6(x) (x->jail_flags |= GOT_IPV6) -#define unset_got_ipv4(x) (x->jail_flags &= ~GOT_IPV4) -#define unset_got_ipv6(x) (x->jail_flags &= ~GOT_IPV6) - -/* - * structs, defines, and functions to cope with stacking - */ - -#define get_task_security(task) (task->security) -#define get_inode_security(inode) (inode->i_security) -#define get_sock_security(sock) (sock->sk_security) -#define get_file_security(file) (file->f_security) -#define get_ipc_security(ipc) (ipc->security) - -#define jail_of(proc) (get_task_security(proc)) - /* * disable_jail: A jail which was in use, but has no references * left, is disabled - we free up the mountpoint and dentry, and @@ -134,12 +111,9 @@ static void free_jail(struct jail_struct if (!tsec) return; - if (tsec->root_pathname) - kfree(tsec->root_pathname); - if (tsec->ip4_addr_name) - kfree(tsec->ip4_addr_name); - if (tsec->ip6_addr_name) - kfree(tsec->ip6_addr_name); + kfree(tsec->root_pathname); + kfree(tsec->ip4_addr_name); + kfree(tsec->ip6_addr_name); kfree(tsec); } @@ -151,17 +125,11 @@ static void release_jail(struct kref *kr { struct jail_struct *tsec; - tsec = container_of(kref,struct jail_struct,kref); + tsec = container_of(kref, struct jail_struct, kref); disable_jail(tsec); free_jail(tsec); } -#define set_task_security(task,data) task->security = data -#define set_inode_security(inode,data) inode->i_security = data -#define set_sock_security(sock,data) sock->sk_security = data -#define set_file_security(file,data) file->f_security = data -#define set_ipc_security(ipc,data) ipc.security = data - /* * jail_task_free_security: this is the callback hooked into LSM. * If there was no task->security field for bsdjail, do nothing. @@ -171,20 +139,18 @@ static void release_jail(struct kref *kr */ static void jail_task_free_security(struct task_struct *task) { - struct jail_struct *tsec; - - tsec = get_task_security(task); + struct jail_struct *tsec = task->security; if (!tsec) return; - if (!in_use(tsec)) { + if (!(tsec->jail_flags & IN_USE)) { /* * someone did 'echo -n x > /proc/<pid>/attr/exec' but * then forked before execing. Nuke the old info. */ free_jail(tsec); - set_task_security(task,NULL); + task->security = NULL; return; } tsec->cur_nrtask--; @@ -196,20 +162,21 @@ static struct jail_struct * alloc_task_security(struct task_struct *tsk) { struct jail_struct *tsec; + tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL); - if (!tsec) - return ERR_PTR(-ENOMEM); - memset(tsec, 0, sizeof(struct jail_struct)); - set_task_security(tsk, tsec); + if (tsec) { + memset(tsec, 0, sizeof(struct jail_struct)); + tsk->security = tsec; + } return tsec; } static inline int in_jail(struct task_struct *t) { - struct jail_struct *tsec = jail_of(t); + struct jail_struct *tsec = t->security; - if (tsec && in_use(tsec)) + if (tsec && (tsec->jail_flags & IN_USE)) return 1; return 0; @@ -223,27 +190,27 @@ in_jail(struct task_struct *t) static void setup_netaddress(struct jail_struct *tsec) { - unsigned int a,b,c,d, i; + unsigned int a, b, c, d, i; unsigned int x[8]; - unset_got_ipv4(tsec); + tsec->jail_flags &= ~(GOT_IPV4 | GOT_IPV6); tsec->addr4 = 0; - unset_got_ipv6(tsec); ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0); if (tsec->ip4_addr_name) { - if (sscanf(tsec->ip4_addr_name,"%u.%u.%u.%u",&a,&b,&c,&d)!=4) + if (sscanf(tsec->ip4_addr_name, "%u.%u.%u.%u", + &a, &b, &c, &d) != 4) return; if (a>255 || b>255 || c>255 || d>255) return; - tsec->addr4 = htonl((a<<24)|(b<<16)|(c<<8)|d); - set_ipv4(tsec); + tsec->addr4 = htonl((a<<24) | (b<<16) | (c<<8) | d); + tsec->jail_flags |= GOT_IPV4; bsdj_debug(DBG, "Network (ipv4) set up (%s)\n", tsec->ip4_addr_name); } if (tsec->ip6_addr_name) { - if (sscanf(tsec->ip6_addr_name,"%x:%x:%x:%x:%x:%x:%x:%x", + if (sscanf(tsec->ip6_addr_name, "%x:%x:%x:%x:%x:%x:%x:%x", &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], &x[7]) != 8) { printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__, @@ -257,7 +224,7 @@ setup_netaddress(struct jail_struct *tse } tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]); } - set_ipv6(tsec); + tsec->jail_flags |= GOT_IPV6; bsdj_debug(DBG, "Network (ipv6) set up (%s)\n", tsec->ip6_addr_name); } @@ -275,10 +242,9 @@ setup_netaddress(struct jail_struct *tse static int enable_jail(struct task_struct *tsk) { struct nameidata nd; - struct jail_struct *tsec; + struct jail_struct *tsec = tsk->security; int retval = -EFAULT; - tsec = jail_of(tsk); if (!tsec || !tsec->root_pathname) goto out; @@ -339,7 +305,7 @@ static int enable_jail(struct task_struc tsec->dentry = dget(nd.dentry); path_release(&nd); kref_init(&tsec->kref); - set_in_use(tsec); + tsec->jail_flags |= IN_USE; /* won't let ourselves be removed until this jail goes away */ try_module_get(THIS_MODULE); @@ -361,108 +327,66 @@ out: static int jail_setprocattr(struct task_struct *p, char *name, void *value, size_t size) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; long val; int start, len; - if (tsec && in_use(tsec)) + if (tsec && (tsec->jail_flags & IN_USE)) return -EINVAL; /* let them guess why */ if (p != current || strcmp(name, "exec")) return -EPERM; - if (strncmp(value, "root ", 5)==0) { + if (!tsec) { + tsec = alloc_task_security(current); if (!tsec) - tsec = alloc_task_security(current); - if (IS_ERR(tsec)) return -ENOMEM; + } - if (tsec->root_pathname) - kfree(tsec->root_pathname); + if (strncmp(value, "root ", 5) == 0) { + kfree(tsec->root_pathname); start = 5; - len = size-start; - tsec->root_pathname = kmalloc(len+1, GFP_KERNEL); + len = size - start + 1; + tsec->root_pathname = kmalloc(len, GFP_KERNEL); if (!tsec->root_pathname) return -ENOMEM; - strlcpy(tsec->root_pathname, value+start, len+1); - } else if (strncmp(value, "ip ", 3)==0) { - if (!tsec) - tsec = alloc_task_security(current); - if (IS_ERR(tsec)) - return -ENOMEM; - - if (tsec->ip4_addr_name) - kfree(tsec->ip4_addr_name); + strlcpy(tsec->root_pathname, value+start, len); + } else if (strncmp(value, "ip ", 3) == 0) { + kfree(tsec->ip4_addr_name); start = 3; - len = size-start; - tsec->ip4_addr_name = kmalloc(len+1, GFP_KERNEL); + len = size - start + 1; + tsec->ip4_addr_name = kmalloc(len, GFP_KERNEL); if (!tsec->ip4_addr_name) return -ENOMEM; - strlcpy(tsec->ip4_addr_name, value+start, len+1); + strlcpy(tsec->ip4_addr_name, value+start, len); } else if (strncmp(value, "ip6 ", 4) == 0) { - if (!tsec) - tsec = alloc_task_security(current); - if (IS_ERR(tsec)) - return -ENOMEM; - - if (tsec->ip6_addr_name) - kfree(tsec->ip6_addr_name); + kfree(tsec->ip6_addr_name); start = 4; - len = size-start; - tsec->ip6_addr_name = kmalloc(len+1, GFP_KERNEL); + len = size - start + 1; + tsec->ip6_addr_name = kmalloc(len, GFP_KERNEL); if (!tsec->ip6_addr_name) return -ENOMEM; - strlcpy(tsec->ip6_addr_name, value+start, len+1); + strlcpy(tsec->ip6_addr_name, value+start, len); /* the next two are equivalent */ - } else if (strncmp(value, "slice ", 6)==0) { - if (!tsec) - tsec = alloc_task_security(current); - if (IS_ERR(tsec)) - return -ENOMEM; - + } else if (strncmp(value, "slice ", 6) == 0) { val = simple_strtoul(value+6, NULL, 0); tsec->maxtimeslice = val; - } else if (strncmp(value, "timeslice ", 10)==0) { - if (!tsec) - tsec = alloc_task_security(current); - if (IS_ERR(tsec)) - return -ENOMEM; - + } else if (strncmp(value, "timeslice ", 10) == 0) { val = simple_strtoul(value+10, NULL, 0); tsec->maxtimeslice = val; - } else if (strncmp(value, "nrtask ", 7)==0) { - if (!tsec) - tsec = alloc_task_security(current); - if (IS_ERR(tsec)) - return -ENOMEM; - + } else if (strncmp(value, "nrtask ", 7) == 0) { val = (int) simple_strtol(value+7, NULL, 0); if (val < 1) return -EINVAL; tsec->max_nrtask = val; - } else if (strncmp(value, "memlock ", 8)==0) { - if (!tsec) - tsec = alloc_task_security(current); - if (IS_ERR(tsec)) - return -ENOMEM; - + } else if (strncmp(value, "memlock ", 8) == 0) { val = simple_strtoul(value+8, NULL, 0); tsec->max_memlock = val; - } else if (strncmp(value, "data ", 5)==0) { - if (!tsec) - tsec = alloc_task_security(current); - if (IS_ERR(tsec)) - return -ENOMEM; - + } else if (strncmp(value, "data ", 5) == 0) { val = simple_strtoul(value+5, NULL, 0); tsec->max_data = val; - } else if (strncmp(value, "nice ", 5)==0) { - if (!tsec) - tsec = alloc_task_security(current); - if (IS_ERR(tsec)) - return -ENOMEM; - + } else if (strncmp(value, "nice ", 5) == 0) { val = simple_strtoul(value+5, NULL, 0); tsec->nice = val; } else @@ -510,9 +434,9 @@ jail_getprocattr(struct task_struct *p, int err = 0; if (in_jail(current)) { - if (strcmp(name, "current")==0) { + if (strcmp(name, "current") == 0) { /* provide network info */ - err = print_jail_net_info(jail_of(current), value, + err = print_jail_net_info(current->security, value, size); return err; } @@ -537,8 +461,8 @@ jail_getprocattr(struct task_struct *p, if (strcmp(name, "current")) return -EPERM; - tsec = jail_of(p); - if (!tsec || !in_use(tsec)) { + tsec = p->security; + if (!tsec || !(tsec->jail_flags & IN_USE)) { err = snprintf(value, size, "Not Jailed\n"); } else { err = snprintf(value, size, @@ -570,16 +494,12 @@ jail_file_send_sigiotask(struct task_str int fd, int reason) { struct file *file; - struct jail_struct *tsec, *fsec; if (!in_jail(current)) return 0; - file = (struct file *)((long)fown - offsetof(struct file,f_owner)); - tsec = jail_of(tsk); - fsec = get_file_security(file); - - if (fsec != tsec) + file = (struct file *) ((long)fown - offsetof(struct file, f_owner)); + if (file->f_security != tsk->security) return -EPERM; return 0; @@ -590,8 +510,8 @@ jail_file_set_fowner(struct file *file) { struct jail_struct *tsec; - tsec = jail_of(current); - set_file_security(file, tsec); + tsec = current->security; + file->f_security = tsec; if (tsec) kref_get(&tsec->kref); @@ -602,33 +522,33 @@ static void free_ipc_security(struct ker { struct jail_struct *tsec; - tsec = get_ipc_security(ipc); + tsec = ipc->security; if (!tsec) return; kref_put(&tsec->kref, release_jail); - set_ipc_security((*ipc), NULL); + ipc->security = NULL; } static void free_file_security(struct file *file) { struct jail_struct *tsec; - tsec = get_file_security(file); + tsec = file->f_security; if (!tsec) return; kref_put(&tsec->kref, release_jail); - set_file_security(file, NULL); + file->f_security = NULL; } static void free_inode_security(struct inode *inode) { struct jail_struct *tsec; - tsec = get_inode_security(inode); + tsec = inode->i_security; if (!tsec) return; kref_put(&tsec->kref, release_jail); - set_inode_security(inode, NULL); + inode->i_security = NULL; } /* @@ -638,10 +558,10 @@ static void free_inode_security(struct i static int jail_ptrace (struct task_struct *tracer, struct task_struct *tracee) { - struct jail_struct *tsec = jail_of(tracer); + struct jail_struct *tsec = tracer->security; - if (tsec && in_use(tsec)) { - if (tsec == jail_of(tracee)) + if (tsec && (tsec->jail_flags & IN_USE)) { + if (tsec == tracee->security) return 0; return -EPERM; } @@ -664,10 +584,10 @@ static inline int jail_inet4_bind(struct struct sockaddr_in *inaddr; __u32 sin_addr, jailaddr; - if (!got_ipv4(tsec)) + if (!(tsec->jail_flags & GOT_IPV4)) return -EPERM; - inaddr = (struct sockaddr_in *)address; + inaddr = (struct sockaddr_in *) address; sin_addr = inaddr->sin_addr.s_addr; jailaddr = tsec->addr4; @@ -692,17 +612,17 @@ jail_inet6_bind(struct socket *sock, str struct sockaddr_in6 *inaddr6; struct in6_addr *sin6_addr, *jailaddr; - if (!got_ipv6(tsec)) + if (!(tsec->jail_flags & GOT_IPV6)) return -EPERM; - inaddr6 = (struct sockaddr_in6 *)address; + inaddr6 = (struct sockaddr_in6 *) address; sin6_addr = &inaddr6->sin6_addr; jailaddr = &tsec->addr6; - if (ipv6_addr_cmp(jailaddr, sin6_addr)==0) + if (ipv6_addr_cmp(jailaddr, sin6_addr) == 0) return 0; - if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback)==0) { + if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback) == 0) { ipv6_addr_copy(sin6_addr, jailaddr); return 0; } @@ -720,15 +640,15 @@ jail_inet6_bind(struct socket *sock, str static int jail_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return 0; if (sock->sk->sk_family == AF_UNIX) return jail_socket_unix_bind(sock, address, addrlen); - if (!got_network(tsec)) + if (!(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) /* If we want to be strict, we could just * deny net access when lacking a pseudo ip. * For now we just allow it. */ @@ -752,18 +672,19 @@ jail_socket_bind(struct socket *sock, st static int jail_socket_create(int family, int type, int protocol, int kern) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec) || kern || !got_network(tsec)) + if (!tsec || kern || !(tsec->jail_flags & IN_USE) || + !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) return 0; switch(family) { case AF_INET: - if (got_ipv4(tsec)) + if (tsec->jail_flags & GOT_IPV4) return 0; return -EPERM; case AF_INET6: - if (got_ipv6(tsec)) + if (tsec->jail_flags & GOT_IPV6) return 0; return -EPERM; default: @@ -779,9 +700,10 @@ jail_socket_post_create(struct socket *s { struct inet_opt *inet; struct ipv6_pinfo *inet6; - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec) || kern || !got_network(tsec)) + if (!tsec || kern || !(tsec->jail_flags & IN_USE) || + !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) return; switch(family) { @@ -805,9 +727,10 @@ jail_socket_listen(struct socket *sock, { struct inet_opt *inet; struct ipv6_pinfo *inet6; - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec) || !got_network(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE) || + !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) return 0; switch (sock->sk->sk_family) { @@ -819,7 +742,7 @@ jail_socket_listen(struct socket *sock, case AF_INET6: inet6 = inet6_sk(sock->sk); - if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6)==0) + if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6) == 0) return 0; return -EPERM; @@ -833,11 +756,11 @@ static void free_sock_security(struct so { struct jail_struct *tsec; - tsec = get_sock_security(sk); + tsec = sk->sk_security; if (!tsec) return; kref_put(&tsec->kref, release_jail); - set_sock_security(sk, NULL); + sk->sk_security = NULL; } /* @@ -854,12 +777,12 @@ jail_socket_unix_bind(struct socket *soc if (sock->sk->sk_family != AF_UNIX) return 0; - sunaddr = (struct sockaddr_un *)address; + sunaddr = (struct sockaddr_un *) address; if (sunaddr->sun_path[0] != 0) return 0; - tsec = jail_of(current); - set_sock_security(sock->sk, tsec); + tsec = current->security; + sock->sk->sk_security = tsec; if (tsec) kref_get(&tsec->kref); return 0; @@ -874,8 +797,8 @@ jail_socket_unix_may_send(struct socket { struct jail_struct *tsec, *ssec; - tsec = jail_of(current); /* jail of sending process */ - ssec = get_sock_security(other->sk); /* jail of receiver */ + tsec = current->security; /* jail of sending process */ + ssec = other->sk->sk_security; /* jail of receiver */ if (tsec != ssec) return -EPERM; @@ -889,8 +812,8 @@ jail_socket_unix_stream_connect(struct s { struct jail_struct *tsec, *ssec; - tsec = jail_of(current); /* jail of sending process */ - ssec = get_sock_security(other->sk); /* jail of receiver */ + tsec = current->security; /* jail of sending process */ + ssec = other->sk->sk_security; /* jail of receiver */ if (tsec != ssec) return -EPERM; @@ -953,9 +876,9 @@ jail_capable (struct task_struct *tsk, i static inline int jail_security_task_create (unsigned long clone_flags) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return 0; if (tsec->max_nrtask && tsec->cur_nrtask >= tsec->max_nrtask) @@ -969,12 +892,12 @@ jail_security_task_create (unsigned long static int jail_task_alloc_security(struct task_struct *tsk) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return 0; - set_task_security(tsk, tsec); + tsk->security = tsec; kref_get(&tsec->kref); tsec->cur_nrtask++; if (tsec->maxtimeslice) { @@ -998,14 +921,13 @@ jail_task_alloc_security(struct task_str static int jail_bprm_alloc_security(struct linux_binprm *bprm) { - struct jail_struct *tsec; + struct jail_struct *tsec = current->security; int ret; - tsec = jail_of(current); if (!tsec) return 0; - if (in_use(tsec)) + if (tsec->jail_flags & IN_USE) return 0; if (tsec->root_pathname) { @@ -1073,23 +995,23 @@ static int jail_proc_inode_permission(struct inode *inode, int mask, struct nameidata *nd) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; struct dentry *dentry = nd->dentry; unsigned pid; pid = name_to_int(dentry); if (pid == ~0U) { struct qstr *dname = &dentry->d_name; - if (strcmp(dname->name, "scsi")==0 || - strcmp(dname->name, "sys")==0 || - strcmp(dname->name, "ide")==0) + if (strcmp(dname->name, "scsi") == 0 || + strcmp(dname->name, "sys") == 0 || + strcmp(dname->name, "ide") == 0) return -EPERM; return 0; } if (dentry->d_parent != dentry->d_sb->s_root) return 0; - if (get_inode_security(inode) != tsec) + if (inode->i_security != tsec) return -ENOENT; return 0; @@ -1132,11 +1054,11 @@ is_jailroot_parent(struct dentry *candid */ static int jail_task_lookup(struct task_struct *p) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; if (!tsec) return 0; - if (tsec == jail_of(p)) + if (tsec == p->security) return 0; return -EPERM; } @@ -1146,14 +1068,14 @@ static int jail_task_lookup(struct task_ */ static void jail_task_to_inode(struct task_struct *p, struct inode *inode) { - struct jail_struct *tsec = jail_of(p); + struct jail_struct *tsec = p->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return; - if (get_inode_security(inode)) + if (inode->i_security) return; kref_get(&tsec->kref); - set_inode_security(inode, tsec); + inode->i_security = tsec; } /* @@ -1167,16 +1089,16 @@ static int jail_inode_permission(struct inode *inode, int mask, struct nameidata *nd) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return 0; if (!nd) return 0; if (nd->dentry && - strcmp(nd->dentry->d_sb->s_type->name, "proc")==0) { + strcmp(nd->dentry->d_sb->s_type->name, "proc") == 0) { return jail_proc_inode_permission(inode, mask, nd); } @@ -1187,10 +1109,10 @@ jail_inode_permission(struct inode *inod return 0; if (is_jailroot_parent(nd->dentry, tsec->dentry, tsec->mnt)) { - bsdj_debug(WARN,"Attempt to chdir(..) out of jail!\n" - "(%s is a subdir of %s)\n", - tsec->dentry->d_name.name, - nd->dentry->d_name.name); + bsdj_debug(WARN, "Attempt to chdir(..) out of jail!\n" + "(%s is a subdir of %s)\n", + tsec->dentry->d_name.name, + nd->dentry->d_name.name); return -EPERM; } @@ -1204,18 +1126,18 @@ jail_inode_permission(struct inode *inod static inline int generic_procpid_check(struct dentry *dentry) { - struct jail_struct *jail = jail_of(current); + struct jail_struct *jail = current->security; unsigned pid = name_to_int(dentry); - if (!jail || !in_use(jail)) + if (!jail || !(jail->jail_flags & IN_USE)) return 0; if (pid == ~0U) return 0; - if (strcmp(dentry->d_sb->s_type->name, "proc")!=0) + if (strcmp(dentry->d_sb->s_type->name, "proc") != 0) return 0; if (dentry->d_parent != dentry->d_sb->s_root) return 0; - if (get_inode_security(dentry->d_inode) != jail) + if (dentry->d_inode->i_security != jail) return -ENOENT; return 0; } @@ -1241,12 +1163,12 @@ jail_inode_getxattr(struct dentry *dentr static int jail_task_kill(struct task_struct *p, struct siginfo *info, int sig) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return 0; - if (tsec == jail_of(p)) + if (tsec == p->security) return 0; if (sig==SIGCHLD) @@ -1283,12 +1205,12 @@ static int jail_task_setscheduler (struc static inline int basic_ipc_security_check(struct kern_ipc_perm *p, struct task_struct *target) { - struct jail_struct *tsec = jail_of(target); + struct jail_struct *tsec = target->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return 0; - if (get_ipc_security(p) != tsec) + if (p->security != tsec) return -EPERM; return 0; @@ -1303,11 +1225,11 @@ jail_ipc_permission(struct kern_ipc_perm static int jail_shm_alloc_security (struct shmid_kernel *shp) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return 0; - set_ipc_security(shp->shm_perm, tsec); + shp->shm_perm.security = tsec; kref_get(&tsec->kref); return 0; } @@ -1342,11 +1264,11 @@ jail_shm_shmat(struct shmid_kernel *shp, static int jail_msg_queue_alloc(struct msg_queue *msq) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return 0; - set_ipc_security(msq->q_perm, tsec); + msq->q_perm.security = tsec; kref_get(&tsec->kref); return 0; } @@ -1388,11 +1310,11 @@ jail_msg_queue_msgrcv(struct msg_queue * static int jail_sem_alloc_security(struct sem_array *sma) { - struct jail_struct *tsec = jail_of(current); + struct jail_struct *tsec = current->security; - if (!tsec || !in_use(tsec)) + if (!tsec || !(tsec->jail_flags & IN_USE)) return 0; - set_ipc_security(sma->sem_perm, tsec); + sma->sem_perm.security = tsec; kref_get(&tsec->kref); return 0; } ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-07 19:01 ` [patch 2/3] " Serge E. Hallyn 2004-10-07 19:42 ` Chris Wright @ 2004-10-10 10:41 ` Christoph Hellwig 2004-10-10 11:31 ` Serge E. Hallyn 2004-10-11 13:47 ` Alan Cox 1 sibling, 2 replies; 37+ messages in thread From: Christoph Hellwig @ 2004-10-10 10:41 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Andrew Morton, chrisw, linux-kernel Your filesystem handling code is completely superflous (and buggy). Please remove all the code dealing with chroot-lookalikes. In your userland script you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your parent, then you can lazly unmount all filesystems and setup your new namespace before starting the jail. The added advantage is that you don't need any cludges to keep the user from exiting the chroot. > +#include <linux/ip.h> > +#include <net/ipv6.h> > +#include <linux/mount.h> > +#include <asm/uaccess.h> Please always include <asm/*.h> headers after <linux/*.h> > +#include <linux/smp_lock.h> I don't see you using the BKL anywhere. > > > > > +#include <linux/kref.h> Why that many blank lines? > +static int jail_debug = 0; no need to initialize to 0 > +MODULE_PARM(jail_debug, "i"); please user module_param > +static int secondary = 0; again no need to itnialize. > + char *ip4_addr_name; /* char * containing ip4 addr to use for jail */ > + char *ip6_addr_name; /* char * containing ip6 addr to use for jail */ How do you habdle non-ip networking? This really needs to be handled more generally. > + /* won't let ourselves be removed until this jail goes away */ > + try_module_get(THIS_MODULE); must be __module_get > +/* > + * LSM /proc/<pid>/attr hooks. > + * You may write into /proc/<pid>/attr/exec: > + * root /some/path > + * ip 2.2.2.2 > + * These values will be used on the next exec() to set up your jail > + * (assuming you're not already in a jail) That's a really awkward interface. > +jail_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, > + int fd, int reason) > +{ > + struct file *file; > + struct jail_struct *tsec, *fsec; > + > + if (!in_jail(current)) > + return 0; > + > + file = (struct file *) ((long)fown - offsetof(struct file, f_owner)); bah. Please use container_of or better get lsm folks to just pass you a struct file * > +jail_proc_inode_permission(struct inode *inode, int mask, > + struct nameidata *nd) > +{ > + struct jail_struct *tsec = current->security; > + struct dentry *dentry = nd->dentry; > + unsigned pid; > + > + pid = name_to_int(dentry); > + if (pid == ~0U) { > + struct qstr *dname = &dentry->d_name; > + if (strcmp(dname->name, "scsi") == 0 || > + strcmp(dname->name, "sys") == 0 || > + strcmp(dname->name, "ide") == 0) > + return -EPERM; > + return 0; oh, please. Don't submit such a crap. if you want to disable sysctl access do it on the sysctl, not procfs level. And disabling access to /proc/ide and /proc/scsi as two very special cases (what about /proc/md, /proc/cciss or /proc/cpqarray?) is totally bullocks, if they allow hardware interaction without checking for capabailities fix them in the driver code. This half-aided security by obscurity crap _is_ going to bite later on. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-10 10:41 ` Christoph Hellwig @ 2004-10-10 11:31 ` Serge E. Hallyn 2004-10-10 11:34 ` Christoph Hellwig 2004-10-11 13:47 ` Alan Cox 1 sibling, 1 reply; 37+ messages in thread From: Serge E. Hallyn @ 2004-10-10 11:31 UTC (permalink / raw) To: Christoph Hellwig, Serge E. Hallyn, Andrew Morton, chrisw, linux-kernel > Your filesystem handling code is completely superflous (and buggy). Please > remove all the code dealing with chroot-lookalikes. In your userland script > you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your > parent, then you can lazly unmount all filesystems and setup your new namespace > before starting the jail. The added advantage is that you don't need any > cludges to keep the user from exiting the chroot. I definately would prefer to use namespaces. I had originally wanted to do a copy_namespace() in the module. That function is not exported, though. Is doing that in user-space really the right way to do it? thanks, -serge ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-10 11:31 ` Serge E. Hallyn @ 2004-10-10 11:34 ` Christoph Hellwig 0 siblings, 0 replies; 37+ messages in thread From: Christoph Hellwig @ 2004-10-10 11:34 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Serge E. Hallyn, Andrew Morton, chrisw, linux-kernel On Sun, Oct 10, 2004 at 07:31:52AM -0400, Serge E. Hallyn wrote: > > Your filesystem handling code is completely superflous (and buggy). Please > > remove all the code dealing with chroot-lookalikes. In your userland script > > you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your > > parent, then you can lazly unmount all filesystems and setup your new namespace > > before starting the jail. The added advantage is that you don't need any > > cludges to keep the user from exiting the chroot. > > I definately would prefer to use namespaces. I had originally wanted to > do a copy_namespace() in the module. That function is not exported, > though. Is doing that in user-space really the right way to do it? If something can be done in userspace nicely that's preferable over doing it in kernelspace, yes. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-10 10:41 ` Christoph Hellwig 2004-10-10 11:31 ` Serge E. Hallyn @ 2004-10-11 13:47 ` Alan Cox 2004-10-12 7:00 ` Herbert Poetzl 2004-10-12 13:11 ` Serge E. Hallyn 1 sibling, 2 replies; 37+ messages in thread From: Alan Cox @ 2004-10-11 13:47 UTC (permalink / raw) To: Christoph Hellwig Cc: Serge E. Hallyn, Andrew Morton, chrisw, Linux Kernel Mailing List On Sul, 2004-10-10 at 11:41, Christoph Hellwig wrote: > Your filesystem handling code is completely superflous (and buggy). Please > remove all the code dealing with chroot-lookalikes. In your userland script > you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your > parent, then you can lazly unmount all filesystems and setup your new namespace > before starting the jail. The added advantage is that you don't need any > cludges to keep the user from exiting the chroot. AF_UNIX socket and fchdir(). That however requires a co-operator outside the chroot so doesn't seem to be a problem. I like the CLONE approach, its a lot cleaner. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-11 13:47 ` Alan Cox @ 2004-10-12 7:00 ` Herbert Poetzl 2004-10-12 9:00 ` Christoph Hellwig 2004-10-12 13:11 ` Serge E. Hallyn 1 sibling, 1 reply; 37+ messages in thread From: Herbert Poetzl @ 2004-10-12 7:00 UTC (permalink / raw) To: Alan Cox Cc: Christoph Hellwig, Serge E. Hallyn, Andrew Morton, chrisw, Linux Kernel Mailing List On Mon, Oct 11, 2004 at 02:47:29PM +0100, Alan Cox wrote: > On Sul, 2004-10-10 at 11:41, Christoph Hellwig wrote: > > Your filesystem handling code is completely superflous (and buggy). Please > > remove all the code dealing with chroot-lookalikes. In your userland script > > you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your > > parent, then you can lazly unmount all filesystems and setup your new namespace > > before starting the jail. The added advantage is that you don't need any > > cludges to keep the user from exiting the chroot. > > AF_UNIX socket and fchdir(). > > That however requires a co-operator outside the chroot so doesn't seem > to be a problem. I like the CLONE approach, its a lot cleaner. and it works well, because we use it for almost a year now on linux-vserver ;) best, Herbert > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-12 7:00 ` Herbert Poetzl @ 2004-10-12 9:00 ` Christoph Hellwig 2004-10-12 12:27 ` Herbert Poetzl 0 siblings, 1 reply; 37+ messages in thread From: Christoph Hellwig @ 2004-10-12 9:00 UTC (permalink / raw) To: Alan Cox, Serge E. Hallyn, Andrew Morton, chrisw, Linux Kernel Mailing List On Tue, Oct 12, 2004 at 09:00:55AM +0200, Herbert Poetzl wrote: > and it works well, because we use it for almost > a year now on linux-vserver ;) Btw, could anyone explain the exact differences between linux-vserver and this jail module? ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-12 9:00 ` Christoph Hellwig @ 2004-10-12 12:27 ` Herbert Poetzl 2004-10-20 15:36 ` Christoph Hellwig 0 siblings, 1 reply; 37+ messages in thread From: Herbert Poetzl @ 2004-10-12 12:27 UTC (permalink / raw) To: Christoph Hellwig, Alan Cox, Serge E. Hallyn, Andrew Morton, chrisw, Linux Kernel Mailing List On Tue, Oct 12, 2004 at 10:00:57AM +0100, Christoph Hellwig wrote: > On Tue, Oct 12, 2004 at 09:00:55AM +0200, Herbert Poetzl wrote: > > and it works well, because we use it for almost > > a year now on linux-vserver ;) > > Btw, could anyone explain the exact differences between linux-vserver > and this jail module? hmm, okay I'll try ... linux-vserver is a combination of kernel patch and userspace tools to create 'virtual servers' similar to UML, but sharing the resources (and kernel). to do this, it uses process isolation, network isolation and disk space separation (tagging). in addition it does resource management (accounting and limits) for various aspects (CPU, memory, processes, sockets, filehandles, ...) the jail module is recreating a limited subset of the isolation aspect via LSM (similar to the BSD jail) which allows to confine a process (and it's children) to a chroot() environment under certain limitations (resources) best, Herbert > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-12 12:27 ` Herbert Poetzl @ 2004-10-20 15:36 ` Christoph Hellwig 2004-10-20 19:18 ` Herbert Poetzl 0 siblings, 1 reply; 37+ messages in thread From: Christoph Hellwig @ 2004-10-20 15:36 UTC (permalink / raw) To: Christoph Hellwig, Alan Cox, Serge E. Hallyn, Andrew Morton, chrisw, Linux Kernel Mailing List On Tue, Oct 12, 2004 at 02:27:33PM +0200, Herbert Poetzl wrote: > On Tue, Oct 12, 2004 at 10:00:57AM +0100, Christoph Hellwig wrote: > > On Tue, Oct 12, 2004 at 09:00:55AM +0200, Herbert Poetzl wrote: > > > and it works well, because we use it for almost > > > a year now on linux-vserver ;) > > > > Btw, could anyone explain the exact differences between linux-vserver > > and this jail module? > > hmm, okay I'll try ... > > linux-vserver is a combination of kernel patch and > userspace tools to create 'virtual servers' similar > to UML, but sharing the resources (and kernel). > > to do this, it uses process isolation, network > isolation and disk space separation (tagging). > in addition it does resource management (accounting > and limits) for various aspects (CPU, memory, > processes, sockets, filehandles, ...) > > the jail module is recreating a limited subset of > the isolation aspect via LSM (similar to the BSD > jail) which allows to confine a process (and it's > children) to a chroot() environment under certain > limitations (resources) So why a) can't linux-vserver use LSM hooks where applicable b) can't the two projects share code so we don't only have a crippled version in mainline ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-20 15:36 ` Christoph Hellwig @ 2004-10-20 19:18 ` Herbert Poetzl 0 siblings, 0 replies; 37+ messages in thread From: Herbert Poetzl @ 2004-10-20 19:18 UTC (permalink / raw) To: Christoph Hellwig, Alan Cox, Serge E. Hallyn, Andrew Morton, chrisw, Linux Kernel Mailing List On Wed, Oct 20, 2004 at 04:36:21PM +0100, Christoph Hellwig wrote: > On Tue, Oct 12, 2004 at 02:27:33PM +0200, Herbert Poetzl wrote: > > On Tue, Oct 12, 2004 at 10:00:57AM +0100, Christoph Hellwig wrote: > > > On Tue, Oct 12, 2004 at 09:00:55AM +0200, Herbert Poetzl wrote: > > > > and it works well, because we use it for almost > > > > a year now on linux-vserver ;) > > > > > > Btw, could anyone explain the exact differences between linux-vserver > > > and this jail module? > > > > hmm, okay I'll try ... > > > > linux-vserver is a combination of kernel patch and > > userspace tools to create 'virtual servers' similar > > to UML, but sharing the resources (and kernel). > > > > to do this, it uses process isolation, network > > isolation and disk space separation (tagging). > > in addition it does resource management (accounting > > and limits) for various aspects (CPU, memory, > > processes, sockets, filehandles, ...) > > > > the jail module is recreating a limited subset of > > the isolation aspect via LSM (similar to the BSD > > jail) which allows to confine a process (and it's > > children) to a chroot() environment under certain > > limitations (resources) > > So why > > a) can't linux-vserver use LSM hooks where applicable well, it could, and probably in future it will do so, but currently there are three reasons which keep me from doing that: 1) some folks want to use LSM for other things, and proper stackering of LSM was broken/missing last time I looked at the code 2) performance: I'm not convinced that the LSM hooks are a good choice, where a single check of a flag (in current) is more than sufficient 3) why move 20% of linux vserver to LSM, where those 20% can not do anything useful without the remaining 80% (or at least some part of it) which can not be done with LSM for various reasons. > b) can't the two projects share code so we don't only have a crippled > version in mainline I'm sure the projects can share code, and IMHO the best solution would be to create a 'cripled' version of linux-vserver and to include it in mainline (if that is what kernel folks want) and to slowly extend this version where possible, moving existing code from linux-vserver into mainline ... once CKRM is working and included, and LSM provides the 'security' features, linux-vserver might become a simple compile time option ... best, Herbert > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-11 13:47 ` Alan Cox 2004-10-12 7:00 ` Herbert Poetzl @ 2004-10-12 13:11 ` Serge E. Hallyn 2004-10-12 14:15 ` Christoph Hellwig 2004-10-12 22:35 ` Ulrich Drepper 1 sibling, 2 replies; 37+ messages in thread From: Serge E. Hallyn @ 2004-10-12 13:11 UTC (permalink / raw) To: Alan Cox; +Cc: linux-kernel > That however requires a co-operator outside the chroot so doesn't seem > to be a problem. I like the CLONE approach, its a lot cleaner. The attached patch (against -rc4-mm1) moves the responsibility for filesystem containment entirely to userspace. The Documentation/bsdjail.txt file reflects the new usage. It also incorporates Christoph's cleanups. I still need to see about generalizing the networking confinement. I certainly like the concept (as I understand it at least) behind the new vserver networking, but am not sure it can be done without patching. -serge diff -Nrup linux-2.6.9-rc4-mm1/Documentation/bsdjail.txt linux-2.6.9-rc4-mm1-jail/Documentation/bsdjail.txt --- linux-2.6.9-rc4-mm1/Documentation/bsdjail.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.9-rc4-mm1-jail/Documentation/bsdjail.txt 2004-10-11 16:22:12.845891208 -0500 @@ -0,0 +1,135 @@ +BSD Jail Linux Security Module +Serge E. Hallyn <serue@us.ibm.com> + +Description: + +Used in conjunction with per-process namespaces, this implements +a subset of the BSD Jail functionality as a Linux LSM. What is +currently implemented: + + If a proces is in a jail, it: + + 1. Cannot mount or umount + 2. Cannot send signals outside of jail + 3. Cannot ptrace processes outside of jail + 4. Cannot create devices + 5. Cannot renice processes + 6. Cannot load or unload modules + 7. Cannot change network settings + 8. May be assigned a specific ip address which will be used + for all it's socket binds. + 9. Cannot see contents of /proc/<pid> entries of processes not in the + same jail. (We hide their existence for convenience's sake, but + their existance can still be detected using, for instance, statfs) + 10. Has no CAP_SYS_RAWIO capability (no ioperm/iopl) + 11. May not share IPC resources with processes outside its own jail. + 12. May find it's valid network address (if restricted) under + /proc/$$/attr/current. + + If properly locked into its own namespace, processes will not be able + to escape to parts of the system's filesystem which were made + unavailable (without outside help). + +WARNINGS: +The security of this module is very much dependent on the security +of the rest of the system. You must carefully think through your +use of the system. + +Some examples: + 1. If you leave /dev/hda1 in the jail, processes in the + jail can access that filesystem (i.e. /sbin/debugfs). + 2. If you provide root access within a jail, this can of + course be used to setuid binaries in the jail. Combined + with an unjailed regular user account, this gives jailed + users unjailed root access. (thanks to Brad Spender for + pointing this out). + +How to use: + 1. Load the bsdjail module if not already loaded or compiled in: + + modprobe bsdjail + + 3. (Optional) Set up an ipv4 alias for the jail + + # /sbin/ifconfig eth0:0 192.168.1.101 + # /sbin/route add -host 192.168.1.101 dev eth0:0 + + 3. Execute a shell under a new namespace: + + exec clone_ns + + (see http://www.win.tue.nl/~aeb/linux/lk/lk-6.html#6.3) + + 4. If not already done, set up the filesystem for the jail. in our + example, we will set it up under /opt. + + mount /dev/hdc5 /opt + mount -t proc proc /opt/proc + + 5. Make sure there is an empty directory to put the old root in. We + will just use /opt/mnt + + mkdir /opt/mnt + + 6. Pivot the old and new roots: + + cd /opt + /sbin/pivot_root . mnt + /usr/sbin/chroot . /bin/sh + + 7. Unmount the old root + + umount -l /mnt + + 6. Give the desired arguments for the jail. If no arguments are + necessary, just say: + + echo lock > /proc/$$/attr/exec + + To lock the process into an ip alias, say: + + echo "ip 192.168.1.101" > /proc/$$/attr/exec + + 7. Execute a new shell. The shell will be under the new jail, and in + the private namespace you've been setting up. + + exec /bin/sh + + 8. To allow friends/customers/whoever to use this system, you might start + start some services. + + sshd + + 9. Ssh is now running under the jail, so you no longer need the original + shell: + + exit + +The new shell runs in a private jail on the filesystem on /dev/hdc5. If proc +has been mounted under /dev/hdc5, then a "ps -auxw" under the jailed shell +will show only entries for processes started under that jail. + +If a private IP was specified for the jail, then + cat /proc/$$/attr/current +will show the address for the private network device. Other network +devices will be visible through /sbin/ifconfig -a, but not usable. + +If the reading process is not in a jail, then + cat /proc/$$/attr/current +returns information about the root and ip * for the target process, +or "Not Jailed" if the target process is not jailed. + +Cat /proc/$$/attr/exec gives a list of the valid keywords to cat into +/proc/$$/attr/exec when starting a jail. + +Current valid keywords for creating a jail are: + + lock: specifies the next exec should land us in a jail. (only needed + if you don't want to give any other keywords) + ip: IPV4 addr for this jail + ip6: IPV6 addr for this jail + nrtask: Number of tasks in this jail + nice: The nice level for this jail. (maybe should be min/max?) + slice: Max timeslice per process + data: Max size of DATA segment per process + memlock: Max size of memory which can be locked per process diff -Nrup linux-2.6.9-rc4-mm1/fs/proc/base.c linux-2.6.9-rc4-mm1-jail/fs/proc/base.c --- linux-2.6.9-rc4-mm1/fs/proc/base.c 2004-10-11 17:02:19.612007144 -0500 +++ linux-2.6.9-rc4-mm1-jail/fs/proc/base.c 2004-10-11 10:00:36.000000000 -0500 @@ -1706,6 +1706,8 @@ static int get_tgid_list(int index, unsi int tgid = p->pid; if (!pid_alive(p)) continue; + if (security_task_lookup(p)) + continue; if (--index >= 0) continue; tgids[nr_tgids] = tgid; diff -Nrup linux-2.6.9-rc4-mm1/include/linux/security.h linux-2.6.9-rc4-mm1-jail/include/linux/security.h --- linux-2.6.9-rc4-mm1/include/linux/security.h 2004-10-11 17:02:21.888661040 -0500 +++ linux-2.6.9-rc4-mm1-jail/include/linux/security.h 2004-10-11 10:00:36.000000000 -0500 @@ -630,6 +630,11 @@ struct swap_info_struct; * Set the security attributes in @p->security for a kernel thread that * is being reparented to the init task. * @p contains the task_struct for the kernel thread. + * @task_lookup: + * Check permission to see the /proc/<pid> entry for process @p. + * @p contains the task_struct for task <pid> which is being looked + * up under /proc + * return 0 if permission is granted. * @task_to_inode: * Set the security attributes for an inode based on an associated task's * security attributes, e.g. for /proc/pid inodes. @@ -1162,6 +1167,7 @@ struct security_operations { unsigned long arg3, unsigned long arg4, unsigned long arg5); void (*task_reparent_to_init) (struct task_struct * p); + int (*task_lookup)(struct task_struct *p); void (*task_to_inode)(struct task_struct *p, struct inode *inode); int (*ipc_permission) (struct kern_ipc_perm * ipcp, short flag); @@ -1767,6 +1773,11 @@ static inline void security_task_reparen security_ops->task_reparent_to_init (p); } +static inline int security_task_lookup(struct task_struct *p) +{ + return security_ops->task_lookup(p); +} + static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) { security_ops->task_to_inode(p, inode); @@ -2407,6 +2418,11 @@ static inline void security_task_reparen cap_task_reparent_to_init (p); } +static inline int security_task_lookup(struct task_struct *p) +{ + return 0; +} + static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) { } diff -Nrup linux-2.6.9-rc4-mm1/security/bsdjail.c linux-2.6.9-rc4-mm1-jail/security/bsdjail.c --- linux-2.6.9-rc4-mm1/security/bsdjail.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.9-rc4-mm1-jail/security/bsdjail.c 2004-10-11 16:55:33.967674456 -0500 @@ -0,0 +1,1365 @@ +/* + * File: linux/security/bsdjail.c + * Author: Serge Hallyn (serue@us.ibm.com) + * Date: Sep 12, 2004 + * + * (See Documentation/bsdjail.txt for more information) + * + * Copyright (C) 2004 International Business Machines <serue@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/security.h> +#include <linux/namei.h> +#include <linux/namespace.h> +#include <linux/proc_fs.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/pagemap.h> +#include <linux/ip.h> +#include <net/ipv6.h> +#include <linux/mount.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/seq_file.h> +#include <linux/un.h> +#include <linux/smp_lock.h> +#include <linux/kref.h> +#include <asm/uaccess.h> + +static int jail_debug; +module_param(jail_debug, int, 0); +MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n"); + +#define DBG 0 +#define WARN 1 +#define bsdj_debug(how, fmt, arg... ) \ + do { \ + if ( how || jail_debug ) \ + printk(KERN_NOTICE "%s: %s: " fmt, \ + MY_NAME, __FUNCTION__ , \ + ## arg ); \ + } while ( 0 ) + +#define MY_NAME "bsdjail" + +/* flag to keep track of how we were registered */ +static int secondary; + +/* + * The task structure holding jail information. + * Taskp->security points to one of these (or is null). + * There is exactly one jail_struct for each jail. If >1 process + * are in the same jail, they share the same jail_struct. + */ +struct jail_struct { + struct kref kref; + + /* these are set on writes to /proc/<pid>/attr/exec */ + char *ip4_addr_name; /* char * containing ip4 addr to use for jail */ + char *ip6_addr_name; /* char * containing ip6 addr to use for jail */ + + /* these are set when a jail becomes active */ + __u32 addr4; /* internal form of ip4_addr_name */ + struct in6_addr addr6; /* internal form of ip6_addr_name */ + + /* Resource limits. 0 = no limit */ + int max_nrtask; /* maximum number of tasks within this jail. */ + int cur_nrtask; /* current number of tasks within this jail. */ + long maxtimeslice; /* max timeslice in ms for procs in this jail */ + long nice; /* nice level for processes in this jail */ + long max_data, max_memlock; /* equivalent to RLIMIT_{DATA, MEMLOCK} */ +/* values for the jail_flags field */ +#define IN_USE 1 /* if 0, task is setting up jail, not yet in it */ +#define GOT_IPV4 2 +#define GOT_IPV6 4 /* if 0, ipv4, else ipv6 */ + char jail_flags; +}; + +/* + * disable_jail: A jail which was in use, but has no references + * left, is disabled - we free up the mountpoint and dentry, and + * give up our reference on the module. + * + * don't need to put namespace, it will be done automatically + * when the last process in jail is put. + * DO need to put the dentry and vfsmount + */ +static void +disable_jail(struct jail_struct *tsec) +{ + module_put(THIS_MODULE); +} + + +static void free_jail(struct jail_struct *tsec) +{ + if (!tsec) + return; + + kfree(tsec->ip4_addr_name); + kfree(tsec->ip6_addr_name); + kfree(tsec); +} + +/* release_jail: + * Callback for kref_put to use for releasing a jail when its + * last user exits. + */ +static void release_jail(struct kref *kref) +{ + struct jail_struct *tsec; + + tsec = container_of(kref, struct jail_struct, kref); + disable_jail(tsec); + free_jail(tsec); +} + +/* + * jail_task_free_security: this is the callback hooked into LSM. + * If there was no task->security field for bsdjail, do nothing. + * If there was, but it was never put into use, free the jail. + * If there was, and the jail is in use, then decrement the usage + * count, and disable and free the jail if the usage count hits 0. + */ +static void jail_task_free_security(struct task_struct *task) +{ + struct jail_struct *tsec = task->security; + + if (!tsec) + return; + + if (!(tsec->jail_flags & IN_USE)) { + /* + * someone did 'echo -n x > /proc/<pid>/attr/exec' but + * then forked before execing. Nuke the old info. + */ + free_jail(tsec); + task->security = NULL; + return; + } + tsec->cur_nrtask--; + /* If this was the last process in the jail, delete the jail */ + kref_put(&tsec->kref, release_jail); +} + +static struct jail_struct * +alloc_task_security(struct task_struct *tsk) +{ + struct jail_struct *tsec; + + tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL); + if (tsec) { + memset(tsec, 0, sizeof(struct jail_struct)); + tsk->security = tsec; + } + return tsec; +} + +static inline int +in_jail(struct task_struct *t) +{ + struct jail_struct *tsec = t->security; + + if (tsec && (tsec->jail_flags & IN_USE)) + return 1; + + return 0; +} + +/* + * If a network address was passed into /proc/<pid>/attr/exec, + * then process in its jail will only be allowed to bind/listen + * to that address. + */ +static void +setup_netaddress(struct jail_struct *tsec) +{ + unsigned int a, b, c, d, i; + unsigned int x[8]; + + tsec->jail_flags &= ~(GOT_IPV4 | GOT_IPV6); + tsec->addr4 = 0; + ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0); + + if (tsec->ip4_addr_name) { + if (sscanf(tsec->ip4_addr_name, "%u.%u.%u.%u", + &a, &b, &c, &d) != 4) + return; + if (a>255 || b>255 || c>255 || d>255) + return; + tsec->addr4 = htonl((a<<24) | (b<<16) | (c<<8) | d); + tsec->jail_flags |= GOT_IPV4; + bsdj_debug(DBG, "Network (ipv4) set up (%s)\n", + tsec->ip4_addr_name); + } + + if (tsec->ip6_addr_name) { + if (sscanf(tsec->ip6_addr_name, "%x:%x:%x:%x:%x:%x:%x:%x", + &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], + &x[7]) != 8) { + printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__, + tsec->ip6_addr_name); + return; + } + for (i=0; i<8; i++) { + if (x[i] > 65535) { + printk("%s: %x > 65535 at %d\n", __FUNCTION__, x[i], i); + return; + } + tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]); + } + tsec->jail_flags |= GOT_IPV6; + bsdj_debug(DBG, "Network (ipv6) set up (%s)\n", + tsec->ip6_addr_name); + } +} + +/* + * enable_jail: + * Called when a process is placed into a new jail to handle the + * actual creation of the jail. + * Creates namespace + * Stores the requested ip address + * Registers a unique pseudo-proc filesystem for this jail + */ +static int enable_jail(struct task_struct *tsk) +{ + struct jail_struct *tsec = tsk->security; + int retval = -EFAULT; + + if (!tsec) + goto out; + + /* set up networking */ + if (tsec->ip4_addr_name || tsec->ip6_addr_name) + setup_netaddress(tsec); + + tsec->cur_nrtask = 1; + if (tsec->nice) + set_user_nice(current, tsec->nice); + if (tsec->max_data) { + current->signal->rlim[RLIMIT_DATA].rlim_cur = tsec->max_data; + current->signal->rlim[RLIMIT_DATA].rlim_max = tsec->max_data; + } + if (tsec->max_memlock) { + current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur = + tsec->max_memlock; + current->signal->rlim[RLIMIT_MEMLOCK].rlim_max = + tsec->max_memlock; + } + if (tsec->maxtimeslice) { + current->signal->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice; + current->signal->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice; + } + /* success and end */ + kref_init(&tsec->kref); + tsec->jail_flags |= IN_USE; + + /* won't let ourselves be removed until this jail goes away */ + try_module_get(THIS_MODULE); + + return 0; + +out: + return retval; +} + +/* + * LSM /proc/<pid>/attr hooks. + * You may write into /proc/<pid>/attr/exec: + * lock (no value, just to specify a jail) + * ip 2.2.2.2 + etc... + * These values will be used on the next exec() to set up your jail + * (assuming you're not already in a jail) + */ +static int +jail_setprocattr(struct task_struct *p, char *name, void *value, size_t rsize) +{ + struct jail_struct *tsec = current->security; + long val; + char *v = value; + int start, len; + size_t size = rsize; + + if (tsec && (tsec->jail_flags & IN_USE)) + return -EINVAL; /* let them guess why */ + + if (p != current || strcmp(name, "exec")) + return -EPERM; + + if (!tsec) { + tsec = alloc_task_security(current); + if (!tsec) + return -ENOMEM; + } + + if (v[size-1] == '\n') + size--; + + if (strncmp(value, "ip ", 3) == 0) { + kfree(tsec->ip4_addr_name); + start = 3; + len = size - start + 1; + tsec->ip4_addr_name = kmalloc(len, GFP_KERNEL); + if (!tsec->ip4_addr_name) + return -ENOMEM; + strlcpy(tsec->ip4_addr_name, value+start, len); + } else if (strncmp(value, "ip6 ", 4) == 0) { + kfree(tsec->ip6_addr_name); + start = 4; + len = size - start + 1; + tsec->ip6_addr_name = kmalloc(len, GFP_KERNEL); + if (!tsec->ip6_addr_name) + return -ENOMEM; + strlcpy(tsec->ip6_addr_name, value+start, len); + + /* the next two are equivalent */ + } else if (strncmp(value, "slice ", 6) == 0) { + val = simple_strtoul(value+6, NULL, 0); + tsec->maxtimeslice = val; + } else if (strncmp(value, "timeslice ", 10) == 0) { + val = simple_strtoul(value+10, NULL, 0); + tsec->maxtimeslice = val; + } else if (strncmp(value, "nrtask ", 7) == 0) { + val = (int) simple_strtol(value+7, NULL, 0); + if (val < 1) + return -EINVAL; + tsec->max_nrtask = val; + } else if (strncmp(value, "memlock ", 8) == 0) { + val = simple_strtoul(value+8, NULL, 0); + tsec->max_memlock = val; + } else if (strncmp(value, "data ", 5) == 0) { + val = simple_strtoul(value+5, NULL, 0); + tsec->max_data = val; + } else if (strncmp(value, "nice ", 5) == 0) { + val = simple_strtoul(value+5, NULL, 0); + tsec->nice = val; + } else if (strncmp(value, "lock", 4) != 0) + return -EINVAL; + + return rsize; +} + +static int print_jail_net_info(struct jail_struct *j, char *buf, int maxcnt) +{ + int len = 0; + + if (j->ip4_addr_name) + len += snprintf(buf, maxcnt, "%s\n", j->ip4_addr_name); + if (j->ip6_addr_name) + len += snprintf(buf, maxcnt-len, "%s\n", j->ip6_addr_name); + + return snprintf(buf, maxcnt, "No network information\n"); +} + +/* + * LSM /proc/<pid>/attr read hook. + * + * /proc/$$/attr/current output: + * If the reading process, say process 1001, is in a jail, then + * cat /proc/999/attr/current + * will print networking information. + * If the reading process, say process 1001, is not in a jail, then + * cat /proc/999/attr/current + * will return + * ip: (ip address of jail) + * if 999 is in a jail, or + * -EINVAL + * if 999 is not in a jail. + * + * /proc/$$/attr/exec output: + * A process in a jail gets -EINVAL for /proc/$$/attr/exec. + * A process not in a jail gets hints on starting a jail. + */ +static int +jail_getprocattr(struct task_struct *p, char *name, void *value, size_t size) +{ + struct jail_struct *tsec; + int err = 0; + + if (in_jail(current)) { + if (strcmp(name, "current") == 0) { + /* provide network info */ + err = print_jail_net_info(current->security, value, + size); + return err; + } + return -EINVAL; /* let them guess why */ + } + + if (strcmp(name, "exec") == 0) { + /* Print usage some help */ + err = snprintf(value, size, + "Valid keywords:\n" + "lock\n" + "ip <ip4-addr>\n" + "ip6 <ip6-addr>\n" + "nrtask <max number of tasks in this jail>\n" + "nice <nice level for processes in this jail>\n" + "slice <max timeslice per process in msecs>\n" + "data <max data size per process in bytes>\n" + "memlock <max lockable memory per process in bytes>\n"); + return err; + } + + if (strcmp(name, "current")) + return -EPERM; + + tsec = p->security; + if (!tsec || !(tsec->jail_flags & IN_USE)) { + err = snprintf(value, size, "Not Jailed\n"); + } else { + err = snprintf(value, size, + "IPv4: %s\nIPv6: %s\n" + "max_nrtask %d current nrtask %d max_timeslice %lu " + "nice %lu\n" + "max_memlock %lu max_data %lu\n", + tsec->ip4_addr_name ? tsec->ip4_addr_name : "(none)", + tsec->ip6_addr_name ? tsec->ip6_addr_name : "(none)", + tsec->max_nrtask, tsec->cur_nrtask, tsec->maxtimeslice, + tsec->nice, tsec->max_data, tsec->max_memlock); + } + + return err; +} + +/* + * Forbid a process in a jail from sending a signal to a process in another + * (or no) jail through file sigio. + * + * We consider the process which set the fowner to be the one sending the + * signal, rather than the one writing to the file. Therefore we store the + * jail of a process during jail_file_set_fowner, then check that against + * the jail of the process receiving the signal. + */ +static int +jail_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, + int fd, int reason) +{ + struct file *file; + + if (!in_jail(current)) + return 0; + + file = container_of(fown, struct file, f_owner); + if (file->f_security != tsk->security) + return -EPERM; + + return 0; +} + +static int +jail_file_set_fowner(struct file *file) +{ + struct jail_struct *tsec; + + tsec = current->security; + file->f_security = tsec; + if (tsec) + kref_get(&tsec->kref); + + return 0; +} + +static void free_ipc_security(struct kern_ipc_perm *ipc) +{ + struct jail_struct *tsec; + + tsec = ipc->security; + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + ipc->security = NULL; +} + +static void free_file_security(struct file *file) +{ + struct jail_struct *tsec; + + tsec = file->f_security; + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + file->f_security = NULL; +} + +static void free_inode_security(struct inode *inode) +{ + struct jail_struct *tsec; + + tsec = inode->i_security; + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + inode->i_security = NULL; +} + +/* + * LSM ptrace hook: + * process in jail may not ptrace process not in the same jail + */ +static int +jail_ptrace (struct task_struct *tracer, struct task_struct *tracee) +{ + struct jail_struct *tsec = tracer->security; + + if (tsec && (tsec->jail_flags & IN_USE)) { + if (tsec == tracee->security) + return 0; + return -EPERM; + } + return 0; +} + +/* + * process in jail may only use one (aliased) ip address. If they try to + * attach to 127.0.0.1, that is remapped to their own address. If some + * other address (and not their own), deny permission + */ +static int jail_socket_unix_bind(struct socket *sock, struct sockaddr *address, + int addrlen); + +#define loopbackaddr htonl((127 << 24) | 1) + +static inline int jail_inet4_bind(struct socket *sock, struct sockaddr *address, + int addrlen, struct jail_struct *tsec) +{ + struct sockaddr_in *inaddr; + __u32 sin_addr, jailaddr; + + if (!(tsec->jail_flags & GOT_IPV4)) + return -EPERM; + + inaddr = (struct sockaddr_in *) address; + sin_addr = inaddr->sin_addr.s_addr; + jailaddr = tsec->addr4; + + if (sin_addr == jailaddr) + return 0; + + if (sin_addr == loopbackaddr || !sin_addr) { + bsdj_debug(DBG, "Got a loopback or 0 address\n"); + sin_addr = jailaddr; + bsdj_debug(DBG, "Converted to: %u.%u.%u.%u\n", + NIPQUAD(sin_addr)); + return 0; + } + + return -EPERM; +} + +static inline int +jail_inet6_bind(struct socket *sock, struct sockaddr *address, int addrlen, + struct jail_struct *tsec) +{ + struct sockaddr_in6 *inaddr6; + struct in6_addr *sin6_addr, *jailaddr; + + if (!(tsec->jail_flags & GOT_IPV6)) + return -EPERM; + + inaddr6 = (struct sockaddr_in6 *) address; + sin6_addr = &inaddr6->sin6_addr; + jailaddr = &tsec->addr6; + + if (ipv6_addr_cmp(jailaddr, sin6_addr) == 0) + return 0; + + if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback) == 0) { + ipv6_addr_copy(sin6_addr, jailaddr); + return 0; + } + + printk(KERN_NOTICE "%s: DENYING\n", __FUNCTION__); + printk(KERN_NOTICE "%s: a %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " + "j %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + __FUNCTION__, + NIP6(*sin6_addr), + NIP6(*jailaddr)); + + return -EPERM; +} + +static int +jail_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (sock->sk->sk_family == AF_UNIX) + return jail_socket_unix_bind(sock, address, addrlen); + + if (!(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) + /* If we want to be strict, we could just + * deny net access when lacking a pseudo ip. + * For now we just allow it. */ + return 0; + + switch(address->sa_family) { + case AF_INET: + return jail_inet4_bind(sock, address, addrlen, tsec); + + case AF_INET6: + return jail_inet6_bind(sock, address, addrlen, tsec); + + default: + return 0; + } +} + +/* + * If locked in an ipv6 jail, don't let them use ipv4, and vice versa + */ +static int +jail_socket_create(int family, int type, int protocol, int kern) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || kern || !(tsec->jail_flags & IN_USE) || + !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) + return 0; + + switch(family) { + case AF_INET: + if (tsec->jail_flags & GOT_IPV4) + return 0; + return -EPERM; + case AF_INET6: + if (tsec->jail_flags & GOT_IPV6) + return 0; + return -EPERM; + default: + return 0; + }; + + return 0; +} + +static void +jail_socket_post_create(struct socket *sock, int family, int type, + int protocol, int kern) +{ + struct inet_opt *inet; + struct ipv6_pinfo *inet6; + struct jail_struct *tsec = current->security; + + if (!tsec || kern || !(tsec->jail_flags & IN_USE) || + !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) + return; + + switch(family) { + case AF_INET: + inet = inet_sk(sock->sk); + inet->saddr = tsec->addr4; + break; + case AF_INET6: + inet6 = inet6_sk(sock->sk); + ipv6_addr_copy(&inet6->saddr, &tsec->addr6); + break; + default: + break; + }; + + return; +} + +static int +jail_socket_listen(struct socket *sock, int backlog) +{ + struct inet_opt *inet; + struct ipv6_pinfo *inet6; + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE) || + !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6))) + return 0; + + switch (sock->sk->sk_family) { + case AF_INET: + inet = inet_sk(sock->sk); + if (inet->saddr == tsec->addr4) + return 0; + return -EPERM; + + case AF_INET6: + inet6 = inet6_sk(sock->sk); + if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6) == 0) + return 0; + return -EPERM; + + default: + return 0; + + } +} + +static void free_sock_security(struct sock *sk) +{ + struct jail_struct *tsec; + + tsec = sk->sk_security; + if (!tsec) + return; + kref_put(&tsec->kref, release_jail); + sk->sk_security = NULL; +} + +/* + * The next three (socket) hooks prevent a process in a jail from sending + * data to a abstract unix domain socket which was bound outside the jail. + */ +static int +jail_socket_unix_bind(struct socket *sock, struct sockaddr *address, + int addrlen) +{ + struct sockaddr_un *sunaddr; + struct jail_struct *tsec; + + if (sock->sk->sk_family != AF_UNIX) + return 0; + + sunaddr = (struct sockaddr_un *) address; + if (sunaddr->sun_path[0] != 0) + return 0; + + tsec = current->security; + sock->sk->sk_security = tsec; + if (tsec) + kref_get(&tsec->kref); + return 0; +} + +/* + * Note - we deny sends both from unjailed to jailed, and from jailed + * to unjailed. As well as, of course between different jails. + */ +static int +jail_socket_unix_may_send(struct socket *sock, struct socket *other) +{ + struct jail_struct *tsec, *ssec; + + tsec = current->security; /* jail of sending process */ + ssec = other->sk->sk_security; /* jail of receiver */ + + if (tsec != ssec) + return -EPERM; + + return 0; +} + +static int +jail_socket_unix_stream_connect(struct socket *sock, + struct socket *other, struct sock *newsk) +{ + struct jail_struct *tsec, *ssec; + + tsec = current->security; /* jail of sending process */ + ssec = other->sk->sk_security; /* jail of receiver */ + + if (tsec != ssec) + return -EPERM; + + return 0; +} + +static int +jail_mount(char * dev_name, struct nameidata *nd, char * type, + unsigned long flags, void * data) +{ + if (in_jail(current)) + return -EPERM; + + return 0; +} + +static int +jail_umount(struct vfsmount *mnt, int flags) +{ + if (in_jail(current)) + return -EPERM; + + return 0; +} + +/* + * process in jail may not: + * use nice + * change network config + * load/unload modules + */ +static int +jail_capable (struct task_struct *tsk, int cap) +{ + if (in_jail(tsk)) { + if (cap == CAP_SYS_NICE) + return -EPERM; + if (cap == CAP_NET_ADMIN) + return -EPERM; + if (cap == CAP_SYS_MODULE) + return -EPERM; + if (cap == CAP_SYS_RAWIO) + return -EPERM; + } + + if (cap_is_fs_cap (cap) ? tsk->fsuid == 0 : tsk->euid == 0) + return 0; + return -EPERM; +} + +/* + * jail_security_task_create: + * + * If the current process is ina a jail, and that jail is about to exceed a + * maximum number of processes, then refuse to fork. If the maximum number + * of jails is listed as 0, then there is no limit for this jail, and we allow + * all forks. + */ +static inline int +jail_security_task_create (unsigned long clone_flags) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (tsec->max_nrtask && tsec->cur_nrtask >= tsec->max_nrtask) + return -EPERM; + return 0; +} + +/* + * The child of a process in a jail belongs in the same jail + */ +static int +jail_task_alloc_security(struct task_struct *tsk) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + tsk->security = tsec; + kref_get(&tsec->kref); + tsec->cur_nrtask++; + if (tsec->maxtimeslice) { + tsk->signal->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice; + tsk->signal->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice; + } + if (tsec->max_data) { + tsk->signal->rlim[RLIMIT_CPU].rlim_max = tsec->max_data; + tsk->signal->rlim[RLIMIT_CPU].rlim_cur = tsec->max_data; + } + if (tsec->max_memlock) { + tsk->signal->rlim[RLIMIT_CPU].rlim_max = tsec->max_memlock; + tsk->signal->rlim[RLIMIT_CPU].rlim_cur = tsec->max_memlock; + } + if (tsec->nice) + set_user_nice(current, tsec->nice); + + return 0; +} + +static int +jail_bprm_alloc_security(struct linux_binprm *bprm) +{ + struct jail_struct *tsec = current->security; + int ret; + + if (!tsec) + return 0; + + if (tsec->jail_flags & IN_USE) + return 0; + + ret = enable_jail(current); + if (ret) { + /* if we failed, nix out the ip requests */ + jail_task_free_security(current); + return ret; + } + return 0; +} + +/* + * Process in jail may not create devices + * Thanks to Brad Spender for pointing out fifos should be allowed. + */ +/* TODO: We may want to allow /dev/log, at least... */ +static int +jail_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + if (!in_jail(current)) + return 0; + + if (S_ISFIFO(mode)) + return 0; + + return -EPERM; +} + +/* yanked from fs/proc/base.c */ +static unsigned name_to_int(struct dentry *dentry) +{ + const char *name = dentry->d_name.name; + int len = dentry->d_name.len; + unsigned n = 0; + + if (len > 1 && *name == '0') + goto out; + while (len-- > 0) { + unsigned c = *name++ - '0'; + if (c > 9) + goto out; + if (n >= (~0U-9)/10) + goto out; + n *= 10; + n += c; + } + return n; +out: + return ~0U; +} + +/* + * jail_proc_inode_permission: + * called only when current is in a jail, and is trying to reach + * /proc/<pid>. We check whether <pid> is in the same jail as + * current. If not, permission is denied. + * + * NOTE: On the one hand, the task_to_inode(inode)->i_security + * approach seems cleaner, but on the other, this prevents us + * from unloading bsdjail for awhile... + */ +static int +jail_proc_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + struct jail_struct *tsec = current->security; + struct dentry *dentry = nd->dentry; + unsigned pid; + + pid = name_to_int(dentry); + if (pid == ~0U) { + return 0; + } + + if (dentry->d_parent != dentry->d_sb->s_root) + return 0; + if (inode->i_security != tsec) + return -ENOENT; + + return 0; +} + +/* + * A process in a jail may not see that /proc/<pid> exists for + * process not in its jail + * Unfortunately we can't pretend that pid for the starting process + * is 1, as vserver does. + */ +static int jail_task_lookup(struct task_struct *p) +{ + struct jail_struct *tsec = current->security; + + if (!tsec) + return 0; + if (tsec == p->security) + return 0; + return -EPERM; +} +/* + * security_task_to_inode: + * Set inode->security = task's jail. + */ +static void jail_task_to_inode(struct task_struct *p, struct inode *inode) +{ + struct jail_struct *tsec = p->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return; + if (inode->i_security) + return; + kref_get(&tsec->kref); + inode->i_security = tsec; +} + +/* + * inode_permission: + * If we are trying to look into certain /proc files from in a jail, we + * may deny permission. + */ +static int +jail_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (!nd) + return 0; + + if (nd->dentry && + strcmp(nd->dentry->d_sb->s_type->name, "proc") == 0) { + return jail_proc_inode_permission(inode, mask, nd); + + } + + return 0; +} + +/* + * A function which returns -ENOENT if dentry is the dentry for + * a /proc/<pid> directory. It returns 0 otherwise. + */ +static inline int +generic_procpid_check(struct dentry *dentry) +{ + struct jail_struct *jail = current->security; + unsigned pid = name_to_int(dentry); + + if (!jail || !(jail->jail_flags & IN_USE)) + return 0; + if (pid == ~0U) + return 0; + if (strcmp(dentry->d_sb->s_type->name, "proc") != 0) + return 0; + if (dentry->d_parent != dentry->d_sb->s_root) + return 0; + if (dentry->d_inode->i_security != jail) + return -ENOENT; + return 0; +} + +/* + * We want getattr to fail on /proc/<pid> to prevent leakage through, for + * instance, ls -d. + */ +static int +jail_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +{ + return generic_procpid_check(dentry); +} + +/* This probably is not necessary - /proc does not support xattrs? */ +static int +jail_inode_getxattr(struct dentry *dentry, char *name) +{ + return generic_procpid_check(dentry); +} + +/* process in jail may not send signal to process not in the same jail */ +static int +jail_task_kill(struct task_struct *p, struct siginfo *info, int sig) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (tsec == p->security) + return 0; + + if (sig==SIGCHLD) + return 0; + + return -EPERM; +} + +/* + * LSM hooks to limit jailed process' abilities to muck with resource + * limits + */ +static int jail_task_setrlimit (unsigned int resource, struct rlimit *new_rlim) +{ + if (!in_jail(current)) + return 0; + + return -EPERM; +} + +static int jail_task_setscheduler (struct task_struct *p, int policy, + struct sched_param *lp) +{ + if (!in_jail(current)) + return 0; + + return -EPERM; +} + +/* + * LSM hooks to limit IPC access. + */ + +static inline int +basic_ipc_security_check(struct kern_ipc_perm *p, struct task_struct *target) +{ + struct jail_struct *tsec = target->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + + if (p->security != tsec) + return -EPERM; + + return 0; +} + +static int +jail_ipc_permission(struct kern_ipc_perm *ipcp, short flag) +{ + return basic_ipc_security_check(ipcp, current); +} + +static int +jail_shm_alloc_security (struct shmid_kernel *shp) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + shp->shm_perm.security = tsec; + kref_get(&tsec->kref); + return 0; +} + +static void +jail_shm_free_security (struct shmid_kernel *shp) +{ + free_ipc_security(&shp->shm_perm); +} + +static int +jail_shm_associate (struct shmid_kernel *shp, int shmflg) +{ + return basic_ipc_security_check(&shp->shm_perm, current); +} + +static int +jail_shm_shmctl(struct shmid_kernel *shp, int cmd) +{ + if (cmd == IPC_INFO || cmd == SHM_INFO) + return 0; + + return basic_ipc_security_check(&shp->shm_perm, current); +} + +static int +jail_shm_shmat(struct shmid_kernel *shp, char *shmaddr, int shmflg) +{ + return basic_ipc_security_check(&shp->shm_perm, current); +} + +static int +jail_msg_queue_alloc(struct msg_queue *msq) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + msq->q_perm.security = tsec; + kref_get(&tsec->kref); + return 0; +} + +static void +jail_msg_queue_free(struct msg_queue *msq) +{ + free_ipc_security(&msq->q_perm); +} + +static int jail_msg_queue_associate(struct msg_queue *msq, int flag) +{ + return basic_ipc_security_check(&msq->q_perm, current); +} + +static int +jail_msg_queue_msgctl(struct msg_queue *msq, int cmd) +{ + if (cmd == IPC_INFO || cmd == MSG_INFO) + return 0; + + return basic_ipc_security_check(&msq->q_perm, current); +} + +static int +jail_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg) +{ + return basic_ipc_security_check(&msq->q_perm, current); +} + +static int +jail_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, + struct task_struct *target, long type, int mode) + +{ + return basic_ipc_security_check(&msq->q_perm, target); +} + +static int +jail_sem_alloc_security(struct sem_array *sma) +{ + struct jail_struct *tsec = current->security; + + if (!tsec || !(tsec->jail_flags & IN_USE)) + return 0; + sma->sem_perm.security = tsec; + kref_get(&tsec->kref); + return 0; +} + +static void +jail_sem_free_security(struct sem_array *sma) +{ + free_ipc_security(&sma->sem_perm); +} + +static int +jail_sem_associate(struct sem_array *sma, int semflg) +{ + return basic_ipc_security_check(&sma->sem_perm, current); +} + +static int +jail_sem_semctl(struct sem_array *sma, int cmd) +{ + if (cmd == IPC_INFO || cmd == SEM_INFO) + return 0; + return basic_ipc_security_check(&sma->sem_perm, current); +} + +static int +jail_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops, + int alter) +{ + return basic_ipc_security_check(&sma->sem_perm, current); +} + +static int +jail_sysctl(struct ctl_table *table, int op) +{ + if (!in_jail(current)) + return 0; + + if (op & 002) + return -EPERM; + + return 0; +} + +static struct security_operations bsdjail_security_ops = { + .ptrace = jail_ptrace, + .capable = jail_capable, + + .task_kill = jail_task_kill, + .task_alloc_security = jail_task_alloc_security, + .task_free_security = jail_task_free_security, + .bprm_alloc_security = jail_bprm_alloc_security, + .task_create = jail_security_task_create, + .task_to_inode = jail_task_to_inode, + .task_lookup = jail_task_lookup, + + .task_setrlimit = jail_task_setrlimit, + .task_setscheduler = jail_task_setscheduler, + + .setprocattr = jail_setprocattr, + .getprocattr = jail_getprocattr, + + .file_set_fowner = jail_file_set_fowner, + .file_send_sigiotask = jail_file_send_sigiotask, + .file_free_security = free_file_security, + + .socket_bind = jail_socket_bind, + .socket_listen = jail_socket_listen, + .socket_create = jail_socket_create, + .socket_post_create = jail_socket_post_create, + .unix_stream_connect = jail_socket_unix_stream_connect, + .unix_may_send = jail_socket_unix_may_send, + .sk_free_security = free_sock_security, + + .inode_mknod = jail_inode_mknod, + .inode_permission = jail_inode_permission, + .inode_free_security = free_inode_security, + .inode_getattr = jail_inode_getattr, + .inode_getxattr = jail_inode_getxattr, + .sb_mount = jail_mount, + .sb_umount = jail_umount, + + .ipc_permission = jail_ipc_permission, + .shm_alloc_security = jail_shm_alloc_security, + .shm_free_security = jail_shm_free_security, + .shm_associate = jail_shm_associate, + .shm_shmctl = jail_shm_shmctl, + .shm_shmat = jail_shm_shmat, + + .msg_queue_alloc_security = jail_msg_queue_alloc, + .msg_queue_free_security = jail_msg_queue_free, + .msg_queue_associate = jail_msg_queue_associate, + .msg_queue_msgctl = jail_msg_queue_msgctl, + .msg_queue_msgsnd = jail_msg_queue_msgsnd, + .msg_queue_msgrcv = jail_msg_queue_msgrcv, + + .sem_alloc_security = jail_sem_alloc_security, + .sem_free_security = jail_sem_free_security, + .sem_associate = jail_sem_associate, + .sem_semctl = jail_sem_semctl, + .sem_semop = jail_sem_semop, + + .sysctl = jail_sysctl, +}; + +static int __init bsdjail_init (void) +{ + int rc = 0; + + if (register_security (&bsdjail_security_ops)) { + printk (KERN_INFO + "Failure registering BSD Jail module with the kernel\n"); + + rc = mod_reg_security(MY_NAME, &bsdjail_security_ops); + if (rc < 0) { + printk (KERN_INFO "Failure registering BSD Jail " + " module with primary security module.\n"); + return -EINVAL; + } + secondary = 1; + } + printk (KERN_INFO "BSD Jail module initialized.\n"); + + return 0; +} + +static void __exit bsdjail_exit (void) +{ + if (secondary) { + if (mod_unreg_security (MY_NAME, &bsdjail_security_ops)) + printk (KERN_INFO "Failure unregistering BSD Jail " + " module with primary module.\n"); + } else { + if (unregister_security (&bsdjail_security_ops)) { + printk (KERN_INFO "Failure unregistering BSD Jail " + "module with the kernel\n"); + } + } + + printk (KERN_INFO "BSD Jail module removed\n"); +} + +security_initcall (bsdjail_init); +module_exit (bsdjail_exit); + +MODULE_DESCRIPTION("BSD Jail LSM."); +MODULE_LICENSE("GPL"); diff -Nrup linux-2.6.9-rc4-mm1/security/dummy.c linux-2.6.9-rc4-mm1-jail/security/dummy.c --- linux-2.6.9-rc4-mm1/security/dummy.c 2004-10-11 17:02:22.265603736 -0500 +++ linux-2.6.9-rc4-mm1-jail/security/dummy.c 2004-10-11 10:00:36.000000000 -0500 @@ -623,6 +623,11 @@ static void dummy_task_reparent_to_init return; } +static int dummy_task_lookup(struct task_struct *p) +{ + return 0; +} + static void dummy_task_to_inode(struct task_struct *p, struct inode *inode) { } @@ -986,6 +991,7 @@ void security_fixup_ops (struct security set_to_dummy_if_null(ops, task_kill); set_to_dummy_if_null(ops, task_prctl); set_to_dummy_if_null(ops, task_reparent_to_init); + set_to_dummy_if_null(ops, task_lookup); set_to_dummy_if_null(ops, task_to_inode); set_to_dummy_if_null(ops, ipc_permission); set_to_dummy_if_null(ops, msg_msg_alloc_security); diff -Nrup linux-2.6.9-rc4-mm1/security/Kconfig linux-2.6.9-rc4-mm1-jail/security/Kconfig --- linux-2.6.9-rc4-mm1/security/Kconfig 2004-10-11 17:02:22.265603736 -0500 +++ linux-2.6.9-rc4-mm1-jail/security/Kconfig 2004-10-11 10:00:51.000000000 -0500 @@ -86,5 +86,16 @@ config SECURITY_SECLVL source security/selinux/Kconfig +config SECURITY_BSDJAIL + tristate "BSD Jail LSM" + depends on SECURITY + select SECURITY_NETWORK + help + Provides BSD Jail compartmentalization functionality. + See Documentation/bsdjail.txt for more information and + usage instructions. + + If you are unsure how to answer this question, answer N. + endmenu diff -Nrup linux-2.6.9-rc4-mm1/security/Makefile linux-2.6.9-rc4-mm1-jail/security/Makefile --- linux-2.6.9-rc4-mm1/security/Makefile 2004-10-11 17:02:22.287600392 -0500 +++ linux-2.6.9-rc4-mm1-jail/security/Makefile 2004-10-11 10:00:51.000000000 -0500 @@ -17,3 +17,4 @@ obj-$(CONFIG_SECURITY_SELINUX) += selin obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o obj-$(CONFIG_SECURITY_SECLVL) += seclvl.o +obj-$(CONFIG_SECURITY_BSDJAIL) += bsdjail.o ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-12 13:11 ` Serge E. Hallyn @ 2004-10-12 14:15 ` Christoph Hellwig 2004-10-12 22:35 ` Ulrich Drepper 1 sibling, 0 replies; 37+ messages in thread From: Christoph Hellwig @ 2004-10-12 14:15 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Alan Cox, linux-kernel On Tue, Oct 12, 2004 at 08:11:24AM -0500, Serge E. Hallyn wrote: > > That however requires a co-operator outside the chroot so doesn't seem > > to be a problem. I like the CLONE approach, its a lot cleaner. > > The attached patch (against -rc4-mm1) moves the responsibility for > filesystem containment entirely to userspace. The Documentation/bsdjail.txt > file reflects the new usage. It also incorporates Christoph's cleanups. > > I still need to see about generalizing the networking confinement. I > certainly like the concept (as I understand it at least) behind the new > vserver networking, but am not sure it can be done without patching. Please remember that linux kernel work is not about "not needing patching". If a concept makes sense changing code is a good thing. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-12 13:11 ` Serge E. Hallyn 2004-10-12 14:15 ` Christoph Hellwig @ 2004-10-12 22:35 ` Ulrich Drepper 2004-10-13 0:58 ` Serge E. Hallyn ` (2 more replies) 1 sibling, 3 replies; 37+ messages in thread From: Ulrich Drepper @ 2004-10-12 22:35 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Alan Cox, linux-kernel -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 Serge E. Hallyn wrote: > +If a private IP was specified for the jail, then > + cat /proc/$$/attr/current How is this going to interact with SELinux? Currently SELinux uses /proc/*/attr/current to report the current security context of the process. libselinux expects the file to contain one string (not even a newline) which is the textual representation of the context. Now with your changes you want to change this. libselinux as-is would break miserably. I don't know the history of the file and who is hijacking the file. Fact is that the file content is currently unstructured and libselinux couldn't possibly determine what part is of interest to itself. So, either you use another file, SELinux uses another file, or the file gets tagged lines like selinux: user_u:user_r:user_t I guess you couldn't even start the userlevel code in FC3 in such a jail in the moment since the libselinux startup tests would fail. - -- ➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖ -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.2.6 (GNU/Linux) iD8DBQFBbFwm2ijCOnn/RHQRAvimAJ9W3bIil5Yi1Ex/CX1FpUjzxyheIQCeNKRu RHv5SGG0iQSEsmbIWfHmwAA= =HZM3 -----END PGP SIGNATURE----- ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-12 22:35 ` Ulrich Drepper @ 2004-10-13 0:58 ` Serge E. Hallyn 2004-10-13 1:09 ` Ulrich Drepper 2004-10-13 1:11 ` Chris Wright 2004-10-13 14:25 ` Stephen Smalley 2 siblings, 1 reply; 37+ messages in thread From: Serge E. Hallyn @ 2004-10-13 0:58 UTC (permalink / raw) To: Ulrich Drepper; +Cc: Alan Cox, linux-kernel > > +If a private IP was specified for the jail, then > > + cat /proc/$$/attr/current > > How is this going to interact with SELinux? Currently SELinux uses The first problem is that to use jail with selinux you'll need to use a stacking infrastructure (which is still being developed) anyway, in order to get around the multiplexing of task->security, file->f_security, and sk->sk_security. But you're right, this is a problem I've had to address with the stacker: > /proc/*/attr/current to report the current security context of the > process. libselinux expects the file to contain one string (not even a ... > selinux: user_u:user_r:user_t This is exactly what my current stacker does, to the byte :-) -serge ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-13 0:58 ` Serge E. Hallyn @ 2004-10-13 1:09 ` Ulrich Drepper 2004-10-13 1:22 ` Serge E. Hallyn 0 siblings, 1 reply; 37+ messages in thread From: Ulrich Drepper @ 2004-10-13 1:09 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: linux-kernel -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 Serge E. Hallyn wrote: >> selinux: user_u:user_r:user_t > > > This is exactly what my current stacker does, to the byte :-) This is all nice and good, but you have to bring this up with the SELinux people _now_ since, as I said before, the current SELinux-enabled userland code might not even start with this change of the format even if SELinux is not enabled. If it is decided that /proc/*/attr/current does not belong to SELinux alone, then the guys should be told about it now so that all the relevant code (libselinux, kernel without your "stacker" stuff, ...) can be changed before the current use spreads too far. - -- ➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖ -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.2.6 (GNU/Linux) iD8DBQFBbIBI2ijCOnn/RHQRAqXMAJ96lsdsTsZf3jI+8UXLAziK1iKC2QCfZyZT zewSIJsYVpIFK2lG0lFcrgY= =SGiv -----END PGP SIGNATURE----- ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-13 1:09 ` Ulrich Drepper @ 2004-10-13 1:22 ` Serge E. Hallyn 2004-10-13 15:26 ` Stephen Smalley 0 siblings, 1 reply; 37+ messages in thread From: Serge E. Hallyn @ 2004-10-13 1:22 UTC (permalink / raw) To: Ulrich Drepper; +Cc: linux-kernel On Tue, Oct 12, 2004 at 06:09:28PM -0700, Ulrich Drepper wrote: > -----BEGIN PGP SIGNED MESSAGE----- > Hash: SHA1 > > Serge E. Hallyn wrote: > > >> selinux: user_u:user_r:user_t > > > > > > This is exactly what my current stacker does, to the byte :-) > > This is all nice and good, but you have to bring this up with the > SELinux people _now_ since, as I said before, the current > SELinux-enabled userland code might not even start with this change of > the format even if SELinux is not enabled. If it is decided that > /proc/*/attr/current does not belong to SELinux alone, then the guys > should be told about it now so that all the relevant code (libselinux, > kernel without your "stacker" stuff, ...) can be changed before the > current use spreads too far. Then they would have to check for an optional "selinux: " at the front of each security_setprocattr entry read in the kernel, in order to handle an lsm infrastructure change which might never be accepted into the kernel anyway. I suppose it's pretty trivial anyway, but then why would they bother... -serge ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-13 1:22 ` Serge E. Hallyn @ 2004-10-13 15:26 ` Stephen Smalley 0 siblings, 0 replies; 37+ messages in thread From: Stephen Smalley @ 2004-10-13 15:26 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Ulrich Drepper, lkml On Tue, 2004-10-12 at 21:22, Serge E. Hallyn wrote: > Then they would have to check for an optional "selinux: " at the front > of each security_setprocattr entry read in the kernel, in order to handle > an lsm infrastructure change which might never be accepted into the kernel > anyway. I suppose it's pretty trivial anyway, but then why would they > bother... The changes to libselinux and procps and any scripts that directly access /proc/pid/attr to deal with multi-entry values would be more important; changing the kernel to prepend "selinux: " on getprocattr and to strip it on setprocattr would indeed be trivial (but one wonders whether we can be confident that userspace will never try to pass one of these multi-entry values read from /proc/pid/attr to another interface that expects a single context, e.g. selinuxfs or setxattr("security.selinux")). -- Stephen Smalley <sds@epoch.ncsc.mil> National Security Agency ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-12 22:35 ` Ulrich Drepper 2004-10-13 0:58 ` Serge E. Hallyn @ 2004-10-13 1:11 ` Chris Wright 2004-10-13 14:25 ` Stephen Smalley 2 siblings, 0 replies; 37+ messages in thread From: Chris Wright @ 2004-10-13 1:11 UTC (permalink / raw) To: Ulrich Drepper; +Cc: Serge E. Hallyn, Alan Cox, linux-kernel * Ulrich Drepper (drepper@redhat.com) wrote: > Serge E. Hallyn wrote: > > +If a private IP was specified for the jail, then > > + cat /proc/$$/attr/current > > How is this going to interact with SELinux? Poorly. It's not expected to work with SELinux. There's no good stacking yet. > Currently SELinux uses > /proc/*/attr/current to report the current security context of the > process. libselinux expects the file to contain one string (not even a > newline) which is the textual representation of the context. Now with > your changes you want to change this. libselinux as-is would break > miserably. Maybe libselinux should not look around in there unless SELinux is enabled in kernel. > I don't know the history of the file and who is hijacking the file. > Fact is that the file content is currently unstructured and libselinux > couldn't possibly determine what part is of interest to itself. > > So, either you use another file, SELinux uses another file, or the file > gets tagged lines like > > selinux: user_u:user_r:user_t Yeah, that's workable. Other options would probably look like putting stuff in module specific locations, which is more painful. > I guess you couldn't even start the userlevel code in FC3 in such a jail > in the moment since the libselinux startup tests would fail. Userspace won't start in a jail, and once it's up, jailing works (on rawhide for example). Admittedly, the label looks a bit funny. # in jail $ ps -eM LABEL PID TTY TIME CMD No 16933 ? 00:00:00 bash No 17010 ? 00:00:00 ps # unconfined $ ps -eM <snip> Not 5714 pts/5 00:00:00 ssh Not 12027 pts/6 00:00:00 bash Not 12046 pts/6 00:00:00 vim Not 16823 pts/4 00:00:00 vim Not 16911 pts/8 00:00:00 bash Root: 16933 pts/7 00:00:00 bash Not 17016 pts/8 00:00:00 ps thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 2/3] lsm: add bsdjail module 2004-10-12 22:35 ` Ulrich Drepper 2004-10-13 0:58 ` Serge E. Hallyn 2004-10-13 1:11 ` Chris Wright @ 2004-10-13 14:25 ` Stephen Smalley 2 siblings, 0 replies; 37+ messages in thread From: Stephen Smalley @ 2004-10-13 14:25 UTC (permalink / raw) To: Ulrich Drepper Cc: Serge E. Hallyn, Alan Cox, lkml, James Morris, Chris Wright, Alexander Viro On Tue, 2004-10-12 at 18:35, Ulrich Drepper wrote: > Serge E. Hallyn wrote: > > > +If a private IP was specified for the jail, then > > + cat /proc/$$/attr/current > > How is this going to interact with SELinux? Currently SELinux uses > /proc/*/attr/current to report the current security context of the > process. libselinux expects the file to contain one string (not even a > newline) which is the textual representation of the context. Now with > your changes you want to change this. libselinux as-is would break > miserably. libselinux is_selinux_enabled() checks /proc/filesystems for selinuxfs first, and SELinux userland checks is_selinux_enabled(). As security modules cannot presently be stacked if they both use the security fields, this is sufficient. There were patches floated on rhselinux-list circa Oct 2003 to add a "selinux/" prefix to the /proc/pid/attr values to explicitly identify the security module, ala the "security.selinux" attribute name for the file extended attribute, but the consensus at that time was that it was sufficient to test for the presence of SELinux via /proc/filesystems. > I don't know the history of the file and who is hijacking the file. > Fact is that the file content is currently unstructured and libselinux > couldn't possibly determine what part is of interest to itself. The /proc/pid/attr interface was submitted by us based on Al Viro's recommendations when the SELinux API was overhauled. We attempted to keep it sufficiently general that other security modules could also use it, but not at the same time, as shared use of LSM security fields wasn't supported anyway. We had earlier proposed [gs]etprocattr calls ala [gs]etxattr calls with distinguished attribute names, but were directed to use /proc instead. > So, either you use another file, SELinux uses another file, or the file > gets tagged lines like > > selinux: user_u:user_r:user_t One value per file seems preferred, but /proc/pid doesn't lend itself to dynamic extension by modules. [gs]etprocattr calls ala [gs]etxattr calls would be simpler if we want to export multiple attribute names, but that was also suggested earlier and rejected. Side bar: Any change here also affects upstream procps, which presently directly takes the /proc/pid/attr/current value and displays it as a single field. -- Stephen Smalley <sds@epoch.ncsc.mil> National Security Agency ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [patch 3/3] lsm: add bsdjail documentation 2004-10-06 20:21 (patch 1/3) lsm: add control over /proc/<pid> visibility Serge Hallyn 2004-10-06 20:24 ` [patch 1/3] lsm: add bsdjail module Serge Hallyn @ 2004-10-06 20:25 ` Serge Hallyn 2004-10-07 22:17 ` Matthias Urlichs 1 sibling, 1 reply; 37+ messages in thread From: Serge Hallyn @ 2004-10-06 20:25 UTC (permalink / raw) To: akpm; +Cc: Chris Wright, linux-kernel, serue Attached is a patch carrying the documentation for the bsdjail LSM. Please apply. Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> diff -Nrup linux-2.6.9-rc3-bk6/Documentation/bsdjail.txt linux-2.6.9-rc3-bk6-jail/Documentation/bsdjail.txt --- linux-2.6.9-rc3-bk6/Documentation/bsdjail.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.9-rc3-bk6-jail/Documentation/bsdjail.txt 2004-10-06 10:51:46.000000000 -0500 @@ -0,0 +1,99 @@ +BSD Jail Linux Security Module +Serge E. Hallyn <serue@us.ibm.com> + +Description: + +Implements a subset of the BSD Jail functionality as a Linux LSM. +What is currently implemented: + + If a proces is in a jail, it: + + 1. Is locked under a chroot (as are all children) which is not + vulnerable to the well-known chdir(..)(etc)chroot(.) escape. + 2. Cannot mount or umount + 3. Cannot send signals outside of jail + 4. Cannot ptrace processes outside of jail + 5. Cannot create devices + 6. Cannot renice processes + 7. Cannot load or unload modules + 8. Cannot change network settings + 9. May be assigned a specific ip address which will be used + for all it's socket binds. + 10. Cannot see contents of /proc/<pid> entries of processes not in the + same jail. (We hide their existence for convenience's sake, but + their existance can still be detected using, for instance, statfs) + 11. Has no CAP_SYS_RAWIO capability (no ioperm/iopl) + 12. May not share IPC resources with processes outside its own jail. + 13. May find it's valid network address (if restricted) under + /proc/$$/attr/current. + +WARNINGS: +The security of this module is very much dependent on the security +of the rest of the system. You must carefully think through your +use of the system. + +Some examples: + 1. If you leave /dev/hda1 in the jail, processes in the + jail can access that filesystem (i.e. /sbin/debugfs). + 2. If you provide root access within a jail, this can of + course be used to setuid binaries in the jail. Combined + with an unjailed regular user account, this gives jailed + users unjailed root access. (thanks to Brad Spender for + pointing this out). To protect against this, use jails + in private namespaces, with the jail filesystems mounted + ONLY within the jail namespaces. For instance: + +$ # (Make sure /dev/hdc5 is not mounted anywhere) +$ new_namespace_shell /bin/bash +$ mount /dev/hdc5 /opt +$ mount -t proc proc /opt/proc +$ echo -n "root /opt" > /proc/$$/attr/exec +$ echo -n "ip 9.53.94.111" > /proc/$$/attr/exec +$ exec /bin/sh +$ sshd +$ apachectl start +$ exit + +How to use: + 1. modprobe bsdjail + [ 1.5 /sbin/ifconfig eth0:0 2.2.2.2; + 1.6 /sbin/route add -host 2.2.2.2 dev eth0:0 + (optional) ] + 2. Make sure the root filesystem (ie /dev/hdc5) is not mounted + anywhere else. + 3. exec_private_namespace /bin/sh + 4. mount /dev/hdc5 /opt + 5. mount -t proc proc /opt/proc + 6. echo -n "root /opt" > /proc/$$/attr/exec + echo -n "ip 2.2.2.2" > /proc/$$/attr/exec (optional) + 7. exec /bin/sh + 8. sshd + 9. exit + +The new shell will now run in a private jail on the filesystem on +/dev/hdc5. If proc has been mounted under /dev/hdc5, then a "ps -auxw" +under the jailed shell will show only entries for processes started under +that jail. + +If a private IP was specified for the jail, then + cat /proc/$$/attr/current +will show the address for the private network device. Other network +devices will be visible through /sbin/ifconfig -a, but not usable. + +If the reading process is not in a jail, then + cat /proc/$$/attr/current +returns information about the root and ip * for the target process, +or "Not Jailed" if the target process is not jailed. + +Cat /proc/$$/attr/exec gives a list of the valid keywords to cat into +/proc/$$/attr/exec when starting a jail. + +Current valid keywords for creating a jail are: + + root: Root of jail's fs + ip: Ip addr for this jail + nrtask: Number of tasks in this jail + nice: The nice level for this jail. (maybe should be min/max?) + slice: Max timeslice per process + data: Max size of DATA segment per process + memlock: Max size of memory which can be locked per process ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: lsm: add bsdjail documentation 2004-10-06 20:25 ` [patch 3/3] lsm: add bsdjail documentation Serge Hallyn @ 2004-10-07 22:17 ` Matthias Urlichs 2004-10-08 20:02 ` Serge E. Hallyn 0 siblings, 1 reply; 37+ messages in thread From: Matthias Urlichs @ 2004-10-07 22:17 UTC (permalink / raw) To: linux-kernel Hi, Serge Hallyn wrote: > + echo -n "ip 2.2.2.2" > /proc/$$/attr/exec (optional) Please use RFC private addresses in example code. That being said, bsdjail is a very good idea (which is why we're stealing it from BSD after all ...). It affords lightweight compartmentalization, in other words a chroot-on-steroids, which is exactly what I need to split one box into a couple of mostly-independent realms, and I assume that many ISP/ASP/whatever hosting people will agree. Anyway, that's my vote for adding it to the kernel. -- Matthias Urlichs ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: lsm: add bsdjail documentation 2004-10-07 22:17 ` Matthias Urlichs @ 2004-10-08 20:02 ` Serge E. Hallyn 0 siblings, 0 replies; 37+ messages in thread From: Serge E. Hallyn @ 2004-10-08 20:02 UTC (permalink / raw) To: Matthias Urlichs; +Cc: linux-kernel On Fri, Oct 08, 2004 at 12:17:09AM +0200, Matthias Urlichs wrote: > Hi, Serge Hallyn wrote: > > > + echo -n "ip 2.2.2.2" > /proc/$$/attr/exec (optional) > > Please use RFC private addresses in example code. The attached patch to 2.6.9-rc3-mm3 fixes up the documentation just a little. > Anyway, that's my vote for adding it to the kernel. Thanks! -serge Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> diff -Nrup linux-2.6.9-rc3-mm3/Documentation/bsdjail.txt linux-2.6.9-rc3-mm3-jail/Documentation/bsdjail.txt --- linux-2.6.9-rc3-mm3/Documentation/bsdjail.txt 2004-10-08 13:56:34.000000000 -0500 +++ linux-2.6.9-rc3-mm3-jail/Documentation/bsdjail.txt 2004-10-08 15:59:13.845998920 -0500 @@ -6,7 +6,7 @@ Description: Implements a subset of the BSD Jail functionality as a Linux LSM. What is currently implemented: - If a proces is in a jail, it: + If a process is in a jail, it: 1. Is locked under a chroot (as are all children) which is not vulnerable to the well-known chdir(..)(etc)chroot(.) escape. @@ -18,13 +18,13 @@ What is currently implemented: 7. Cannot load or unload modules 8. Cannot change network settings 9. May be assigned a specific ip address which will be used - for all it's socket binds. + for all its socket binds. 10. Cannot see contents of /proc/<pid> entries of processes not in the same jail. (We hide their existence for convenience's sake, but - their existance can still be detected using, for instance, statfs) + their existence can still be detected using, for instance, statfs) 11. Has no CAP_SYS_RAWIO capability (no ioperm/iopl) 12. May not share IPC resources with processes outside its own jail. - 13. May find it's valid network address (if restricted) under + 13. May find its valid network address (if restricted) under /proc/$$/attr/current. WARNINGS: @@ -48,7 +48,7 @@ $ new_namespace_shell /bin/bash $ mount /dev/hdc5 /opt $ mount -t proc proc /opt/proc $ echo -n "root /opt" > /proc/$$/attr/exec -$ echo -n "ip 9.53.94.111" > /proc/$$/attr/exec +$ echo -n "ip 192.168.0.2" > /proc/$$/attr/exec $ exec /bin/sh $ sshd $ apachectl start @@ -56,8 +56,8 @@ $ exit How to use: 1. modprobe bsdjail - [ 1.5 /sbin/ifconfig eth0:0 2.2.2.2; - 1.6 /sbin/route add -host 2.2.2.2 dev eth0:0 + [ 1.5 /sbin/ifconfig eth0:0 192.168.0.2; + 1.6 /sbin/route add -host 192.168.0.2 dev eth0:0 (optional) ] 2. Make sure the root filesystem (ie /dev/hdc5) is not mounted anywhere else. @@ -65,7 +65,8 @@ How to use: 4. mount /dev/hdc5 /opt 5. mount -t proc proc /opt/proc 6. echo -n "root /opt" > /proc/$$/attr/exec - echo -n "ip 2.2.2.2" > /proc/$$/attr/exec (optional) + echo -n "ip 192.168.0.2" > /proc/$$/attr/exec (optional) + echo -n "ip6 2002:a00:0:0:0:0:0:1" > /proc/$$/attr/exec (optional) 7. exec /bin/sh 8. sshd 9. exit @@ -79,6 +80,7 @@ If a private IP was specified for the ja cat /proc/$$/attr/current will show the address for the private network device. Other network devices will be visible through /sbin/ifconfig -a, but not usable. +A jail may be assigned one IPV4 and one IPV6 address. If the reading process is not in a jail, then cat /proc/$$/attr/current @@ -92,8 +94,9 @@ Current valid keywords for creating a ja root: Root of jail's fs ip: Ip addr for this jail + ip6: Ipv6 addr for this jail (may currently not be shorthand) nrtask: Number of tasks in this jail - nice: The nice level for this jail. (maybe should be min/max?) + nice: The nice level for this jail (maybe should be min/max?) slice: Max timeslice per process data: Max size of DATA segment per process memlock: Max size of memory which can be locked per process ^ permalink raw reply [flat|nested] 37+ messages in thread
end of thread, other threads:[~2004-10-20 19:25 UTC | newest] Thread overview: 37+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2004-10-06 20:21 (patch 1/3) lsm: add control over /proc/<pid> visibility Serge Hallyn 2004-10-06 20:24 ` [patch 1/3] lsm: add bsdjail module Serge Hallyn 2004-10-06 23:26 ` Andrew Morton 2004-10-07 4:08 ` Serge E. Hallyn 2004-10-07 6:18 ` James Morris 2004-10-07 6:22 ` Andrew Morton 2004-10-07 16:06 ` Chris Wright 2004-10-07 18:40 ` Andrew Morton 2004-10-07 18:52 ` Chris Wright 2004-10-07 20:56 ` Serge E. Hallyn 2004-10-10 6:24 ` Herbert Poetzl 2004-10-07 12:06 ` Alan Cox 2004-10-07 19:01 ` [patch 2/3] " Serge E. Hallyn 2004-10-07 19:42 ` Chris Wright 2004-10-07 20:05 ` Andrew Morton 2004-10-08 18:05 ` Serge E. Hallyn 2004-10-10 10:41 ` Christoph Hellwig 2004-10-10 11:31 ` Serge E. Hallyn 2004-10-10 11:34 ` Christoph Hellwig 2004-10-11 13:47 ` Alan Cox 2004-10-12 7:00 ` Herbert Poetzl 2004-10-12 9:00 ` Christoph Hellwig 2004-10-12 12:27 ` Herbert Poetzl 2004-10-20 15:36 ` Christoph Hellwig 2004-10-20 19:18 ` Herbert Poetzl 2004-10-12 13:11 ` Serge E. Hallyn 2004-10-12 14:15 ` Christoph Hellwig 2004-10-12 22:35 ` Ulrich Drepper 2004-10-13 0:58 ` Serge E. Hallyn 2004-10-13 1:09 ` Ulrich Drepper 2004-10-13 1:22 ` Serge E. Hallyn 2004-10-13 15:26 ` Stephen Smalley 2004-10-13 1:11 ` Chris Wright 2004-10-13 14:25 ` Stephen Smalley 2004-10-06 20:25 ` [patch 3/3] lsm: add bsdjail documentation Serge Hallyn 2004-10-07 22:17 ` Matthias Urlichs 2004-10-08 20:02 ` Serge E. Hallyn
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox