* [patch] mlock-as-nonroot revisted
@ 2004-07-29 10:03 Arjan van de Ven
2004-07-29 21:28 ` Andrew Morton
2004-07-30 1:52 ` Chris Wright
0 siblings, 2 replies; 49+ messages in thread
From: Arjan van de Ven @ 2004-07-29 10:03 UTC (permalink / raw)
To: linux-kernel; +Cc: riel, akpm
Hi,
Hi,
Below is a fixed up patch to allow non-root to mlock memory (but only if the
rlimit allows it, which defaults to 0). This is needed/useful for oracle and
co to be allowed to mlock/use hugetlb fs running as non-privileged user.
Also setting the limit to 4Kb can be very useful for gnupg and similar apps.
Compared to the previous revision of this patch; shm accounting has been
changed to be per user struct, while keeping track of which user struct
allocated the shm segment in the first place. This is done in order to avoid
the security bug where one process/user could mlock and another munlock
which would screw up the accounting.
Greetings,
Arjan van de Ven
diff -purN linux-2.6.7/fs/hugetlbfs/inode.c linux/fs/hugetlbfs/inode.c
--- linux-2.6.7/fs/hugetlbfs/inode.c 2004-07-29 11:36:55.744448953 +0200
+++ linux/fs/hugetlbfs/inode.c 2004-07-29 11:38:04.292595263 +0200
@@ -722,7 +722,7 @@ struct file *hugetlb_zero_setup(size_t s
struct qstr quick_string;
char buf[16];
- if (!capable(CAP_IPC_LOCK))
+ if (!can_do_mlock())
return ERR_PTR(-EPERM);
if (!is_hugepage_mem_enough(size))
diff -purN linux-2.6.7/include/asm-alpha/resource.h linux/include/asm-alpha/resource.h
--- linux-2.6.7/include/asm-alpha/resource.h 2004-07-29 11:36:46.344513294 +0200
+++ linux/include/asm-alpha/resource.h 2004-07-29 11:38:04.310593182 +0200
@@ -41,7 +41,7 @@
{INR_OPEN, INR_OPEN}, /* RLIMIT_NOFILE */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_AS */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_NPROC */ \
- {LONG_MAX, LONG_MAX}, /* RLIMIT_MEMLOCK */ \
+ {0, 0 }, /* RLIMIT_MEMLOCK */ \
{LONG_MAX, LONG_MAX}, /* RLIMIT_LOCKS */ \
{MAX_SIGPENDING, MAX_SIGPENDING}, /* RLIMIT_SIGPENDING */ \
{MQ_BYTES_MAX, MQ_BYTES_MAX}, /* RLIMIT_MSGQUEUE */ \
diff -purN linux-2.6.7/include/asm-arm/resource.h linux/include/asm-arm/resource.h
--- linux-2.6.7/include/asm-arm/resource.h 2004-07-29 11:36:46.380509225 +0200
+++ linux/include/asm-arm/resource.h 2004-07-29 11:38:04.292595263 +0200
@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING}, \
diff -purN linux-2.6.7/include/asm-arm26/resource.h linux/include/asm-arm26/resource.h
--- linux-2.6.7/include/asm-arm26/resource.h 2004-07-29 11:36:46.383508886 +0200
+++ linux/include/asm-arm26/resource.h 2004-07-29 11:38:04.293595148 +0200
@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING}, \
diff -purN linux-2.6.7/include/asm-cris/resource.h linux/include/asm-cris/resource.h
--- linux-2.6.7/include/asm-cris/resource.h 2004-07-29 11:36:46.384508773 +0200
+++ linux/include/asm-cris/resource.h 2004-07-29 11:38:04.310593182 +0200
@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-h8300/resource.h linux/include/asm-h8300/resource.h
--- linux-2.6.7/include/asm-h8300/resource.h 2004-07-29 11:36:46.389508207 +0200
+++ linux/include/asm-h8300/resource.h 2004-07-29 11:38:04.294595032 +0200
@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-i386/resource.h linux/include/asm-i386/resource.h
--- linux-2.6.7/include/asm-i386/resource.h 2004-07-29 11:36:46.409505947 +0200
+++ linux/include/asm-i386/resource.h 2004-07-29 11:38:04.295594916 +0200
@@ -40,7 +40,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-ia64/resource.h linux/include/asm-ia64/resource.h
--- linux-2.6.7/include/asm-ia64/resource.h 2004-07-29 11:36:46.418504929 +0200
+++ linux/include/asm-ia64/resource.h 2004-07-29 11:38:04.295594916 +0200
@@ -46,7 +46,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-m68k/resource.h linux/include/asm-m68k/resource.h
--- linux-2.6.7/include/asm-m68k/resource.h 2004-07-29 11:36:46.425504138 +0200
+++ linux/include/asm-m68k/resource.h 2004-07-29 11:38:04.296594801 +0200
@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-parisc/resource.h linux/include/asm-parisc/resource.h
--- linux-2.6.7/include/asm-parisc/resource.h 2004-07-29 11:36:46.447501651 +0200
+++ linux/include/asm-parisc/resource.h 2004-07-29 11:38:04.297594685 +0200
@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-ppc/resource.h linux/include/asm-ppc/resource.h
--- linux-2.6.7/include/asm-ppc/resource.h 2004-07-29 11:36:46.462499955 +0200
+++ linux/include/asm-ppc/resource.h 2004-07-29 11:38:04.298594570 +0200
@@ -36,7 +36,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-ppc64/resource.h linux/include/asm-ppc64/resource.h
--- linux-2.6.7/include/asm-ppc64/resource.h 2004-07-29 11:36:46.472498825 +0200
+++ linux/include/asm-ppc64/resource.h 2004-07-29 11:38:04.298594570 +0200
@@ -45,7 +45,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-s390/resource.h linux/include/asm-s390/resource.h
--- linux-2.6.7/include/asm-s390/resource.h 2004-07-29 11:36:46.479498034 +0200
+++ linux/include/asm-s390/resource.h 2004-07-29 11:38:04.299594454 +0200
@@ -47,7 +47,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-sh/resource.h linux/include/asm-sh/resource.h
--- linux-2.6.7/include/asm-sh/resource.h 2004-07-29 11:36:46.491496677 +0200
+++ linux/include/asm-sh/resource.h 2004-07-29 11:38:04.300594338 +0200
@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-sparc/resource.h linux/include/asm-sparc/resource.h
--- linux-2.6.7/include/asm-sparc/resource.h 2004-07-29 11:36:46.537491477 +0200
+++ linux/include/asm-sparc/resource.h 2004-07-29 11:38:04.300594338 +0200
@@ -44,7 +44,7 @@
{ 0, RLIM_INFINITY}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{INR_OPEN, INR_OPEN}, {0, 0}, \
- {RLIM_INFINITY, RLIM_INFINITY}, \
+ {0, 0}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{MAX_SIGPENDING, MAX_SIGPENDING}, \
diff -purN linux-2.6.7/include/asm-sparc64/resource.h linux/include/asm-sparc64/resource.h
--- linux-2.6.7/include/asm-sparc64/resource.h 2004-07-29 11:36:46.545490573 +0200
+++ linux/include/asm-sparc64/resource.h 2004-07-29 11:38:04.301594223 +0200
@@ -43,7 +43,7 @@
{ 0, RLIM_INFINITY}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{INR_OPEN, INR_OPEN}, {0, 0}, \
- {RLIM_INFINITY, RLIM_INFINITY}, \
+ {0, 0 }, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{RLIM_INFINITY, RLIM_INFINITY}, \
{MAX_SIGPENDING, MAX_SIGPENDING}, \
diff -purN linux-2.6.7/include/asm-v850/resource.h linux/include/asm-v850/resource.h
--- linux-2.6.7/include/asm-v850/resource.h 2004-07-29 11:36:46.548490234 +0200
+++ linux/include/asm-v850/resource.h 2004-07-29 11:38:04.302594107 +0200
@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/asm-x86_64/resource.h linux/include/asm-x86_64/resource.h
--- linux-2.6.7/include/asm-x86_64/resource.h 2004-07-29 11:36:46.553489668 +0200
+++ linux/include/asm-x86_64/resource.h 2004-07-29 11:38:04.303593991 +0200
@@ -39,7 +39,7 @@
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ 0, 0 }, \
{ INR_OPEN, INR_OPEN }, \
- { RLIM_INFINITY, RLIM_INFINITY }, \
+ { 0, 0 }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ RLIM_INFINITY, RLIM_INFINITY }, \
{ MAX_SIGPENDING, MAX_SIGPENDING }, \
diff -purN linux-2.6.7/include/linux/mm.h linux/include/linux/mm.h
--- linux-2.6.7/include/linux/mm.h 2004-07-29 11:36:56.044414936 +0200
+++ linux/include/linux/mm.h 2004-07-29 11:38:04.310593182 +0200
@@ -496,9 +496,19 @@ int shmem_set_policy(struct vm_area_stru
struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
unsigned long addr);
struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags);
-void shmem_lock(struct file * file, int lock);
+int shmem_lock(struct file * file, int lock, struct user_struct *);
int shmem_zero_setup(struct vm_area_struct *);
+static inline int can_do_mlock(void)
+{
+ if (capable(CAP_IPC_LOCK))
+ return 1;
+ if (current->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
+ return 1;
+ return 0;
+}
+
+
/*
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
diff -purN linux-2.6.7/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.6.7/include/linux/sched.h 2004-07-29 11:36:55.954425142 +0200
+++ linux/include/linux/sched.h 2004-07-29 11:38:04.311593066 +0200
@@ -331,6 +331,7 @@ struct user_struct {
atomic_t sigpending; /* How many pending signals does this user have? */
/* protected by mq_lock */
unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
+ unsigned long locked_shm; /* How many pages of mlocked shm ? */
/* Hash table maintenance information */
struct list_head uidhash_list;
diff -purN linux-2.6.7/include/linux/shm.h linux/include/linux/shm.h
--- linux-2.6.7/include/linux/shm.h 2004-06-16 07:19:43.000000000 +0200
+++ linux/include/linux/shm.h 2004-07-29 11:38:04.313592835 +0200
@@ -84,6 +84,7 @@ struct shmid_kernel /* private to the ke
time_t shm_ctim;
pid_t shm_cprid;
pid_t shm_lprid;
+ struct user_struct * mlock_user;
};
/* shm_mode upper byte flags */
diff -purN linux-2.6.7/ipc/shm.c linux/ipc/shm.c
--- linux-2.6.7/ipc/shm.c 2004-07-29 11:36:55.137517777 +0200
+++ linux/ipc/shm.c 2004-07-29 11:38:04.313592835 +0200
@@ -114,7 +114,7 @@ static void shm_destroy (struct shmid_ke
shm_rmid (shp->id);
shm_unlock(shp);
if (!is_file_hugepages(shp->shm_file))
- shmem_lock(shp->shm_file, 0);
+ shmem_lock(shp->shm_file, 0, shp->mlock_user);
fput (shp->shm_file);
security_shm_free(shp);
ipc_rcu_free(shp, sizeof(struct shmid_kernel));
@@ -221,6 +221,7 @@ static int newseg (key_t key, int shmflg
shp->shm_nattch = 0;
shp->id = shm_buildid(id,shp->shm_perm.seq);
shp->shm_file = file;
+ shp->mlock_user = NULL;
file->f_dentry->d_inode->i_ino = shp->id;
if (shmflg & SHM_HUGETLB)
set_file_hugepages(file);
@@ -504,14 +505,11 @@ asmlinkage long sys_shmctl (int shmid, i
case SHM_LOCK:
case SHM_UNLOCK:
{
-/* Allow superuser to lock segment in memory */
-/* Should the pages be faulted in here or leave it to user? */
-/* need to determine interaction with current->swappable */
- if (!capable(CAP_IPC_LOCK)) {
+ /* Allow superuser to lock segment in memory */
+ if (!can_do_mlock()) {
err = -EPERM;
goto out;
}
-
shp = shm_lock(shmid);
if(shp==NULL) {
err = -EINVAL;
@@ -526,12 +524,14 @@ asmlinkage long sys_shmctl (int shmid, i
goto out_unlock;
if(cmd==SHM_LOCK) {
- if (!is_file_hugepages(shp->shm_file))
- shmem_lock(shp->shm_file, 1);
- shp->shm_flags |= SHM_LOCKED;
+ if (!is_file_hugepages(shp->shm_file)) {
+ err = shmem_lock(shp->shm_file, 1, current->user);
+ if (!err)
+ shp->shm_flags |= SHM_LOCKED;
+ }
} else {
if (!is_file_hugepages(shp->shm_file))
- shmem_lock(shp->shm_file, 0);
+ shmem_lock(shp->shm_file, 0, shp->mlock_user);
shp->shm_flags &= ~SHM_LOCKED;
}
shm_unlock(shp);
diff -purN linux-2.6.7/ipc/util.c linux/ipc/util.c
--- linux-2.6.7/ipc/util.c 2004-07-29 11:36:55.137517777 +0200
+++ linux/ipc/util.c 2004-07-29 11:38:04.306593644 +0200
@@ -392,8 +392,11 @@ int ipcperms (struct kern_ipc_perm *ipcp
granted_mode >>= 3;
/* is there some bit set in requested_mode but not in granted_mode? */
if ((requested_mode & ~granted_mode & 0007) &&
- !capable(CAP_IPC_OWNER))
- return -1;
+ !capable(CAP_IPC_OWNER)) {
+ if (!can_do_mlock()) {
+ return -1;
+ }
+ }
return security_ipc_permission(ipcp, flag);
}
diff -purN linux-2.6.7/kernel/user.c linux/kernel/user.c
--- linux-2.6.7/kernel/user.c 2004-07-29 11:36:55.155515736 +0200
+++ linux/kernel/user.c 2004-07-29 11:38:04.314592719 +0200
@@ -32,7 +32,8 @@ struct user_struct root_user = {
.processes = ATOMIC_INIT(1),
.files = ATOMIC_INIT(0),
.sigpending = ATOMIC_INIT(0),
- .mq_bytes = 0
+ .mq_bytes = 0,
+ .locked_shm = 0
};
/*
@@ -113,6 +114,7 @@ struct user_struct * alloc_uid(uid_t uid
atomic_set(&new->sigpending, 0);
new->mq_bytes = 0;
+ new->locked_shm = 0;
/*
* Before adding this, check whether we raced
diff -purN linux-2.6.7/mm/mlock.c linux/mm/mlock.c
--- linux-2.6.7/mm/mlock.c 2004-06-16 07:18:57.000000000 +0200
+++ linux/mm/mlock.c 2004-07-29 11:38:04.306593644 +0200
@@ -60,7 +60,7 @@ static int do_mlock(unsigned long start,
struct vm_area_struct * vma, * next;
int error;
- if (on && !capable(CAP_IPC_LOCK))
+ if (on && !can_do_mlock())
return -EPERM;
len = PAGE_ALIGN(len);
end = start + len;
@@ -118,7 +118,7 @@ asmlinkage long sys_mlock(unsigned long
lock_limit >>= PAGE_SHIFT;
/* check against resource limits */
- if (locked <= lock_limit)
+ if ( (locked <= lock_limit) || capable(CAP_IPC_LOCK))
error = do_mlock(start, len, 1);
up_write(¤t->mm->mmap_sem);
return error;
@@ -142,7 +142,7 @@ static int do_mlockall(int flags)
unsigned int def_flags;
struct vm_area_struct * vma;
- if (!capable(CAP_IPC_LOCK))
+ if (!can_do_mlock())
return -EPERM;
def_flags = 0;
@@ -177,7 +177,7 @@ asmlinkage long sys_mlockall(int flags)
lock_limit >>= PAGE_SHIFT;
ret = -ENOMEM;
- if (current->mm->total_vm <= lock_limit)
+ if ((current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK))
ret = do_mlockall(flags);
out:
up_write(¤t->mm->mmap_sem);
diff -purN linux-2.6.7/mm/mmap.c linux/mm/mmap.c
--- linux-2.6.7/mm/mmap.c 2004-07-29 11:36:55.950425596 +0200
+++ linux/mm/mmap.c 2004-07-29 11:38:04.307593529 +0200
@@ -806,15 +806,17 @@ unsigned long do_mmap_pgoff(struct file
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_LOCKED) {
- if (!capable(CAP_IPC_LOCK))
+ if (!can_do_mlock())
return -EPERM;
vm_flags |= VM_LOCKED;
}
/* mlock MCL_FUTURE? */
if (vm_flags & VM_LOCKED) {
- unsigned long locked = mm->locked_vm << PAGE_SHIFT;
+ unsigned long locked, lock_limit;
+ locked = mm->locked_vm << PAGE_SHIFT;
+ lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
locked += len;
- if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
@@ -1746,9 +1748,11 @@ unsigned long do_brk(unsigned long addr,
* mlock MCL_FUTURE?
*/
if (mm->def_flags & VM_LOCKED) {
- unsigned long locked = mm->locked_vm << PAGE_SHIFT;
+ unsigned long locked, lock_limit;
+ locked = mm->locked_vm << PAGE_SHIFT;
+ lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
locked += len;
- if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
diff -purN linux-2.6.7/mm/mremap.c linux/mm/mremap.c
--- linux-2.6.7/mm/mremap.c 2004-07-29 11:36:55.169514149 +0200
+++ linux/mm/mremap.c 2004-07-29 11:38:04.308593413 +0200
@@ -324,10 +324,12 @@ unsigned long do_mremap(unsigned long ad
goto out;
}
if (vma->vm_flags & VM_LOCKED) {
- unsigned long locked = current->mm->locked_vm << PAGE_SHIFT;
+ unsigned long locked, lock_limit;
+ locked = current->mm->locked_vm << PAGE_SHIFT;
+ lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
locked += new_len - old_len;
ret = -EAGAIN;
- if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
goto out;
}
ret = -ENOMEM;
diff -purN linux-2.6.7/mm/shmem.c linux/mm/shmem.c
--- linux-2.6.7/mm/shmem.c 2004-07-29 11:36:55.640460745 +0200
+++ linux/mm/shmem.c 2004-07-29 11:38:04.315592604 +0200
@@ -1151,17 +1151,43 @@ shmem_get_policy(struct vm_area_struct *
}
#endif
-void shmem_lock(struct file *file, int lock)
+/* Protects current->user->locked_shm from concurrent access */
+static spinlock_t shmem_lock_user = SPIN_LOCK_UNLOCKED;
+
+int shmem_lock(struct file *file, int lock, struct user_struct * user)
{
struct inode *inode = file->f_dentry->d_inode;
struct shmem_inode_info *info = SHMEM_I(inode);
+ unsigned long lock_limit, locked;
+ int retval = -ENOMEM;
spin_lock(&info->lock);
+ spin_lock(&shmem_lock_user);
+ if (lock && !(info->flags & VM_LOCKED)) {
+ locked = inode->i_size >> PAGE_SHIFT;
+ locked += user->locked_shm;
+ lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
+ lock_limit >>= PAGE_SHIFT;
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK))
+ goto out_nomem;
+ /* for this branch user == current->user so it won't go away under us */
+ atomic_inc(&user->__count);
+ user->locked_shm = locked;
+ }
+ if (!lock && (info->flags & VM_LOCKED) && user) {
+ locked = inode->i_size >> PAGE_SHIFT;
+ user->locked_shm -= locked;
+ free_uid(user);
+ }
if (lock)
info->flags |= VM_LOCKED;
else
info->flags &= ~VM_LOCKED;
+ retval = 0;
+out_nomem:
+ spin_unlock(&shmem_lock_user);
spin_unlock(&info->lock);
+ return retval;
}
static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
^ permalink raw reply [flat|nested] 49+ messages in thread* Re: [patch] mlock-as-nonroot revisted 2004-07-29 10:03 [patch] mlock-as-nonroot revisted Arjan van de Ven @ 2004-07-29 21:28 ` Andrew Morton 2004-07-29 21:40 ` Andrea Arcangeli 2004-07-30 0:51 ` Rik van Riel 2004-07-30 1:52 ` Chris Wright 1 sibling, 2 replies; 49+ messages in thread From: Andrew Morton @ 2004-07-29 21:28 UTC (permalink / raw) To: Arjan van de Ven; +Cc: linux-kernel, riel, Andrea Arcangeli Arjan van de Ven <arjanv@redhat.com> wrote: > > Below is a fixed up patch to allow non-root to mlock memory I seem to recall that Andrea identified reasons why per-user mlock limits were fundamentally broken/unsuitable, but I forget the details. Perhaps he could remind us? As for this patch: it's a new capability which will get basically zero testing for the next year, which is a worry. How have you tested it, and how much? ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-29 21:28 ` Andrew Morton @ 2004-07-29 21:40 ` Andrea Arcangeli 2004-07-30 0:50 ` Rik van Riel 2004-07-30 0:51 ` Rik van Riel 1 sibling, 1 reply; 49+ messages in thread From: Andrea Arcangeli @ 2004-07-29 21:40 UTC (permalink / raw) To: Andrew Morton; +Cc: Arjan van de Ven, linux-kernel, riel On Thu, Jul 29, 2004 at 02:28:29PM -0700, Andrew Morton wrote: > Arjan van de Ven <arjanv@redhat.com> wrote: > > > > Below is a fixed up patch to allow non-root to mlock memory > > I seem to recall that Andrea identified reasons why per-user mlock limits > were fundamentally broken/unsuitable, but I forget the details. Perhaps he > could remind us? yep, the rlimit for mlocked stuff works only for the pagetables pinning. In turn it works perfectly for mlock. But shared memory or hugetlbfs obviously aren't pinned via the pagetables in the virtual address space, such things are persistent and non-swappable objects, even killing the task won't change a thing. So as described some month ago such patch is insecure and conceptually flawed since they're using rlimits to control persistent objects that have absolutely nothing to do with the task itself, which in turns make the rlimit useless. the very best one can do right now is the below. If you remove the shm/hugetlbfs brokeness from the rlimit patch that will become a safe feature for mlock (for mlock it works fine since mlock is all about pinning the pagetables, not about persistent objects that have nothing to do with the task), but it won't change almost anything for oracle standpoint since it doesn't allow hugetlbfs usage anyways. diff -U4 -Nrp linux-2.6.5/fs/hugetlbfs/inode.c linux-2.6.5.cap-mlock/fs/hugetlbfs/inode.c --- linux-2.6.5/fs/hugetlbfs/inode.c 2004-04-04 05:38:14.000000000 +0200 +++ linux-2.6.5.cap-mlock/fs/hugetlbfs/inode.c 2004-04-05 00:15:41.000000000 +0200 @@ -706,9 +706,9 @@ struct file *hugetlb_zero_setup(size_t s struct dentry *dentry, *root; struct qstr quick_string; char buf[16]; - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return ERR_PTR(-EPERM); if (!is_hugepage_mem_enough(size)) return ERR_PTR(-ENOMEM); diff -U4 -Nrp linux-2.6.5/include/linux/sched.h linux-2.6.5.cap-mlock/include/linux/sched.h --- linux-2.6.5/include/linux/sched.h 2004-04-05 00:15:03.000000000 +0200 +++ linux-2.6.5.cap-mlock/include/linux/sched.h 2004-04-05 00:15:41.000000000 +0200 @@ -748,8 +748,14 @@ static inline int capable(int cap) return 0; } #endif +extern int sysctl_disable_cap_mlock; +static inline int can_do_mlock(void) +{ + return likely(sysctl_disable_cap_mlock || capable(CAP_IPC_LOCK)); +} + /* * Routines for handling mm_structs */ extern struct mm_struct * mm_alloc(void); diff -U4 -Nrp linux-2.6.5/include/linux/sysctl.h linux-2.6.5.cap-mlock/include/linux/sysctl.h --- linux-2.6.5/include/linux/sysctl.h 2004-04-05 00:15:03.000000000 +0200 +++ linux-2.6.5.cap-mlock/include/linux/sysctl.h 2004-04-05 00:16:19.000000000 +0200 @@ -166,8 +166,9 @@ enum VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ VM_LAPTOP_MODE=23, /* vm laptop mode */ VM_BLOCK_DUMP=24, /* block dump mode */ + VM_DISABLE_CAP_MLOCK=25,/* disable CAP_IPC_LOCK checking */ }; /* CTL_NET names: */ diff -U4 -Nrp linux-2.6.5/ipc/shm.c linux-2.6.5.cap-mlock/ipc/shm.c --- linux-2.6.5/ipc/shm.c 2004-04-05 00:15:03.000000000 +0200 +++ linux-2.6.5.cap-mlock/ipc/shm.c 2004-04-05 00:15:41.000000000 +0200 @@ -502,9 +502,9 @@ asmlinkage long sys_shmctl (int shmid, i { /* Allow superuser to lock segment in memory */ /* Should the pages be faulted in here or leave it to user? */ /* need to determine interaction with current->swappable */ - if (!capable(CAP_IPC_LOCK)) { + if (!can_do_mlock()) { err = -EPERM; goto out; } diff -U4 -Nrp linux-2.6.5/kernel/capability.c linux-2.6.5.cap-mlock/kernel/capability.c --- linux-2.6.5/kernel/capability.c 2004-04-04 05:37:36.000000000 +0200 +++ linux-2.6.5.cap-mlock/kernel/capability.c 2004-04-05 00:15:41.000000000 +0200 @@ -13,8 +13,9 @@ #include <asm/uaccess.h> unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ kernel_cap_t cap_bset = CAP_INIT_EFF_SET; +int sysctl_disable_cap_mlock = 0; EXPORT_SYMBOL(securebits); EXPORT_SYMBOL(cap_bset); diff -U4 -Nrp linux-2.6.5/kernel/sysctl.c linux-2.6.5.cap-mlock/kernel/sysctl.c --- linux-2.6.5/kernel/sysctl.c 2004-04-05 00:15:03.000000000 +0200 +++ linux-2.6.5.cap-mlock/kernel/sysctl.c 2004-04-05 00:18:01.000000000 +0200 @@ -821,9 +821,17 @@ static ctl_table vm_table[] = { .proc_handler = &proc_dointvec, .strategy = &sysctl_intvec, .extra1 = &zero, }, - { .ctl_name = 0 } + { + .ctl_name = VM_DISABLE_CAP_MLOCK, + .procname = "disable_cap_mlock", + .data = &sysctl_disable_cap_mlock, + .maxlen = sizeof(sysctl_disable_cap_mlock), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } }; static ctl_table proc_table[] = { { .ctl_name = 0 } diff -U4 -Nrp linux-2.6.5/mm/mlock.c linux-2.6.5.cap-mlock/mm/mlock.c --- linux-2.6.5/mm/mlock.c 2004-04-04 05:36:16.000000000 +0200 +++ linux-2.6.5.cap-mlock/mm/mlock.c 2004-04-05 00:15:41.000000000 +0200 @@ -56,9 +56,9 @@ static int do_mlock(unsigned long start, unsigned long nstart, end, tmp; struct vm_area_struct * vma, * next; int error; - if (on && !capable(CAP_IPC_LOCK)) + if (on && !can_do_mlock()) return -EPERM; len = PAGE_ALIGN(len); end = start + len; if (end < start) @@ -138,9 +138,9 @@ static int do_mlockall(int flags) int error; unsigned int def_flags; struct vm_area_struct * vma; - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return -EPERM; def_flags = 0; if (flags & MCL_FUTURE) diff -U4 -Nrp linux-2.6.5/mm/mmap.c linux-2.6.5.cap-mlock/mm/mmap.c --- linux-2.6.5/mm/mmap.c 2004-04-05 00:15:06.000000000 +0200 +++ linux-2.6.5.cap-mlock/mm/mmap.c 2004-04-05 00:15:41.000000000 +0200 @@ -574,9 +574,9 @@ unsigned long __do_mmap_pgoff(struct mm_ vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; if (flags & MAP_LOCKED) { - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return -EPERM; vm_flags |= VM_LOCKED; } /* mlock MCL_FUTURE? */ ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-29 21:40 ` Andrea Arcangeli @ 2004-07-30 0:50 ` Rik van Riel 2004-07-30 2:16 ` Andrea Arcangeli 0 siblings, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-07-30 0:50 UTC (permalink / raw) To: Andrea Arcangeli; +Cc: Andrew Morton, Arjan van de Ven, linux-kernel On Thu, 29 Jul 2004, Andrea Arcangeli wrote: > So as described some month ago such patch is insecure and conceptually > flawed since they're using rlimits to control persistent objects that > have absolutely nothing to do with the task itself, which in turns make > the rlimit useless. Which is why shared memory segments are accounted against the USER, instead of against the TASK. Your criticism from a few months ago was noted and the patch got fixed. If you want to criticise the new code, please read it first, especially the changes to shmem.c ;) -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." - Brian W. Kernighan ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-30 0:50 ` Rik van Riel @ 2004-07-30 2:16 ` Andrea Arcangeli 0 siblings, 0 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-07-30 2:16 UTC (permalink / raw) To: Rik van Riel; +Cc: Andrew Morton, Arjan van de Ven, linux-kernel On Thu, Jul 29, 2004 at 08:50:16PM -0400, Rik van Riel wrote: > please read it first, especially the changes to shmem.c ;) I read it, and it was obviously still broken. I got CC direct from Andrew and I read only Andrew's comment and the code, and I could stop reading the code pretty quick since hugetlbfs is at the top and very well visibly broken. chown on tmpfs and hugetlbfs sounds "interesting" with your approch, I'm looking forward to the next fixed revision. I'm not against per-user myself, but it's not like doing it for transient memory or transient objects associated with the task itself. Furthemore I'm not convinced rlimits should be used for such persistent things that have nothing to do with running tasks but ok, I can live with it if it works. I tend to believe if something we should use fs_quota in tmpfs and hugetlbfs for such things. That is the place to catch chown etc.. ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-29 21:28 ` Andrew Morton 2004-07-29 21:40 ` Andrea Arcangeli @ 2004-07-30 0:51 ` Rik van Riel 2004-07-30 2:17 ` Andrea Arcangeli 1 sibling, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-07-30 0:51 UTC (permalink / raw) To: Andrew Morton; +Cc: Arjan van de Ven, linux-kernel, Andrea Arcangeli On Thu, 29 Jul 2004, Andrew Morton wrote: > I seem to recall that Andrea identified reasons why per-user mlock > limits were fundamentally broken/unsuitable, but I forget the details. > Perhaps he could remind us? Fixed in the current patch. > As for this patch: it's a new capability which will get basically zero > testing for the next year, which is a worry. How have you tested it, and > how much? It's been in RHEL3 for a while now. First with the the mistake Andrea identified, but that code's been fixed too now... -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." - Brian W. Kernighan ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-30 0:51 ` Rik van Riel @ 2004-07-30 2:17 ` Andrea Arcangeli 0 siblings, 0 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-07-30 2:17 UTC (permalink / raw) To: Rik van Riel; +Cc: Andrew Morton, Arjan van de Ven, linux-kernel On Thu, Jul 29, 2004 at 08:51:27PM -0400, Rik van Riel wrote: > the mistake Andrea identified, but that code's been > fixed too now... I'm afraid not. ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-29 10:03 [patch] mlock-as-nonroot revisted Arjan van de Ven 2004-07-29 21:28 ` Andrew Morton @ 2004-07-30 1:52 ` Chris Wright 2004-07-30 2:09 ` Andrea Arcangeli ` (3 more replies) 1 sibling, 4 replies; 49+ messages in thread From: Chris Wright @ 2004-07-30 1:52 UTC (permalink / raw) To: Arjan van de Ven; +Cc: linux-kernel, riel, akpm, andrea * Arjan van de Ven (arjanv@redhat.com) wrote: > Compared to the previous revision of this patch; shm accounting has been > changed to be per user struct, while keeping track of which user struct > allocated the shm segment in the first place. This is done in order to avoid > the security bug where one process/user could mlock and another munlock > which would screw up the accounting. I think this one still needs a bit of work. Unless I'm mistaken, I see three problems: 1) hugetlb accounting is not done. so it's only simple change to checking permission, but the acutal usage is not tracked (gets back to problem andrea pointed out). with this patch, wouldn't !capable(CAP_IPC_LOCK) && rlim[RLIMIT_MEMLOCK].rlim_cur == 1 be enough to get all the hugepages a user would want (i.e. security hole)? 2) mlock_user isn't ever set, so SHM_LOCK accounting looks broken (trivial to fix). 3) now the RLIMIT_MEMLOCK value represents at best half of what a user can acutally lock. because half of the accounting (mlock) is done against locked_vm, and the other half against locked_shm. and as i mentioned above, seems that hugetlb is unaccounted for. I do agree, however, that storing in user struct allows for quota like accounting that matches the shm_lock and hugetlb use cases. > diff -purN linux-2.6.7/fs/hugetlbfs/inode.c linux/fs/hugetlbfs/inode.c > --- linux-2.6.7/fs/hugetlbfs/inode.c 2004-07-29 11:36:55.744448953 +0200 > +++ linux/fs/hugetlbfs/inode.c 2004-07-29 11:38:04.292595263 +0200 > @@ -722,7 +722,7 @@ struct file *hugetlb_zero_setup(size_t s > struct qstr quick_string; > char buf[16]; > > - if (!capable(CAP_IPC_LOCK)) > + if (!can_do_mlock()) > return ERR_PTR(-EPERM); Hrm, this looks stale, does it actually apply? > +++ linux/include/linux/shm.h 2004-07-29 11:38:04.313592835 +0200 > @@ -84,6 +84,7 @@ struct shmid_kernel /* private to the ke > time_t shm_ctim; > pid_t shm_cprid; > pid_t shm_lprid; > + struct user_struct * mlock_user; This is never set to anything other than NULL. > diff -purN linux-2.6.7/ipc/shm.c linux/ipc/shm.c > --- linux-2.6.7/ipc/shm.c 2004-07-29 11:36:55.137517777 +0200 > +++ linux/ipc/shm.c 2004-07-29 11:38:04.313592835 +0200 > @@ -114,7 +114,7 @@ static void shm_destroy (struct shmid_ke > shm_rmid (shp->id); > shm_unlock(shp); > if (!is_file_hugepages(shp->shm_file)) > - shmem_lock(shp->shm_file, 0); > + shmem_lock(shp->shm_file, 0, shp->mlock_user); So this is NULL. > fput (shp->shm_file); > security_shm_free(shp); > ipc_rcu_free(shp, sizeof(struct shmid_kernel)); > @@ -221,6 +221,7 @@ static int newseg (key_t key, int shmflg > shp->shm_nattch = 0; > shp->id = shm_buildid(id,shp->shm_perm.seq); > shp->shm_file = file; > + shp->mlock_user = NULL; > file->f_dentry->d_inode->i_ino = shp->id; > if (shmflg & SHM_HUGETLB) > set_file_hugepages(file); > @@ -504,14 +505,11 @@ asmlinkage long sys_shmctl (int shmid, i > case SHM_LOCK: > case SHM_UNLOCK: > { > -/* Allow superuser to lock segment in memory */ > -/* Should the pages be faulted in here or leave it to user? */ > -/* need to determine interaction with current->swappable */ > - if (!capable(CAP_IPC_LOCK)) { > + /* Allow superuser to lock segment in memory */ > + if (!can_do_mlock()) { > err = -EPERM; > goto out; > } > - > shp = shm_lock(shmid); > if(shp==NULL) { > err = -EINVAL; > @@ -526,12 +524,14 @@ asmlinkage long sys_shmctl (int shmid, i > goto out_unlock; > > if(cmd==SHM_LOCK) { > - if (!is_file_hugepages(shp->shm_file)) > - shmem_lock(shp->shm_file, 1); > - shp->shm_flags |= SHM_LOCKED; > + if (!is_file_hugepages(shp->shm_file)) { > + err = shmem_lock(shp->shm_file, 1, current->user); > + if (!err) > + shp->shm_flags |= SHM_LOCKED; > + } > } else { > if (!is_file_hugepages(shp->shm_file)) > - shmem_lock(shp->shm_file, 0); > + shmem_lock(shp->shm_file, 0, shp->mlock_user); And this would be NULL. > shp->shm_flags &= ~SHM_LOCKED; > } > shm_unlock(shp); > diff -purN linux-2.6.7/ipc/util.c linux/ipc/util.c > --- linux-2.6.7/ipc/util.c 2004-07-29 11:36:55.137517777 +0200 > +++ linux/ipc/util.c 2004-07-29 11:38:04.306593644 +0200 > @@ -392,8 +392,11 @@ int ipcperms (struct kern_ipc_perm *ipcp > granted_mode >>= 3; > /* is there some bit set in requested_mode but not in granted_mode? */ > if ((requested_mode & ~granted_mode & 0007) && > - !capable(CAP_IPC_OWNER)) > - return -1; > + !capable(CAP_IPC_OWNER)) { > + if (!can_do_mlock()) { I don't quite see the need for this check for every ipc type. Isn't the SHM_LOCK case sufficient? > + return -1; > + } > + } > > return security_ipc_permission(ipcp, flag); > } > diff -purN linux-2.6.7/mm/shmem.c linux/mm/shmem.c > --- linux-2.6.7/mm/shmem.c 2004-07-29 11:36:55.640460745 +0200 > +++ linux/mm/shmem.c 2004-07-29 11:38:04.315592604 +0200 > @@ -1151,17 +1151,43 @@ shmem_get_policy(struct vm_area_struct * > } > #endif > > -void shmem_lock(struct file *file, int lock) > +/* Protects current->user->locked_shm from concurrent access */ > +static spinlock_t shmem_lock_user = SPIN_LOCK_UNLOCKED; > + > +int shmem_lock(struct file *file, int lock, struct user_struct * user) > { > struct inode *inode = file->f_dentry->d_inode; > struct shmem_inode_info *info = SHMEM_I(inode); > + unsigned long lock_limit, locked; > + int retval = -ENOMEM; > > spin_lock(&info->lock); > + spin_lock(&shmem_lock_user); > + if (lock && !(info->flags & VM_LOCKED)) { > + locked = inode->i_size >> PAGE_SHIFT; > + locked += user->locked_shm; > + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; > + lock_limit >>= PAGE_SHIFT; > + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) > + goto out_nomem; > + /* for this branch user == current->user so it won't go away under us */ > + atomic_inc(&user->__count); > + user->locked_shm = locked; > + } > + if (!lock && (info->flags & VM_LOCKED) && user) { So, I think this branch is never excercised because user will be NULL. > + locked = inode->i_size >> PAGE_SHIFT; > + user->locked_shm -= locked; > + free_uid(user); > + } > if (lock) > info->flags |= VM_LOCKED; > else > info->flags &= ~VM_LOCKED; > + retval = 0; > +out_nomem: > + spin_unlock(&shmem_lock_user); > spin_unlock(&info->lock); > + return retval; > } > > static int shmem_mmap(struct file *file, struct vm_area_struct *vma) thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-30 1:52 ` Chris Wright @ 2004-07-30 2:09 ` Andrea Arcangeli 2004-07-30 2:46 ` Rik van Riel ` (2 subsequent siblings) 3 siblings, 0 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-07-30 2:09 UTC (permalink / raw) To: Chris Wright; +Cc: Arjan van de Ven, linux-kernel, riel, akpm On Thu, Jul 29, 2004 at 06:52:15PM -0700, Chris Wright wrote: > 1) hugetlb accounting is not done. so it's only simple change to checking > permission, but the acutal usage is not tracked (gets back to problem > andrea pointed out). with this patch, wouldn't !capable(CAP_IPC_LOCK) > && rlim[RLIMIT_MEMLOCK].rlim_cur == 1 be enough to get all the hugepages > a user would want (i.e. security hole)? exactly, you beaten me on reply-speed ;). And this patch is needed primarly to get access to hugetlbfs without IPC_CAP_LOCK as Arjan mentioned. > I do agree, however, that storing in user struct allows for quota like > accounting that matches the shm_lock and hugetlb use cases. Looking forward to see hugetlbfs working with user quota too... rlimit user-quota is certainly a reasonable approach, though I'm not sure what happens if root runs chown, that's funny not? Comments? ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-30 1:52 ` Chris Wright 2004-07-30 2:09 ` Andrea Arcangeli @ 2004-07-30 2:46 ` Rik van Riel 2004-08-03 20:54 ` Rik van Riel 2004-08-03 20:55 ` Rik van Riel 3 siblings, 0 replies; 49+ messages in thread From: Rik van Riel @ 2004-07-30 2:46 UTC (permalink / raw) To: Chris Wright; +Cc: Arjan van de Ven, linux-kernel, akpm, andrea On Thu, 29 Jul 2004, Chris Wright wrote: > 2) mlock_user isn't ever set, so SHM_LOCK accounting looks broken > (trivial to fix). Woops, looks like Arjan's patch is missed a little piece of the code there when going throug the forward port... > 3) now the RLIMIT_MEMLOCK value represents at best half of what a user > can acutally lock. because half of the accounting (mlock) is done against > locked_vm, and the other half against locked_shm. and as i mentioned > above, seems that hugetlb is unaccounted for. Well, the RLIMIT_MEMLOCK is a per-process limit anyway so the total amount of memory a user can mlock isn't really limited by it. The user_struct itself just gets the same limit as each of the user's processes, to account for memory that doesn't have the same life time as processes. > I do agree, however, that storing in user struct allows for quota like > accounting that matches the shm_lock and hugetlb use cases. Ok, cool ;) -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." - Brian W. Kernighan ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-30 1:52 ` Chris Wright 2004-07-30 2:09 ` Andrea Arcangeli 2004-07-30 2:46 ` Rik van Riel @ 2004-08-03 20:54 ` Rik van Riel 2004-08-03 21:45 ` Chris Wright 2004-08-03 20:55 ` Rik van Riel 3 siblings, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-08-03 20:54 UTC (permalink / raw) To: Chris Wright; +Cc: Arjan van de Ven, linux-kernel, akpm, andrea On Thu, 29 Jul 2004, Chris Wright wrote: > 2) mlock_user isn't ever set, so SHM_LOCK accounting looks broken > (trivial to fix). Should be fixed by the patch below. Does this look acceptable ? --- ipc/shm.c.mlock 2004-08-03 10:19:49.470575891 -0400 +++ ipc/shm.c 2004-08-03 11:44:10.047294881 -0400 @@ -524,15 +524,19 @@ goto out_unlock; if(cmd==SHM_LOCK) { + struct user_struct * user = current->user; if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, current->user); - if (!err) + if (!err) { shp->shm_flags |= SHM_LOCKED; + shp->mlock_user = user; + } } } else { if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0, shp->mlock_user); shp->shm_flags &= ~SHM_LOCKED; + shp->mlock_user = NULL; } shm_unlock(shp); goto out; ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 20:54 ` Rik van Riel @ 2004-08-03 21:45 ` Chris Wright 0 siblings, 0 replies; 49+ messages in thread From: Chris Wright @ 2004-08-03 21:45 UTC (permalink / raw) To: Rik van Riel; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm, andrea * Rik van Riel (riel@redhat.com) wrote: > On Thu, 29 Jul 2004, Chris Wright wrote: > > > 2) mlock_user isn't ever set, so SHM_LOCK accounting looks broken > > (trivial to fix). > > Should be fixed by the patch below. Does this look > acceptable ? Yeah, sure does. Could also pass the cached user struct instead of current->user to shmem_lock, might fit in 80 cols. ;-) thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-07-30 1:52 ` Chris Wright ` (2 preceding siblings ...) 2004-08-03 20:54 ` Rik van Riel @ 2004-08-03 20:55 ` Rik van Riel 2004-08-03 21:07 ` Andrea Arcangeli 3 siblings, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-08-03 20:55 UTC (permalink / raw) To: Chris Wright; +Cc: Arjan van de Ven, linux-kernel, akpm, andrea On Thu, 29 Jul 2004, Chris Wright wrote: > 1) hugetlb accounting is not done. so it's only simple change to checking > permission, but the acutal usage is not tracked This patch should fix that. It's incremental with Arjan's patch and the "set shp->mlock_user" patch I just sent. Are you ok with how this is implemented ? Please shoot holes in it, so I can make this better... ;) --- linux-2.6.7/include/linux/mm.h.htlb 2004-08-03 16:29:07.362393143 -0400 +++ linux-2.6.7/include/linux/mm.h 2004-08-03 16:31:20.179095875 -0400 @@ -509,7 +509,8 @@ return 1; return 0; } - +extern int user_can_mlock(size_t, struct user_struct *); +extern void user_subtract_mlock(size_t, struct user_struct *); /* * Parameter block passed down to zap_pte_range in exceptional cases. --- linux-2.6.7/fs/hugetlbfs/inode.c.htlb 2004-08-03 13:33:26.433750664 -0400 +++ linux-2.6.7/fs/hugetlbfs/inode.c 2004-08-03 14:08:43.636761209 -0400 @@ -718,7 +718,7 @@ struct file *hugetlb_zero_setup(size_t size) { - int error; + int error = -ENOMEM; struct file *file; struct inode *inode; struct dentry *dentry, *root; @@ -731,6 +731,9 @@ if (!is_hugepage_mem_enough(size)) return ERR_PTR(-ENOMEM); + if (!user_can_mlock(size, current->user)) + return ERR_PTR(-ENOMEM); + root = hugetlbfs_vfsmount->mnt_root; snprintf(buf, 16, "%lu", hugetlbfs_counter()); quick_string.name = buf; @@ -738,7 +741,7 @@ quick_string.hash = 0; dentry = d_alloc(root, &quick_string); if (!dentry) - return ERR_PTR(-ENOMEM); + goto out_subtract_mlock; error = -ENFILE; file = get_empty_filp(); @@ -765,6 +768,8 @@ put_filp(file); out_dentry: dput(dentry); +out_subtract_mlock: + user_subtract_mlock(size, current->user); return ERR_PTR(error); } --- linux-2.6.7/ipc/shm.c.htlb 2004-08-03 11:46:08.383363603 -0400 +++ linux-2.6.7/ipc/shm.c 2004-08-03 16:48:26.150674657 -0400 @@ -115,6 +115,9 @@ shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0, shp->mlock_user); + else + user_subtract_mlock(shp->shm_file->f_dentry->d_inode->i_size, + shp->mlock_user); fput (shp->shm_file); security_shm_free(shp); ipc_rcu_free(shp, sizeof(struct shmid_kernel)); @@ -198,9 +201,11 @@ return error; } - if (shmflg & SHM_HUGETLB) + if (shmflg & SHM_HUGETLB) { + /* hugetlb_zero_setup takes care of mlock user accounting */ file = hugetlb_zero_setup(size); - else { + shp->mlock_user = current->user; + } else { sprintf (name, "SYSV%08x", key); file = shmem_file_setup(name, size, VM_ACCOUNT); } --- linux-2.6.7/mm/shmem.c.htlb 2004-08-03 14:08:49.277094888 -0400 +++ linux-2.6.7/mm/shmem.c 2004-08-03 16:32:41.215119384 -0400 @@ -1151,41 +1151,27 @@ } #endif -/* Protects current->user->locked_shm from concurrent access */ -static spinlock_t shmem_lock_user = SPIN_LOCK_UNLOCKED; - int shmem_lock(struct file *file, int lock, struct user_struct * user) { struct inode *inode = file->f_dentry->d_inode; struct shmem_inode_info *info = SHMEM_I(inode); - unsigned long lock_limit, locked; int retval = -ENOMEM; + if (!can_do_mlock()) + return -EPERM; + spin_lock(&info->lock); - spin_lock(&shmem_lock_user); if (lock && !(info->flags & VM_LOCKED)) { - locked = inode->i_size >> PAGE_SHIFT; - locked += user->locked_shm; - lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; - lock_limit >>= PAGE_SHIFT; - if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) + if (!user_can_mlock(inode->i_size, user) && !capable(CAP_IPC_LOCK)) goto out_nomem; - /* for this branch user == current->user so it won't go away under us */ - atomic_inc(&user->__count); - user->locked_shm = locked; + info->flags |= VM_LOCKED; } if (!lock && (info->flags & VM_LOCKED) && user) { - locked = inode->i_size >> PAGE_SHIFT; - user->locked_shm -= locked; - free_uid(user); - } - if (lock) - info->flags |= VM_LOCKED; - else + user_subtract_mlock(inode->i_size, user); info->flags &= ~VM_LOCKED; + } retval = 0; out_nomem: - spin_unlock(&shmem_lock_user); spin_unlock(&info->lock); return retval; } --- linux-2.6.7/mm/mlock.c.htlb 2004-08-03 14:27:35.270370974 -0400 +++ linux-2.6.7/mm/mlock.c 2004-08-03 16:32:05.650641994 -0400 @@ -193,3 +193,38 @@ up_write(¤t->mm->mmap_sem); return ret; } + +/* + * Objects with different lifetime than processes (mlocked shm segments + * and hugetlb files) get accounted against the user_struct instead. + */ +static spinlock_t mlock_user_lock = SPIN_LOCK_UNLOCKED; + +int user_can_mlock(size_t size, struct user_struct * user) +{ + unsigned long lock_limit, locked; + int allowed = 0; + + spin_lock(&mlock_user_lock); + locked = size >> PAGE_SHIFT; + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit >>= PAGE_SHIFT; + if (locked + user->locked_shm > lock_limit) + goto out; + atomic_inc(&user->__count); + user->locked_shm += locked; + allowed = 1; +out: + spin_unlock(&mlock_user_lock); + return allowed; +} + +void user_subtract_mlock(size_t size, struct user_struct * user) +{ + if (user) { + spin_lock(&mlock_user_lock); + user->locked_shm -= (size >> PAGE_SHIFT); + spin_unlock(&mlock_user_lock); + free_uid(user); + } +} ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 20:55 ` Rik van Riel @ 2004-08-03 21:07 ` Andrea Arcangeli 2004-08-03 21:13 ` Arjan van de Ven 2004-08-03 21:13 ` Rik van Riel 0 siblings, 2 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-03 21:07 UTC (permalink / raw) To: Rik van Riel; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 04:55:49PM -0400, Rik van Riel wrote: > @@ -198,9 +201,11 @@ > return error; > } > > - if (shmflg & SHM_HUGETLB) > + if (shmflg & SHM_HUGETLB) { > + /* hugetlb_zero_setup takes care of mlock user accounting */ > file = hugetlb_zero_setup(size); > - else { > + shp->mlock_user = current->user; > + } else { > sprintf (name, "SYSV%08x", key); > file = shmem_file_setup(name, size, VM_ACCOUNT); > } where do you change mlock_user in chown? ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:07 ` Andrea Arcangeli @ 2004-08-03 21:13 ` Arjan van de Ven 2004-08-03 21:36 ` Andrea Arcangeli 2004-08-03 21:13 ` Rik van Riel 1 sibling, 1 reply; 49+ messages in thread From: Arjan van de Ven @ 2004-08-03 21:13 UTC (permalink / raw) To: Andrea Arcangeli; +Cc: Rik van Riel, Chris Wright, linux-kernel, akpm [-- Attachment #1: Type: text/plain, Size: 959 bytes --] On Tue, Aug 03, 2004 at 11:07:37PM +0200, Andrea Arcangeli wrote: > On Tue, Aug 03, 2004 at 04:55:49PM -0400, Rik van Riel wrote: > > @@ -198,9 +201,11 @@ > > return error; > > } > > > > - if (shmflg & SHM_HUGETLB) > > + if (shmflg & SHM_HUGETLB) { > > + /* hugetlb_zero_setup takes care of mlock user accounting */ > > file = hugetlb_zero_setup(size); > > - else { > > + shp->mlock_user = current->user; > > + } else { > > sprintf (name, "SYSV%08x", key); > > file = shmem_file_setup(name, size, VM_ACCOUNT); > > } > > where do you change mlock_user in chown? ok silly question maybe, but why would you? The user that mlock'd gets to pay for it, and gets his credits back at munlock. Chown doesn't really matter in that regard..... The thing that does matter of course is that the user who "paid" in credits gets them back in the end.. and that this does. But maybe you see a useful use pattern that I'm missing? Please convince me :) [-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:13 ` Arjan van de Ven @ 2004-08-03 21:36 ` Andrea Arcangeli 2004-08-03 21:38 ` Arjan van de Ven 0 siblings, 1 reply; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-03 21:36 UTC (permalink / raw) To: Arjan van de Ven; +Cc: Rik van Riel, Chris Wright, linux-kernel, akpm On Tue, Aug 03, 2004 at 11:13:39PM +0200, Arjan van de Ven wrote: > The user that mlock'd gets to pay for it, and gets his credits back at > munlock. Chown doesn't really matter in that regard..... The thing that does what? he cannot get credits when it munlock if this is hugetlbfs. If it does you're again into the insecure DoS scenario. btw, I don't see where you call user_subtract_mlock when the file is truncated. what exactly are you trying to do in that patch? I thought you were binding the storage to a certain user structure _persistently_. Which breaks badly with chown since if the admin chown the file to root.root then you cannot truncate it anymore but you're locked up completely and you cannot use mlock anymore and you've to send email to the admin asking to reassing the file to you so that you can truncate it. If you're calling user_subtract_mlock at unlock time (not at truncate time) then it has the same issues that the patch I've rejected some months ago when I wrote the disable_cap_mlock. ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:36 ` Andrea Arcangeli @ 2004-08-03 21:38 ` Arjan van de Ven 2004-08-03 21:51 ` Andrea Arcangeli 0 siblings, 1 reply; 49+ messages in thread From: Arjan van de Ven @ 2004-08-03 21:38 UTC (permalink / raw) To: Andrea Arcangeli; +Cc: Rik van Riel, Chris Wright, linux-kernel, akpm [-- Attachment #1: Type: text/plain, Size: 525 bytes --] On Tue, Aug 03, 2004 at 11:36:34PM +0200, Andrea Arcangeli wrote: > On Tue, Aug 03, 2004 at 11:13:39PM +0200, Arjan van de Ven wrote: > > The user that mlock'd gets to pay for it, and gets his credits back at > > munlock. Chown doesn't really matter in that regard..... The thing that does > > what? he cannot get credits when it munlock if this is hugetlbfs. If it > does you're again into the insecure DoS scenario. not if you keep track of who locked in the first place and give the credit back to *that* user (struct). [-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:38 ` Arjan van de Ven @ 2004-08-03 21:51 ` Andrea Arcangeli 2004-08-03 22:01 ` Chris Wright 0 siblings, 1 reply; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-03 21:51 UTC (permalink / raw) To: Arjan van de Ven; +Cc: Rik van Riel, Chris Wright, linux-kernel, akpm On Tue, Aug 03, 2004 at 11:38:57PM +0200, Arjan van de Ven wrote: > On Tue, Aug 03, 2004 at 11:36:34PM +0200, Andrea Arcangeli wrote: > > On Tue, Aug 03, 2004 at 11:13:39PM +0200, Arjan van de Ven wrote: > > > The user that mlock'd gets to pay for it, and gets his credits back at > > > munlock. Chown doesn't really matter in that regard..... The thing that does > > > > what? he cannot get credits when it munlock if this is hugetlbfs. If it > > does you're again into the insecure DoS scenario. > > not if you keep track of who locked in the first place and give the credit > back to *that* user (struct). I wasn't talking about chown above. I mean, where's the truncate that releases the user-struct-bind? You just can't release the user-struct-bind from munlock/exit/whatever-task-opeartion-different-from-truncate-or-chown. ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:51 ` Andrea Arcangeli @ 2004-08-03 22:01 ` Chris Wright 2004-08-03 22:11 ` Andrea Arcangeli 0 siblings, 1 reply; 49+ messages in thread From: Chris Wright @ 2004-08-03 22:01 UTC (permalink / raw) To: Andrea Arcangeli Cc: Arjan van de Ven, Rik van Riel, Chris Wright, linux-kernel, akpm * Andrea Arcangeli (andrea@suse.de) wrote: > On Tue, Aug 03, 2004 at 11:38:57PM +0200, Arjan van de Ven wrote: > > not if you keep track of who locked in the first place and give the credit > > back to *that* user (struct). > > I wasn't talking about chown above. I mean, where's the truncate that > releases the user-struct-bind? I'm not sure what you mean. Truncate should only update the accounting, not break the binding, right? > You just can't release the user-struct-bind from > munlock/exit/whatever-task-opeartion-different-from-truncate-or-chown. It's meant to be done at object destruction. thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 22:01 ` Chris Wright @ 2004-08-03 22:11 ` Andrea Arcangeli 2004-08-03 22:33 ` Chris Wright 2004-08-04 1:21 ` Rik van Riel 0 siblings, 2 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-03 22:11 UTC (permalink / raw) To: Chris Wright; +Cc: Arjan van de Ven, Rik van Riel, linux-kernel, akpm On Tue, Aug 03, 2004 at 03:01:18PM -0700, Chris Wright wrote: > * Andrea Arcangeli (andrea@suse.de) wrote: > > On Tue, Aug 03, 2004 at 11:38:57PM +0200, Arjan van de Ven wrote: > > > not if you keep track of who locked in the first place and give the credit > > > back to *that* user (struct). > > > > I wasn't talking about chown above. I mean, where's the truncate that > > releases the user-struct-bind? > > I'm not sure what you mean. Truncate should only update the accounting, > not break the binding, right? yep, update the accounting. And with that I meant "releasing" part of it (accordingly to the size of the truncate, a truncate(0) should release it all). > It's meant to be done at object destruction. where? Maybe it's just that those are incremental patches and I'm missing the other part of the patch, but reading those patches I can't see where the user_subtract_mlock happens when I truncate an hugetlbfs file (or delete it or whatever). Sure it can't be munlock releasing/_updating_ the user-struct accounting for fs persistent storage. But if other code takes care of it then maybe you want to delete the user_subtract_mlock function and use the other piece that already existed for truncate. Anyways my overall picture of this is that you're trying to do filesystem quotas with rlimit which sounds quite flawed. ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 22:11 ` Andrea Arcangeli @ 2004-08-03 22:33 ` Chris Wright 2004-08-03 22:42 ` Andrea Arcangeli 2004-08-04 1:21 ` Rik van Riel 1 sibling, 1 reply; 49+ messages in thread From: Chris Wright @ 2004-08-03 22:33 UTC (permalink / raw) To: Andrea Arcangeli Cc: Chris Wright, Arjan van de Ven, Rik van Riel, linux-kernel, akpm * Andrea Arcangeli (andrea@suse.de) wrote: > On Tue, Aug 03, 2004 at 03:01:18PM -0700, Chris Wright wrote: > > I'm not sure what you mean. Truncate should only update the accounting, > > not break the binding, right? > > yep, update the accounting. And with that I meant "releasing" part of > it (accordingly to the size of the truncate, a truncate(0) should > release it all). OK, good. I thought you meant drop binding to user, rather then reduce the accounting. > > It's meant to be done at object destruction. > > where? I just mean in general the only time it's valid to drop the binding (which includes dropping refcount on the user struct) should be when the object is destroyed. > Maybe it's just that those are incremental patches and I'm missing the > other part of the patch, but reading those patches I can't see where the > user_subtract_mlock happens when I truncate an hugetlbfs file (or delete > it or whatever). Sure it can't be munlock releasing/_updating_ the user-struct > accounting for fs persistent storage. But if other code takes care of it > then maybe you want to delete the user_subtract_mlock function and use > the other piece that already existed for truncate. Heh, yeah in a place like hugetlb_put_quota? > Anyways my overall picture of this is that you're trying to do > filesystem quotas with rlimit which sounds quite flawed. It's so tempting because of the similarity (and hence ease of administration) with mlocked pages. And if they can be merged, user_struct being a fine placeholder, then it's perhaps simpler. thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 22:33 ` Chris Wright @ 2004-08-03 22:42 ` Andrea Arcangeli 2004-08-03 22:52 ` Chris Wright 0 siblings, 1 reply; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-03 22:42 UTC (permalink / raw) To: Chris Wright; +Cc: Arjan van de Ven, Rik van Riel, linux-kernel, akpm On Tue, Aug 03, 2004 at 03:33:35PM -0700, Chris Wright wrote: > I just mean in general the only time it's valid to drop the binding > (which includes dropping refcount on the user struct) should be when > the object is destroyed. yep, agreed. > > Maybe it's just that those are incremental patches and I'm missing the > > other part of the patch, but reading those patches I can't see where the > > user_subtract_mlock happens when I truncate an hugetlbfs file (or delete > > it or whatever). Sure it can't be munlock releasing/_updating_ the user-struct > > accounting for fs persistent storage. But if other code takes care of it > > then maybe you want to delete the user_subtract_mlock function and use > > the other piece that already existed for truncate. > > Heh, yeah in a place like hugetlb_put_quota? yep. that's the kind of function I was looking for to update/release the accounting, but it's not there, and sure it wasn't there in the previous patch either. ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 22:42 ` Andrea Arcangeli @ 2004-08-03 22:52 ` Chris Wright 0 siblings, 0 replies; 49+ messages in thread From: Chris Wright @ 2004-08-03 22:52 UTC (permalink / raw) To: Andrea Arcangeli Cc: Chris Wright, Arjan van de Ven, Rik van Riel, linux-kernel, akpm * Andrea Arcangeli (andrea@suse.de) wrote: > On Tue, Aug 03, 2004 at 03:33:35PM -0700, Chris Wright wrote: > > Heh, yeah in a place like hugetlb_put_quota? > > yep. that's the kind of function I was looking for to update/release the > accounting, but it's not there, and sure it wasn't there in the previous > patch either. I didn't see it there either. But I think it's fixable. thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 22:11 ` Andrea Arcangeli 2004-08-03 22:33 ` Chris Wright @ 2004-08-04 1:21 ` Rik van Riel 2004-08-04 1:53 ` Andrea Arcangeli 1 sibling, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-08-04 1:21 UTC (permalink / raw) To: Andrea Arcangeli; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Wed, 4 Aug 2004, Andrea Arcangeli wrote: > Anyways my overall picture of this is that you're trying to do > filesystem quotas with rlimit which sounds quite flawed. This is exactly why named hugetlb files are NOT included in this accounting, only the ones created through the SHM interface are. -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." - Brian W. Kernighan ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 1:21 ` Rik van Riel @ 2004-08-04 1:53 ` Andrea Arcangeli 2004-08-04 2:01 ` Rik van Riel 2004-08-04 2:07 ` Chris Wright 0 siblings, 2 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-04 1:53 UTC (permalink / raw) To: Rik van Riel; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 09:21:34PM -0400, Rik van Riel wrote: > This is exactly why named hugetlb files are NOT included > in this accounting, only the ones created through the SHM > interface are. but you're allowing everybody to alloc all RAM in hugetlb files with the change in the previos patch posted by Arjan (you're currently posted incremental patches against Arjan's patch at the top of the thread I hope), so you must definitely apply "this" accounting to hugetlb files too or it's still insecure as far as I can tell. ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 1:53 ` Andrea Arcangeli @ 2004-08-04 2:01 ` Rik van Riel 2004-08-04 2:13 ` Andrea Arcangeli 2004-08-04 2:07 ` Chris Wright 1 sibling, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-08-04 2:01 UTC (permalink / raw) To: Andrea Arcangeli; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Wed, 4 Aug 2004, Andrea Arcangeli wrote: > On Tue, Aug 03, 2004 at 09:21:34PM -0400, Rik van Riel wrote: > > This is exactly why named hugetlb files are NOT included > > in this accounting, only the ones created through the SHM > > interface are. > > but you're allowing everybody to alloc all RAM in hugetlb files with > the change in the previos patch posted by Arjan Nope, Arjan's patch (and my incremental) touch hugetlb_zero_setup, which only seems to be called from ipc/shm.c Normal hugetlb file creation (through the filesystem) isn't touched by these patches. > (you're currently posted incremental patches against Arjan's patch at > the top of the thread I hope), so you must definitely apply "this" > accounting to hugetlb files too or it's still insecure as far as I can > tell. The patch only touches "anonymous" hugetlb files, ie. the ones created through ipc/shm.c. I'm not sure why you claim that the others would be affected by this patch... -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." - Brian W. Kernighan ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 2:01 ` Rik van Riel @ 2004-08-04 2:13 ` Andrea Arcangeli 2004-08-04 2:20 ` William Lee Irwin III ` (2 more replies) 0 siblings, 3 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-04 2:13 UTC (permalink / raw) To: Rik van Riel; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 10:01:12PM -0400, Rik van Riel wrote: > On Wed, 4 Aug 2004, Andrea Arcangeli wrote: > > On Tue, Aug 03, 2004 at 09:21:34PM -0400, Rik van Riel wrote: > > > This is exactly why named hugetlb files are NOT included > > > in this accounting, only the ones created through the SHM > > > interface are. > > > > but you're allowing everybody to alloc all RAM in hugetlb files with > > the change in the previos patch posted by Arjan > > Nope, Arjan's patch (and my incremental) touch hugetlb_zero_setup, > which only seems to be called from ipc/shm.c > > Normal hugetlb file creation (through the filesystem) isn't touched > by these patches. it is: diff -purN linux-2.6.7/fs/hugetlbfs/inode.c linux/fs/hugetlbfs/inode.c --- linux-2.6.7/fs/hugetlbfs/inode.c 2004-07-29 11:36:55.744448953 +0200 +++ linux/fs/hugetlbfs/inode.c 2004-07-29 11:38:04.292595263 +0200 @@ -722,7 +722,7 @@ struct file *hugetlb_zero_setup(size_t s struct qstr quick_string; char buf[16]; - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return ERR_PTR(-EPERM); if (!is_hugepage_mem_enough(size)) this breaks local security if you set the rlimit to 1 byte (well, 1 byte == disable_cap_mlock). ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 2:13 ` Andrea Arcangeli @ 2004-08-04 2:20 ` William Lee Irwin III 2004-08-04 2:22 ` Rik van Riel 2004-08-04 2:25 ` Chris Wright 2 siblings, 0 replies; 49+ messages in thread From: William Lee Irwin III @ 2004-08-04 2:20 UTC (permalink / raw) To: Andrea Arcangeli Cc: Rik van Riel, Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 10:01:12PM -0400, Rik van Riel wrote: >> Nope, Arjan's patch (and my incremental) touch hugetlb_zero_setup, >> which only seems to be called from ipc/shm.c >> Normal hugetlb file creation (through the filesystem) isn't touched >> by these patches. On Wed, Aug 04, 2004 at 04:13:32AM +0200, Andrea Arcangeli wrote: > it is: hugetlb_zero_setup() is only used for shm last I checked. -- wli ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 2:13 ` Andrea Arcangeli 2004-08-04 2:20 ` William Lee Irwin III @ 2004-08-04 2:22 ` Rik van Riel 2004-08-04 2:31 ` William Lee Irwin III 2004-08-04 3:13 ` Andrea Arcangeli 2004-08-04 2:25 ` Chris Wright 2 siblings, 2 replies; 49+ messages in thread From: Rik van Riel @ 2004-08-04 2:22 UTC (permalink / raw) To: Andrea Arcangeli; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Wed, 4 Aug 2004, Andrea Arcangeli wrote: > > Normal hugetlb file creation (through the filesystem) isn't touched > > by these patches. > > it is: Hugetlb file creation through the filesystem never calls hugetlb_zero_setup! What are you talking about ? > diff -purN linux-2.6.7/fs/hugetlbfs/inode.c linux/fs/hugetlbfs/inode.c > --- linux-2.6.7/fs/hugetlbfs/inode.c 2004-07-29 11:36:55.744448953 > +0200 > +++ linux/fs/hugetlbfs/inode.c 2004-07-29 11:38:04.292595263 +0200 > @@ -722,7 +722,7 @@ struct file *hugetlb_zero_setup(size_t s > struct qstr quick_string; > char buf[16]; > > - if (!capable(CAP_IPC_LOCK)) > + if (!can_do_mlock()) > return ERR_PTR(-EPERM); > this breaks local security if you set the rlimit to 1 byte (well, 1 byte > == disable_cap_mlock). Please read my incremental patch. It adds a quota check right after this code segment. -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." - Brian W. Kernighan ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 2:22 ` Rik van Riel @ 2004-08-04 2:31 ` William Lee Irwin III 2004-08-04 2:56 ` Rik van Riel 2004-08-04 3:13 ` Andrea Arcangeli 1 sibling, 1 reply; 49+ messages in thread From: William Lee Irwin III @ 2004-08-04 2:31 UTC (permalink / raw) To: Rik van Riel Cc: Andrea Arcangeli, Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 10:22:30PM -0400, Rik van Riel wrote: > Please read my incremental patch. It adds a quota check > right after this code segment. Actually, some of the confusion may be losing pieces of the incremental patches (I'm certainly missing the full picture). By any chance could you repost a complete patch and/or series? -- wli ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 2:31 ` William Lee Irwin III @ 2004-08-04 2:56 ` Rik van Riel 2004-08-04 6:06 ` Chris Wright 0 siblings, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-08-04 2:56 UTC (permalink / raw) To: William Lee Irwin III Cc: Andrea Arcangeli, Chris Wright, Arjan van de Ven, linux-kernel, Andrew Morton On Tue, 3 Aug 2004, William Lee Irwin III wrote: > By any chance could you repost a complete patch and/or series? Here you are. It is fits into the Fedora kernel rpm, but I guess it should apply to upstream and -mm too. Please check if there are any spots left where this patch did something wrong. I'd like to get this merged ASAP, so I will fix any actual errors people find. If people find additional stuff that needs work in the same area, I can look at that later in follow-up patches. --- linux-2.6.7/include/asm-ppc64/resource.h.mlock 2004-08-03 22:46:28.030210685 -0400 +++ linux-2.6.7/include/asm-ppc64/resource.h 2004-08-03 22:46:43.185742010 -0400 @@ -45,7 +45,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-m68k/resource.h.mlock 2004-08-03 22:46:27.194457109 -0400 +++ linux-2.6.7/include/asm-m68k/resource.h 2004-08-03 22:46:43.189740831 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-parisc/resource.h.mlock 2004-08-03 22:46:27.751292925 -0400 +++ linux-2.6.7/include/asm-parisc/resource.h 2004-08-03 22:46:43.192739946 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-ppc/resource.h.mlock 2004-08-03 22:46:27.918243699 -0400 +++ linux-2.6.7/include/asm-ppc/resource.h 2004-08-03 22:46:43.196738767 -0400 @@ -36,7 +36,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-x86_64/resource.h.mlock 2004-08-03 22:46:28.754996979 -0400 +++ linux-2.6.7/include/asm-x86_64/resource.h 2004-08-03 22:46:43.199737883 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE , PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-ia64/resource.h.mlock 2004-08-03 22:46:27.039502798 -0400 +++ linux-2.6.7/include/asm-ia64/resource.h 2004-08-03 22:46:43.202736999 -0400 @@ -46,7 +46,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/linux/mm.h.mlock 2004-08-03 22:46:42.762866697 -0400 +++ linux-2.6.7/include/linux/mm.h 2004-08-03 22:46:43.209734935 -0400 @@ -498,9 +498,20 @@ int shmem_set_policy(struct vm_area_stru struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, unsigned long addr); struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags); -void shmem_lock(struct file * file, int lock); +int shmem_lock(struct file * file, int lock, struct user_struct *); int shmem_zero_setup(struct vm_area_struct *); +static inline int can_do_mlock(void) +{ + if (capable(CAP_IPC_LOCK)) + return 1; + if (current->rlim[RLIMIT_MEMLOCK].rlim_cur != 0) + return 1; + return 0; +} +extern int user_can_mlock(size_t, struct user_struct *); +extern void user_subtract_mlock(size_t, struct user_struct *); + /* * Parameter block passed down to zap_pte_range in exceptional cases. */ --- linux-2.6.7/include/linux/shm.h.mlock 2004-06-16 01:19:43.000000000 -0400 +++ linux-2.6.7/include/linux/shm.h 2004-08-03 22:46:43.241725503 -0400 @@ -84,6 +84,7 @@ struct shmid_kernel /* private to the ke time_t shm_ctim; pid_t shm_cprid; pid_t shm_lprid; + struct user_struct * mlock_user; }; /* shm_mode upper byte flags */ --- linux-2.6.7/include/linux/sched.h.mlock 2004-08-03 22:46:42.140050338 -0400 +++ linux-2.6.7/include/linux/sched.h 2004-08-03 22:46:43.252722260 -0400 @@ -331,6 +331,7 @@ struct user_struct { atomic_t sigpending; /* How many pending signals does this user have? */ /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ + unsigned long locked_shm; /* How many pages of mlocked shm ? */ /* Hash table maintenance information */ struct list_head uidhash_list; --- linux-2.6.7/include/asm-arm/resource.h.mlock 2004-08-03 22:46:26.638620999 -0400 +++ linux-2.6.7/include/asm-arm/resource.h 2004-08-03 22:46:43.358691015 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING}, \ --- linux-2.6.7/include/asm-sparc/resource.h.mlock 2004-08-03 22:46:28.557055343 -0400 +++ linux-2.6.7/include/asm-sparc/resource.h 2004-08-03 22:46:43.362689836 -0400 @@ -44,7 +44,7 @@ { 0, RLIM_INFINITY}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {INR_OPEN, INR_OPEN}, {0, 0}, \ - {RLIM_INFINITY, RLIM_INFINITY}, \ + {PAGE_SIZE, PAGE_SIZE}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {MAX_SIGPENDING, MAX_SIGPENDING}, \ --- linux-2.6.7/include/asm-alpha/resource.h.mlock 2004-08-03 22:46:26.332711198 -0400 +++ linux-2.6.7/include/asm-alpha/resource.h 2004-08-03 22:46:43.365688951 -0400 @@ -41,7 +41,7 @@ {INR_OPEN, INR_OPEN}, /* RLIMIT_NOFILE */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_AS */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_NPROC */ \ - {LONG_MAX, LONG_MAX}, /* RLIMIT_MEMLOCK */ \ + {PAGE_SIZE, PAGE_SIZE}, /* RLIMIT_MEMLOCK */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_LOCKS */ \ {MAX_SIGPENDING, MAX_SIGPENDING}, /* RLIMIT_SIGPENDING */ \ {MQ_BYTES_MAX, MQ_BYTES_MAX}, /* RLIMIT_MSGQUEUE */ \ --- linux-2.6.7/include/asm-h8300/resource.h.mlock 2004-08-03 22:46:26.781578848 -0400 +++ linux-2.6.7/include/asm-h8300/resource.h 2004-08-03 22:46:43.368688067 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-i386/resource.h.mlock 2004-08-03 22:46:26.959526379 -0400 +++ linux-2.6.7/include/asm-i386/resource.h 2004-08-03 22:46:43.371687183 -0400 @@ -40,7 +40,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-s390/resource.h.mlock 2004-08-03 22:46:28.102189462 -0400 +++ linux-2.6.7/include/asm-s390/resource.h 2004-08-03 22:46:43.373686593 -0400 @@ -47,7 +47,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-cris/resource.h.mlock 2004-08-03 22:46:26.721596534 -0400 +++ linux-2.6.7/include/asm-cris/resource.h 2004-08-03 22:46:43.376685709 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-sparc64/resource.h.mlock 2004-08-03 22:46:28.636032057 -0400 +++ linux-2.6.7/include/asm-sparc64/resource.h 2004-08-03 22:46:43.379684825 -0400 @@ -43,7 +43,7 @@ { 0, RLIM_INFINITY}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {INR_OPEN, INR_OPEN}, {0, 0}, \ - {RLIM_INFINITY, RLIM_INFINITY}, \ + {PAGE_SIZE, PAGE_SIZE }, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {MAX_SIGPENDING, MAX_SIGPENDING}, \ --- linux-2.6.7/include/asm-arm26/resource.h.mlock 2004-08-03 22:46:26.671611272 -0400 +++ linux-2.6.7/include/asm-arm26/resource.h 2004-08-03 22:46:43.382683940 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING}, \ --- linux-2.6.7/include/asm-v850/resource.h.mlock 2004-08-03 22:46:28.683018203 -0400 +++ linux-2.6.7/include/asm-v850/resource.h 2004-08-03 22:46:43.385683056 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-sh/resource.h.mlock 2004-08-03 22:46:28.228152321 -0400 +++ linux-2.6.7/include/asm-sh/resource.h 2004-08-03 22:46:43.387682466 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/fs/hugetlbfs/inode.c.mlock 2004-08-03 22:46:41.332288511 -0400 +++ linux-2.6.7/fs/hugetlbfs/inode.c 2004-08-03 22:46:43.394680403 -0400 @@ -718,19 +718,22 @@ static unsigned long hugetlbfs_counter(v struct file *hugetlb_zero_setup(size_t size) { - int error; + int error = -ENOMEM; struct file *file; struct inode *inode; struct dentry *dentry, *root; struct qstr quick_string; char buf[16]; - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return ERR_PTR(-EPERM); if (!is_hugepage_mem_enough(size)) return ERR_PTR(-ENOMEM); + if (!user_can_mlock(size, current->user)) + return ERR_PTR(-ENOMEM); + root = hugetlbfs_vfsmount->mnt_root; snprintf(buf, 16, "%lu", hugetlbfs_counter()); quick_string.name = buf; @@ -738,7 +741,7 @@ struct file *hugetlb_zero_setup(size_t s quick_string.hash = 0; dentry = d_alloc(root, &quick_string); if (!dentry) - return ERR_PTR(-ENOMEM); + goto out_subtract_mlock; error = -ENFILE; file = get_empty_filp(); @@ -765,6 +768,8 @@ out_file: put_filp(file); out_dentry: dput(dentry); +out_subtract_mlock: + user_subtract_mlock(size, current->user); return ERR_PTR(error); } --- linux-2.6.7/kernel/user.c.mlock 2004-08-03 22:46:30.113596392 -0400 +++ linux-2.6.7/kernel/user.c 2004-08-03 22:46:43.396679813 -0400 @@ -32,7 +32,8 @@ struct user_struct root_user = { .processes = ATOMIC_INIT(1), .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), - .mq_bytes = 0 + .mq_bytes = 0, + .locked_shm = 0 }; /* @@ -113,6 +114,7 @@ struct user_struct * alloc_uid(uid_t uid atomic_set(&new->sigpending, 0); new->mq_bytes = 0; + new->locked_shm = 0; /* * Before adding this, check whether we raced --- linux-2.6.7/ipc/shm.c.mlock 2004-08-03 22:46:29.848674505 -0400 +++ linux-2.6.7/ipc/shm.c 2004-08-03 22:46:43.399678929 -0400 @@ -114,7 +114,10 @@ static void shm_destroy (struct shmid_ke shm_rmid (shp->id); shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0); + shmem_lock(shp->shm_file, 0, shp->mlock_user); + else + user_subtract_mlock(shp->shm_file->f_dentry->d_inode->i_size, + shp->mlock_user); fput (shp->shm_file); security_shm_free(shp); ipc_rcu_free(shp, sizeof(struct shmid_kernel)); @@ -198,9 +201,11 @@ static int newseg (key_t key, int shmflg return error; } - if (shmflg & SHM_HUGETLB) + if (shmflg & SHM_HUGETLB) { + /* hugetlb_zero_setup takes care of mlock user accounting */ file = hugetlb_zero_setup(size); - else { + shp->mlock_user = current->user; + } else { sprintf (name, "SYSV%08x", key); file = shmem_file_setup(name, size, VM_ACCOUNT); } @@ -221,6 +226,7 @@ static int newseg (key_t key, int shmflg shp->shm_nattch = 0; shp->id = shm_buildid(id,shp->shm_perm.seq); shp->shm_file = file; + shp->mlock_user = NULL; file->f_dentry->d_inode->i_ino = shp->id; if (shmflg & SHM_HUGETLB) set_file_hugepages(file); @@ -504,14 +510,11 @@ asmlinkage long sys_shmctl (int shmid, i case SHM_LOCK: case SHM_UNLOCK: { -/* Allow superuser to lock segment in memory */ -/* Should the pages be faulted in here or leave it to user? */ -/* need to determine interaction with current->swappable */ - if (!capable(CAP_IPC_LOCK)) { + /* Allow superuser to lock segment in memory */ + if (!can_do_mlock()) { err = -EPERM; goto out; } - shp = shm_lock(shmid); if(shp==NULL) { err = -EINVAL; @@ -526,13 +529,19 @@ asmlinkage long sys_shmctl (int shmid, i goto out_unlock; if(cmd==SHM_LOCK) { - if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 1); - shp->shm_flags |= SHM_LOCKED; + struct user_struct * user = current->user; + if (!is_file_hugepages(shp->shm_file)) { + err = shmem_lock(shp->shm_file, 1, current->user); + if (!err) { + shp->shm_flags |= SHM_LOCKED; + shp->mlock_user = user; + } + } } else { if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0); + shmem_lock(shp->shm_file, 0, shp->mlock_user); shp->shm_flags &= ~SHM_LOCKED; + shp->mlock_user = NULL; } shm_unlock(shp); goto out; --- linux-2.6.7/ipc/util.c.mlock 2004-08-03 22:46:29.851673621 -0400 +++ linux-2.6.7/ipc/util.c 2004-08-03 22:46:43.402678045 -0400 @@ -392,8 +392,11 @@ int ipcperms (struct kern_ipc_perm *ipcp granted_mode >>= 3; /* is there some bit set in requested_mode but not in granted_mode? */ if ((requested_mode & ~granted_mode & 0007) && - !capable(CAP_IPC_OWNER)) - return -1; + !capable(CAP_IPC_OWNER)) { + if (!can_do_mlock()) { + return -1; + } + } return security_ipc_permission(ipcp, flag); } --- linux-2.6.7/mm/mlock.c.mlock 2004-06-16 01:18:57.000000000 -0400 +++ linux-2.6.7/mm/mlock.c 2004-08-03 22:46:43.431669497 -0400 @@ -60,7 +60,7 @@ static int do_mlock(unsigned long start, struct vm_area_struct * vma, * next; int error; - if (on && !capable(CAP_IPC_LOCK)) + if (on && !can_do_mlock()) return -EPERM; len = PAGE_ALIGN(len); end = start + len; @@ -118,7 +118,7 @@ asmlinkage long sys_mlock(unsigned long lock_limit >>= PAGE_SHIFT; /* check against resource limits */ - if (locked <= lock_limit) + if ( (locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = do_mlock(start, len, 1); up_write(¤t->mm->mmap_sem); return error; @@ -142,7 +142,7 @@ static int do_mlockall(int flags) unsigned int def_flags; struct vm_area_struct * vma; - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return -EPERM; def_flags = 0; @@ -177,7 +177,7 @@ asmlinkage long sys_mlockall(int flags) lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; - if (current->mm->total_vm <= lock_limit) + if ((current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); out: up_write(¤t->mm->mmap_sem); @@ -193,3 +193,38 @@ asmlinkage long sys_munlockall(void) up_write(¤t->mm->mmap_sem); return ret; } + +/* + * Objects with different lifetime than processes (mlocked shm segments + * and hugetlb files) get accounted against the user_struct instead. + */ +static spinlock_t mlock_user_lock = SPIN_LOCK_UNLOCKED; + +int user_can_mlock(size_t size, struct user_struct * user) +{ + unsigned long lock_limit, locked; + int allowed = 0; + + spin_lock(&mlock_user_lock); + locked = size >> PAGE_SHIFT; + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit >>= PAGE_SHIFT; + if (locked + user->locked_shm > lock_limit) + goto out; + atomic_inc(&user->__count); + user->locked_shm += locked; + allowed = 1; +out: + spin_unlock(&mlock_user_lock); + return allowed; +} + +void user_subtract_mlock(size_t size, struct user_struct * user) +{ + if (user) { + spin_lock(&mlock_user_lock); + user->locked_shm -= (size >> PAGE_SHIFT); + spin_unlock(&mlock_user_lock); + free_uid(user); + } +} --- linux-2.6.7/mm/mmap.c.mlock 2004-08-03 22:46:42.101061834 -0400 +++ linux-2.6.7/mm/mmap.c 2004-08-03 22:46:43.436668023 -0400 @@ -806,15 +806,17 @@ unsigned long do_mmap_pgoff(struct file mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; if (flags & MAP_LOCKED) { - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return -EPERM; vm_flags |= VM_LOCKED; } /* mlock MCL_FUTURE? */ if (vm_flags & VM_LOCKED) { - unsigned long locked = mm->locked_vm << PAGE_SHIFT; + unsigned long locked, lock_limit; + locked = mm->locked_vm << PAGE_SHIFT; + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; locked += len; - if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } @@ -1746,9 +1748,11 @@ unsigned long do_brk(unsigned long addr, * mlock MCL_FUTURE? */ if (mm->def_flags & VM_LOCKED) { - unsigned long locked = mm->locked_vm << PAGE_SHIFT; + unsigned long locked, lock_limit; + locked = mm->locked_vm << PAGE_SHIFT; + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; locked += len; - if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } --- linux-2.6.7/mm/mremap.c.mlock 2004-08-03 22:46:30.312537733 -0400 +++ linux-2.6.7/mm/mremap.c 2004-08-03 22:46:43.439667138 -0400 @@ -324,10 +324,12 @@ unsigned long do_mremap(unsigned long ad goto out; } if (vma->vm_flags & VM_LOCKED) { - unsigned long locked = current->mm->locked_vm << PAGE_SHIFT; + unsigned long locked, lock_limit; + locked = current->mm->locked_vm << PAGE_SHIFT; + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; locked += new_len - old_len; ret = -EAGAIN; - if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) goto out; } ret = -ENOMEM; --- linux-2.6.7/mm/shmem.c.mlock 2004-08-03 22:46:39.416853287 -0400 +++ linux-2.6.7/mm/shmem.c 2004-08-03 22:46:43.444665665 -0400 @@ -1151,17 +1151,29 @@ shmem_get_policy(struct vm_area_struct * } #endif -void shmem_lock(struct file *file, int lock) +int shmem_lock(struct file *file, int lock, struct user_struct * user) { struct inode *inode = file->f_dentry->d_inode; struct shmem_inode_info *info = SHMEM_I(inode); + int retval = -ENOMEM; + + if (!can_do_mlock()) + return -EPERM; spin_lock(&info->lock); - if (lock) + if (lock && !(info->flags & VM_LOCKED)) { + if (!user_can_mlock(inode->i_size, user) && !capable(CAP_IPC_LOCK)) + goto out_nomem; info->flags |= VM_LOCKED; - else + } + if (!lock && (info->flags & VM_LOCKED) && user) { + user_subtract_mlock(inode->i_size, user); info->flags &= ~VM_LOCKED; + } + retval = 0; +out_nomem: spin_unlock(&info->lock); + return retval; } static int shmem_mmap(struct file *file, struct vm_area_struct *vma) ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 2:56 ` Rik van Riel @ 2004-08-04 6:06 ` Chris Wright 2004-08-04 13:31 ` Rik van Riel 0 siblings, 1 reply; 49+ messages in thread From: Chris Wright @ 2004-08-04 6:06 UTC (permalink / raw) To: Rik van Riel Cc: William Lee Irwin III, Andrea Arcangeli, Chris Wright, Arjan van de Ven, linux-kernel, Andrew Morton * Rik van Riel (riel@redhat.com) wrote: > On Tue, 3 Aug 2004, William Lee Irwin III wrote: > > > By any chance could you repost a complete patch and/or series? > > Here you are. It is fits into the Fedora kernel rpm, but > I guess it should apply to upstream and -mm too. Not quite. The hugetlb_shm_group change is upstream so there's a small conflict. I've got a diff. It begs the question of whether to keep the hugetlb_shm_group bits at all if this is merged. > Please check if there are any spots left where this patch > did something wrong. I'd like to get this merged ASAP, so > I will fix any actual errors people find. Few spots below. > --- linux-2.6.7/ipc/shm.c.mlock 2004-08-03 22:46:29.848674505 -0400 > +++ linux-2.6.7/ipc/shm.c 2004-08-03 22:46:43.399678929 -0400 > @@ -114,7 +114,10 @@ static void shm_destroy (struct shmid_ke > shm_rmid (shp->id); > shm_unlock(shp); > if (!is_file_hugepages(shp->shm_file)) > - shmem_lock(shp->shm_file, 0); > + shmem_lock(shp->shm_file, 0, shp->mlock_user); > + else > + user_subtract_mlock(shp->shm_file->f_dentry->d_inode->i_size, > + shp->mlock_user); > fput (shp->shm_file); > security_shm_free(shp); > ipc_rcu_free(shp, sizeof(struct shmid_kernel)); > @@ -198,9 +201,11 @@ static int newseg (key_t key, int shmflg > return error; > } > > - if (shmflg & SHM_HUGETLB) > + if (shmflg & SHM_HUGETLB) { > + /* hugetlb_zero_setup takes care of mlock user accounting */ > file = hugetlb_zero_setup(size); > - else { > + shp->mlock_user = current->user; This gets overwritten by NULL in the hunk below. So the accouting will never be undone. > + } else { > sprintf (name, "SYSV%08x", key); > file = shmem_file_setup(name, size, VM_ACCOUNT); > } > @@ -221,6 +226,7 @@ static int newseg (key_t key, int shmflg > shp->shm_nattch = 0; > shp->id = shm_buildid(id,shp->shm_perm.seq); > shp->shm_file = file; > + shp->mlock_user = NULL; > file->f_dentry->d_inode->i_ino = shp->id; > if (shmflg & SHM_HUGETLB) > set_file_hugepages(file); > @@ -504,14 +510,11 @@ asmlinkage long sys_shmctl (int shmid, i > case SHM_LOCK: > case SHM_UNLOCK: > { > -/* Allow superuser to lock segment in memory */ > -/* Should the pages be faulted in here or leave it to user? */ > -/* need to determine interaction with current->swappable */ > - if (!capable(CAP_IPC_LOCK)) { > + /* Allow superuser to lock segment in memory */ > + if (!can_do_mlock()) { I actually think this is too restrictive. Why not be able to unlock is the rlimit has been reset to zero? It's also called 2 or 3 times during SHM_LOCK. > err = -EPERM; > goto out; > } > - > shp = shm_lock(shmid); > if(shp==NULL) { > err = -EINVAL; > @@ -526,13 +529,19 @@ asmlinkage long sys_shmctl (int shmid, i > goto out_unlock; > > if(cmd==SHM_LOCK) { > - if (!is_file_hugepages(shp->shm_file)) > - shmem_lock(shp->shm_file, 1); > - shp->shm_flags |= SHM_LOCKED; > + struct user_struct * user = current->user; > + if (!is_file_hugepages(shp->shm_file)) { > + err = shmem_lock(shp->shm_file, 1, current->user); > + if (!err) { > + shp->shm_flags |= SHM_LOCKED; Slight change in behaviour. Used to set SHM_LOCKED on hugetlb backed segments as well. I don't see any purpose for the old behaviour though. > + shp->mlock_user = user; > + } > + } > } else { > if (!is_file_hugepages(shp->shm_file)) > - shmem_lock(shp->shm_file, 0); > + shmem_lock(shp->shm_file, 0, shp->mlock_user); > shp->shm_flags &= ~SHM_LOCKED; This doesn't match behaviour above. > + shp->mlock_user = NULL; This means that SHM_UNLOCK on SHM_HUGETLB segment will never get unaccounted during segment destruction (since mlock_user will errnoeously be NULL). I think both of these should be under !is_file_hugepages condition. I don't see the point of SHM_{UN,}LOCK on SHM_HUGETLB segment. > } > shm_unlock(shp); > goto out; > --- linux-2.6.7/ipc/util.c.mlock 2004-08-03 22:46:29.851673621 -0400 > +++ linux-2.6.7/ipc/util.c 2004-08-03 22:46:43.402678045 -0400 > @@ -392,8 +392,11 @@ int ipcperms (struct kern_ipc_perm *ipcp > granted_mode >>= 3; > /* is there some bit set in requested_mode but not in granted_mode? */ > if ((requested_mode & ~granted_mode & 0007) && > - !capable(CAP_IPC_OWNER)) > - return -1; > + !capable(CAP_IPC_OWNER)) { > + if (!can_do_mlock()) { > + return -1; > + } > + } I still don't see the use for this one. I believe it duplicates SHM_HUGETLB check that's already there. > +int user_can_mlock(size_t size, struct user_struct * user) > +{ > + unsigned long lock_limit, locked; > + int allowed = 0; > + > + spin_lock(&mlock_user_lock); > + locked = size >> PAGE_SHIFT; > + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; > + lock_limit >>= PAGE_SHIFT; > + if (locked + user->locked_shm > lock_limit) > + goto out; > + atomic_inc(&user->__count); There is a trivial get_uid wrapper. Although, that's arguable whether it's useful here. > + user->locked_shm += locked; > + allowed = 1; > +out: > + spin_unlock(&mlock_user_lock); > + return allowed; > +} > + > +void user_subtract_mlock(size_t size, struct user_struct * user) > +{ > + if (user) { Hmm, is !user ever valid? Perhaps it should start out as BUG_ON? > + spin_lock(&mlock_user_lock); > + user->locked_shm -= (size >> PAGE_SHIFT); > + spin_unlock(&mlock_user_lock); > + free_uid(user); > + } > +} > --- linux-2.6.7/mm/shmem.c.mlock 2004-08-03 22:46:39.416853287 -0400 > +++ linux-2.6.7/mm/shmem.c 2004-08-03 22:46:43.444665665 -0400 > @@ -1151,17 +1151,29 @@ shmem_get_policy(struct vm_area_struct * > } > #endif > > -void shmem_lock(struct file *file, int lock) > +int shmem_lock(struct file *file, int lock, struct user_struct * user) > { > struct inode *inode = file->f_dentry->d_inode; > struct shmem_inode_info *info = SHMEM_I(inode); > + int retval = -ENOMEM; > + > + if (!can_do_mlock()) > + return -EPERM; I see no point in checking this when !lock. In fact, the error will be silently ignored, and the accounting will never be undone if ulimit is reset to 0 before removing the segment. In the case of lock == 1, user_can_mlock basically duplicates the check. Maybe it should just be removed. > spin_lock(&info->lock); > - if (lock) > + if (lock && !(info->flags & VM_LOCKED)) { > + if (!user_can_mlock(inode->i_size, user) && !capable(CAP_IPC_LOCK)) > + goto out_nomem; > info->flags |= VM_LOCKED; > - else > + } > + if (!lock && (info->flags & VM_LOCKED) && user) { > + user_subtract_mlock(inode->i_size, user); > info->flags &= ~VM_LOCKED; > + } > + retval = 0; > +out_nomem: > spin_unlock(&info->lock); > + return retval; > } > > static int shmem_mmap(struct file *file, struct vm_area_struct *vma) -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 6:06 ` Chris Wright @ 2004-08-04 13:31 ` Rik van Riel 2004-08-04 13:51 ` Arjan van de Ven 0 siblings, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-08-04 13:31 UTC (permalink / raw) To: Chris Wright Cc: William Lee Irwin III, Andrea Arcangeli, Arjan van de Ven, linux-kernel, Andrew Morton On Tue, 3 Aug 2004, Chris Wright wrote: > This gets overwritten by NULL in the hunk below. So the accouting will > never be undone. Fixed in the incremental below. > > @@ -504,14 +510,11 @@ asmlinkage long sys_shmctl (int shmid, i > > case SHM_LOCK: > > case SHM_UNLOCK: > > { > > -/* Allow superuser to lock segment in memory */ > > -/* Should the pages be faulted in here or leave it to user? */ > > -/* need to determine interaction with current->swappable */ > > - if (!capable(CAP_IPC_LOCK)) { > > + /* Allow superuser to lock segment in memory */ > > + if (!can_do_mlock()) { > > I actually think this is too restrictive. Why not be able > to unlock is the rlimit has been reset to zero? It's also > called 2 or 3 times during SHM_LOCK. Good point. I've made unlock an unprivileged operation in the incremental patch below. Let me know if this is what you wanted ;) > > @@ -526,13 +529,19 @@ asmlinkage long sys_shmctl (int shmid, i > > goto out_unlock; > > > > if(cmd==SHM_LOCK) { > > - if (!is_file_hugepages(shp->shm_file)) > > - shmem_lock(shp->shm_file, 1); > > - shp->shm_flags |= SHM_LOCKED; > > + struct user_struct * user = current->user; > > + if (!is_file_hugepages(shp->shm_file)) { > > + err = shmem_lock(shp->shm_file, 1, current->user); > > + if (!err) { > > + shp->shm_flags |= SHM_LOCKED; > > Slight change in behaviour. Used to set SHM_LOCKED on hugetlb backed > segments as well. I don't see any purpose for the old behaviour though. It's a needed change though, since shmem_lock could fail due to the user not having enough quota left. > > } else { > > if (!is_file_hugepages(shp->shm_file)) > > - shmem_lock(shp->shm_file, 0); > > + shmem_lock(shp->shm_file, 0, shp->mlock_user); > > shp->shm_flags &= ~SHM_LOCKED; > > This doesn't match behaviour above. > > + shp->mlock_user = NULL; > > This means that SHM_UNLOCK on SHM_HUGETLB segment will never get > unaccounted during segment destruction (since mlock_user will > errnoeously be NULL). I think both of these should be under > !is_file_hugepages condition. I don't see the point of SHM_{UN,}LOCK on > SHM_HUGETLB segment. Fixed in the incremental, I guess... And yeah, I'm not quite sure why we go through the motions with SHM_HUGETLB segments myself ... I'm pondering a: if (is_file_hugepages(shp->shm_file) return -EINVAL; earlier on in the function. That would get rid of these issues and clean up the code somewhat ;) > > --- linux-2.6.7/ipc/util.c.mlock 2004-08-03 22:46:29.851673621 -0400 > > +++ linux-2.6.7/ipc/util.c 2004-08-03 22:46:43.402678045 -0400 > > @@ -392,8 +392,11 @@ int ipcperms (struct kern_ipc_perm *ipcp > > granted_mode >>= 3; > > /* is there some bit set in requested_mode but not in granted_mode? */ > > if ((requested_mode & ~granted_mode & 0007) && > > - !capable(CAP_IPC_OWNER)) > > - return -1; > > + !capable(CAP_IPC_OWNER)) { > > + if (!can_do_mlock()) { > > + return -1; > > + } > > + } > > I still don't see the use for this one. I believe it duplicates > SHM_HUGETLB check that's already there. I'm not sure about your comments here. However, I'm also not quite sure about this piece of code. Arjan ? ;) > > +int user_can_mlock(size_t size, struct user_struct * user) > > +{ > > + unsigned long lock_limit, locked; > > + int allowed = 0; > > + > > + spin_lock(&mlock_user_lock); > > + locked = size >> PAGE_SHIFT; > > + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; > > + lock_limit >>= PAGE_SHIFT; > > + if (locked + user->locked_shm > lock_limit) > > + goto out; > > + atomic_inc(&user->__count); > > There is a trivial get_uid wrapper. Although, that's arguable whether > it's useful here. It's certainly more beautiful. Fixed in the incremental ... > > +void user_subtract_mlock(size_t size, struct user_struct * user) > > +{ > > + if (user) { > > Hmm, is !user ever valid? Perhaps it should start out as BUG_ON? Hmmm, I guess you're right. Hugetlb segments always should have an shp->mlock_user and while shmem_lock gets called blindly from shm_destroy, it should only call user_subtract_mlock when the shm segment really was locked in memory... I just verified the 3 callers and they appear correct wrt this. > > +int shmem_lock(struct file *file, int lock, struct user_struct * user) > > { > > struct inode *inode = file->f_dentry->d_inode; > > struct shmem_inode_info *info = SHMEM_I(inode); > > + int retval = -ENOMEM; > > + > > + if (!can_do_mlock()) > > + return -EPERM; > > I see no point in checking this when !lock. Agreed, fixed in the incremental. Please let me know if you find any more issues. --- linux-2.6.7/ipc/shm.c.incremental 2004-08-04 08:24:41.000000000 -0400 +++ linux-2.6.7/ipc/shm.c 2004-08-04 08:32:39.000000000 -0400 @@ -193,6 +193,7 @@ static int newseg (key_t key, int shmflg shp->shm_perm.key = key; shp->shm_flags = (shmflg & S_IRWXUGO); + shp->mlock_user = NULL; shp->shm_perm.security = NULL; error = security_shm_alloc(shp); @@ -226,7 +227,6 @@ static int newseg (key_t key, int shmflg shp->shm_nattch = 0; shp->id = shm_buildid(id,shp->shm_perm.seq); shp->shm_file = file; - shp->mlock_user = NULL; file->f_dentry->d_inode->i_ino = shp->id; if (shmflg & SHM_HUGETLB) set_file_hugepages(file); @@ -511,7 +511,7 @@ asmlinkage long sys_shmctl (int shmid, i case SHM_UNLOCK: { /* Allow superuser to lock segment in memory */ - if (!can_do_mlock()) { + if (!can_do_mlock() && cmd == SHM_LOCK) { err = -EPERM; goto out; } @@ -537,9 +537,8 @@ asmlinkage long sys_shmctl (int shmid, i shp->mlock_user = user; } } - } else { - if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0, shp->mlock_user); + } else if (!is_file_hugepages(shp->shm_file)) { + shmem_lock(shp->shm_file, 0, shp->mlock_user); shp->shm_flags &= ~SHM_LOCKED; shp->mlock_user = NULL; } --- linux-2.6.7/mm/mlock.c.incremental 2004-08-04 09:22:11.594669582 -0400 +++ linux-2.6.7/mm/mlock.c 2004-08-04 09:28:14.417102357 -0400 @@ -211,7 +211,7 @@ int user_can_mlock(size_t size, struct u lock_limit >>= PAGE_SHIFT; if (locked + user->locked_shm > lock_limit) goto out; - atomic_inc(&user->__count); + get_uid(user); user->locked_shm += locked; allowed = 1; out: @@ -221,10 +221,8 @@ out: void user_subtract_mlock(size_t size, struct user_struct * user) { - if (user) { - spin_lock(&mlock_user_lock); - user->locked_shm -= (size >> PAGE_SHIFT); - spin_unlock(&mlock_user_lock); - free_uid(user); - } + spin_lock(&mlock_user_lock); + user->locked_shm -= (size >> PAGE_SHIFT); + spin_unlock(&mlock_user_lock); + free_uid(user); } --- linux-2.6.7/mm/shmem.c.incremental 2004-08-04 09:29:10.606445758 -0400 +++ linux-2.6.7/mm/shmem.c 2004-08-04 09:29:47.337557579 -0400 @@ -1157,7 +1157,7 @@ int shmem_lock(struct file *file, int lo struct shmem_inode_info *info = SHMEM_I(inode); int retval = -ENOMEM; - if (!can_do_mlock()) + if (lock && !can_do_mlock()) return -EPERM; spin_lock(&info->lock); ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 13:31 ` Rik van Riel @ 2004-08-04 13:51 ` Arjan van de Ven 2004-08-04 13:56 ` Rik van Riel 0 siblings, 1 reply; 49+ messages in thread From: Arjan van de Ven @ 2004-08-04 13:51 UTC (permalink / raw) To: Rik van Riel Cc: Chris Wright, William Lee Irwin III, Andrea Arcangeli, linux-kernel, Andrew Morton [-- Attachment #1: Type: text/plain, Size: 753 bytes --] On Wed, Aug 04, 2004 at 09:31:54AM -0400, Rik van Riel wrote: > > > @@ -392,8 +392,11 @@ int ipcperms (struct kern_ipc_perm *ipcp > > > granted_mode >>= 3; > > > /* is there some bit set in requested_mode but not in granted_mode? */ > > > if ((requested_mode & ~granted_mode & 0007) && > > > - !capable(CAP_IPC_OWNER)) > > > - return -1; > > > + !capable(CAP_IPC_OWNER)) { > > > + if (!can_do_mlock()) { > > > + return -1; > > > + } > > > + } > > > > I still don't see the use for this one. I believe it duplicates > > SHM_HUGETLB check that's already there. > > I'm not sure about your comments here. However, I'm also not > quite sure about this piece of code. Arjan ? ;) hmmm looks bullshit now that I look at it again. [-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 13:51 ` Arjan van de Ven @ 2004-08-04 13:56 ` Rik van Riel 0 siblings, 0 replies; 49+ messages in thread From: Rik van Riel @ 2004-08-04 13:56 UTC (permalink / raw) To: Arjan van de Ven Cc: Chris Wright, William Lee Irwin III, Andrea Arcangeli, linux-kernel, Andrew Morton On Wed, 4 Aug 2004, Arjan van de Ven wrote: > hmmm looks bullshit now that I look at it again. OK, I removed the ipc/util.c chunk from the patch now. Here's the complete patch again, addressing all of Chris's latest issues. If nobody else has big objections, I'd like to get this merged upstream ;) --- linux-2.6.7/include/asm-ppc64/resource.h.mlock 2004-08-04 07:46:42.000000000 -0400 +++ linux-2.6.7/include/asm-ppc64/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -45,7 +45,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-m68k/resource.h.mlock 2004-08-04 07:46:42.000000000 -0400 +++ linux-2.6.7/include/asm-m68k/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-parisc/resource.h.mlock 2004-08-04 07:46:42.000000000 -0400 +++ linux-2.6.7/include/asm-parisc/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-ppc/resource.h.mlock 2004-08-04 07:46:42.000000000 -0400 +++ linux-2.6.7/include/asm-ppc/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -36,7 +36,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-x86_64/resource.h.mlock 2004-08-04 07:46:43.000000000 -0400 +++ linux-2.6.7/include/asm-x86_64/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE , PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-ia64/resource.h.mlock 2004-08-04 07:46:42.000000000 -0400 +++ linux-2.6.7/include/asm-ia64/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -46,7 +46,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/linux/mm.h.mlock 2004-08-04 07:47:05.000000000 -0400 +++ linux-2.6.7/include/linux/mm.h 2004-08-04 07:47:06.000000000 -0400 @@ -498,9 +498,20 @@ int shmem_set_policy(struct vm_area_stru struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, unsigned long addr); struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags); -void shmem_lock(struct file * file, int lock); +int shmem_lock(struct file * file, int lock, struct user_struct *); int shmem_zero_setup(struct vm_area_struct *); +static inline int can_do_mlock(void) +{ + if (capable(CAP_IPC_LOCK)) + return 1; + if (current->rlim[RLIMIT_MEMLOCK].rlim_cur != 0) + return 1; + return 0; +} +extern int user_can_mlock(size_t, struct user_struct *); +extern void user_subtract_mlock(size_t, struct user_struct *); + /* * Parameter block passed down to zap_pte_range in exceptional cases. */ --- linux-2.6.7/include/linux/shm.h.mlock 2004-06-16 01:19:43.000000000 -0400 +++ linux-2.6.7/include/linux/shm.h 2004-08-04 07:47:06.000000000 -0400 @@ -84,6 +84,7 @@ struct shmid_kernel /* private to the ke time_t shm_ctim; pid_t shm_cprid; pid_t shm_lprid; + struct user_struct * mlock_user; }; /* shm_mode upper byte flags */ --- linux-2.6.7/include/linux/sched.h.mlock 2004-08-04 07:47:04.000000000 -0400 +++ linux-2.6.7/include/linux/sched.h 2004-08-04 07:47:06.000000000 -0400 @@ -331,6 +331,7 @@ struct user_struct { atomic_t sigpending; /* How many pending signals does this user have? */ /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ + unsigned long locked_shm; /* How many pages of mlocked shm ? */ /* Hash table maintenance information */ struct list_head uidhash_list; --- linux-2.6.7/include/asm-arm/resource.h.mlock 2004-08-04 07:46:41.000000000 -0400 +++ linux-2.6.7/include/asm-arm/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING}, \ --- linux-2.6.7/include/asm-sparc/resource.h.mlock 2004-08-04 07:46:43.000000000 -0400 +++ linux-2.6.7/include/asm-sparc/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -44,7 +44,7 @@ { 0, RLIM_INFINITY}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {INR_OPEN, INR_OPEN}, {0, 0}, \ - {RLIM_INFINITY, RLIM_INFINITY}, \ + {PAGE_SIZE, PAGE_SIZE}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {MAX_SIGPENDING, MAX_SIGPENDING}, \ --- linux-2.6.7/include/asm-alpha/resource.h.mlock 2004-08-04 07:46:41.000000000 -0400 +++ linux-2.6.7/include/asm-alpha/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -41,7 +41,7 @@ {INR_OPEN, INR_OPEN}, /* RLIMIT_NOFILE */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_AS */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_NPROC */ \ - {LONG_MAX, LONG_MAX}, /* RLIMIT_MEMLOCK */ \ + {PAGE_SIZE, PAGE_SIZE}, /* RLIMIT_MEMLOCK */ \ {LONG_MAX, LONG_MAX}, /* RLIMIT_LOCKS */ \ {MAX_SIGPENDING, MAX_SIGPENDING}, /* RLIMIT_SIGPENDING */ \ {MQ_BYTES_MAX, MQ_BYTES_MAX}, /* RLIMIT_MSGQUEUE */ \ --- linux-2.6.7/include/asm-h8300/resource.h.mlock 2004-08-04 07:46:41.000000000 -0400 +++ linux-2.6.7/include/asm-h8300/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-i386/resource.h.mlock 2004-08-04 07:46:42.000000000 -0400 +++ linux-2.6.7/include/asm-i386/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -40,7 +40,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-s390/resource.h.mlock 2004-08-04 07:46:42.000000000 -0400 +++ linux-2.6.7/include/asm-s390/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -47,7 +47,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-cris/resource.h.mlock 2004-08-04 07:46:41.000000000 -0400 +++ linux-2.6.7/include/asm-cris/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-sparc64/resource.h.mlock 2004-08-04 07:46:43.000000000 -0400 +++ linux-2.6.7/include/asm-sparc64/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -43,7 +43,7 @@ { 0, RLIM_INFINITY}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {INR_OPEN, INR_OPEN}, {0, 0}, \ - {RLIM_INFINITY, RLIM_INFINITY}, \ + {PAGE_SIZE, PAGE_SIZE }, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {RLIM_INFINITY, RLIM_INFINITY}, \ {MAX_SIGPENDING, MAX_SIGPENDING}, \ --- linux-2.6.7/include/asm-arm26/resource.h.mlock 2004-08-04 07:46:41.000000000 -0400 +++ linux-2.6.7/include/asm-arm26/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING}, \ --- linux-2.6.7/include/asm-v850/resource.h.mlock 2004-08-04 07:46:43.000000000 -0400 +++ linux-2.6.7/include/asm-v850/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/include/asm-sh/resource.h.mlock 2004-08-04 07:46:43.000000000 -0400 +++ linux-2.6.7/include/asm-sh/resource.h 2004-08-04 07:47:06.000000000 -0400 @@ -39,7 +39,7 @@ { RLIM_INFINITY, RLIM_INFINITY }, \ { 0, 0 }, \ { INR_OPEN, INR_OPEN }, \ - { RLIM_INFINITY, RLIM_INFINITY }, \ + { PAGE_SIZE, PAGE_SIZE }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { RLIM_INFINITY, RLIM_INFINITY }, \ { MAX_SIGPENDING, MAX_SIGPENDING }, \ --- linux-2.6.7/fs/hugetlbfs/inode.c.mlock 2004-08-04 07:47:03.000000000 -0400 +++ linux-2.6.7/fs/hugetlbfs/inode.c 2004-08-04 07:47:06.000000000 -0400 @@ -718,19 +718,22 @@ static unsigned long hugetlbfs_counter(v struct file *hugetlb_zero_setup(size_t size) { - int error; + int error = -ENOMEM; struct file *file; struct inode *inode; struct dentry *dentry, *root; struct qstr quick_string; char buf[16]; - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return ERR_PTR(-EPERM); if (!is_hugepage_mem_enough(size)) return ERR_PTR(-ENOMEM); + if (!user_can_mlock(size, current->user)) + return ERR_PTR(-ENOMEM); + root = hugetlbfs_vfsmount->mnt_root; snprintf(buf, 16, "%lu", hugetlbfs_counter()); quick_string.name = buf; @@ -738,7 +741,7 @@ struct file *hugetlb_zero_setup(size_t s quick_string.hash = 0; dentry = d_alloc(root, &quick_string); if (!dentry) - return ERR_PTR(-ENOMEM); + goto out_subtract_mlock; error = -ENFILE; file = get_empty_filp(); @@ -765,6 +768,8 @@ out_file: put_filp(file); out_dentry: dput(dentry); +out_subtract_mlock: + user_subtract_mlock(size, current->user); return ERR_PTR(error); } --- linux-2.6.7/kernel/user.c.mlock 2004-08-04 07:46:45.000000000 -0400 +++ linux-2.6.7/kernel/user.c 2004-08-04 07:47:06.000000000 -0400 @@ -32,7 +32,8 @@ struct user_struct root_user = { .processes = ATOMIC_INIT(1), .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), - .mq_bytes = 0 + .mq_bytes = 0, + .locked_shm = 0 }; /* @@ -113,6 +114,7 @@ struct user_struct * alloc_uid(uid_t uid atomic_set(&new->sigpending, 0); new->mq_bytes = 0; + new->locked_shm = 0; /* * Before adding this, check whether we raced --- linux-2.6.7/ipc/shm.c.mlock 2004-08-04 07:46:45.000000000 -0400 +++ linux-2.6.7/ipc/shm.c 2004-08-04 08:32:39.000000000 -0400 @@ -114,7 +114,10 @@ static void shm_destroy (struct shmid_ke shm_rmid (shp->id); shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0); + shmem_lock(shp->shm_file, 0, shp->mlock_user); + else + user_subtract_mlock(shp->shm_file->f_dentry->d_inode->i_size, + shp->mlock_user); fput (shp->shm_file); security_shm_free(shp); ipc_rcu_free(shp, sizeof(struct shmid_kernel)); @@ -190,6 +193,7 @@ static int newseg (key_t key, int shmflg shp->shm_perm.key = key; shp->shm_flags = (shmflg & S_IRWXUGO); + shp->mlock_user = NULL; shp->shm_perm.security = NULL; error = security_shm_alloc(shp); @@ -198,9 +202,11 @@ static int newseg (key_t key, int shmflg return error; } - if (shmflg & SHM_HUGETLB) + if (shmflg & SHM_HUGETLB) { + /* hugetlb_zero_setup takes care of mlock user accounting */ file = hugetlb_zero_setup(size); - else { + shp->mlock_user = current->user; + } else { sprintf (name, "SYSV%08x", key); file = shmem_file_setup(name, size, VM_ACCOUNT); } @@ -504,14 +510,11 @@ asmlinkage long sys_shmctl (int shmid, i case SHM_LOCK: case SHM_UNLOCK: { -/* Allow superuser to lock segment in memory */ -/* Should the pages be faulted in here or leave it to user? */ -/* need to determine interaction with current->swappable */ - if (!capable(CAP_IPC_LOCK)) { + /* Allow superuser to lock segment in memory */ + if (!can_do_mlock() && cmd == SHM_LOCK) { err = -EPERM; goto out; } - shp = shm_lock(shmid); if(shp==NULL) { err = -EINVAL; @@ -526,13 +529,18 @@ asmlinkage long sys_shmctl (int shmid, i goto out_unlock; if(cmd==SHM_LOCK) { - if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 1); - shp->shm_flags |= SHM_LOCKED; - } else { - if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0); + struct user_struct * user = current->user; + if (!is_file_hugepages(shp->shm_file)) { + err = shmem_lock(shp->shm_file, 1, current->user); + if (!err) { + shp->shm_flags |= SHM_LOCKED; + shp->mlock_user = user; + } + } + } else if (!is_file_hugepages(shp->shm_file)) { + shmem_lock(shp->shm_file, 0, shp->mlock_user); shp->shm_flags &= ~SHM_LOCKED; + shp->mlock_user = NULL; } shm_unlock(shp); goto out; --- linux-2.6.7/mm/mlock.c.mlock 2004-06-16 01:18:57.000000000 -0400 +++ linux-2.6.7/mm/mlock.c 2004-08-04 09:28:14.417102357 -0400 @@ -60,7 +60,7 @@ static int do_mlock(unsigned long start, struct vm_area_struct * vma, * next; int error; - if (on && !capable(CAP_IPC_LOCK)) + if (on && !can_do_mlock()) return -EPERM; len = PAGE_ALIGN(len); end = start + len; @@ -118,7 +118,7 @@ asmlinkage long sys_mlock(unsigned long lock_limit >>= PAGE_SHIFT; /* check against resource limits */ - if (locked <= lock_limit) + if ( (locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = do_mlock(start, len, 1); up_write(¤t->mm->mmap_sem); return error; @@ -142,7 +142,7 @@ static int do_mlockall(int flags) unsigned int def_flags; struct vm_area_struct * vma; - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return -EPERM; def_flags = 0; @@ -177,7 +177,7 @@ asmlinkage long sys_mlockall(int flags) lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; - if (current->mm->total_vm <= lock_limit) + if ((current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); out: up_write(¤t->mm->mmap_sem); @@ -193,3 +193,36 @@ asmlinkage long sys_munlockall(void) up_write(¤t->mm->mmap_sem); return ret; } + +/* + * Objects with different lifetime than processes (mlocked shm segments + * and hugetlb files) get accounted against the user_struct instead. + */ +static spinlock_t mlock_user_lock = SPIN_LOCK_UNLOCKED; + +int user_can_mlock(size_t size, struct user_struct * user) +{ + unsigned long lock_limit, locked; + int allowed = 0; + + spin_lock(&mlock_user_lock); + locked = size >> PAGE_SHIFT; + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit >>= PAGE_SHIFT; + if (locked + user->locked_shm > lock_limit) + goto out; + get_uid(user); + user->locked_shm += locked; + allowed = 1; +out: + spin_unlock(&mlock_user_lock); + return allowed; +} + +void user_subtract_mlock(size_t size, struct user_struct * user) +{ + spin_lock(&mlock_user_lock); + user->locked_shm -= (size >> PAGE_SHIFT); + spin_unlock(&mlock_user_lock); + free_uid(user); +} --- linux-2.6.7/mm/mmap.c.mlock 2004-08-04 07:47:04.000000000 -0400 +++ linux-2.6.7/mm/mmap.c 2004-08-04 07:47:06.000000000 -0400 @@ -806,15 +806,17 @@ unsigned long do_mmap_pgoff(struct file mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; if (flags & MAP_LOCKED) { - if (!capable(CAP_IPC_LOCK)) + if (!can_do_mlock()) return -EPERM; vm_flags |= VM_LOCKED; } /* mlock MCL_FUTURE? */ if (vm_flags & VM_LOCKED) { - unsigned long locked = mm->locked_vm << PAGE_SHIFT; + unsigned long locked, lock_limit; + locked = mm->locked_vm << PAGE_SHIFT; + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; locked += len; - if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } @@ -1746,9 +1748,11 @@ unsigned long do_brk(unsigned long addr, * mlock MCL_FUTURE? */ if (mm->def_flags & VM_LOCKED) { - unsigned long locked = mm->locked_vm << PAGE_SHIFT; + unsigned long locked, lock_limit; + locked = mm->locked_vm << PAGE_SHIFT; + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; locked += len; - if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } --- linux-2.6.7/mm/mremap.c.mlock 2004-08-04 07:46:45.000000000 -0400 +++ linux-2.6.7/mm/mremap.c 2004-08-04 07:47:06.000000000 -0400 @@ -324,10 +324,12 @@ unsigned long do_mremap(unsigned long ad goto out; } if (vma->vm_flags & VM_LOCKED) { - unsigned long locked = current->mm->locked_vm << PAGE_SHIFT; + unsigned long locked, lock_limit; + locked = current->mm->locked_vm << PAGE_SHIFT; + lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; locked += new_len - old_len; ret = -EAGAIN; - if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) goto out; } ret = -ENOMEM; --- linux-2.6.7/mm/shmem.c.mlock 2004-08-04 07:46:58.000000000 -0400 +++ linux-2.6.7/mm/shmem.c 2004-08-04 09:29:47.337557579 -0400 @@ -1151,17 +1151,29 @@ shmem_get_policy(struct vm_area_struct * } #endif -void shmem_lock(struct file *file, int lock) +int shmem_lock(struct file *file, int lock, struct user_struct * user) { struct inode *inode = file->f_dentry->d_inode; struct shmem_inode_info *info = SHMEM_I(inode); + int retval = -ENOMEM; + + if (lock && !can_do_mlock()) + return -EPERM; spin_lock(&info->lock); - if (lock) + if (lock && !(info->flags & VM_LOCKED)) { + if (!user_can_mlock(inode->i_size, user) && !capable(CAP_IPC_LOCK)) + goto out_nomem; info->flags |= VM_LOCKED; - else + } + if (!lock && (info->flags & VM_LOCKED) && user) { + user_subtract_mlock(inode->i_size, user); info->flags &= ~VM_LOCKED; + } + retval = 0; +out_nomem: spin_unlock(&info->lock); + return retval; } static int shmem_mmap(struct file *file, struct vm_area_struct *vma) ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 2:22 ` Rik van Riel 2004-08-04 2:31 ` William Lee Irwin III @ 2004-08-04 3:13 ` Andrea Arcangeli 1 sibling, 0 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-04 3:13 UTC (permalink / raw) To: Rik van Riel; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 10:22:30PM -0400, Rik van Riel wrote: > On Wed, 4 Aug 2004, Andrea Arcangeli wrote: > > > > Normal hugetlb file creation (through the filesystem) isn't touched > > > by these patches. > > > > it is: > > Hugetlb file creation through the filesystem never calls > hugetlb_zero_setup! What are you talking about ? > > > diff -purN linux-2.6.7/fs/hugetlbfs/inode.c linux/fs/hugetlbfs/inode.c > > --- linux-2.6.7/fs/hugetlbfs/inode.c 2004-07-29 11:36:55.744448953 > > +0200 > > +++ linux/fs/hugetlbfs/inode.c 2004-07-29 11:38:04.292595263 +0200 > > @@ -722,7 +722,7 @@ struct file *hugetlb_zero_setup(size_t s > > struct qstr quick_string; > > char buf[16]; > > > > - if (!capable(CAP_IPC_LOCK)) > > + if (!can_do_mlock()) > > return ERR_PTR(-EPERM); > > > this breaks local security if you set the rlimit to 1 byte (well, 1 byte > > == disable_cap_mlock). > > Please read my incremental patch. It adds a quota check > right after this code segment. I thought the check was applied to files too, and such code would not have worked correctly with files. ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 2:13 ` Andrea Arcangeli 2004-08-04 2:20 ` William Lee Irwin III 2004-08-04 2:22 ` Rik van Riel @ 2004-08-04 2:25 ` Chris Wright 2 siblings, 0 replies; 49+ messages in thread From: Chris Wright @ 2004-08-04 2:25 UTC (permalink / raw) To: Andrea Arcangeli Cc: Rik van Riel, Chris Wright, Arjan van de Ven, linux-kernel, akpm * Andrea Arcangeli (andrea@suse.de) wrote: > diff -purN linux-2.6.7/fs/hugetlbfs/inode.c linux/fs/hugetlbfs/inode.c > --- linux-2.6.7/fs/hugetlbfs/inode.c 2004-07-29 11:36:55.744448953 > +0200 > +++ linux/fs/hugetlbfs/inode.c 2004-07-29 11:38:04.292595263 +0200 > @@ -722,7 +722,7 @@ struct file *hugetlb_zero_setup(size_t s > struct qstr quick_string; > char buf[16]; > > - if (!capable(CAP_IPC_LOCK)) > + if (!can_do_mlock()) > return ERR_PTR(-EPERM); > > if (!is_hugepage_mem_enough(size)) > > this breaks local security if you set the rlimit to 1 byte (well, 1 byte > == disable_cap_mlock). Right, that's true only for SHM_HUGETLB though, which uses hugetlb_zero_setup. And _that_ bit is what Rik's follow on patch is trying to fix. The normal hugetlbfs method using mmap() is unaffected, AFAICT. And has always been pretty open to using all of max_huge_pages. thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 1:53 ` Andrea Arcangeli 2004-08-04 2:01 ` Rik van Riel @ 2004-08-04 2:07 ` Chris Wright 2004-08-04 2:18 ` Andrea Arcangeli 1 sibling, 1 reply; 49+ messages in thread From: Chris Wright @ 2004-08-04 2:07 UTC (permalink / raw) To: Andrea Arcangeli Cc: Rik van Riel, Chris Wright, Arjan van de Ven, linux-kernel, akpm * Andrea Arcangeli (andrea@suse.de) wrote: > On Tue, Aug 03, 2004 at 09:21:34PM -0400, Rik van Riel wrote: > > This is exactly why named hugetlb files are NOT included > > in this accounting, only the ones created through the SHM > > interface are. > > but you're allowing everybody to alloc all RAM in hugetlb files with > the change in the previos patch posted by Arjan (you're currently posted > incremental patches against Arjan's patch at the top of the thread I > hope), so you must definitely apply "this" accounting to hugetlb files > too or it's still insecure as far as I can tell. I think it's no different from current behaviour. Which for fs based allocations is only guarded by fsuid and max_huge_pages. thanks, -chris -- Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 2:07 ` Chris Wright @ 2004-08-04 2:18 ` Andrea Arcangeli 0 siblings, 0 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-04 2:18 UTC (permalink / raw) To: Chris Wright; +Cc: Rik van Riel, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 07:07:30PM -0700, Chris Wright wrote: > I think it's no different from current behaviour. Which for fs based > allocations is only guarded by fsuid and max_huge_pages. sorry, what does prevent people from creating a 2M file, filling it with pagefaults, closing creating another one and so on? Either "this" accounting is applied to hugetlb files too or it's insecure. Now if they didn't change the creation of the hugetlb files to be allowed if the rlimit is !=0 then it wouldn't be insecure, but they changed that in the first patch. could you post a final patch once you're ready so that I will apply and verify that I can exploit it? If it doesn't break it'll be still messed up w.r.t. chown semantics (a true quota would get that right). ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:07 ` Andrea Arcangeli 2004-08-03 21:13 ` Arjan van de Ven @ 2004-08-03 21:13 ` Rik van Riel 2004-08-03 21:22 ` Andrea Arcangeli 1 sibling, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-08-03 21:13 UTC (permalink / raw) To: Andrea Arcangeli; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, 3 Aug 2004, Andrea Arcangeli wrote: > > - if (shmflg & SHM_HUGETLB) > > + if (shmflg & SHM_HUGETLB) { > > + /* hugetlb_zero_setup takes care of mlock user accounting */ > > file = hugetlb_zero_setup(size); > > + shp->mlock_user = current->user; > > + } else { > where do you change mlock_user in chown? You don't. Normal users aren't allowed to chown each other's files, nor are they allowed to "give away" one of their files to somebody else. On unlock the quota gets deducted from the user who created the hugetlbfs file. This means there shouldn't be security issues with this approach. Let me know if I've overlooked one. -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." - Brian W. Kernighan ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:13 ` Rik van Riel @ 2004-08-03 21:22 ` Andrea Arcangeli 2004-08-03 21:24 ` Arjan van de Ven 2004-08-03 21:31 ` Rik van Riel 0 siblings, 2 replies; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-03 21:22 UTC (permalink / raw) To: Rik van Riel; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 05:13:56PM -0400, Rik van Riel wrote: > On Tue, 3 Aug 2004, Andrea Arcangeli wrote: > > > > - if (shmflg & SHM_HUGETLB) > > > + if (shmflg & SHM_HUGETLB) { > > > + /* hugetlb_zero_setup takes care of mlock user accounting */ > > > file = hugetlb_zero_setup(size); > > > + shp->mlock_user = current->user; > > > + } else { > > > where do you change mlock_user in chown? > > You don't. Normal users aren't allowed to chown each > other's files, nor are they allowed to "give away" one > of their files to somebody else. > > On unlock the quota gets deducted from the user who > created the hugetlbfs file. > > This means there shouldn't be security issues with this > approach. Let me know if I've overlooked one. I agree there aren't security issues, but it's still very wrong to charge the old user if the admin gives the locked ram to a new user. This erratic behaviour shows how much the rlimit approch is flawed for named fs objects that have nothing to do with the transient task that created them. ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:22 ` Andrea Arcangeli @ 2004-08-03 21:24 ` Arjan van de Ven 2004-08-03 21:31 ` Rik van Riel 1 sibling, 0 replies; 49+ messages in thread From: Arjan van de Ven @ 2004-08-03 21:24 UTC (permalink / raw) To: Andrea Arcangeli; +Cc: Rik van Riel, Chris Wright, linux-kernel, akpm [-- Attachment #1: Type: text/plain, Size: 315 bytes --] On Tue, Aug 03, 2004 at 11:22:31PM +0200, Andrea Arcangeli wrote: > > I agree there aren't security issues, but it's still very wrong to > charge the old user if the admin gives the locked ram to a new user. no he gives the FILE to the new user. Which 1) is rare and 2) doesn't mean the old user stopped using it [-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:22 ` Andrea Arcangeli 2004-08-03 21:24 ` Arjan van de Ven @ 2004-08-03 21:31 ` Rik van Riel 2004-08-03 21:39 ` Andrea Arcangeli 2004-08-03 22:18 ` Gerrit Huizenga 1 sibling, 2 replies; 49+ messages in thread From: Rik van Riel @ 2004-08-03 21:31 UTC (permalink / raw) To: Andrea Arcangeli; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, 3 Aug 2004, Andrea Arcangeli wrote: > I agree there aren't security issues, but it's still very wrong to > charge the old user if the admin gives the locked ram to a new user. > This erratic behaviour shows how much the rlimit approch is flawed for > named fs objects that have nothing to do with the transient task that > created them. If root wants to screw over a user, there's nothing we can do. I am not worried about the scenario you describe because hugetlbfs seems to be used only by Oracle anyway, so you won't run into issues like you describe. It would be different for a general purpose filesystem, but I'd like to see a usage case for your scenario before making the code overly complex. -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." - Brian W. Kernighan ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:31 ` Rik van Riel @ 2004-08-03 21:39 ` Andrea Arcangeli 2004-08-04 1:56 ` William Lee Irwin III 2004-08-03 22:18 ` Gerrit Huizenga 1 sibling, 1 reply; 49+ messages in thread From: Andrea Arcangeli @ 2004-08-03 21:39 UTC (permalink / raw) To: Rik van Riel; +Cc: Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 05:31:08PM -0400, Rik van Riel wrote: > If root wants to screw over a user, there's nothing we > can do. I am not worried about the scenario you describe > because hugetlbfs seems to be used only by Oracle anyway, > so you won't run into issues like you describe. hugetlbfs isn't only used by oracle. Anyways if you were right then why is there a IPC_CAP_LOCK in hugetlbfs in the first place? If Oracle is the only user then just drop such check and stop binding rlimits to persistent fs objects. > It would be different for a general purpose filesystem, > but I'd like to see a usage case for your scenario before > making the code overly complex. if calling chown on hugetlbfs makes no sense then why is chown available in the first place? ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:39 ` Andrea Arcangeli @ 2004-08-04 1:56 ` William Lee Irwin III 0 siblings, 0 replies; 49+ messages in thread From: William Lee Irwin III @ 2004-08-04 1:56 UTC (permalink / raw) To: Andrea Arcangeli Cc: Rik van Riel, Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, Aug 03, 2004 at 05:31:08PM -0400, Rik van Riel wrote: >> If root wants to screw over a user, there's nothing we >> can do. I am not worried about the scenario you describe >> because hugetlbfs seems to be used only by Oracle anyway, >> so you won't run into issues like you describe. On Tue, Aug 03, 2004 at 11:39:42PM +0200, Andrea Arcangeli wrote: > hugetlbfs isn't only used by oracle. Anyways if you were right then why > is there a IPC_CAP_LOCK in hugetlbfs in the first place? If Oracle is > the only user then just drop such check and stop binding rlimits to > persistent fs objects. Excellent. It appears the check for the ability to mlock is now checking the amount of memory to be locked. -- wli ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 21:31 ` Rik van Riel 2004-08-03 21:39 ` Andrea Arcangeli @ 2004-08-03 22:18 ` Gerrit Huizenga 2004-08-04 1:22 ` Rik van Riel 1 sibling, 1 reply; 49+ messages in thread From: Gerrit Huizenga @ 2004-08-03 22:18 UTC (permalink / raw) To: Rik van Riel Cc: Andrea Arcangeli, Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, 03 Aug 2004 17:31:08 EDT, Rik van Riel wrote: > On Tue, 3 Aug 2004, Andrea Arcangeli wrote: > > > I agree there aren't security issues, but it's still very wrong to > > charge the old user if the admin gives the locked ram to a new user. > > This erratic behaviour shows how much the rlimit approch is flawed for > > named fs objects that have nothing to do with the transient task that > > created them. > > If root wants to screw over a user, there's nothing we > can do. I am not worried about the scenario you describe > because hugetlbfs seems to be used only by Oracle anyway, > so you won't run into issues like you describe. > > It would be different for a general purpose filesystem, > but I'd like to see a usage case for your scenario before > making the code overly complex. DB2, JVM also use hugetlbfs, other uses have been tried with some success. gerrit ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-03 22:18 ` Gerrit Huizenga @ 2004-08-04 1:22 ` Rik van Riel 2004-08-04 1:37 ` Gerrit Huizenga 0 siblings, 1 reply; 49+ messages in thread From: Rik van Riel @ 2004-08-04 1:22 UTC (permalink / raw) To: Gerrit Huizenga Cc: Andrea Arcangeli, Chris Wright, Arjan van de Ven, linux-kernel, akpm On Tue, 3 Aug 2004, Gerrit Huizenga wrote: > DB2, JVM also use hugetlbfs, other uses have been tried with > some success. OK. Do any of those do the "root chowns an unnamed hugetlbfs file" scenario ? ;) -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." - Brian W. Kernighan ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 1:22 ` Rik van Riel @ 2004-08-04 1:37 ` Gerrit Huizenga 2004-08-04 1:55 ` William Lee Irwin III 0 siblings, 1 reply; 49+ messages in thread From: Gerrit Huizenga @ 2004-08-04 1:37 UTC (permalink / raw) To: Rik van Riel Cc: Andrea Arcangeli, Chris Wright, Arjan van de Ven, linux-kernel, akpm, pbadari On Tue, 03 Aug 2004 21:22:45 EDT, Rik van Riel wrote: > On Tue, 3 Aug 2004, Gerrit Huizenga wrote: > > > DB2, JVM also use hugetlbfs, other uses have been tried with > > some success. > > OK. Do any of those do the "root chowns an unnamed > hugetlbfs file" scenario ? ;) Badari will probably know the access method for DB2 better than I do. I know they go quite out of their way to avoid having root permissions at any point in time. How they accomplish this in the current source base, I don't know. They were using capabilities for things like this for a while. gerrit ^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [patch] mlock-as-nonroot revisted 2004-08-04 1:37 ` Gerrit Huizenga @ 2004-08-04 1:55 ` William Lee Irwin III 0 siblings, 0 replies; 49+ messages in thread From: William Lee Irwin III @ 2004-08-04 1:55 UTC (permalink / raw) To: Gerrit Huizenga Cc: Rik van Riel, Andrea Arcangeli, Chris Wright, Arjan van de Ven, linux-kernel, akpm, pbadari On Tue, 03 Aug 2004 21:22:45 EDT, Rik van Riel wrote: >> OK. Do any of those do the "root chowns an unnamed >> hugetlbfs file" scenario ? ;) On Tue, Aug 03, 2004 at 06:37:02PM -0700, Gerrit Huizenga wrote: > Badari will probably know the access method for DB2 better than > I do. I know they go quite out of their way to avoid having > root permissions at any point in time. How they accomplish this > in the current source base, I don't know. They were using > capabilities for things like this for a while. IIRC the program launcher acquires the capabilities prior to dropping root privileges and acquires the shm segment prior to exec. -- wli ^ permalink raw reply [flat|nested] 49+ messages in thread
end of thread, other threads:[~2004-08-04 13:57 UTC | newest] Thread overview: 49+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2004-07-29 10:03 [patch] mlock-as-nonroot revisted Arjan van de Ven 2004-07-29 21:28 ` Andrew Morton 2004-07-29 21:40 ` Andrea Arcangeli 2004-07-30 0:50 ` Rik van Riel 2004-07-30 2:16 ` Andrea Arcangeli 2004-07-30 0:51 ` Rik van Riel 2004-07-30 2:17 ` Andrea Arcangeli 2004-07-30 1:52 ` Chris Wright 2004-07-30 2:09 ` Andrea Arcangeli 2004-07-30 2:46 ` Rik van Riel 2004-08-03 20:54 ` Rik van Riel 2004-08-03 21:45 ` Chris Wright 2004-08-03 20:55 ` Rik van Riel 2004-08-03 21:07 ` Andrea Arcangeli 2004-08-03 21:13 ` Arjan van de Ven 2004-08-03 21:36 ` Andrea Arcangeli 2004-08-03 21:38 ` Arjan van de Ven 2004-08-03 21:51 ` Andrea Arcangeli 2004-08-03 22:01 ` Chris Wright 2004-08-03 22:11 ` Andrea Arcangeli 2004-08-03 22:33 ` Chris Wright 2004-08-03 22:42 ` Andrea Arcangeli 2004-08-03 22:52 ` Chris Wright 2004-08-04 1:21 ` Rik van Riel 2004-08-04 1:53 ` Andrea Arcangeli 2004-08-04 2:01 ` Rik van Riel 2004-08-04 2:13 ` Andrea Arcangeli 2004-08-04 2:20 ` William Lee Irwin III 2004-08-04 2:22 ` Rik van Riel 2004-08-04 2:31 ` William Lee Irwin III 2004-08-04 2:56 ` Rik van Riel 2004-08-04 6:06 ` Chris Wright 2004-08-04 13:31 ` Rik van Riel 2004-08-04 13:51 ` Arjan van de Ven 2004-08-04 13:56 ` Rik van Riel 2004-08-04 3:13 ` Andrea Arcangeli 2004-08-04 2:25 ` Chris Wright 2004-08-04 2:07 ` Chris Wright 2004-08-04 2:18 ` Andrea Arcangeli 2004-08-03 21:13 ` Rik van Riel 2004-08-03 21:22 ` Andrea Arcangeli 2004-08-03 21:24 ` Arjan van de Ven 2004-08-03 21:31 ` Rik van Riel 2004-08-03 21:39 ` Andrea Arcangeli 2004-08-04 1:56 ` William Lee Irwin III 2004-08-03 22:18 ` Gerrit Huizenga 2004-08-04 1:22 ` Rik van Riel 2004-08-04 1:37 ` Gerrit Huizenga 2004-08-04 1:55 ` William Lee Irwin III
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox