* [PATCH] ipc 1/3: Add refcount to ipc_rcu_alloc
@ 2004-07-03 17:35 Manfred Spraul
2004-07-03 20:22 ` Andrew Morton
0 siblings, 1 reply; 5+ messages in thread
From: Manfred Spraul @ 2004-07-03 17:35 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 437 bytes --]
Hi,
the lifetime of the ipc objects (sem array, msg queue, shm mapping) is
controlled by kern_ipc_perms->lock - a spinlock. There is no simple way
to reacquire this spinlock after it was dropped to
schedule()/kmalloc/copy_{to,from}_user/whatever.
The attached patch adds a reference count as a preparation to get rid of
sem_revalidate().
Andrew, could you add it to -mm?
Signed-Off-By: Manfred Spraul <manfred@colorfullife.com>
[-- Attachment #2: patch-ipc-01-rcu_refcount --]
[-- Type: text/plain, Size: 6131 bytes --]
diff -u 2.6/ipc/msg.c build-2.6/ipc/msg.c
--- 2.6/ipc/msg.c 2004-07-03 19:08:47.273962648 +0200
+++ build-2.6/ipc/msg.c 2004-07-03 19:16:03.445654472 +0200
@@ -100,14 +100,14 @@
msq->q_perm.security = NULL;
retval = security_msg_queue_alloc(msq);
if (retval) {
- ipc_rcu_free(msq, sizeof(*msq));
+ ipc_rcu_putref(msq);
return retval;
}
id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
if(id == -1) {
security_msg_queue_free(msq);
- ipc_rcu_free(msq, sizeof(*msq));
+ ipc_rcu_putref(msq);
return -ENOSPC;
}
@@ -193,7 +193,7 @@
}
atomic_sub(msq->q_cbytes, &msg_bytes);
security_msg_queue_free(msq);
- ipc_rcu_free(msq, sizeof(struct msg_queue));
+ ipc_rcu_putref(msq);
}
asmlinkage long sys_msgget (key_t key, int msgflg)
diff -u 2.6/ipc/sem.c build-2.6/ipc/sem.c
--- 2.6/ipc/sem.c 2004-07-03 19:08:47.274962496 +0200
+++ build-2.6/ipc/sem.c 2004-07-03 19:16:08.845833520 +0200
@@ -179,14 +179,14 @@
sma->sem_perm.security = NULL;
retval = security_sem_alloc(sma);
if (retval) {
- ipc_rcu_free(sma, size);
+ ipc_rcu_putref(sma);
return retval;
}
id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
if(id == -1) {
security_sem_free(sma);
- ipc_rcu_free(sma, size);
+ ipc_rcu_putref(sma);
return -ENOSPC;
}
used_sems += nsems;
@@ -473,7 +473,7 @@
used_sems -= sma->sem_nsems;
size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
security_sem_free(sma);
- ipc_rcu_free(sma, size);
+ ipc_rcu_putref(sma);
}
static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
diff -u 2.6/ipc/shm.c build-2.6/ipc/shm.c
--- 2.6/ipc/shm.c 2004-07-03 19:08:47.281961432 +0200
+++ build-2.6/ipc/shm.c 2004-07-03 19:09:16.646497344 +0200
@@ -117,7 +117,7 @@
shmem_lock(shp->shm_file, 0);
fput (shp->shm_file);
security_shm_free(shp);
- ipc_rcu_free(shp, sizeof(struct shmid_kernel));
+ ipc_rcu_putref(shp);
}
/*
@@ -194,7 +194,7 @@
shp->shm_perm.security = NULL;
error = security_shm_alloc(shp);
if (error) {
- ipc_rcu_free(shp, sizeof(*shp));
+ ipc_rcu_putref(shp);
return error;
}
@@ -234,7 +234,7 @@
fput(file);
no_file:
security_shm_free(shp);
- ipc_rcu_free(shp, sizeof(*shp));
+ ipc_rcu_putref(shp);
return error;
}
diff -u 2.6/ipc/util.c build-2.6/ipc/util.c
--- 2.6/ipc/util.c 2004-07-03 19:08:47.286960672 +0200
+++ build-2.6/ipc/util.c 2004-07-03 19:20:00.162668016 +0200
@@ -135,7 +135,6 @@
new[i].p = NULL;
}
old = ids->entries;
- i = ids->size;
/*
* before setting the ids->entries to the new array, there must be a
@@ -147,7 +146,7 @@
smp_wmb(); /* prevent indexing into old array based on new size. */
ids->size = newsize;
- ipc_rcu_free(old, sizeof(struct ipc_id)*i);
+ ipc_rcu_putref(old);
return ids->size;
}
@@ -292,10 +291,21 @@
void *data[0];
};
+struct ipc_rcu_hdr
+{
+ int refcount;
+ int is_vmalloc;
+};
+
+#define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_kmalloc) > sizeof(struct ipc_rcu_hdr) ? \
+ sizeof(struct ipc_rcu_kmalloc) : sizeof(struct ipc_rcu_hdr))
+#define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_vmalloc) > sizeof(struct ipc_rcu_hdr) ? \
+ sizeof(struct ipc_rcu_vmalloc) : sizeof(struct ipc_rcu_hdr))
+
static inline int rcu_use_vmalloc(int size)
{
/* Too big for a single page? */
- if (sizeof(struct ipc_rcu_kmalloc) + size > PAGE_SIZE)
+ if (HDRLEN_KMALLOC + size > PAGE_SIZE)
return 1;
return 0;
}
@@ -317,16 +327,29 @@
* workqueue if necessary (for vmalloc).
*/
if (rcu_use_vmalloc(size)) {
- out = vmalloc(sizeof(struct ipc_rcu_vmalloc) + size);
- if (out) out += sizeof(struct ipc_rcu_vmalloc);
+ out = vmalloc(HDRLEN_VMALLOC + size);
+ if (out) {
+ out += HDRLEN_VMALLOC;
+ ((struct ipc_rcu_hdr *)out)[-1].is_vmalloc = 1;
+ ((struct ipc_rcu_hdr *)out)[-1].refcount = 1;
+ }
} else {
- out = kmalloc(sizeof(struct ipc_rcu_kmalloc)+size, GFP_KERNEL);
- if (out) out += sizeof(struct ipc_rcu_kmalloc);
+ out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
+ if (out) {
+ out += HDRLEN_KMALLOC;
+ ((struct ipc_rcu_hdr *)out)[-1].is_vmalloc = 0;
+ ((struct ipc_rcu_hdr *)out)[-1].refcount = 1;
+ }
}
return out;
}
+void ipc_rcu_getref(void *ptr)
+{
+ ((struct ipc_rcu_hdr *)ptr)[-1].refcount++;
+}
+
/**
* ipc_schedule_free - free ipc + rcu space
*
@@ -355,11 +378,12 @@
kfree(free);
}
-
-
-void ipc_rcu_free(void* ptr, int size)
+void ipc_rcu_putref(void *ptr)
{
- if (rcu_use_vmalloc(size)) {
+ if (--((struct ipc_rcu_hdr *)ptr)[-1].refcount > 0)
+ return;
+
+ if (((struct ipc_rcu_hdr *)ptr)[-1].is_vmalloc) {
struct ipc_rcu_vmalloc *free;
free = ptr - sizeof(*free);
call_rcu(&free->rcu, ipc_schedule_free);
@@ -368,7 +392,6 @@
free = ptr - sizeof(*free);
call_rcu(&free->rcu, ipc_immediate_free);
}
-
}
/**
@@ -506,6 +529,12 @@
return out;
}
+void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
+{
+ rcu_read_lock();
+ spin_lock(&perm->lock);
+}
+
void ipc_unlock(struct kern_ipc_perm* perm)
{
spin_unlock(&perm->lock);
diff -u 2.6/ipc/util.h build-2.6/ipc/util.h
--- 2.6/ipc/util.h 2004-07-03 19:08:47.287960520 +0200
+++ build-2.6/ipc/util.h 2004-07-03 19:09:16.647497192 +0200
@@ -45,14 +45,20 @@
*/
void* ipc_alloc(int size);
void ipc_free(void* ptr, int size);
-/* for allocation that need to be freed by RCU
- * both function can sleep
+
+/*
+ * For allocation that need to be freed by RCU.
+ * Objects are reference counted, they start with reference count 1.
+ * getref increases the refcount, the putref call that reduces the recount
+ * to 0 schedules the rcu destruction. Caller must guarantee locking.
*/
void* ipc_rcu_alloc(int size);
-void ipc_rcu_free(void* arg, int size);
+void ipc_rcu_getref(void *ptr);
+void ipc_rcu_putref(void *ptr);
struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id);
struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id);
+void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp);
void ipc_unlock(struct kern_ipc_perm* perm);
int ipc_buildid(struct ipc_ids* ids, int id, int seq);
int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid);
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH] ipc 1/3: Add refcount to ipc_rcu_alloc 2004-07-03 17:35 [PATCH] ipc 1/3: Add refcount to ipc_rcu_alloc Manfred Spraul @ 2004-07-03 20:22 ` Andrew Morton 2004-07-03 21:52 ` Manfred Spraul 0 siblings, 1 reply; 5+ messages in thread From: Andrew Morton @ 2004-07-03 20:22 UTC (permalink / raw) To: Manfred Spraul; +Cc: linux-kernel Manfred Spraul <manfred@colorfullife.com> wrote: > > the lifetime of the ipc objects (sem array, msg queue, shm mapping) is > controlled by kern_ipc_perms->lock - a spinlock. There is no simple way > to reacquire this spinlock after it was dropped to > schedule()/kmalloc/copy_{to,from}_user/whatever. > > The attached patch adds a reference count as a preparation to get rid of > sem_revalidate(). The pointer offsetting tricks are rather unattractive. Is it not possible to simple aggregate the refcount into the refcounted structure, use container_of()? ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ipc 1/3: Add refcount to ipc_rcu_alloc 2004-07-03 20:22 ` Andrew Morton @ 2004-07-03 21:52 ` Manfred Spraul 2004-07-03 22:44 ` Andrew Morton 0 siblings, 1 reply; 5+ messages in thread From: Manfred Spraul @ 2004-07-03 21:52 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel [-- Attachment #1: Type: text/plain, Size: 421 bytes --] Andrew Morton wrote: >The pointer offsetting tricks are rather unattractive. Is it not possible >to simple aggregate the refcount into the refcounted structure, use >container_of()? > > Patch attached: it worked quite well and I've used the chance to reduce the size of the vmalloc header a bit: the work_struct and the rcu_head structures can be a union. Signed-Off-By: Manfred Spraul <manfred@colorfullife.com> [-- Attachment #2: patch-ipc-01-rcu_refcount --] [-- Type: text/plain, Size: 7908 bytes --] diff -u 2.6/ipc/msg.c build-2.6/ipc/msg.c --- 2.6/ipc/msg.c 2004-07-03 22:47:11.994484152 +0200 +++ build-2.6/ipc/msg.c 2004-07-03 22:46:31.652617048 +0200 @@ -100,14 +100,14 @@ msq->q_perm.security = NULL; retval = security_msg_queue_alloc(msq); if (retval) { - ipc_rcu_free(msq, sizeof(*msq)); + ipc_rcu_putref(msq); return retval; } id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni); if(id == -1) { security_msg_queue_free(msq); - ipc_rcu_free(msq, sizeof(*msq)); + ipc_rcu_putref(msq); return -ENOSPC; } @@ -193,7 +193,7 @@ } atomic_sub(msq->q_cbytes, &msg_bytes); security_msg_queue_free(msq); - ipc_rcu_free(msq, sizeof(struct msg_queue)); + ipc_rcu_putref(msq); } asmlinkage long sys_msgget (key_t key, int msgflg) diff -u 2.6/ipc/sem.c build-2.6/ipc/sem.c --- 2.6/ipc/sem.c 2004-07-03 22:47:11.995484000 +0200 +++ build-2.6/ipc/sem.c 2004-07-03 22:46:36.307909336 +0200 @@ -179,14 +179,14 @@ sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); if (retval) { - ipc_rcu_free(sma, size); + ipc_rcu_putref(sma); return retval; } id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni); if(id == -1) { security_sem_free(sma); - ipc_rcu_free(sma, size); + ipc_rcu_putref(sma); return -ENOSPC; } used_sems += nsems; @@ -473,7 +473,7 @@ used_sems -= sma->sem_nsems; size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem); security_sem_free(sma); - ipc_rcu_free(sma, size); + ipc_rcu_putref(sma); } static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) diff -u 2.6/ipc/shm.c build-2.6/ipc/shm.c --- 2.6/ipc/shm.c 2004-07-03 22:47:11.996483848 +0200 +++ build-2.6/ipc/shm.c 2004-07-03 19:24:57.000000000 +0200 @@ -117,7 +117,7 @@ shmem_lock(shp->shm_file, 0); fput (shp->shm_file); security_shm_free(shp); - ipc_rcu_free(shp, sizeof(struct shmid_kernel)); + ipc_rcu_putref(shp); } /* @@ -194,7 +194,7 @@ shp->shm_perm.security = NULL; error = security_shm_alloc(shp); if (error) { - ipc_rcu_free(shp, sizeof(*shp)); + ipc_rcu_putref(shp); return error; } @@ -234,7 +234,7 @@ fput(file); no_file: security_shm_free(shp); - ipc_rcu_free(shp, sizeof(*shp)); + ipc_rcu_putref(shp); return error; } diff -u 2.6/ipc/util.c build-2.6/ipc/util.c --- 2.6/ipc/util.c 2004-07-03 22:47:11.998483544 +0200 +++ build-2.6/ipc/util.c 2004-07-03 23:38:50.813392072 +0200 @@ -135,7 +135,6 @@ new[i].p = NULL; } old = ids->entries; - i = ids->size; /* * before setting the ids->entries to the new array, there must be a @@ -147,7 +146,7 @@ smp_wmb(); /* prevent indexing into old array based on new size. */ ids->size = newsize; - ipc_rcu_free(old, sizeof(struct ipc_id)*i); + ipc_rcu_putref(old); return ids->size; } @@ -277,25 +276,47 @@ kfree(ptr); } -struct ipc_rcu_kmalloc +/* + * rcu allocations: + * There are three headers that are prepended to the actual allocation: + * - during use: ipc_rcu_hdr. + * - during the rcu grace period: ipc_rcu_grace. + * - [only if vmalloc]: ipc_rcu_sched. + * Their lifetime doesn't overlap, thus the headers share the same memory. + * Unlike a normal union, they are right-aligned, thus some container_of + * forward/backward casting is necessary: + */ +struct ipc_rcu_hdr +{ + int refcount; + int is_vmalloc; + void *data[0]; +}; + + +struct ipc_rcu_grace { struct rcu_head rcu; /* "void *" makes sure alignment of following data is sane. */ void *data[0]; }; -struct ipc_rcu_vmalloc +struct ipc_rcu_sched { - struct rcu_head rcu; struct work_struct work; /* "void *" makes sure alignment of following data is sane. */ void *data[0]; }; +#define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \ + sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr)) +#define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \ + sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC) + static inline int rcu_use_vmalloc(int size) { /* Too big for a single page? */ - if (sizeof(struct ipc_rcu_kmalloc) + size > PAGE_SIZE) + if (HDRLEN_KMALLOC + size > PAGE_SIZE) return 1; return 0; } @@ -317,16 +338,29 @@ * workqueue if necessary (for vmalloc). */ if (rcu_use_vmalloc(size)) { - out = vmalloc(sizeof(struct ipc_rcu_vmalloc) + size); - if (out) out += sizeof(struct ipc_rcu_vmalloc); + out = vmalloc(HDRLEN_VMALLOC + size); + if (out) { + out += HDRLEN_VMALLOC; + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1; + container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; + } } else { - out = kmalloc(sizeof(struct ipc_rcu_kmalloc)+size, GFP_KERNEL); - if (out) out += sizeof(struct ipc_rcu_kmalloc); + out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL); + if (out) { + out += HDRLEN_KMALLOC; + container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0; + container_of(out, struct ipc_rcu_hdr, data)->refcount = 1; + } } return out; } +void ipc_rcu_getref(void *ptr) +{ + container_of(ptr, struct ipc_rcu_hdr, data)->refcount++; +} + /** * ipc_schedule_free - free ipc + rcu space * @@ -335,11 +369,13 @@ */ static void ipc_schedule_free(struct rcu_head *head) { - struct ipc_rcu_vmalloc *free = - container_of(head, struct ipc_rcu_vmalloc, rcu); + struct ipc_rcu_grace *grace = + container_of(head, struct ipc_rcu_grace, rcu); + struct ipc_rcu_sched *sched = + container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]); - INIT_WORK(&free->work, vfree, free); - schedule_work(&free->work); + INIT_WORK(&sched->work, vfree, sched); + schedule_work(&sched->work); } /** @@ -350,25 +386,23 @@ */ static void ipc_immediate_free(struct rcu_head *head) { - struct ipc_rcu_kmalloc *free = - container_of(head, struct ipc_rcu_kmalloc, rcu); + struct ipc_rcu_grace *free = + container_of(head, struct ipc_rcu_grace, rcu); kfree(free); } - - -void ipc_rcu_free(void* ptr, int size) +void ipc_rcu_putref(void *ptr) { - if (rcu_use_vmalloc(size)) { - struct ipc_rcu_vmalloc *free; - free = ptr - sizeof(*free); - call_rcu(&free->rcu, ipc_schedule_free); + if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0) + return; + + if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) { + call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, + ipc_schedule_free); } else { - struct ipc_rcu_kmalloc *free; - free = ptr - sizeof(*free); - call_rcu(&free->rcu, ipc_immediate_free); + call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu, + ipc_immediate_free); } - } /** @@ -506,6 +540,12 @@ return out; } +void ipc_lock_by_ptr(struct kern_ipc_perm *perm) +{ + rcu_read_lock(); + spin_lock(&perm->lock); +} + void ipc_unlock(struct kern_ipc_perm* perm) { spin_unlock(&perm->lock); diff -u 2.6/ipc/util.h build-2.6/ipc/util.h --- 2.6/ipc/util.h 2004-07-03 22:47:11.999483392 +0200 +++ build-2.6/ipc/util.h 2004-07-03 19:24:57.000000000 +0200 @@ -45,14 +45,20 @@ */ void* ipc_alloc(int size); void ipc_free(void* ptr, int size); -/* for allocation that need to be freed by RCU - * both function can sleep + +/* + * For allocation that need to be freed by RCU. + * Objects are reference counted, they start with reference count 1. + * getref increases the refcount, the putref call that reduces the recount + * to 0 schedules the rcu destruction. Caller must guarantee locking. */ void* ipc_rcu_alloc(int size); -void ipc_rcu_free(void* arg, int size); +void ipc_rcu_getref(void *ptr); +void ipc_rcu_putref(void *ptr); struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id); struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id); +void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp); void ipc_unlock(struct kern_ipc_perm* perm); int ipc_buildid(struct ipc_ids* ids, int id, int seq); int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid); ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ipc 1/3: Add refcount to ipc_rcu_alloc 2004-07-03 21:52 ` Manfred Spraul @ 2004-07-03 22:44 ` Andrew Morton 2004-07-03 23:10 ` Manfred Spraul 0 siblings, 1 reply; 5+ messages in thread From: Andrew Morton @ 2004-07-03 22:44 UTC (permalink / raw) To: Manfred Spraul; +Cc: linux-kernel Manfred Spraul <manfred@colorfullife.com> wrote: > > +struct ipc_rcu_hdr > +{ > + int refcount; > + int is_vmalloc; > + void *data[0]; > +}; That's not what I meant ;) struct ipc_rcu_hdr { int refcount; int is_vmalloc; }; Then place one of these inside struct msg_queue, one inside struct sem_array, etc. #define ipc_rcu_putref(p) if (--p->rcu_hdr.refcount == 0) ipc_rcu_putref_final(p, &p->rcu_hdr) or whatever. That's assuming struct kref isn't suitable. I guess you don't want the atomic_t. ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ipc 1/3: Add refcount to ipc_rcu_alloc 2004-07-03 22:44 ` Andrew Morton @ 2004-07-03 23:10 ` Manfred Spraul 0 siblings, 0 replies; 5+ messages in thread From: Manfred Spraul @ 2004-07-03 23:10 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel Andrew Morton wrote: >Manfred Spraul <manfred@colorfullife.com> wrote: > > >>+struct ipc_rcu_hdr >> +{ >> + int refcount; >> + int is_vmalloc; >> + void *data[0]; >> +}; >> >> > >That's not what I meant ;) > >struct ipc_rcu_hdr >{ > int refcount; > int is_vmalloc; >}; > >Then place one of these inside struct msg_queue, one inside struct >sem_array, etc. > > > No. The size of the headers depends on the allocation size: 8 bytes for allocs < 4088 bytes, 60 bytes for larger allocs. I don't want to expose that implementation detail outside of util.c. ipc_rcu_alloc allocates an arbitrary sized refcounted memory block that is released through the rcu framework. Everything else is hidden in util.c. -- Manfred ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2004-07-03 23:11 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2004-07-03 17:35 [PATCH] ipc 1/3: Add refcount to ipc_rcu_alloc Manfred Spraul 2004-07-03 20:22 ` Andrew Morton 2004-07-03 21:52 ` Manfred Spraul 2004-07-03 22:44 ` Andrew Morton 2004-07-03 23:10 ` Manfred Spraul
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox