* [PATCH] checkpoint/restart of robust futex lists
@ 2009-06-03 4:19 Matt Helsley
[not found] ` <20090603041919.GO9285-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 11+ messages in thread
From: Matt Helsley @ 2009-06-03 4:19 UTC (permalink / raw)
To: Containers
Save and restore the [compat_]robust_list member of the task struct.
These lists record which futexes the task holds. To keep the overhead of
robust futexes low the list is kept in userspace. When the task exits the
kernel carefully walks these lists to recover held futexes that
other tasks may be attempting to acquire with FUTEX_WAIT.
Because they point to userspace memory that is saved/restored by
checkpoint/restart saving the list pointers works.
While saving the pointers works during checkpoint, restart is tricky
because the robust futex ABI contains provisions for changes based on
checking the size of the list head. So we need to save the length of
the list head too in order to make sure that the kernel used during
restart is capable of handling that ABI. Since there is only one ABI
supported at the moment taking the list head's size is simple. Should
the ABI change we will need to use the same size as specified during
sys_set_robust_list() and hence some new means of determining the length
of this userspace structure in sys_checkpoint would be required.
Rather than rewrite the logic that checks and handles the ABI we reuse
sys_set_robust_list() by factoring out the body of the function and
calling it during restart.
Signed-off-by: Matt Helsley <matthltc-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
diff --git a/checkpoint/process.c b/checkpoint/process.c
index b604a85..084a2e4 100644
--- a/checkpoint/process.c
+++ b/checkpoint/process.c
@@ -42,6 +42,17 @@ static int checkpoint_task_struct(struct ckpt_ctx *ctx, struct task_struct *t)
h->task_comm_len = TASK_COMM_LEN;
+#ifdef CONFIG_FUTEX
+ /* These are __user pointers and can be saved without the objhash. */
+ h->robust_futex_list = t->robust_list;
+ h->robust_futex_head_len = sizeof(t->robust_list);
+#ifdef CONFIG_COMPAT
+ h->compat_robust_futex_list = t->compat_robust_list;
+ h->compat_robust_futex_head_len = sizeof(t->compat_robust_list);
+#endif
+ /* FIXME save pi futex state?? */
+#endif
+
/* FIXME: save remaining relevant task_struct fields */
ret = ckpt_write_obj(ctx, &h->h);
@@ -366,6 +377,20 @@ static int restore_task_struct(struct ckpt_ctx *ctx)
memset(t->comm, 0, TASK_COMM_LEN);
ret = _ckpt_read_string(ctx, t->comm, h->task_comm_len);
+#ifdef CONFIG_FUTEX
+ /* Since we restore the memory map the address remains the same and
+ * this is safe. This is the same as [compat_]sys_set_robust_list() */
+ if (h->robust_futex_list) {
+ struct robust_list_head __user *rfl = h->robust_futex_list;
+ do_set_robust_list(rfl, h->robust_futex_head_len);
+ }
+#ifdef CONFIG_COMPAT
+ if (h->compat_robust_futex_list) {
+ struct compat_robust_list_head __user *crfl = h->compat_robust_futex_list;
+ do_compat_set_robust_list(crfl, h->compat_robust_futex_head_len);
+ }
+#endif
+#endif
/* FIXME: restore remaining relevant task_struct fields */
out:
ckpt_hdr_put(ctx, h);
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index cd427d8..f226f8f 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -163,6 +163,12 @@ struct ckpt_hdr_task {
__u32 exit_signal;
__u32 task_comm_len;
+
+ __u32 robust_futex_head_len;
+ __u32 compat_robust_futex_head_len;
+ __u64 robust_futex_list; /* a __user * */
+ __u64 compat_robust_futex_list; /* a __user * */
+ /* FIXME need futex prio inheritance state? */
} __attribute__((aligned(8)));
/* namespaces */
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f2ded21..f311e36 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -165,7 +165,8 @@ struct compat_robust_list_head {
};
extern void compat_exit_robust_list(struct task_struct *curr);
-
+extern long do_compat_set_robust_list(struct compat_robust_list_head __user *head,
+ compat_size_t len);
asmlinkage long
compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
compat_size_t len);
diff --git a/include/linux/futex.h b/include/linux/futex.h
index dd0e06b..8685d1c 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -178,6 +178,7 @@ union futex_key {
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
#ifdef CONFIG_FUTEX
+extern long do_set_robust_list(struct robust_list_head __user *head, size_t len);
extern void exit_robust_list(struct task_struct *curr);
extern void exit_pi_state_list(struct task_struct *curr);
extern int futex_cmpxchg_enabled;
diff --git a/kernel/futex.c b/kernel/futex.c
index f405c73..cf80e7c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1680,13 +1680,7 @@ pi_faulted:
* the list. There can only be one such pending lock.
*/
-/**
- * sys_set_robust_list - set the robust-futex list head of a task
- * @head: pointer to the list-head
- * @len: length of the list-head, as userspace expects
- */
-SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
- size_t, len)
+long do_set_robust_list(struct robust_list_head __user *head, size_t len)
{
if (!futex_cmpxchg_enabled)
return -ENOSYS;
@@ -1702,6 +1696,17 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
}
/**
+ * sys_set_robust_list - set the robust-futex list head of a task
+ * @head: pointer to the list-head
+ * @len: length of the list-head, as userspace expects
+ */
+SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
+ size_t, len)
+{
+ return do_set_robust_list(head, len);
+}
+
+/**
* sys_get_robust_list - get the robust-futex list head of a task
* @pid: pid of the process [zero for current task]
* @head_ptr: pointer to a list-head pointer, the kernel fills it in
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d607a5b..eac734c 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -114,9 +114,9 @@ void compat_exit_robust_list(struct task_struct *curr)
}
}
-asmlinkage long
-compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
- compat_size_t len)
+long
+do_compat_set_robust_list(struct compat_robust_list_head __user *head,
+ compat_size_t len)
{
if (!futex_cmpxchg_enabled)
return -ENOSYS;
@@ -130,6 +130,13 @@ compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
}
asmlinkage long
+compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
+ compat_size_t len)
+{
+ return do_compat_set_robust_list(head, len);
+}
+
+asmlinkage long
compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
compat_size_t __user *len_ptr)
{
^ permalink raw reply related [flat|nested] 11+ messages in thread[parent not found: <20090603041919.GO9285-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <20090603041919.GO9285-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2009-06-03 15:58 ` Serge E. Hallyn [not found] ` <20090603155804.GA7848-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 2009-06-08 2:37 ` Oren Laadan 2009-06-18 17:12 ` Oren Laadan 2 siblings, 1 reply; 11+ messages in thread From: Serge E. Hallyn @ 2009-06-03 15:58 UTC (permalink / raw) To: Matt Helsley; +Cc: Containers Quoting Matt Helsley (matthltc-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org): > diff --git a/checkpoint/process.c b/checkpoint/process.c > index b604a85..084a2e4 100644 > --- a/checkpoint/process.c > +++ b/checkpoint/process.c > @@ -42,6 +42,17 @@ static int checkpoint_task_struct(struct ckpt_ctx *ctx, struct task_struct *t) > > h->task_comm_len = TASK_COMM_LEN; > > +#ifdef CONFIG_FUTEX > + /* These are __user pointers and can be saved without the objhash. */ > + h->robust_futex_list = t->robust_list; > + h->robust_futex_head_len = sizeof(t->robust_list); > +#ifdef CONFIG_COMPAT > + h->compat_robust_futex_list = t->compat_robust_list; > + h->compat_robust_futex_head_len = sizeof(t->compat_robust_list); > +#endif > + /* FIXME save pi futex state?? */ > +#endif > + So, I'm torn on this, but this does look like a prime example of code which is destined to go stale and out of sync with the main futex code. On the other hand, if we define futex_checkpoint() and futex_restart(), and do that for every little thing we c/r, that could get out of hand... But I think it's a risk worth taking. What do you think? Also, could you send out your testcase so I can add it to cr_tests? thanks, -serge ^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <20090603155804.GA7848-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <20090603155804.GA7848-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2009-06-08 2:28 ` Oren Laadan 0 siblings, 0 replies; 11+ messages in thread From: Oren Laadan @ 2009-06-08 2:28 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Containers Serge E. Hallyn wrote: > Quoting Matt Helsley (matthltc-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org): >> diff --git a/checkpoint/process.c b/checkpoint/process.c >> index b604a85..084a2e4 100644 >> --- a/checkpoint/process.c >> +++ b/checkpoint/process.c >> @@ -42,6 +42,17 @@ static int checkpoint_task_struct(struct ckpt_ctx *ctx, struct task_struct *t) >> >> h->task_comm_len = TASK_COMM_LEN; >> >> +#ifdef CONFIG_FUTEX >> + /* These are __user pointers and can be saved without the objhash. */ >> + h->robust_futex_list = t->robust_list; >> + h->robust_futex_head_len = sizeof(t->robust_list); >> +#ifdef CONFIG_COMPAT >> + h->compat_robust_futex_list = t->compat_robust_list; >> + h->compat_robust_futex_head_len = sizeof(t->compat_robust_list); >> +#endif >> + /* FIXME save pi futex state?? */ >> +#endif >> + > > So, I'm torn on this, but this does look like a prime example of code > which is destined to go stale and out of sync with the main futex code. > > On the other hand, if we define futex_checkpoint() and futex_restart(), > and do that for every little thing we c/r, that could get out of hand... > > But I think it's a risk worth taking. What do you think? Which risk is the one worth taking ? -- I assume splitting the c/r code to smaller pieces near/at related subsystems source code. The question is: where do you draw the line ? (This is also related to Andrew Morgan's concern about capabilities). I think that for a "self-contained" object (e.g. capabilities), it makes much sense to separate it. I'm not so sure about passing struct ckpt_task_struct_hdr around to small functions to fill it... can become very scattered. And a dedicated "futex" object doesn't make sense either. If anything, perhaps use a 'struct ckpt_task_futex' which will be embedded in ckpt_task_struct_hdr: struct ckpt_task_struct_hdr { struct ckpt_hdr h; ... struct ckpt_task_futex futex; ... }; There are more examples, for instance: * nsproxy: in checkpoint/process.c or in kernel/nxproxy.c ? * uts_ns: in checkpoint/process.c or in kernel/utsname.c ? And also: * files/fd: in checkpoint/files.c or in fs/checkpoint.c ? * memory: in checkpoint/memory.c or in mm/checkpoint.c ? Originally it seemed natural to keep everything under checkpoint/ subdir. The cost is (a) risks getting out of sync, and (b) need to export functions that could remain static/private otherwise. Comments ? Oren. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <20090603041919.GO9285-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 2009-06-03 15:58 ` Serge E. Hallyn @ 2009-06-08 2:37 ` Oren Laadan [not found] ` <4A2C7972.9090404-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> 2009-06-18 17:12 ` Oren Laadan 2 siblings, 1 reply; 11+ messages in thread From: Oren Laadan @ 2009-06-08 2:37 UTC (permalink / raw) To: Matt Helsley; +Cc: Containers Matt Helsley wrote: > Save and restore the [compat_]robust_list member of the task struct. > > These lists record which futexes the task holds. To keep the overhead of > robust futexes low the list is kept in userspace. When the task exits the > kernel carefully walks these lists to recover held futexes that > other tasks may be attempting to acquire with FUTEX_WAIT. > > Because they point to userspace memory that is saved/restored by > checkpoint/restart saving the list pointers works. > > While saving the pointers works during checkpoint, restart is tricky > because the robust futex ABI contains provisions for changes based on > checking the size of the list head. So we need to save the length of > the list head too in order to make sure that the kernel used during > restart is capable of handling that ABI. Since there is only one ABI > supported at the moment taking the list head's size is simple. Should > the ABI change we will need to use the same size as specified during > sys_set_robust_list() and hence some new means of determining the length > of this userspace structure in sys_checkpoint would be required. > > Rather than rewrite the logic that checks and handles the ABI we reuse > sys_set_robust_list() by factoring out the body of the function and > calling it during restart. > > Signed-off-by: Matt Helsley <matthltc-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> Patch looks good. Too bad we don't support futex, yet... Oren. ^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <4A2C7972.9090404-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>]
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <4A2C7972.9090404-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> @ 2009-06-08 14:08 ` Serge E. Hallyn [not found] ` <20090608140810.GB29432-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 0 siblings, 1 reply; 11+ messages in thread From: Serge E. Hallyn @ 2009-06-08 14:08 UTC (permalink / raw) To: Oren Laadan; +Cc: Containers Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org): > > > Matt Helsley wrote: > > Save and restore the [compat_]robust_list member of the task struct. > > > > These lists record which futexes the task holds. To keep the overhead of > > robust futexes low the list is kept in userspace. When the task exits the > > kernel carefully walks these lists to recover held futexes that > > other tasks may be attempting to acquire with FUTEX_WAIT. > > > > Because they point to userspace memory that is saved/restored by > > checkpoint/restart saving the list pointers works. > > > > While saving the pointers works during checkpoint, restart is tricky > > because the robust futex ABI contains provisions for changes based on > > checking the size of the list head. So we need to save the length of > > the list head too in order to make sure that the kernel used during > > restart is capable of handling that ABI. Since there is only one ABI > > supported at the moment taking the list head's size is simple. Should > > the ABI change we will need to use the same size as specified during > > sys_set_robust_list() and hence some new means of determining the length > > of this userspace structure in sys_checkpoint would be required. > > > > Rather than rewrite the logic that checks and handles the ABI we reuse > > sys_set_robust_list() by factoring out the body of the function and > > calling it during restart. > > > > Signed-off-by: Matt Helsley <matthltc-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> > > Patch looks good. Too bad we don't support futex, yet... ? IIUC (from Matt and Dave), after this patch, you might need something for PI futexes, but otherwise non-contended cases "just work" because there is no kernel involvement. -serge ^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <20090608140810.GB29432-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <20090608140810.GB29432-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2009-06-08 16:31 ` Oren Laadan [not found] ` <4A2D3CE3.6030400-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> 0 siblings, 1 reply; 11+ messages in thread From: Oren Laadan @ 2009-06-08 16:31 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Containers Serge E. Hallyn wrote: > Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org): >> >> Matt Helsley wrote: >>> Save and restore the [compat_]robust_list member of the task struct. >>> >>> These lists record which futexes the task holds. To keep the overhead of >>> robust futexes low the list is kept in userspace. When the task exits the >>> kernel carefully walks these lists to recover held futexes that >>> other tasks may be attempting to acquire with FUTEX_WAIT. >>> >>> Because they point to userspace memory that is saved/restored by >>> checkpoint/restart saving the list pointers works. >>> >>> While saving the pointers works during checkpoint, restart is tricky >>> because the robust futex ABI contains provisions for changes based on >>> checking the size of the list head. So we need to save the length of >>> the list head too in order to make sure that the kernel used during >>> restart is capable of handling that ABI. Since there is only one ABI >>> supported at the moment taking the list head's size is simple. Should >>> the ABI change we will need to use the same size as specified during >>> sys_set_robust_list() and hence some new means of determining the length >>> of this userspace structure in sys_checkpoint would be required. >>> >>> Rather than rewrite the logic that checks and handles the ABI we reuse >>> sys_set_robust_list() by factoring out the body of the function and >>> calling it during restart. >>> >>> Signed-off-by: Matt Helsley <matthltc-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> >> Patch looks good. Too bad we don't support futex, yet... > > ? > > IIUC (from Matt and Dave), after this patch, you might need something > for PI futexes, but otherwise non-contended cases "just work" because > there is no kernel involvement. That's what I thought. But I also thought that a checkpoint would fail anyway as soon as it hits the futex-file-descriptor. Or am I missing something ? Oren. ^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <4A2D3CE3.6030400-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>]
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <4A2D3CE3.6030400-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> @ 2009-06-08 16:59 ` Serge E. Hallyn [not found] ` <20090608165951.GA3610-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 0 siblings, 1 reply; 11+ messages in thread From: Serge E. Hallyn @ 2009-06-08 16:59 UTC (permalink / raw) To: Oren Laadan; +Cc: Containers Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org): > > > Serge E. Hallyn wrote: > > IIUC (from Matt and Dave), after this patch, you might need something > > for PI futexes, but otherwise non-contended cases "just work" because > > there is no kernel involvement. > > That's what I thought. But I also thought that a checkpoint would > fail anyway as soon as it hits the futex-file-descriptor. Or am I > missing something ? Yes - FUTEX_FD no longer exists :) -serge ^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <20090608165951.GA3610-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <20090608165951.GA3610-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2009-06-08 18:34 ` Oren Laadan 0 siblings, 0 replies; 11+ messages in thread From: Oren Laadan @ 2009-06-08 18:34 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: Containers Serge E. Hallyn wrote: > Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org): >> >> Serge E. Hallyn wrote: >>> IIUC (from Matt and Dave), after this patch, you might need something >>> for PI futexes, but otherwise non-contended cases "just work" because >>> there is no kernel involvement. >> That's what I thought. But I also thought that a checkpoint would >> fail anyway as soon as it hits the futex-file-descriptor. Or am I >> missing something ? > > Yes - FUTEX_FD no longer exists :) Heh .. need to catch up ! Oren. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <20090603041919.GO9285-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 2009-06-03 15:58 ` Serge E. Hallyn 2009-06-08 2:37 ` Oren Laadan @ 2009-06-18 17:12 ` Oren Laadan [not found] ` <4A3A7572.9030907-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> 2 siblings, 1 reply; 11+ messages in thread From: Oren Laadan @ 2009-06-18 17:12 UTC (permalink / raw) To: Matt Helsley; +Cc: Containers Matt Helsley wrote: > Save and restore the [compat_]robust_list member of the task struct. > > These lists record which futexes the task holds. To keep the overhead of > robust futexes low the list is kept in userspace. When the task exits the > kernel carefully walks these lists to recover held futexes that > other tasks may be attempting to acquire with FUTEX_WAIT. > > Because they point to userspace memory that is saved/restored by > checkpoint/restart saving the list pointers works. > > While saving the pointers works during checkpoint, restart is tricky > because the robust futex ABI contains provisions for changes based on > checking the size of the list head. So we need to save the length of > the list head too in order to make sure that the kernel used during > restart is capable of handling that ABI. Since there is only one ABI > supported at the moment taking the list head's size is simple. Should > the ABI change we will need to use the same size as specified during > sys_set_robust_list() and hence some new means of determining the length > of this userspace structure in sys_checkpoint would be required. > > Rather than rewrite the logic that checks and handles the ABI we reuse > sys_set_robust_list() by factoring out the body of the function and > calling it during restart. > > Signed-off-by: Matt Helsley <matthltc-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> What happens if we checkpoint on a system with CONFIG_FUTEX and restart on a a system with !CONFIG_FUTEX ? Clearly, if processes used futex, restart should fail. If they didn't - then all we know is that at the time of the checkpoint they didn't. It could be that they didn't and will never try, so restarting on a futex-less kernel is correct. But it could be that they perhaps did and perhaps will attempt again later on. In this case, restarting on a futex-less kernel is incorrect, because it isn't compatible with the (intention) of the original execution. I suppose it's time to come up with some scheme to encode the capabilities of a kernel in the checkpoint image. At restart we can test compatibility of current kernel with original one, and decide what to do. The decision itself, or part of it - like the question above - can be done in userspace. The opposite case (checkpoint with !CONFIG_FUTEX and restart on CONFIG_FUTEX) is always ok, because it will overwrite with null values on the restarting tasks. Oren. > > diff --git a/checkpoint/process.c b/checkpoint/process.c > index b604a85..084a2e4 100644 > --- a/checkpoint/process.c > +++ b/checkpoint/process.c > @@ -42,6 +42,17 @@ static int checkpoint_task_struct(struct ckpt_ctx *ctx, struct task_struct *t) > > h->task_comm_len = TASK_COMM_LEN; > > +#ifdef CONFIG_FUTEX > + /* These are __user pointers and can be saved without the objhash. */ > + h->robust_futex_list = t->robust_list; > + h->robust_futex_head_len = sizeof(t->robust_list); > +#ifdef CONFIG_COMPAT > + h->compat_robust_futex_list = t->compat_robust_list; > + h->compat_robust_futex_head_len = sizeof(t->compat_robust_list); > +#endif > + /* FIXME save pi futex state?? */ > +#endif > + > /* FIXME: save remaining relevant task_struct fields */ > > ret = ckpt_write_obj(ctx, &h->h); > @@ -366,6 +377,20 @@ static int restore_task_struct(struct ckpt_ctx *ctx) > memset(t->comm, 0, TASK_COMM_LEN); > ret = _ckpt_read_string(ctx, t->comm, h->task_comm_len); > > +#ifdef CONFIG_FUTEX > + /* Since we restore the memory map the address remains the same and > + * this is safe. This is the same as [compat_]sys_set_robust_list() */ > + if (h->robust_futex_list) { > + struct robust_list_head __user *rfl = h->robust_futex_list; > + do_set_robust_list(rfl, h->robust_futex_head_len); > + } > +#ifdef CONFIG_COMPAT > + if (h->compat_robust_futex_list) { > + struct compat_robust_list_head __user *crfl = h->compat_robust_futex_list; > + do_compat_set_robust_list(crfl, h->compat_robust_futex_head_len); > + } > +#endif > +#endif > /* FIXME: restore remaining relevant task_struct fields */ > out: > ckpt_hdr_put(ctx, h); > diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h > index cd427d8..f226f8f 100644 > --- a/include/linux/checkpoint_hdr.h > +++ b/include/linux/checkpoint_hdr.h > @@ -163,6 +163,12 @@ struct ckpt_hdr_task { > __u32 exit_signal; > > __u32 task_comm_len; > + > + __u32 robust_futex_head_len; > + __u32 compat_robust_futex_head_len; > + __u64 robust_futex_list; /* a __user * */ > + __u64 compat_robust_futex_list; /* a __user * */ > + /* FIXME need futex prio inheritance state? */ > } __attribute__((aligned(8))); > > /* namespaces */ > diff --git a/include/linux/compat.h b/include/linux/compat.h > index f2ded21..f311e36 100644 > --- a/include/linux/compat.h > +++ b/include/linux/compat.h > @@ -165,7 +165,8 @@ struct compat_robust_list_head { > }; > > extern void compat_exit_robust_list(struct task_struct *curr); > - > +extern long do_compat_set_robust_list(struct compat_robust_list_head __user *head, > + compat_size_t len); > asmlinkage long > compat_sys_set_robust_list(struct compat_robust_list_head __user *head, > compat_size_t len); > diff --git a/include/linux/futex.h b/include/linux/futex.h > index dd0e06b..8685d1c 100644 > --- a/include/linux/futex.h > +++ b/include/linux/futex.h > @@ -178,6 +178,7 @@ union futex_key { > #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } > > #ifdef CONFIG_FUTEX > +extern long do_set_robust_list(struct robust_list_head __user *head, size_t len); > extern void exit_robust_list(struct task_struct *curr); > extern void exit_pi_state_list(struct task_struct *curr); > extern int futex_cmpxchg_enabled; > diff --git a/kernel/futex.c b/kernel/futex.c > index f405c73..cf80e7c 100644 > --- a/kernel/futex.c > +++ b/kernel/futex.c > @@ -1680,13 +1680,7 @@ pi_faulted: > * the list. There can only be one such pending lock. > */ > > -/** > - * sys_set_robust_list - set the robust-futex list head of a task > - * @head: pointer to the list-head > - * @len: length of the list-head, as userspace expects > - */ > -SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, > - size_t, len) > +long do_set_robust_list(struct robust_list_head __user *head, size_t len) > { > if (!futex_cmpxchg_enabled) > return -ENOSYS; > @@ -1702,6 +1696,17 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, > } > > /** > + * sys_set_robust_list - set the robust-futex list head of a task > + * @head: pointer to the list-head > + * @len: length of the list-head, as userspace expects > + */ > +SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, > + size_t, len) > +{ > + return do_set_robust_list(head, len); > +} > + > +/** > * sys_get_robust_list - get the robust-futex list head of a task > * @pid: pid of the process [zero for current task] > * @head_ptr: pointer to a list-head pointer, the kernel fills it in > diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c > index d607a5b..eac734c 100644 > --- a/kernel/futex_compat.c > +++ b/kernel/futex_compat.c > @@ -114,9 +114,9 @@ void compat_exit_robust_list(struct task_struct *curr) > } > } > > -asmlinkage long > -compat_sys_set_robust_list(struct compat_robust_list_head __user *head, > - compat_size_t len) > +long > +do_compat_set_robust_list(struct compat_robust_list_head __user *head, > + compat_size_t len) > { > if (!futex_cmpxchg_enabled) > return -ENOSYS; > @@ -130,6 +130,13 @@ compat_sys_set_robust_list(struct compat_robust_list_head __user *head, > } > > asmlinkage long > +compat_sys_set_robust_list(struct compat_robust_list_head __user *head, > + compat_size_t len) > +{ > + return do_compat_set_robust_list(head, len); > +} > + > +asmlinkage long > compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, > compat_size_t __user *len_ptr) > { > ^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <4A3A7572.9030907-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>]
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <4A3A7572.9030907-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> @ 2009-06-18 19:24 ` Nathan Lynch [not found] ` <m31vphgxa0.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org> 0 siblings, 1 reply; 11+ messages in thread From: Nathan Lynch @ 2009-06-18 19:24 UTC (permalink / raw) To: Oren Laadan; +Cc: Containers Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> writes: > What happens if we checkpoint on a system with CONFIG_FUTEX and > restart on a a system with !CONFIG_FUTEX ? You can't disable FUTEX unless EMBEDDED=y, and EMBEDDED is the "I know what I'm doing" knob. Is this really worth worrying about? ^ permalink raw reply [flat|nested] 11+ messages in thread
[parent not found: <m31vphgxa0.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH] checkpoint/restart of robust futex lists [not found] ` <m31vphgxa0.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org> @ 2009-06-19 3:41 ` Oren Laadan 0 siblings, 0 replies; 11+ messages in thread From: Oren Laadan @ 2009-06-19 3:41 UTC (permalink / raw) To: Nathan Lynch; +Cc: Containers Nathan Lynch wrote: > Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> writes: >> What happens if we checkpoint on a system with CONFIG_FUTEX and >> restart on a a system with !CONFIG_FUTEX ? > > You can't disable FUTEX unless EMBEDDED=y, and EMBEDDED is the "I know > what I'm doing" knob. Is this really worth worrying about? > Good point. Still, a warning won't hurt, and can be done in userspace. Even if you "know what you're doing", you may miss this detail. But more importantly, this brings up the issue of how to encode the configuration of the kernel used when a checkpoint is taken. This information would indicate, in a sense, the set of assumptions on the environments that can possibly restart from that checkpoint. This can be checked by user space against the current kernel at restart, and at the very least issue a warning about possible incompatibility. User could override and proceed as is, or modify the image, etc. Coincidentally a similar discussion starts in the other thread on combinations of namespace config options - "[PATCH 1/1] cr: fix compilation with CONFIG_UTS_NS=n", so I'll raise it there. Oren. ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2009-06-19 3:41 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-06-03 4:19 [PATCH] checkpoint/restart of robust futex lists Matt Helsley
[not found] ` <20090603041919.GO9285-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-03 15:58 ` Serge E. Hallyn
[not found] ` <20090603155804.GA7848-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-08 2:28 ` Oren Laadan
2009-06-08 2:37 ` Oren Laadan
[not found] ` <4A2C7972.9090404-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-06-08 14:08 ` Serge E. Hallyn
[not found] ` <20090608140810.GB29432-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-08 16:31 ` Oren Laadan
[not found] ` <4A2D3CE3.6030400-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-06-08 16:59 ` Serge E. Hallyn
[not found] ` <20090608165951.GA3610-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-06-08 18:34 ` Oren Laadan
2009-06-18 17:12 ` Oren Laadan
[not found] ` <4A3A7572.9030907-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-06-18 19:24 ` Nathan Lynch
[not found] ` <m31vphgxa0.fsf-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2009-06-19 3:41 ` Oren Laadan
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.