* [PATCH] c/r: pin down modules registered to obj-ops
@ 2010-04-29 22:41 Oren Laadan
[not found] ` <1272580878-7268-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Oren Laadan @ 2010-04-29 22:41 UTC (permalink / raw)
To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
To ensure that a module does not unload in the middle of a checkpoint
or a restart operation, pin (module_get) all the modules during either
operation. For that, add a new memeber ->owner in ckpt_obj_ops.
Also add a counter that keeps track of how many module_gets we have
going on, to properly handle new modules that register when an
operation is underway.
This is a proof of concept, applies on top of the patch that introduces
objhash on rc7 (patch 33).
Todo:
- I put the initialization part in init_*_ctx(), but it could be moved
to ckpt_ctx_alloc() which is common to both checkpoint and restart.
- If this is ok, then additional patches should adjust those modules
that indeed register...
Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
---
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 2f5af3c..4ed8a8c 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -40,11 +40,13 @@ extern long do_sys_restart(pid_t pid, int fd,
#define CKPT_CTX_RESTART_BIT 1
#define CKPT_CTX_SUCCESS_BIT 2
#define CKPT_CTX_ERROR_BIT 3
+#define CKPT_CTX_PINNED_BIT 4
#define CKPT_CTX_CHECKPOINT (1 << CKPT_CTX_CHECKPOINT_BIT)
#define CKPT_CTX_RESTART (1 << CKPT_CTX_RESTART_BIT)
#define CKPT_CTX_SUCCESS (1 << CKPT_CTX_SUCCESS_BIT)
#define CKPT_CTX_ERROR (1 << CKPT_CTX_ERROR_BIT)
+#define CKPT_CTX_PINNED (1 << CKPT_CTX_PINNED_BIT)
/* ckpt_ctx: uflags */
#define CHECKPOINT_USER_FLAGS CHECKPOINT_SUBTREE
@@ -107,6 +109,9 @@ static inline int ckpt_get_error(struct ckpt_ctx *ctx)
extern void restore_notify_error(struct ckpt_ctx *ctx);
/* obj_hash */
+extern int ckpt_obj_module_get(void);
+extern void ckpt_obj_module_put(void);
+
extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
@@ -284,6 +289,7 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx);
struct ckpt_obj_ops;
extern int register_checkpoint_obj(const struct ckpt_obj_ops *ops);
+extern void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops);
#else /* CONFIG_CHEKCPOINT */
@@ -293,6 +299,10 @@ static inline int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
return 0;
}
+static inline void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
+{
+}
+
#endif /* CONFIG_CHECKPOINT */
#endif /* __KERNEL__ */
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index 912e06a..8035556 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -79,6 +79,7 @@ struct ckpt_obj_ops {
int (*ref_grab)(void *ptr);
int (*checkpoint)(struct ckpt_ctx *ctx, void *ptr);
void *(*restore)(struct ckpt_ctx *ctx);
+ struct module *owner;
};
diff --git a/kernel/checkpoint/checkpoint.c b/kernel/checkpoint/checkpoint.c
index f451f8f..1a08bfb 100644
--- a/kernel/checkpoint/checkpoint.c
+++ b/kernel/checkpoint/checkpoint.c
@@ -473,6 +473,7 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
{
struct task_struct *task;
struct nsproxy *nsproxy;
+ int ret;
/*
* No need for explicit cleanup here, because if an error
@@ -514,6 +515,12 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
return -EINVAL; /* cleanup by ckpt_ctx_free() */
}
+ /* pin down modules - cleanup by ckpt_ctx_free() */
+ ret = ckpt_obj_module_get();
+ if (ret < 0)
+ return ret;
+ ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
+
return 0;
}
diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
index 1ee06d0..db795e3 100644
--- a/kernel/checkpoint/objhash.c
+++ b/kernel/checkpoint/objhash.c
@@ -45,17 +45,80 @@ static const struct ckpt_obj_ops *ckpt_obj_ops[CKPT_OBJ_MAX] = {
[CKPT_OBJ_IGNORE] = &ckpt_obj_ignored_ops,
};
+static int ckpt_obj_pinned_count;
+static DEFINE_SPINLOCK(ckpt_obj_pinned_lock);
+
int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
{
+ int ret = -EINVAL;
+ int i;
+
if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
return -EINVAL;
- if (ckpt_obj_ops[ops->obj_type] != NULL)
- return -EINVAL;
- ckpt_obj_ops[ops->obj_type] = ops;
- return 0;
+ spin_lock(&ckpt_obj_pinned_lock);
+ if (ckpt_obj_ops[ops->obj_type] == NULL) {
+ if (ops->owner) {
+ for (i = 0; i < ckpt_obj_pinned_count)
+ __module_get(owner->module);
+ }
+ ckpt_obj_ops[ops->obj_type] = ops;
+ ret = 0;
+ }
+ spin_unlock(&ckpt_obj_pinned_lock);
+ return ret;
}
EXPORT_SYMBOL(register_checkpoint_obj);
+void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
+{
+ if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
+ return;
+ spin_lock(&ckpt_obj_pinned_lock);
+ if (ckpt_obj_ops[ops->obj_type] == ops)
+ ckpt_obj_ops[ops->obj_type] = NULL;
+ spin_unlock(&ckpt_obj_pinned_lock);
+ return ret;
+}
+EXPORT_SYMBOL(unregister_checkpoint_obj);
+
+static void _ckpt_obj_module_put(int last)
+{
+ int i;
+
+ for (i = 0; i < last; i++) {
+ if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
+ continue;
+ module_put(ckpt_obj_ops[i]->owner);
+ }
+}
+void ckpt_obj_module_put(void)
+{
+ spin_lock(&ckpt_obj_pinned_lock);
+ _ckpt_obj_module_put(CKPT_OBJ_MAX);
+ ckpt_obj_pinned_count--;
+ spin_unlock(&ckpt_obj_pinned_lock);
+}
+
+int ckpt_obj_module_get(void)
+{
+ int i, ret = 0;
+
+ spin_lock(&ckpt_obj_pinned_lock);
+ for (i = 0; i < CKPT_OBJ_MAX; i++) {
+ if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
+ continue;
+ ret = try_module_get(ckpt_obj_ops[i]->owner);
+ if (ret < 0)
+ break;
+ }
+ if (ret < 0)
+ _ckpt_obj_module_put(i);
+ else
+ ckpt_obj_pinned_count++;
+ spin_unlock(&ckpt_obj_pinned_lock);
+ return ret;
+}
+
#define CKPT_OBJ_HASH_NBITS 10
#define CKPT_OBJ_HASH_TOTAL (1UL << CKPT_OBJ_HASH_NBITS)
diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
index 437de4f..582e1f1 100644
--- a/kernel/checkpoint/restart.c
+++ b/kernel/checkpoint/restart.c
@@ -1084,6 +1084,12 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t pid)
ctx->active_pid = -1; /* see restore_activate_next, get_active_pid */
+ /* pin down modules - cleanup by ckpt_ctx_free() */
+ ret = ckpt_obj_module_get();
+ if (ret < 0)
+ return ret;
+ ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
+
return 0;
}
diff --git a/kernel/checkpoint/sys.c b/kernel/checkpoint/sys.c
index 5e84915..e1a1f96 100644
--- a/kernel/checkpoint/sys.c
+++ b/kernel/checkpoint/sys.c
@@ -217,6 +217,10 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx)
kfree(ctx->pids_arr);
+ /* un-pin modules */
+ if (ctx->kflags & CKPT_CTX_PINNED)
+ ckpt_obj_module_put();
+
kfree(ctx);
}
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] c/r: pin down modules registered to obj-ops
[not found] ` <1272580878-7268-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2010-04-29 23:11 ` Serge E. Hallyn
[not found] ` <20100429231121.GA3862-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Serge E. Hallyn @ 2010-04-29 23:11 UTC (permalink / raw)
To: Oren Laadan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
> To ensure that a module does not unload in the middle of a checkpoint
> or a restart operation, pin (module_get) all the modules during either
> operation. For that, add a new memeber ->owner in ckpt_obj_ops.
>
> Also add a counter that keeps track of how many module_gets we have
> going on, to properly handle new modules that register when an
> operation is underway.
>
> This is a proof of concept, applies on top of the patch that introduces
> objhash on rc7 (patch 33).
>
> Todo:
>
> - I put the initialization part in init_*_ctx(), but it could be moved
> to ckpt_ctx_alloc() which is common to both checkpoint and restart.
>
> - If this is ok, then additional patches should adjust those modules
> that indeed register...
>
> Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
Yes this looks good to me
Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
Were you going to stick this into v21 too, or queue that up as
first patch after the patchbomb?
thanks,
-serge
>
> ---
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index 2f5af3c..4ed8a8c 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -40,11 +40,13 @@ extern long do_sys_restart(pid_t pid, int fd,
> #define CKPT_CTX_RESTART_BIT 1
> #define CKPT_CTX_SUCCESS_BIT 2
> #define CKPT_CTX_ERROR_BIT 3
> +#define CKPT_CTX_PINNED_BIT 4
>
> #define CKPT_CTX_CHECKPOINT (1 << CKPT_CTX_CHECKPOINT_BIT)
> #define CKPT_CTX_RESTART (1 << CKPT_CTX_RESTART_BIT)
> #define CKPT_CTX_SUCCESS (1 << CKPT_CTX_SUCCESS_BIT)
> #define CKPT_CTX_ERROR (1 << CKPT_CTX_ERROR_BIT)
> +#define CKPT_CTX_PINNED (1 << CKPT_CTX_PINNED_BIT)
>
> /* ckpt_ctx: uflags */
> #define CHECKPOINT_USER_FLAGS CHECKPOINT_SUBTREE
> @@ -107,6 +109,9 @@ static inline int ckpt_get_error(struct ckpt_ctx *ctx)
> extern void restore_notify_error(struct ckpt_ctx *ctx);
>
> /* obj_hash */
> +extern int ckpt_obj_module_get(void);
> +extern void ckpt_obj_module_put(void);
> +
> extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
> extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
>
> @@ -284,6 +289,7 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx);
>
> struct ckpt_obj_ops;
> extern int register_checkpoint_obj(const struct ckpt_obj_ops *ops);
> +extern void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops);
>
> #else /* CONFIG_CHEKCPOINT */
>
> @@ -293,6 +299,10 @@ static inline int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
> return 0;
> }
>
> +static inline void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
> +{
> +}
> +
> #endif /* CONFIG_CHECKPOINT */
> #endif /* __KERNEL__ */
>
> diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
> index 912e06a..8035556 100644
> --- a/include/linux/checkpoint_types.h
> +++ b/include/linux/checkpoint_types.h
> @@ -79,6 +79,7 @@ struct ckpt_obj_ops {
> int (*ref_grab)(void *ptr);
> int (*checkpoint)(struct ckpt_ctx *ctx, void *ptr);
> void *(*restore)(struct ckpt_ctx *ctx);
> + struct module *owner;
> };
>
>
> diff --git a/kernel/checkpoint/checkpoint.c b/kernel/checkpoint/checkpoint.c
> index f451f8f..1a08bfb 100644
> --- a/kernel/checkpoint/checkpoint.c
> +++ b/kernel/checkpoint/checkpoint.c
> @@ -473,6 +473,7 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
> {
> struct task_struct *task;
> struct nsproxy *nsproxy;
> + int ret;
>
> /*
> * No need for explicit cleanup here, because if an error
> @@ -514,6 +515,12 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
> return -EINVAL; /* cleanup by ckpt_ctx_free() */
> }
>
> + /* pin down modules - cleanup by ckpt_ctx_free() */
> + ret = ckpt_obj_module_get();
> + if (ret < 0)
> + return ret;
> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
> +
> return 0;
> }
>
> diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
> index 1ee06d0..db795e3 100644
> --- a/kernel/checkpoint/objhash.c
> +++ b/kernel/checkpoint/objhash.c
> @@ -45,17 +45,80 @@ static const struct ckpt_obj_ops *ckpt_obj_ops[CKPT_OBJ_MAX] = {
> [CKPT_OBJ_IGNORE] = &ckpt_obj_ignored_ops,
> };
>
> +static int ckpt_obj_pinned_count;
> +static DEFINE_SPINLOCK(ckpt_obj_pinned_lock);
> +
> int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
> {
> + int ret = -EINVAL;
> + int i;
> +
> if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
> return -EINVAL;
> - if (ckpt_obj_ops[ops->obj_type] != NULL)
> - return -EINVAL;
> - ckpt_obj_ops[ops->obj_type] = ops;
> - return 0;
> + spin_lock(&ckpt_obj_pinned_lock);
> + if (ckpt_obj_ops[ops->obj_type] == NULL) {
> + if (ops->owner) {
> + for (i = 0; i < ckpt_obj_pinned_count)
> + __module_get(owner->module);
> + }
> + ckpt_obj_ops[ops->obj_type] = ops;
> + ret = 0;
> + }
> + spin_unlock(&ckpt_obj_pinned_lock);
> + return ret;
> }
> EXPORT_SYMBOL(register_checkpoint_obj);
>
> +void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
> +{
> + if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
> + return;
> + spin_lock(&ckpt_obj_pinned_lock);
> + if (ckpt_obj_ops[ops->obj_type] == ops)
> + ckpt_obj_ops[ops->obj_type] = NULL;
> + spin_unlock(&ckpt_obj_pinned_lock);
> + return ret;
> +}
> +EXPORT_SYMBOL(unregister_checkpoint_obj);
> +
> +static void _ckpt_obj_module_put(int last)
> +{
> + int i;
> +
> + for (i = 0; i < last; i++) {
> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
> + continue;
> + module_put(ckpt_obj_ops[i]->owner);
> + }
> +}
> +void ckpt_obj_module_put(void)
> +{
> + spin_lock(&ckpt_obj_pinned_lock);
> + _ckpt_obj_module_put(CKPT_OBJ_MAX);
> + ckpt_obj_pinned_count--;
> + spin_unlock(&ckpt_obj_pinned_lock);
> +}
> +
> +int ckpt_obj_module_get(void)
> +{
> + int i, ret = 0;
> +
> + spin_lock(&ckpt_obj_pinned_lock);
> + for (i = 0; i < CKPT_OBJ_MAX; i++) {
> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
> + continue;
> + ret = try_module_get(ckpt_obj_ops[i]->owner);
> + if (ret < 0)
> + break;
> + }
> + if (ret < 0)
> + _ckpt_obj_module_put(i);
> + else
> + ckpt_obj_pinned_count++;
> + spin_unlock(&ckpt_obj_pinned_lock);
> + return ret;
> +}
> +
> #define CKPT_OBJ_HASH_NBITS 10
> #define CKPT_OBJ_HASH_TOTAL (1UL << CKPT_OBJ_HASH_NBITS)
>
> diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
> index 437de4f..582e1f1 100644
> --- a/kernel/checkpoint/restart.c
> +++ b/kernel/checkpoint/restart.c
> @@ -1084,6 +1084,12 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t pid)
>
> ctx->active_pid = -1; /* see restore_activate_next, get_active_pid */
>
> + /* pin down modules - cleanup by ckpt_ctx_free() */
> + ret = ckpt_obj_module_get();
> + if (ret < 0)
> + return ret;
> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
> +
> return 0;
> }
>
> diff --git a/kernel/checkpoint/sys.c b/kernel/checkpoint/sys.c
> index 5e84915..e1a1f96 100644
> --- a/kernel/checkpoint/sys.c
> +++ b/kernel/checkpoint/sys.c
> @@ -217,6 +217,10 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx)
>
> kfree(ctx->pids_arr);
>
> + /* un-pin modules */
> + if (ctx->kflags & CKPT_CTX_PINNED)
> + ckpt_obj_module_put();
> +
> kfree(ctx);
> }
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] c/r: pin down modules registered to obj-ops
[not found] ` <4BDA5DDA.3010701-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
@ 2010-04-30 4:34 ` Serge E. Hallyn
[not found] ` <20100430043432.GA15098-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Serge E. Hallyn @ 2010-04-30 4:34 UTC (permalink / raw)
To: Oren Laadan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
How can it crash kernels? At the moment every ckpt_ops that is
registered is done so from files which are not parts of modules.
Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
>
> I don't feel comfortable with posting a version that we know
> can crash a kernel, and even less so if we say that's it's broken...
> So I prefer it inside.
>
> Oren
>
> Serge E. Hallyn wrote:
> > Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
> >> To ensure that a module does not unload in the middle of a checkpoint
> >> or a restart operation, pin (module_get) all the modules during either
> >> operation. For that, add a new memeber ->owner in ckpt_obj_ops.
> >>
> >> Also add a counter that keeps track of how many module_gets we have
> >> going on, to properly handle new modules that register when an
> >> operation is underway.
> >>
> >> This is a proof of concept, applies on top of the patch that introduces
> >> objhash on rc7 (patch 33).
> >>
> >> Todo:
> >>
> >> - I put the initialization part in init_*_ctx(), but it could be moved
> >> to ckpt_ctx_alloc() which is common to both checkpoint and restart.
> >>
> >> - If this is ok, then additional patches should adjust those modules
> >> that indeed register...
> >>
> >> Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
> >
> > Yes this looks good to me
> >
> > Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> >
> > Were you going to stick this into v21 too, or queue that up as
> > first patch after the patchbomb?
> >
> > thanks,
> > -serge
> >
> >> ---
> >> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> >> index 2f5af3c..4ed8a8c 100644
> >> --- a/include/linux/checkpoint.h
> >> +++ b/include/linux/checkpoint.h
> >> @@ -40,11 +40,13 @@ extern long do_sys_restart(pid_t pid, int fd,
> >> #define CKPT_CTX_RESTART_BIT 1
> >> #define CKPT_CTX_SUCCESS_BIT 2
> >> #define CKPT_CTX_ERROR_BIT 3
> >> +#define CKPT_CTX_PINNED_BIT 4
> >>
> >> #define CKPT_CTX_CHECKPOINT (1 << CKPT_CTX_CHECKPOINT_BIT)
> >> #define CKPT_CTX_RESTART (1 << CKPT_CTX_RESTART_BIT)
> >> #define CKPT_CTX_SUCCESS (1 << CKPT_CTX_SUCCESS_BIT)
> >> #define CKPT_CTX_ERROR (1 << CKPT_CTX_ERROR_BIT)
> >> +#define CKPT_CTX_PINNED (1 << CKPT_CTX_PINNED_BIT)
> >>
> >> /* ckpt_ctx: uflags */
> >> #define CHECKPOINT_USER_FLAGS CHECKPOINT_SUBTREE
> >> @@ -107,6 +109,9 @@ static inline int ckpt_get_error(struct ckpt_ctx *ctx)
> >> extern void restore_notify_error(struct ckpt_ctx *ctx);
> >>
> >> /* obj_hash */
> >> +extern int ckpt_obj_module_get(void);
> >> +extern void ckpt_obj_module_put(void);
> >> +
> >> extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
> >> extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
> >>
> >> @@ -284,6 +289,7 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx);
> >>
> >> struct ckpt_obj_ops;
> >> extern int register_checkpoint_obj(const struct ckpt_obj_ops *ops);
> >> +extern void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops);
> >>
> >> #else /* CONFIG_CHEKCPOINT */
> >>
> >> @@ -293,6 +299,10 @@ static inline int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
> >> return 0;
> >> }
> >>
> >> +static inline void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
> >> +{
> >> +}
> >> +
> >> #endif /* CONFIG_CHECKPOINT */
> >> #endif /* __KERNEL__ */
> >>
> >> diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
> >> index 912e06a..8035556 100644
> >> --- a/include/linux/checkpoint_types.h
> >> +++ b/include/linux/checkpoint_types.h
> >> @@ -79,6 +79,7 @@ struct ckpt_obj_ops {
> >> int (*ref_grab)(void *ptr);
> >> int (*checkpoint)(struct ckpt_ctx *ctx, void *ptr);
> >> void *(*restore)(struct ckpt_ctx *ctx);
> >> + struct module *owner;
> >> };
> >>
> >>
> >> diff --git a/kernel/checkpoint/checkpoint.c b/kernel/checkpoint/checkpoint.c
> >> index f451f8f..1a08bfb 100644
> >> --- a/kernel/checkpoint/checkpoint.c
> >> +++ b/kernel/checkpoint/checkpoint.c
> >> @@ -473,6 +473,7 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
> >> {
> >> struct task_struct *task;
> >> struct nsproxy *nsproxy;
> >> + int ret;
> >>
> >> /*
> >> * No need for explicit cleanup here, because if an error
> >> @@ -514,6 +515,12 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
> >> return -EINVAL; /* cleanup by ckpt_ctx_free() */
> >> }
> >>
> >> + /* pin down modules - cleanup by ckpt_ctx_free() */
> >> + ret = ckpt_obj_module_get();
> >> + if (ret < 0)
> >> + return ret;
> >> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
> >> +
> >> return 0;
> >> }
> >>
> >> diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
> >> index 1ee06d0..db795e3 100644
> >> --- a/kernel/checkpoint/objhash.c
> >> +++ b/kernel/checkpoint/objhash.c
> >> @@ -45,17 +45,80 @@ static const struct ckpt_obj_ops *ckpt_obj_ops[CKPT_OBJ_MAX] = {
> >> [CKPT_OBJ_IGNORE] = &ckpt_obj_ignored_ops,
> >> };
> >>
> >> +static int ckpt_obj_pinned_count;
> >> +static DEFINE_SPINLOCK(ckpt_obj_pinned_lock);
> >> +
> >> int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
> >> {
> >> + int ret = -EINVAL;
> >> + int i;
> >> +
> >> if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
> >> return -EINVAL;
> >> - if (ckpt_obj_ops[ops->obj_type] != NULL)
> >> - return -EINVAL;
> >> - ckpt_obj_ops[ops->obj_type] = ops;
> >> - return 0;
> >> + spin_lock(&ckpt_obj_pinned_lock);
> >> + if (ckpt_obj_ops[ops->obj_type] == NULL) {
> >> + if (ops->owner) {
> >> + for (i = 0; i < ckpt_obj_pinned_count)
> >> + __module_get(owner->module);
> >> + }
> >> + ckpt_obj_ops[ops->obj_type] = ops;
> >> + ret = 0;
> >> + }
> >> + spin_unlock(&ckpt_obj_pinned_lock);
> >> + return ret;
> >> }
> >> EXPORT_SYMBOL(register_checkpoint_obj);
> >>
> >> +void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
> >> +{
> >> + if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
> >> + return;
> >> + spin_lock(&ckpt_obj_pinned_lock);
> >> + if (ckpt_obj_ops[ops->obj_type] == ops)
> >> + ckpt_obj_ops[ops->obj_type] = NULL;
> >> + spin_unlock(&ckpt_obj_pinned_lock);
> >> + return ret;
> >> +}
> >> +EXPORT_SYMBOL(unregister_checkpoint_obj);
> >> +
> >> +static void _ckpt_obj_module_put(int last)
> >> +{
> >> + int i;
> >> +
> >> + for (i = 0; i < last; i++) {
> >> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
> >> + continue;
> >> + module_put(ckpt_obj_ops[i]->owner);
> >> + }
> >> +}
> >> +void ckpt_obj_module_put(void)
> >> +{
> >> + spin_lock(&ckpt_obj_pinned_lock);
> >> + _ckpt_obj_module_put(CKPT_OBJ_MAX);
> >> + ckpt_obj_pinned_count--;
> >> + spin_unlock(&ckpt_obj_pinned_lock);
> >> +}
> >> +
> >> +int ckpt_obj_module_get(void)
> >> +{
> >> + int i, ret = 0;
> >> +
> >> + spin_lock(&ckpt_obj_pinned_lock);
> >> + for (i = 0; i < CKPT_OBJ_MAX; i++) {
> >> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
> >> + continue;
> >> + ret = try_module_get(ckpt_obj_ops[i]->owner);
> >> + if (ret < 0)
> >> + break;
> >> + }
> >> + if (ret < 0)
> >> + _ckpt_obj_module_put(i);
> >> + else
> >> + ckpt_obj_pinned_count++;
> >> + spin_unlock(&ckpt_obj_pinned_lock);
> >> + return ret;
> >> +}
> >> +
> >> #define CKPT_OBJ_HASH_NBITS 10
> >> #define CKPT_OBJ_HASH_TOTAL (1UL << CKPT_OBJ_HASH_NBITS)
> >>
> >> diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
> >> index 437de4f..582e1f1 100644
> >> --- a/kernel/checkpoint/restart.c
> >> +++ b/kernel/checkpoint/restart.c
> >> @@ -1084,6 +1084,12 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t pid)
> >>
> >> ctx->active_pid = -1; /* see restore_activate_next, get_active_pid */
> >>
> >> + /* pin down modules - cleanup by ckpt_ctx_free() */
> >> + ret = ckpt_obj_module_get();
> >> + if (ret < 0)
> >> + return ret;
> >> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
> >> +
> >> return 0;
> >> }
> >>
> >> diff --git a/kernel/checkpoint/sys.c b/kernel/checkpoint/sys.c
> >> index 5e84915..e1a1f96 100644
> >> --- a/kernel/checkpoint/sys.c
> >> +++ b/kernel/checkpoint/sys.c
> >> @@ -217,6 +217,10 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx)
> >>
> >> kfree(ctx->pids_arr);
> >>
> >> + /* un-pin modules */
> >> + if (ctx->kflags & CKPT_CTX_PINNED)
> >> + ckpt_obj_module_put();
> >> +
> >> kfree(ctx);
> >> }
> >
> _______________________________________________
> Containers mailing list
> Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] c/r: pin down modules registered to obj-ops
[not found] ` <20100429231121.GA3862-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-04-30 4:34 ` Oren Laadan
[not found] ` <4BDA5DDA.3010701-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Oren Laadan @ 2010-04-30 4:34 UTC (permalink / raw)
To: Serge E. Hallyn; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
I don't feel comfortable with posting a version that we know
can crash a kernel, and even less so if we say that's it's broken...
So I prefer it inside.
Oren
Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
>> To ensure that a module does not unload in the middle of a checkpoint
>> or a restart operation, pin (module_get) all the modules during either
>> operation. For that, add a new memeber ->owner in ckpt_obj_ops.
>>
>> Also add a counter that keeps track of how many module_gets we have
>> going on, to properly handle new modules that register when an
>> operation is underway.
>>
>> This is a proof of concept, applies on top of the patch that introduces
>> objhash on rc7 (patch 33).
>>
>> Todo:
>>
>> - I put the initialization part in init_*_ctx(), but it could be moved
>> to ckpt_ctx_alloc() which is common to both checkpoint and restart.
>>
>> - If this is ok, then additional patches should adjust those modules
>> that indeed register...
>>
>> Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
>
> Yes this looks good to me
>
> Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
>
> Were you going to stick this into v21 too, or queue that up as
> first patch after the patchbomb?
>
> thanks,
> -serge
>
>> ---
>> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
>> index 2f5af3c..4ed8a8c 100644
>> --- a/include/linux/checkpoint.h
>> +++ b/include/linux/checkpoint.h
>> @@ -40,11 +40,13 @@ extern long do_sys_restart(pid_t pid, int fd,
>> #define CKPT_CTX_RESTART_BIT 1
>> #define CKPT_CTX_SUCCESS_BIT 2
>> #define CKPT_CTX_ERROR_BIT 3
>> +#define CKPT_CTX_PINNED_BIT 4
>>
>> #define CKPT_CTX_CHECKPOINT (1 << CKPT_CTX_CHECKPOINT_BIT)
>> #define CKPT_CTX_RESTART (1 << CKPT_CTX_RESTART_BIT)
>> #define CKPT_CTX_SUCCESS (1 << CKPT_CTX_SUCCESS_BIT)
>> #define CKPT_CTX_ERROR (1 << CKPT_CTX_ERROR_BIT)
>> +#define CKPT_CTX_PINNED (1 << CKPT_CTX_PINNED_BIT)
>>
>> /* ckpt_ctx: uflags */
>> #define CHECKPOINT_USER_FLAGS CHECKPOINT_SUBTREE
>> @@ -107,6 +109,9 @@ static inline int ckpt_get_error(struct ckpt_ctx *ctx)
>> extern void restore_notify_error(struct ckpt_ctx *ctx);
>>
>> /* obj_hash */
>> +extern int ckpt_obj_module_get(void);
>> +extern void ckpt_obj_module_put(void);
>> +
>> extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
>> extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
>>
>> @@ -284,6 +289,7 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx);
>>
>> struct ckpt_obj_ops;
>> extern int register_checkpoint_obj(const struct ckpt_obj_ops *ops);
>> +extern void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops);
>>
>> #else /* CONFIG_CHEKCPOINT */
>>
>> @@ -293,6 +299,10 @@ static inline int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
>> return 0;
>> }
>>
>> +static inline void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
>> +{
>> +}
>> +
>> #endif /* CONFIG_CHECKPOINT */
>> #endif /* __KERNEL__ */
>>
>> diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
>> index 912e06a..8035556 100644
>> --- a/include/linux/checkpoint_types.h
>> +++ b/include/linux/checkpoint_types.h
>> @@ -79,6 +79,7 @@ struct ckpt_obj_ops {
>> int (*ref_grab)(void *ptr);
>> int (*checkpoint)(struct ckpt_ctx *ctx, void *ptr);
>> void *(*restore)(struct ckpt_ctx *ctx);
>> + struct module *owner;
>> };
>>
>>
>> diff --git a/kernel/checkpoint/checkpoint.c b/kernel/checkpoint/checkpoint.c
>> index f451f8f..1a08bfb 100644
>> --- a/kernel/checkpoint/checkpoint.c
>> +++ b/kernel/checkpoint/checkpoint.c
>> @@ -473,6 +473,7 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
>> {
>> struct task_struct *task;
>> struct nsproxy *nsproxy;
>> + int ret;
>>
>> /*
>> * No need for explicit cleanup here, because if an error
>> @@ -514,6 +515,12 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
>> return -EINVAL; /* cleanup by ckpt_ctx_free() */
>> }
>>
>> + /* pin down modules - cleanup by ckpt_ctx_free() */
>> + ret = ckpt_obj_module_get();
>> + if (ret < 0)
>> + return ret;
>> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
>> +
>> return 0;
>> }
>>
>> diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
>> index 1ee06d0..db795e3 100644
>> --- a/kernel/checkpoint/objhash.c
>> +++ b/kernel/checkpoint/objhash.c
>> @@ -45,17 +45,80 @@ static const struct ckpt_obj_ops *ckpt_obj_ops[CKPT_OBJ_MAX] = {
>> [CKPT_OBJ_IGNORE] = &ckpt_obj_ignored_ops,
>> };
>>
>> +static int ckpt_obj_pinned_count;
>> +static DEFINE_SPINLOCK(ckpt_obj_pinned_lock);
>> +
>> int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
>> {
>> + int ret = -EINVAL;
>> + int i;
>> +
>> if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
>> return -EINVAL;
>> - if (ckpt_obj_ops[ops->obj_type] != NULL)
>> - return -EINVAL;
>> - ckpt_obj_ops[ops->obj_type] = ops;
>> - return 0;
>> + spin_lock(&ckpt_obj_pinned_lock);
>> + if (ckpt_obj_ops[ops->obj_type] == NULL) {
>> + if (ops->owner) {
>> + for (i = 0; i < ckpt_obj_pinned_count)
>> + __module_get(owner->module);
>> + }
>> + ckpt_obj_ops[ops->obj_type] = ops;
>> + ret = 0;
>> + }
>> + spin_unlock(&ckpt_obj_pinned_lock);
>> + return ret;
>> }
>> EXPORT_SYMBOL(register_checkpoint_obj);
>>
>> +void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
>> +{
>> + if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
>> + return;
>> + spin_lock(&ckpt_obj_pinned_lock);
>> + if (ckpt_obj_ops[ops->obj_type] == ops)
>> + ckpt_obj_ops[ops->obj_type] = NULL;
>> + spin_unlock(&ckpt_obj_pinned_lock);
>> + return ret;
>> +}
>> +EXPORT_SYMBOL(unregister_checkpoint_obj);
>> +
>> +static void _ckpt_obj_module_put(int last)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < last; i++) {
>> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
>> + continue;
>> + module_put(ckpt_obj_ops[i]->owner);
>> + }
>> +}
>> +void ckpt_obj_module_put(void)
>> +{
>> + spin_lock(&ckpt_obj_pinned_lock);
>> + _ckpt_obj_module_put(CKPT_OBJ_MAX);
>> + ckpt_obj_pinned_count--;
>> + spin_unlock(&ckpt_obj_pinned_lock);
>> +}
>> +
>> +int ckpt_obj_module_get(void)
>> +{
>> + int i, ret = 0;
>> +
>> + spin_lock(&ckpt_obj_pinned_lock);
>> + for (i = 0; i < CKPT_OBJ_MAX; i++) {
>> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
>> + continue;
>> + ret = try_module_get(ckpt_obj_ops[i]->owner);
>> + if (ret < 0)
>> + break;
>> + }
>> + if (ret < 0)
>> + _ckpt_obj_module_put(i);
>> + else
>> + ckpt_obj_pinned_count++;
>> + spin_unlock(&ckpt_obj_pinned_lock);
>> + return ret;
>> +}
>> +
>> #define CKPT_OBJ_HASH_NBITS 10
>> #define CKPT_OBJ_HASH_TOTAL (1UL << CKPT_OBJ_HASH_NBITS)
>>
>> diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
>> index 437de4f..582e1f1 100644
>> --- a/kernel/checkpoint/restart.c
>> +++ b/kernel/checkpoint/restart.c
>> @@ -1084,6 +1084,12 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t pid)
>>
>> ctx->active_pid = -1; /* see restore_activate_next, get_active_pid */
>>
>> + /* pin down modules - cleanup by ckpt_ctx_free() */
>> + ret = ckpt_obj_module_get();
>> + if (ret < 0)
>> + return ret;
>> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
>> +
>> return 0;
>> }
>>
>> diff --git a/kernel/checkpoint/sys.c b/kernel/checkpoint/sys.c
>> index 5e84915..e1a1f96 100644
>> --- a/kernel/checkpoint/sys.c
>> +++ b/kernel/checkpoint/sys.c
>> @@ -217,6 +217,10 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx)
>>
>> kfree(ctx->pids_arr);
>>
>> + /* un-pin modules */
>> + if (ctx->kflags & CKPT_CTX_PINNED)
>> + ckpt_obj_module_put();
>> +
>> kfree(ctx);
>> }
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] c/r: pin down modules registered to obj-ops
[not found] ` <20100430043432.GA15098-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
@ 2010-04-30 4:55 ` Oren Laadan
0 siblings, 0 replies; 5+ messages in thread
From: Oren Laadan @ 2010-04-30 4:55 UTC (permalink / raw)
To: Serge E. Hallyn; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
Serge E. Hallyn wrote:
> How can it crash kernels? At the moment every ckpt_ops that is
> registered is done so from files which are not parts of modules.
... that's a good point.
Ok then - the patch will wait.
Oren.
>
> Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org):
>> I don't feel comfortable with posting a version that we know
>> can crash a kernel, and even less so if we say that's it's broken...
>> So I prefer it inside.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-04-30 4:55 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-29 22:41 [PATCH] c/r: pin down modules registered to obj-ops Oren Laadan
[not found] ` <1272580878-7268-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-04-29 23:11 ` Serge E. Hallyn
[not found] ` <20100429231121.GA3862-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-30 4:34 ` Oren Laadan
[not found] ` <4BDA5DDA.3010701-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-04-30 4:34 ` Serge E. Hallyn
[not found] ` <20100430043432.GA15098-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
2010-04-30 4:55 ` Oren Laadan
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.