* [PATCH] c/r: pin down modules registered to obj-ops
@ 2010-04-29 22:41 Oren Laadan
[not found] ` <1272580878-7268-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Oren Laadan @ 2010-04-29 22:41 UTC (permalink / raw)
To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
To ensure that a module does not unload in the middle of a checkpoint
or a restart operation, pin (module_get) all the modules during either
operation. For that, add a new memeber ->owner in ckpt_obj_ops.
Also add a counter that keeps track of how many module_gets we have
going on, to properly handle new modules that register when an
operation is underway.
This is a proof of concept, applies on top of the patch that introduces
objhash on rc7 (patch 33).
Todo:
- I put the initialization part in init_*_ctx(), but it could be moved
to ckpt_ctx_alloc() which is common to both checkpoint and restart.
- If this is ok, then additional patches should adjust those modules
that indeed register...
Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
---
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 2f5af3c..4ed8a8c 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -40,11 +40,13 @@ extern long do_sys_restart(pid_t pid, int fd,
#define CKPT_CTX_RESTART_BIT 1
#define CKPT_CTX_SUCCESS_BIT 2
#define CKPT_CTX_ERROR_BIT 3
+#define CKPT_CTX_PINNED_BIT 4
#define CKPT_CTX_CHECKPOINT (1 << CKPT_CTX_CHECKPOINT_BIT)
#define CKPT_CTX_RESTART (1 << CKPT_CTX_RESTART_BIT)
#define CKPT_CTX_SUCCESS (1 << CKPT_CTX_SUCCESS_BIT)
#define CKPT_CTX_ERROR (1 << CKPT_CTX_ERROR_BIT)
+#define CKPT_CTX_PINNED (1 << CKPT_CTX_PINNED_BIT)
/* ckpt_ctx: uflags */
#define CHECKPOINT_USER_FLAGS CHECKPOINT_SUBTREE
@@ -107,6 +109,9 @@ static inline int ckpt_get_error(struct ckpt_ctx *ctx)
extern void restore_notify_error(struct ckpt_ctx *ctx);
/* obj_hash */
+extern int ckpt_obj_module_get(void);
+extern void ckpt_obj_module_put(void);
+
extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
@@ -284,6 +289,7 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx);
struct ckpt_obj_ops;
extern int register_checkpoint_obj(const struct ckpt_obj_ops *ops);
+extern void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops);
#else /* CONFIG_CHEKCPOINT */
@@ -293,6 +299,10 @@ static inline int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
return 0;
}
+static inline void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
+{
+}
+
#endif /* CONFIG_CHECKPOINT */
#endif /* __KERNEL__ */
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index 912e06a..8035556 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -79,6 +79,7 @@ struct ckpt_obj_ops {
int (*ref_grab)(void *ptr);
int (*checkpoint)(struct ckpt_ctx *ctx, void *ptr);
void *(*restore)(struct ckpt_ctx *ctx);
+ struct module *owner;
};
diff --git a/kernel/checkpoint/checkpoint.c b/kernel/checkpoint/checkpoint.c
index f451f8f..1a08bfb 100644
--- a/kernel/checkpoint/checkpoint.c
+++ b/kernel/checkpoint/checkpoint.c
@@ -473,6 +473,7 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
{
struct task_struct *task;
struct nsproxy *nsproxy;
+ int ret;
/*
* No need for explicit cleanup here, because if an error
@@ -514,6 +515,12 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid)
return -EINVAL; /* cleanup by ckpt_ctx_free() */
}
+ /* pin down modules - cleanup by ckpt_ctx_free() */
+ ret = ckpt_obj_module_get();
+ if (ret < 0)
+ return ret;
+ ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
+
return 0;
}
diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
index 1ee06d0..db795e3 100644
--- a/kernel/checkpoint/objhash.c
+++ b/kernel/checkpoint/objhash.c
@@ -45,17 +45,80 @@ static const struct ckpt_obj_ops *ckpt_obj_ops[CKPT_OBJ_MAX] = {
[CKPT_OBJ_IGNORE] = &ckpt_obj_ignored_ops,
};
+static int ckpt_obj_pinned_count;
+static DEFINE_SPINLOCK(ckpt_obj_pinned_lock);
+
int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
{
+ int ret = -EINVAL;
+ int i;
+
if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
return -EINVAL;
- if (ckpt_obj_ops[ops->obj_type] != NULL)
- return -EINVAL;
- ckpt_obj_ops[ops->obj_type] = ops;
- return 0;
+ spin_lock(&ckpt_obj_pinned_lock);
+ if (ckpt_obj_ops[ops->obj_type] == NULL) {
+ if (ops->owner) {
+ for (i = 0; i < ckpt_obj_pinned_count)
+ __module_get(owner->module);
+ }
+ ckpt_obj_ops[ops->obj_type] = ops;
+ ret = 0;
+ }
+ spin_unlock(&ckpt_obj_pinned_lock);
+ return ret;
}
EXPORT_SYMBOL(register_checkpoint_obj);
+void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
+{
+ if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
+ return;
+ spin_lock(&ckpt_obj_pinned_lock);
+ if (ckpt_obj_ops[ops->obj_type] == ops)
+ ckpt_obj_ops[ops->obj_type] = NULL;
+ spin_unlock(&ckpt_obj_pinned_lock);
+ return ret;
+}
+EXPORT_SYMBOL(unregister_checkpoint_obj);
+
+static void _ckpt_obj_module_put(int last)
+{
+ int i;
+
+ for (i = 0; i < last; i++) {
+ if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
+ continue;
+ module_put(ckpt_obj_ops[i]->owner);
+ }
+}
+void ckpt_obj_module_put(void)
+{
+ spin_lock(&ckpt_obj_pinned_lock);
+ _ckpt_obj_module_put(CKPT_OBJ_MAX);
+ ckpt_obj_pinned_count--;
+ spin_unlock(&ckpt_obj_pinned_lock);
+}
+
+int ckpt_obj_module_get(void)
+{
+ int i, ret = 0;
+
+ spin_lock(&ckpt_obj_pinned_lock);
+ for (i = 0; i < CKPT_OBJ_MAX; i++) {
+ if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
+ continue;
+ ret = try_module_get(ckpt_obj_ops[i]->owner);
+ if (ret < 0)
+ break;
+ }
+ if (ret < 0)
+ _ckpt_obj_module_put(i);
+ else
+ ckpt_obj_pinned_count++;
+ spin_unlock(&ckpt_obj_pinned_lock);
+ return ret;
+}
+
#define CKPT_OBJ_HASH_NBITS 10
#define CKPT_OBJ_HASH_TOTAL (1UL << CKPT_OBJ_HASH_NBITS)
diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
index 437de4f..582e1f1 100644
--- a/kernel/checkpoint/restart.c
+++ b/kernel/checkpoint/restart.c
@@ -1084,6 +1084,12 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t pid)
ctx->active_pid = -1; /* see restore_activate_next, get_active_pid */
+ /* pin down modules - cleanup by ckpt_ctx_free() */
+ ret = ckpt_obj_module_get();
+ if (ret < 0)
+ return ret;
+ ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
+
return 0;
}
diff --git a/kernel/checkpoint/sys.c b/kernel/checkpoint/sys.c
index 5e84915..e1a1f96 100644
--- a/kernel/checkpoint/sys.c
+++ b/kernel/checkpoint/sys.c
@@ -217,6 +217,10 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx)
kfree(ctx->pids_arr);
+ /* un-pin modules */
+ if (ctx->kflags & CKPT_CTX_PINNED)
+ ckpt_obj_module_put();
+
kfree(ctx);
}
^ permalink raw reply related [flat|nested] 5+ messages in thread[parent not found: <1272580878-7268-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>]
* Re: [PATCH] c/r: pin down modules registered to obj-ops [not found] ` <1272580878-7268-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> @ 2010-04-29 23:11 ` Serge E. Hallyn [not found] ` <20100429231121.GA3862-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> 0 siblings, 1 reply; 5+ messages in thread From: Serge E. Hallyn @ 2010-04-29 23:11 UTC (permalink / raw) To: Oren Laadan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org): > To ensure that a module does not unload in the middle of a checkpoint > or a restart operation, pin (module_get) all the modules during either > operation. For that, add a new memeber ->owner in ckpt_obj_ops. > > Also add a counter that keeps track of how many module_gets we have > going on, to properly handle new modules that register when an > operation is underway. > > This is a proof of concept, applies on top of the patch that introduces > objhash on rc7 (patch 33). > > Todo: > > - I put the initialization part in init_*_ctx(), but it could be moved > to ckpt_ctx_alloc() which is common to both checkpoint and restart. > > - If this is ok, then additional patches should adjust those modules > that indeed register... > > Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> Yes this looks good to me Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> Were you going to stick this into v21 too, or queue that up as first patch after the patchbomb? thanks, -serge > > --- > diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h > index 2f5af3c..4ed8a8c 100644 > --- a/include/linux/checkpoint.h > +++ b/include/linux/checkpoint.h > @@ -40,11 +40,13 @@ extern long do_sys_restart(pid_t pid, int fd, > #define CKPT_CTX_RESTART_BIT 1 > #define CKPT_CTX_SUCCESS_BIT 2 > #define CKPT_CTX_ERROR_BIT 3 > +#define CKPT_CTX_PINNED_BIT 4 > > #define CKPT_CTX_CHECKPOINT (1 << CKPT_CTX_CHECKPOINT_BIT) > #define CKPT_CTX_RESTART (1 << CKPT_CTX_RESTART_BIT) > #define CKPT_CTX_SUCCESS (1 << CKPT_CTX_SUCCESS_BIT) > #define CKPT_CTX_ERROR (1 << CKPT_CTX_ERROR_BIT) > +#define CKPT_CTX_PINNED (1 << CKPT_CTX_PINNED_BIT) > > /* ckpt_ctx: uflags */ > #define CHECKPOINT_USER_FLAGS CHECKPOINT_SUBTREE > @@ -107,6 +109,9 @@ static inline int ckpt_get_error(struct ckpt_ctx *ctx) > extern void restore_notify_error(struct ckpt_ctx *ctx); > > /* obj_hash */ > +extern int ckpt_obj_module_get(void); > +extern void ckpt_obj_module_put(void); > + > extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx); > extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx); > > @@ -284,6 +289,7 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx); > > struct ckpt_obj_ops; > extern int register_checkpoint_obj(const struct ckpt_obj_ops *ops); > +extern void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops); > > #else /* CONFIG_CHEKCPOINT */ > > @@ -293,6 +299,10 @@ static inline int register_checkpoint_obj(const struct ckpt_obj_ops *ops) > return 0; > } > > +static inline void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops) > +{ > +} > + > #endif /* CONFIG_CHECKPOINT */ > #endif /* __KERNEL__ */ > > diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h > index 912e06a..8035556 100644 > --- a/include/linux/checkpoint_types.h > +++ b/include/linux/checkpoint_types.h > @@ -79,6 +79,7 @@ struct ckpt_obj_ops { > int (*ref_grab)(void *ptr); > int (*checkpoint)(struct ckpt_ctx *ctx, void *ptr); > void *(*restore)(struct ckpt_ctx *ctx); > + struct module *owner; > }; > > > diff --git a/kernel/checkpoint/checkpoint.c b/kernel/checkpoint/checkpoint.c > index f451f8f..1a08bfb 100644 > --- a/kernel/checkpoint/checkpoint.c > +++ b/kernel/checkpoint/checkpoint.c > @@ -473,6 +473,7 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid) > { > struct task_struct *task; > struct nsproxy *nsproxy; > + int ret; > > /* > * No need for explicit cleanup here, because if an error > @@ -514,6 +515,12 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid) > return -EINVAL; /* cleanup by ckpt_ctx_free() */ > } > > + /* pin down modules - cleanup by ckpt_ctx_free() */ > + ret = ckpt_obj_module_get(); > + if (ret < 0) > + return ret; > + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED); > + > return 0; > } > > diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c > index 1ee06d0..db795e3 100644 > --- a/kernel/checkpoint/objhash.c > +++ b/kernel/checkpoint/objhash.c > @@ -45,17 +45,80 @@ static const struct ckpt_obj_ops *ckpt_obj_ops[CKPT_OBJ_MAX] = { > [CKPT_OBJ_IGNORE] = &ckpt_obj_ignored_ops, > }; > > +static int ckpt_obj_pinned_count; > +static DEFINE_SPINLOCK(ckpt_obj_pinned_lock); > + > int register_checkpoint_obj(const struct ckpt_obj_ops *ops) > { > + int ret = -EINVAL; > + int i; > + > if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX) > return -EINVAL; > - if (ckpt_obj_ops[ops->obj_type] != NULL) > - return -EINVAL; > - ckpt_obj_ops[ops->obj_type] = ops; > - return 0; > + spin_lock(&ckpt_obj_pinned_lock); > + if (ckpt_obj_ops[ops->obj_type] == NULL) { > + if (ops->owner) { > + for (i = 0; i < ckpt_obj_pinned_count) > + __module_get(owner->module); > + } > + ckpt_obj_ops[ops->obj_type] = ops; > + ret = 0; > + } > + spin_unlock(&ckpt_obj_pinned_lock); > + return ret; > } > EXPORT_SYMBOL(register_checkpoint_obj); > > +void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops) > +{ > + if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX) > + return; > + spin_lock(&ckpt_obj_pinned_lock); > + if (ckpt_obj_ops[ops->obj_type] == ops) > + ckpt_obj_ops[ops->obj_type] = NULL; > + spin_unlock(&ckpt_obj_pinned_lock); > + return ret; > +} > +EXPORT_SYMBOL(unregister_checkpoint_obj); > + > +static void _ckpt_obj_module_put(int last) > +{ > + int i; > + > + for (i = 0; i < last; i++) { > + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner) > + continue; > + module_put(ckpt_obj_ops[i]->owner); > + } > +} > +void ckpt_obj_module_put(void) > +{ > + spin_lock(&ckpt_obj_pinned_lock); > + _ckpt_obj_module_put(CKPT_OBJ_MAX); > + ckpt_obj_pinned_count--; > + spin_unlock(&ckpt_obj_pinned_lock); > +} > + > +int ckpt_obj_module_get(void) > +{ > + int i, ret = 0; > + > + spin_lock(&ckpt_obj_pinned_lock); > + for (i = 0; i < CKPT_OBJ_MAX; i++) { > + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner) > + continue; > + ret = try_module_get(ckpt_obj_ops[i]->owner); > + if (ret < 0) > + break; > + } > + if (ret < 0) > + _ckpt_obj_module_put(i); > + else > + ckpt_obj_pinned_count++; > + spin_unlock(&ckpt_obj_pinned_lock); > + return ret; > +} > + > #define CKPT_OBJ_HASH_NBITS 10 > #define CKPT_OBJ_HASH_TOTAL (1UL << CKPT_OBJ_HASH_NBITS) > > diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c > index 437de4f..582e1f1 100644 > --- a/kernel/checkpoint/restart.c > +++ b/kernel/checkpoint/restart.c > @@ -1084,6 +1084,12 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t pid) > > ctx->active_pid = -1; /* see restore_activate_next, get_active_pid */ > > + /* pin down modules - cleanup by ckpt_ctx_free() */ > + ret = ckpt_obj_module_get(); > + if (ret < 0) > + return ret; > + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED); > + > return 0; > } > > diff --git a/kernel/checkpoint/sys.c b/kernel/checkpoint/sys.c > index 5e84915..e1a1f96 100644 > --- a/kernel/checkpoint/sys.c > +++ b/kernel/checkpoint/sys.c > @@ -217,6 +217,10 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx) > > kfree(ctx->pids_arr); > > + /* un-pin modules */ > + if (ctx->kflags & CKPT_CTX_PINNED) > + ckpt_obj_module_put(); > + > kfree(ctx); > } ^ permalink raw reply [flat|nested] 5+ messages in thread
[parent not found: <20100429231121.GA3862-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] c/r: pin down modules registered to obj-ops [not found] ` <20100429231121.GA3862-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> @ 2010-04-30 4:34 ` Oren Laadan [not found] ` <4BDA5DDA.3010701-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> 0 siblings, 1 reply; 5+ messages in thread From: Oren Laadan @ 2010-04-30 4:34 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA I don't feel comfortable with posting a version that we know can crash a kernel, and even less so if we say that's it's broken... So I prefer it inside. Oren Serge E. Hallyn wrote: > Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org): >> To ensure that a module does not unload in the middle of a checkpoint >> or a restart operation, pin (module_get) all the modules during either >> operation. For that, add a new memeber ->owner in ckpt_obj_ops. >> >> Also add a counter that keeps track of how many module_gets we have >> going on, to properly handle new modules that register when an >> operation is underway. >> >> This is a proof of concept, applies on top of the patch that introduces >> objhash on rc7 (patch 33). >> >> Todo: >> >> - I put the initialization part in init_*_ctx(), but it could be moved >> to ckpt_ctx_alloc() which is common to both checkpoint and restart. >> >> - If this is ok, then additional patches should adjust those modules >> that indeed register... >> >> Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> > > Yes this looks good to me > > Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> > > Were you going to stick this into v21 too, or queue that up as > first patch after the patchbomb? > > thanks, > -serge > >> --- >> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h >> index 2f5af3c..4ed8a8c 100644 >> --- a/include/linux/checkpoint.h >> +++ b/include/linux/checkpoint.h >> @@ -40,11 +40,13 @@ extern long do_sys_restart(pid_t pid, int fd, >> #define CKPT_CTX_RESTART_BIT 1 >> #define CKPT_CTX_SUCCESS_BIT 2 >> #define CKPT_CTX_ERROR_BIT 3 >> +#define CKPT_CTX_PINNED_BIT 4 >> >> #define CKPT_CTX_CHECKPOINT (1 << CKPT_CTX_CHECKPOINT_BIT) >> #define CKPT_CTX_RESTART (1 << CKPT_CTX_RESTART_BIT) >> #define CKPT_CTX_SUCCESS (1 << CKPT_CTX_SUCCESS_BIT) >> #define CKPT_CTX_ERROR (1 << CKPT_CTX_ERROR_BIT) >> +#define CKPT_CTX_PINNED (1 << CKPT_CTX_PINNED_BIT) >> >> /* ckpt_ctx: uflags */ >> #define CHECKPOINT_USER_FLAGS CHECKPOINT_SUBTREE >> @@ -107,6 +109,9 @@ static inline int ckpt_get_error(struct ckpt_ctx *ctx) >> extern void restore_notify_error(struct ckpt_ctx *ctx); >> >> /* obj_hash */ >> +extern int ckpt_obj_module_get(void); >> +extern void ckpt_obj_module_put(void); >> + >> extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx); >> extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx); >> >> @@ -284,6 +289,7 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx); >> >> struct ckpt_obj_ops; >> extern int register_checkpoint_obj(const struct ckpt_obj_ops *ops); >> +extern void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops); >> >> #else /* CONFIG_CHEKCPOINT */ >> >> @@ -293,6 +299,10 @@ static inline int register_checkpoint_obj(const struct ckpt_obj_ops *ops) >> return 0; >> } >> >> +static inline void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops) >> +{ >> +} >> + >> #endif /* CONFIG_CHECKPOINT */ >> #endif /* __KERNEL__ */ >> >> diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h >> index 912e06a..8035556 100644 >> --- a/include/linux/checkpoint_types.h >> +++ b/include/linux/checkpoint_types.h >> @@ -79,6 +79,7 @@ struct ckpt_obj_ops { >> int (*ref_grab)(void *ptr); >> int (*checkpoint)(struct ckpt_ctx *ctx, void *ptr); >> void *(*restore)(struct ckpt_ctx *ctx); >> + struct module *owner; >> }; >> >> >> diff --git a/kernel/checkpoint/checkpoint.c b/kernel/checkpoint/checkpoint.c >> index f451f8f..1a08bfb 100644 >> --- a/kernel/checkpoint/checkpoint.c >> +++ b/kernel/checkpoint/checkpoint.c >> @@ -473,6 +473,7 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid) >> { >> struct task_struct *task; >> struct nsproxy *nsproxy; >> + int ret; >> >> /* >> * No need for explicit cleanup here, because if an error >> @@ -514,6 +515,12 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid) >> return -EINVAL; /* cleanup by ckpt_ctx_free() */ >> } >> >> + /* pin down modules - cleanup by ckpt_ctx_free() */ >> + ret = ckpt_obj_module_get(); >> + if (ret < 0) >> + return ret; >> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED); >> + >> return 0; >> } >> >> diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c >> index 1ee06d0..db795e3 100644 >> --- a/kernel/checkpoint/objhash.c >> +++ b/kernel/checkpoint/objhash.c >> @@ -45,17 +45,80 @@ static const struct ckpt_obj_ops *ckpt_obj_ops[CKPT_OBJ_MAX] = { >> [CKPT_OBJ_IGNORE] = &ckpt_obj_ignored_ops, >> }; >> >> +static int ckpt_obj_pinned_count; >> +static DEFINE_SPINLOCK(ckpt_obj_pinned_lock); >> + >> int register_checkpoint_obj(const struct ckpt_obj_ops *ops) >> { >> + int ret = -EINVAL; >> + int i; >> + >> if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX) >> return -EINVAL; >> - if (ckpt_obj_ops[ops->obj_type] != NULL) >> - return -EINVAL; >> - ckpt_obj_ops[ops->obj_type] = ops; >> - return 0; >> + spin_lock(&ckpt_obj_pinned_lock); >> + if (ckpt_obj_ops[ops->obj_type] == NULL) { >> + if (ops->owner) { >> + for (i = 0; i < ckpt_obj_pinned_count) >> + __module_get(owner->module); >> + } >> + ckpt_obj_ops[ops->obj_type] = ops; >> + ret = 0; >> + } >> + spin_unlock(&ckpt_obj_pinned_lock); >> + return ret; >> } >> EXPORT_SYMBOL(register_checkpoint_obj); >> >> +void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops) >> +{ >> + if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX) >> + return; >> + spin_lock(&ckpt_obj_pinned_lock); >> + if (ckpt_obj_ops[ops->obj_type] == ops) >> + ckpt_obj_ops[ops->obj_type] = NULL; >> + spin_unlock(&ckpt_obj_pinned_lock); >> + return ret; >> +} >> +EXPORT_SYMBOL(unregister_checkpoint_obj); >> + >> +static void _ckpt_obj_module_put(int last) >> +{ >> + int i; >> + >> + for (i = 0; i < last; i++) { >> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner) >> + continue; >> + module_put(ckpt_obj_ops[i]->owner); >> + } >> +} >> +void ckpt_obj_module_put(void) >> +{ >> + spin_lock(&ckpt_obj_pinned_lock); >> + _ckpt_obj_module_put(CKPT_OBJ_MAX); >> + ckpt_obj_pinned_count--; >> + spin_unlock(&ckpt_obj_pinned_lock); >> +} >> + >> +int ckpt_obj_module_get(void) >> +{ >> + int i, ret = 0; >> + >> + spin_lock(&ckpt_obj_pinned_lock); >> + for (i = 0; i < CKPT_OBJ_MAX; i++) { >> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner) >> + continue; >> + ret = try_module_get(ckpt_obj_ops[i]->owner); >> + if (ret < 0) >> + break; >> + } >> + if (ret < 0) >> + _ckpt_obj_module_put(i); >> + else >> + ckpt_obj_pinned_count++; >> + spin_unlock(&ckpt_obj_pinned_lock); >> + return ret; >> +} >> + >> #define CKPT_OBJ_HASH_NBITS 10 >> #define CKPT_OBJ_HASH_TOTAL (1UL << CKPT_OBJ_HASH_NBITS) >> >> diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c >> index 437de4f..582e1f1 100644 >> --- a/kernel/checkpoint/restart.c >> +++ b/kernel/checkpoint/restart.c >> @@ -1084,6 +1084,12 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t pid) >> >> ctx->active_pid = -1; /* see restore_activate_next, get_active_pid */ >> >> + /* pin down modules - cleanup by ckpt_ctx_free() */ >> + ret = ckpt_obj_module_get(); >> + if (ret < 0) >> + return ret; >> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED); >> + >> return 0; >> } >> >> diff --git a/kernel/checkpoint/sys.c b/kernel/checkpoint/sys.c >> index 5e84915..e1a1f96 100644 >> --- a/kernel/checkpoint/sys.c >> +++ b/kernel/checkpoint/sys.c >> @@ -217,6 +217,10 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx) >> >> kfree(ctx->pids_arr); >> >> + /* un-pin modules */ >> + if (ctx->kflags & CKPT_CTX_PINNED) >> + ckpt_obj_module_put(); >> + >> kfree(ctx); >> } > ^ permalink raw reply [flat|nested] 5+ messages in thread
[parent not found: <4BDA5DDA.3010701-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>]
* Re: [PATCH] c/r: pin down modules registered to obj-ops [not found] ` <4BDA5DDA.3010701-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> @ 2010-04-30 4:34 ` Serge E. Hallyn [not found] ` <20100430043432.GA15098-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org> 0 siblings, 1 reply; 5+ messages in thread From: Serge E. Hallyn @ 2010-04-30 4:34 UTC (permalink / raw) To: Oren Laadan; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA How can it crash kernels? At the moment every ckpt_ops that is registered is done so from files which are not parts of modules. Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org): > > I don't feel comfortable with posting a version that we know > can crash a kernel, and even less so if we say that's it's broken... > So I prefer it inside. > > Oren > > Serge E. Hallyn wrote: > > Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org): > >> To ensure that a module does not unload in the middle of a checkpoint > >> or a restart operation, pin (module_get) all the modules during either > >> operation. For that, add a new memeber ->owner in ckpt_obj_ops. > >> > >> Also add a counter that keeps track of how many module_gets we have > >> going on, to properly handle new modules that register when an > >> operation is underway. > >> > >> This is a proof of concept, applies on top of the patch that introduces > >> objhash on rc7 (patch 33). > >> > >> Todo: > >> > >> - I put the initialization part in init_*_ctx(), but it could be moved > >> to ckpt_ctx_alloc() which is common to both checkpoint and restart. > >> > >> - If this is ok, then additional patches should adjust those modules > >> that indeed register... > >> > >> Signed-off-by: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org> > > > > Yes this looks good to me > > > > Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org> > > > > Were you going to stick this into v21 too, or queue that up as > > first patch after the patchbomb? > > > > thanks, > > -serge > > > >> --- > >> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h > >> index 2f5af3c..4ed8a8c 100644 > >> --- a/include/linux/checkpoint.h > >> +++ b/include/linux/checkpoint.h > >> @@ -40,11 +40,13 @@ extern long do_sys_restart(pid_t pid, int fd, > >> #define CKPT_CTX_RESTART_BIT 1 > >> #define CKPT_CTX_SUCCESS_BIT 2 > >> #define CKPT_CTX_ERROR_BIT 3 > >> +#define CKPT_CTX_PINNED_BIT 4 > >> > >> #define CKPT_CTX_CHECKPOINT (1 << CKPT_CTX_CHECKPOINT_BIT) > >> #define CKPT_CTX_RESTART (1 << CKPT_CTX_RESTART_BIT) > >> #define CKPT_CTX_SUCCESS (1 << CKPT_CTX_SUCCESS_BIT) > >> #define CKPT_CTX_ERROR (1 << CKPT_CTX_ERROR_BIT) > >> +#define CKPT_CTX_PINNED (1 << CKPT_CTX_PINNED_BIT) > >> > >> /* ckpt_ctx: uflags */ > >> #define CHECKPOINT_USER_FLAGS CHECKPOINT_SUBTREE > >> @@ -107,6 +109,9 @@ static inline int ckpt_get_error(struct ckpt_ctx *ctx) > >> extern void restore_notify_error(struct ckpt_ctx *ctx); > >> > >> /* obj_hash */ > >> +extern int ckpt_obj_module_get(void); > >> +extern void ckpt_obj_module_put(void); > >> + > >> extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx); > >> extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx); > >> > >> @@ -284,6 +289,7 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx); > >> > >> struct ckpt_obj_ops; > >> extern int register_checkpoint_obj(const struct ckpt_obj_ops *ops); > >> +extern void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops); > >> > >> #else /* CONFIG_CHEKCPOINT */ > >> > >> @@ -293,6 +299,10 @@ static inline int register_checkpoint_obj(const struct ckpt_obj_ops *ops) > >> return 0; > >> } > >> > >> +static inline void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops) > >> +{ > >> +} > >> + > >> #endif /* CONFIG_CHECKPOINT */ > >> #endif /* __KERNEL__ */ > >> > >> diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h > >> index 912e06a..8035556 100644 > >> --- a/include/linux/checkpoint_types.h > >> +++ b/include/linux/checkpoint_types.h > >> @@ -79,6 +79,7 @@ struct ckpt_obj_ops { > >> int (*ref_grab)(void *ptr); > >> int (*checkpoint)(struct ckpt_ctx *ctx, void *ptr); > >> void *(*restore)(struct ckpt_ctx *ctx); > >> + struct module *owner; > >> }; > >> > >> > >> diff --git a/kernel/checkpoint/checkpoint.c b/kernel/checkpoint/checkpoint.c > >> index f451f8f..1a08bfb 100644 > >> --- a/kernel/checkpoint/checkpoint.c > >> +++ b/kernel/checkpoint/checkpoint.c > >> @@ -473,6 +473,7 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid) > >> { > >> struct task_struct *task; > >> struct nsproxy *nsproxy; > >> + int ret; > >> > >> /* > >> * No need for explicit cleanup here, because if an error > >> @@ -514,6 +515,12 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, pid_t pid) > >> return -EINVAL; /* cleanup by ckpt_ctx_free() */ > >> } > >> > >> + /* pin down modules - cleanup by ckpt_ctx_free() */ > >> + ret = ckpt_obj_module_get(); > >> + if (ret < 0) > >> + return ret; > >> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED); > >> + > >> return 0; > >> } > >> > >> diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c > >> index 1ee06d0..db795e3 100644 > >> --- a/kernel/checkpoint/objhash.c > >> +++ b/kernel/checkpoint/objhash.c > >> @@ -45,17 +45,80 @@ static const struct ckpt_obj_ops *ckpt_obj_ops[CKPT_OBJ_MAX] = { > >> [CKPT_OBJ_IGNORE] = &ckpt_obj_ignored_ops, > >> }; > >> > >> +static int ckpt_obj_pinned_count; > >> +static DEFINE_SPINLOCK(ckpt_obj_pinned_lock); > >> + > >> int register_checkpoint_obj(const struct ckpt_obj_ops *ops) > >> { > >> + int ret = -EINVAL; > >> + int i; > >> + > >> if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX) > >> return -EINVAL; > >> - if (ckpt_obj_ops[ops->obj_type] != NULL) > >> - return -EINVAL; > >> - ckpt_obj_ops[ops->obj_type] = ops; > >> - return 0; > >> + spin_lock(&ckpt_obj_pinned_lock); > >> + if (ckpt_obj_ops[ops->obj_type] == NULL) { > >> + if (ops->owner) { > >> + for (i = 0; i < ckpt_obj_pinned_count) > >> + __module_get(owner->module); > >> + } > >> + ckpt_obj_ops[ops->obj_type] = ops; > >> + ret = 0; > >> + } > >> + spin_unlock(&ckpt_obj_pinned_lock); > >> + return ret; > >> } > >> EXPORT_SYMBOL(register_checkpoint_obj); > >> > >> +void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops) > >> +{ > >> + if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX) > >> + return; > >> + spin_lock(&ckpt_obj_pinned_lock); > >> + if (ckpt_obj_ops[ops->obj_type] == ops) > >> + ckpt_obj_ops[ops->obj_type] = NULL; > >> + spin_unlock(&ckpt_obj_pinned_lock); > >> + return ret; > >> +} > >> +EXPORT_SYMBOL(unregister_checkpoint_obj); > >> + > >> +static void _ckpt_obj_module_put(int last) > >> +{ > >> + int i; > >> + > >> + for (i = 0; i < last; i++) { > >> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner) > >> + continue; > >> + module_put(ckpt_obj_ops[i]->owner); > >> + } > >> +} > >> +void ckpt_obj_module_put(void) > >> +{ > >> + spin_lock(&ckpt_obj_pinned_lock); > >> + _ckpt_obj_module_put(CKPT_OBJ_MAX); > >> + ckpt_obj_pinned_count--; > >> + spin_unlock(&ckpt_obj_pinned_lock); > >> +} > >> + > >> +int ckpt_obj_module_get(void) > >> +{ > >> + int i, ret = 0; > >> + > >> + spin_lock(&ckpt_obj_pinned_lock); > >> + for (i = 0; i < CKPT_OBJ_MAX; i++) { > >> + if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner) > >> + continue; > >> + ret = try_module_get(ckpt_obj_ops[i]->owner); > >> + if (ret < 0) > >> + break; > >> + } > >> + if (ret < 0) > >> + _ckpt_obj_module_put(i); > >> + else > >> + ckpt_obj_pinned_count++; > >> + spin_unlock(&ckpt_obj_pinned_lock); > >> + return ret; > >> +} > >> + > >> #define CKPT_OBJ_HASH_NBITS 10 > >> #define CKPT_OBJ_HASH_TOTAL (1UL << CKPT_OBJ_HASH_NBITS) > >> > >> diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c > >> index 437de4f..582e1f1 100644 > >> --- a/kernel/checkpoint/restart.c > >> +++ b/kernel/checkpoint/restart.c > >> @@ -1084,6 +1084,12 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t pid) > >> > >> ctx->active_pid = -1; /* see restore_activate_next, get_active_pid */ > >> > >> + /* pin down modules - cleanup by ckpt_ctx_free() */ > >> + ret = ckpt_obj_module_get(); > >> + if (ret < 0) > >> + return ret; > >> + ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED); > >> + > >> return 0; > >> } > >> > >> diff --git a/kernel/checkpoint/sys.c b/kernel/checkpoint/sys.c > >> index 5e84915..e1a1f96 100644 > >> --- a/kernel/checkpoint/sys.c > >> +++ b/kernel/checkpoint/sys.c > >> @@ -217,6 +217,10 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx) > >> > >> kfree(ctx->pids_arr); > >> > >> + /* un-pin modules */ > >> + if (ctx->kflags & CKPT_CTX_PINNED) > >> + ckpt_obj_module_put(); > >> + > >> kfree(ctx); > >> } > > > _______________________________________________ > Containers mailing list > Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org > https://lists.linux-foundation.org/mailman/listinfo/containers ^ permalink raw reply [flat|nested] 5+ messages in thread
[parent not found: <20100430043432.GA15098-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>]
* Re: [PATCH] c/r: pin down modules registered to obj-ops [not found] ` <20100430043432.GA15098-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org> @ 2010-04-30 4:55 ` Oren Laadan 0 siblings, 0 replies; 5+ messages in thread From: Oren Laadan @ 2010-04-30 4:55 UTC (permalink / raw) To: Serge E. Hallyn; +Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA Serge E. Hallyn wrote: > How can it crash kernels? At the moment every ckpt_ops that is > registered is done so from files which are not parts of modules. ... that's a good point. Ok then - the patch will wait. Oren. > > Quoting Oren Laadan (orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org): >> I don't feel comfortable with posting a version that we know >> can crash a kernel, and even less so if we say that's it's broken... >> So I prefer it inside. ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-04-30 4:55 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-29 22:41 [PATCH] c/r: pin down modules registered to obj-ops Oren Laadan
[not found] ` <1272580878-7268-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-04-29 23:11 ` Serge E. Hallyn
[not found] ` <20100429231121.GA3862-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-04-30 4:34 ` Oren Laadan
[not found] ` <4BDA5DDA.3010701-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-04-30 4:34 ` Serge E. Hallyn
[not found] ` <20100430043432.GA15098-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
2010-04-30 4:55 ` Oren Laadan
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.