All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 0/4][cr]: Checkpoint/restart file-owner info
@ 2010-05-11 22:38 Sukadev Bhattiprolu
       [not found] ` <1273617500-13653-1-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
                   ` (4 more replies)
  0 siblings, 5 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-11 22:38 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Matt Helsley, serue, Containers, linux-fsdevel

Checkpoint/restart the file-owner information for each 'struct file'
in the application.

  [PATCH 1/4]	Add uid, euid params to f_modown()
  [PATCH 2/4]	Define __f_setown_uid()
  [PATCH 3/4]	Checkpoint file-owner information
  [PATCH 4/4]	Restore file_owner info

  fs/checkpoint.c                |   61 +++++++++++++++++++++++++++++----------
  fs/fcntl.c                     |   22 +++++++++-----
  include/linux/checkpoint_hdr.h |    5 +++
  include/linux/fs.h             |    2 +
  4 files changed, 66 insertions(+), 24 deletions(-)

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown()
       [not found] ` <1273617500-13653-1-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2010-05-11 22:38   ` Sukadev Bhattiprolu
  2010-05-11 22:38   ` [RFC][PATCH 2/4][cr]: Define __f_setown_uid() Sukadev Bhattiprolu
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-11 22:38 UTC (permalink / raw)
  To: Oren Laadan; +Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Containers

Checkpoint/restart of file-owner.

Add uid, euid parameters to f_modown(). These parameters will be needed
when restarting an application (and hence restoring the file information),
from a checkpoint image.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 fs/fcntl.c |   11 +++++------
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2079af0..aeab1f4 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -197,7 +197,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 }
 
 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
-                     int force)
+                     uid_t uid, uid_t euid, int force)
 {
 	write_lock_irq(&filp->f_owner.lock);
 	if (force || !filp->f_owner.pid) {
@@ -206,9 +206,8 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 		filp->f_owner.pid_type = type;
 
 		if (pid) {
-			const struct cred *cred = current_cred();
-			filp->f_owner.uid = cred->uid;
-			filp->f_owner.euid = cred->euid;
+			filp->f_owner.uid = uid;
+			filp->f_owner.euid = euid;
 		}
 	}
 	write_unlock_irq(&filp->f_owner.lock);
@@ -223,7 +222,7 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 	if (err)
 		return err;
 
-	f_modown(filp, pid, type, force);
+	f_modown(filp, pid, type, current_uid(), current_euid(), force);
 	return 0;
 }
 EXPORT_SYMBOL(__f_setown);
@@ -249,7 +248,7 @@ EXPORT_SYMBOL(f_setown);
 
 void f_delown(struct file *filp)
 {
-	f_modown(filp, NULL, PIDTYPE_PID, 1);
+	f_modown(filp, NULL, PIDTYPE_PID, current_uid(), current_euid(), 1);
 }
 
 pid_t f_getown(struct file *filp)
-- 
1.6.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown()
  2010-05-11 22:38 [RFC][PATCH 0/4][cr]: Checkpoint/restart file-owner info Sukadev Bhattiprolu
       [not found] ` <1273617500-13653-1-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2010-05-11 22:38 ` Sukadev Bhattiprolu
       [not found]   ` <1273617500-13653-2-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2010-05-12 17:05   ` Jamie Lokier
  2010-05-11 22:38 ` [RFC][PATCH 2/4][cr]: Define __f_setown_uid() Sukadev Bhattiprolu
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-11 22:38 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Matt Helsley, serue, Containers, linux-fsdevel

Checkpoint/restart of file-owner.

Add uid, euid parameters to f_modown(). These parameters will be needed
when restarting an application (and hence restoring the file information),
from a checkpoint image.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
 fs/fcntl.c |   11 +++++------
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2079af0..aeab1f4 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -197,7 +197,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 }
 
 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
-                     int force)
+                     uid_t uid, uid_t euid, int force)
 {
 	write_lock_irq(&filp->f_owner.lock);
 	if (force || !filp->f_owner.pid) {
@@ -206,9 +206,8 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 		filp->f_owner.pid_type = type;
 
 		if (pid) {
-			const struct cred *cred = current_cred();
-			filp->f_owner.uid = cred->uid;
-			filp->f_owner.euid = cred->euid;
+			filp->f_owner.uid = uid;
+			filp->f_owner.euid = euid;
 		}
 	}
 	write_unlock_irq(&filp->f_owner.lock);
@@ -223,7 +222,7 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 	if (err)
 		return err;
 
-	f_modown(filp, pid, type, force);
+	f_modown(filp, pid, type, current_uid(), current_euid(), force);
 	return 0;
 }
 EXPORT_SYMBOL(__f_setown);
@@ -249,7 +248,7 @@ EXPORT_SYMBOL(f_setown);
 
 void f_delown(struct file *filp)
 {
-	f_modown(filp, NULL, PIDTYPE_PID, 1);
+	f_modown(filp, NULL, PIDTYPE_PID, current_uid(), current_euid(), 1);
 }
 
 pid_t f_getown(struct file *filp)
-- 
1.6.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
       [not found] ` <1273617500-13653-1-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2010-05-11 22:38   ` [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown() Sukadev Bhattiprolu
@ 2010-05-11 22:38   ` Sukadev Bhattiprolu
  2010-05-11 22:38   ` [RFC][PATCH 3/4][cr]: Checkpoint file-owner information Sukadev Bhattiprolu
  2010-05-11 22:38   ` [RFC][PATCH 4/4][cr]: Restore file_owner info Sukadev Bhattiprolu
  3 siblings, 0 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-11 22:38 UTC (permalink / raw)
  To: Oren Laadan; +Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Containers

__f_setown_uid() is same as __f_setown(), except that instead of assuming the
uid and euid of current process, it expects them to be passed in as parameters.

This interface will be useful when checkpointing and restarting an application
that has a 'file owner' specified for any of the application's open files.
The uid, euid of the process setting up the owner is saved in the checkpoint
image. When the application is restarted, the save uid and euid values are
restored.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 fs/fcntl.c         |   13 ++++++++++---
 include/linux/fs.h |    2 ++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index aeab1f4..6e5d2bc 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -213,8 +213,8 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 	write_unlock_irq(&filp->f_owner.lock);
 }
 
-int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
-		int force)
+int __f_setown_uid(struct file *filp, struct pid *pid, enum pid_type type,
+		uid_t uid, uid_t euid, int force)
 {
 	int err;
 
@@ -222,9 +222,16 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 	if (err)
 		return err;
 
-	f_modown(filp, pid, type, current_uid(), current_euid(), force);
+	f_modown(filp, pid, type, uid, euid, force);
 	return 0;
 }
+
+int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
+		int force)
+{
+	return __f_setown_uid(filp, pid, type, current_uid(), current_euid(),
+			force);
+}
 EXPORT_SYMBOL(__f_setown);
 
 int f_setown(struct file *filp, unsigned long arg, int force)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ee725ff..8257b1a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1304,6 +1304,8 @@ extern void kill_fasync(struct fasync_struct **, int, int);
 /* only for net: no internal synchronization */
 extern void __kill_fasync(struct fasync_struct *, int, int);
 
+extern int __f_setown_uid(struct file *filp, struct pid *, enum pid_type,
+		uid_t uid, uid_t euid, int force);
 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
 extern int f_setown(struct file *filp, unsigned long arg, int force);
 extern void f_delown(struct file *filp);
-- 
1.6.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
  2010-05-11 22:38 [RFC][PATCH 0/4][cr]: Checkpoint/restart file-owner info Sukadev Bhattiprolu
       [not found] ` <1273617500-13653-1-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2010-05-11 22:38 ` [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown() Sukadev Bhattiprolu
@ 2010-05-11 22:38 ` Sukadev Bhattiprolu
       [not found]   ` <1273617500-13653-3-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2010-05-12 14:07   ` Matthew Wilcox
  2010-05-11 22:38 ` [RFC][PATCH 3/4][cr]: Checkpoint file-owner information Sukadev Bhattiprolu
  2010-05-11 22:38 ` [RFC][PATCH 4/4][cr]: Restore file_owner info Sukadev Bhattiprolu
  4 siblings, 2 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-11 22:38 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Matt Helsley, serue, Containers, linux-fsdevel

__f_setown_uid() is same as __f_setown(), except that instead of assuming the
uid and euid of current process, it expects them to be passed in as parameters.

This interface will be useful when checkpointing and restarting an application
that has a 'file owner' specified for any of the application's open files.
The uid, euid of the process setting up the owner is saved in the checkpoint
image. When the application is restarted, the save uid and euid values are
restored.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
 fs/fcntl.c         |   13 ++++++++++---
 include/linux/fs.h |    2 ++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index aeab1f4..6e5d2bc 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -213,8 +213,8 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 	write_unlock_irq(&filp->f_owner.lock);
 }
 
-int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
-		int force)
+int __f_setown_uid(struct file *filp, struct pid *pid, enum pid_type type,
+		uid_t uid, uid_t euid, int force)
 {
 	int err;
 
@@ -222,9 +222,16 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 	if (err)
 		return err;
 
-	f_modown(filp, pid, type, current_uid(), current_euid(), force);
+	f_modown(filp, pid, type, uid, euid, force);
 	return 0;
 }
+
+int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
+		int force)
+{
+	return __f_setown_uid(filp, pid, type, current_uid(), current_euid(),
+			force);
+}
 EXPORT_SYMBOL(__f_setown);
 
 int f_setown(struct file *filp, unsigned long arg, int force)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ee725ff..8257b1a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1304,6 +1304,8 @@ extern void kill_fasync(struct fasync_struct **, int, int);
 /* only for net: no internal synchronization */
 extern void __kill_fasync(struct fasync_struct *, int, int);
 
+extern int __f_setown_uid(struct file *filp, struct pid *, enum pid_type,
+		uid_t uid, uid_t euid, int force);
 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
 extern int f_setown(struct file *filp, unsigned long arg, int force);
 extern void f_delown(struct file *filp);
-- 
1.6.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [RFC][PATCH 3/4][cr]: Checkpoint file-owner information
       [not found] ` <1273617500-13653-1-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2010-05-11 22:38   ` [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown() Sukadev Bhattiprolu
  2010-05-11 22:38   ` [RFC][PATCH 2/4][cr]: Define __f_setown_uid() Sukadev Bhattiprolu
@ 2010-05-11 22:38   ` Sukadev Bhattiprolu
  2010-05-11 22:38   ` [RFC][PATCH 4/4][cr]: Restore file_owner info Sukadev Bhattiprolu
  3 siblings, 0 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-11 22:38 UTC (permalink / raw)
  To: Oren Laadan; +Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Containers

Checkpoint the file->f_owner information for an open file. This
information will be used to restore the file-owner information
when the application is restarted from the checkpoint.

The file->f_owner information is "private" to each 'struct file' i.e.
fown_struct is not an external object shared with other file structures.
So the information can directly be added to the 'ckpt_hdr_file' object.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 fs/checkpoint.c                |   27 +++++++++++----------------
 include/linux/checkpoint_hdr.h |    5 +++++
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/fs/checkpoint.c b/fs/checkpoint.c
index e036a7a..0fa4ce8 100644
--- a/fs/checkpoint.c
+++ b/fs/checkpoint.c
@@ -168,12 +168,19 @@ int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 			   struct ckpt_hdr_file *h)
 {
 	struct cred *f_cred = (struct cred *) file->f_cred;
+	struct pid *pid = file->f_owner.pid;
 
 	h->f_flags = file->f_flags;
 	h->f_mode = file->f_mode;
 	h->f_pos = file->f_pos;
 	h->f_version = file->f_version;
 
+	h->f_owner_pid = pid_nr_ns(pid, ns_of_pid(pid));
+	h->f_owner_pid_type = file->f_owner.pid_type;
+	h->f_owner_uid = file->f_owner.uid;
+	h->f_owner_euid = file->f_owner.euid;
+	h->f_owner_signum = file->f_owner.signum;
+
 	h->f_credref = checkpoint_obj(ctx, f_cred, CKPT_OBJ_CRED);
 	if (h->f_credref < 0)
 		return h->f_credref;
@@ -184,10 +191,10 @@ int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 		return h->f_secref;
 	}
 
-	ckpt_debug("file %s credref %d secref %d\n",
-		file->f_dentry->d_name.name, h->f_credref, h->f_secref);
-
-	/* FIX: need also file->f_owner, etc */
+	ckpt_debug("file %s credref %d secref %d, fowner-pid %d, type %d, "
+			"fowner-signum %d\n", file->f_dentry->d_name.name,
+			h->f_credref, h->f_secref, h->f_owner_pid,
+			h->f_owner_pid_type, h->f_owner_signum);
 
 	return 0;
 }
@@ -267,7 +274,6 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
 	struct fdtable *fdt;
 	int objref, ret;
 	int coe = 0;	/* avoid gcc warning */
-	pid_t pid;
 
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE_DESC);
 	if (!h)
@@ -302,17 +308,6 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
 	}
 
 	/*
-	 * TODO: Implement c/r of fowner and f_sigio.  Should be
-	 * trivial, but for now we just refuse its checkpoint
-	 */
-	pid = f_getown(file);
-	if (pid) {
-		ret = -EBUSY;
-		ckpt_err(ctx, ret, "%(T)fd %d has an owner (%d)\n", fd);
-		goto out;
-	}
-
-	/*
 	 * if seen first time, this will add 'file' to the objhash, keep
 	 * a reference to it, dump its state while at it.
 	 */
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 790214f..44e2a0d 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -570,6 +570,11 @@ struct ckpt_hdr_file {
 	__u64 f_pos;
 	__u64 f_version;
 	__s32 f_secref;
+	__s32 f_owner_pid;
+	__u32 f_owner_pid_type;
+	__u32 f_owner_uid;
+	__u32 f_owner_euid;
+	__s32 f_owner_signum;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_file_generic {
-- 
1.6.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [RFC][PATCH 3/4][cr]: Checkpoint file-owner information
  2010-05-11 22:38 [RFC][PATCH 0/4][cr]: Checkpoint/restart file-owner info Sukadev Bhattiprolu
                   ` (2 preceding siblings ...)
  2010-05-11 22:38 ` [RFC][PATCH 2/4][cr]: Define __f_setown_uid() Sukadev Bhattiprolu
@ 2010-05-11 22:38 ` Sukadev Bhattiprolu
  2010-05-11 22:38 ` [RFC][PATCH 4/4][cr]: Restore file_owner info Sukadev Bhattiprolu
  4 siblings, 0 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-11 22:38 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Matt Helsley, serue, Containers, linux-fsdevel

Checkpoint the file->f_owner information for an open file. This
information will be used to restore the file-owner information
when the application is restarted from the checkpoint.

The file->f_owner information is "private" to each 'struct file' i.e.
fown_struct is not an external object shared with other file structures.
So the information can directly be added to the 'ckpt_hdr_file' object.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
 fs/checkpoint.c                |   27 +++++++++++----------------
 include/linux/checkpoint_hdr.h |    5 +++++
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/fs/checkpoint.c b/fs/checkpoint.c
index e036a7a..0fa4ce8 100644
--- a/fs/checkpoint.c
+++ b/fs/checkpoint.c
@@ -168,12 +168,19 @@ int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 			   struct ckpt_hdr_file *h)
 {
 	struct cred *f_cred = (struct cred *) file->f_cred;
+	struct pid *pid = file->f_owner.pid;
 
 	h->f_flags = file->f_flags;
 	h->f_mode = file->f_mode;
 	h->f_pos = file->f_pos;
 	h->f_version = file->f_version;
 
+	h->f_owner_pid = pid_nr_ns(pid, ns_of_pid(pid));
+	h->f_owner_pid_type = file->f_owner.pid_type;
+	h->f_owner_uid = file->f_owner.uid;
+	h->f_owner_euid = file->f_owner.euid;
+	h->f_owner_signum = file->f_owner.signum;
+
 	h->f_credref = checkpoint_obj(ctx, f_cred, CKPT_OBJ_CRED);
 	if (h->f_credref < 0)
 		return h->f_credref;
@@ -184,10 +191,10 @@ int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 		return h->f_secref;
 	}
 
-	ckpt_debug("file %s credref %d secref %d\n",
-		file->f_dentry->d_name.name, h->f_credref, h->f_secref);
-
-	/* FIX: need also file->f_owner, etc */
+	ckpt_debug("file %s credref %d secref %d, fowner-pid %d, type %d, "
+			"fowner-signum %d\n", file->f_dentry->d_name.name,
+			h->f_credref, h->f_secref, h->f_owner_pid,
+			h->f_owner_pid_type, h->f_owner_signum);
 
 	return 0;
 }
@@ -267,7 +274,6 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
 	struct fdtable *fdt;
 	int objref, ret;
 	int coe = 0;	/* avoid gcc warning */
-	pid_t pid;
 
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE_DESC);
 	if (!h)
@@ -302,17 +308,6 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
 	}
 
 	/*
-	 * TODO: Implement c/r of fowner and f_sigio.  Should be
-	 * trivial, but for now we just refuse its checkpoint
-	 */
-	pid = f_getown(file);
-	if (pid) {
-		ret = -EBUSY;
-		ckpt_err(ctx, ret, "%(T)fd %d has an owner (%d)\n", fd);
-		goto out;
-	}
-
-	/*
 	 * if seen first time, this will add 'file' to the objhash, keep
 	 * a reference to it, dump its state while at it.
 	 */
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 790214f..44e2a0d 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -570,6 +570,11 @@ struct ckpt_hdr_file {
 	__u64 f_pos;
 	__u64 f_version;
 	__s32 f_secref;
+	__s32 f_owner_pid;
+	__u32 f_owner_pid_type;
+	__u32 f_owner_uid;
+	__u32 f_owner_euid;
+	__s32 f_owner_signum;
 } __attribute__((aligned(8)));
 
 struct ckpt_hdr_file_generic {
-- 
1.6.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [RFC][PATCH 4/4][cr]: Restore file_owner info
       [not found] ` <1273617500-13653-1-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
                     ` (2 preceding siblings ...)
  2010-05-11 22:38   ` [RFC][PATCH 3/4][cr]: Checkpoint file-owner information Sukadev Bhattiprolu
@ 2010-05-11 22:38   ` Sukadev Bhattiprolu
  3 siblings, 0 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-11 22:38 UTC (permalink / raw)
  To: Oren Laadan; +Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Containers

Restore the file-owner information for each 'struct file'.  This is
essentially is like a new fcntl(F_SETOWN) and fcntl(F_SETSIG) calls,
except that the pid, uid, euid and signum values are read from the
checkpoint image.

Signed-off-by: Sukadev Bhattiprolu <sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 fs/checkpoint.c |   34 ++++++++++++++++++++++++++++++++++
 1 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/fs/checkpoint.c b/fs/checkpoint.c
index 0fa4ce8..73e2bc9 100644
--- a/fs/checkpoint.c
+++ b/fs/checkpoint.c
@@ -615,6 +615,36 @@ static int attach_file(struct file *file)
 	return fd;
 }
 
+static int restore_file_owner(struct ckpt_ctx *ctx, struct ckpt_hdr_file *h,
+		struct file *file)
+{
+	int ret;
+	struct pid *pid;
+
+	ckpt_debug("restore_file_owner(): uid %u, euid %u, pid %d, type %d\n",
+			h->f_owner_uid, h->f_owner_euid, h->f_owner_pid,
+			h->f_owner_pid_type);
+
+	rcu_read_lock();
+	pid = find_vpid(h->f_owner_pid);
+
+	/*
+	 * TODO: Do we need to force==1 or can it be 0 ? 'force' is used to
+	 * 	 modify the owner, if one is already set. Can it be set when
+	 * 	 we restart an application ?
+	 */
+	ret = __f_setown_uid(file, pid, h->f_owner_pid_type, h->f_owner_uid,
+			h->f_owner_euid, 1);
+	rcu_read_unlock();
+
+	file->f_owner.signum = h->f_owner_signum;
+
+	if (ret < 0)
+		ckpt_err(ctx, ret, "__fsetown_uid() failed\n");
+
+	return ret;
+}
+
 #define CKPT_SETFL_MASK  \
 	(O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
 
@@ -648,6 +678,10 @@ int restore_file_common(struct ckpt_ctx *ctx, struct file *file,
 	if (ret < 0)
 		return ret;
 
+	ret = restore_file_owner(ctx, h, file);
+	if (ret < 0)
+		return ret;
+
 	/*
 	 * Normally f_mode is set by open, and modified only via
 	 * fcntl(), so its value now should match that at checkpoint.
-- 
1.6.0.4

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [RFC][PATCH 4/4][cr]: Restore file_owner info
  2010-05-11 22:38 [RFC][PATCH 0/4][cr]: Checkpoint/restart file-owner info Sukadev Bhattiprolu
                   ` (3 preceding siblings ...)
  2010-05-11 22:38 ` [RFC][PATCH 3/4][cr]: Checkpoint file-owner information Sukadev Bhattiprolu
@ 2010-05-11 22:38 ` Sukadev Bhattiprolu
  4 siblings, 0 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-11 22:38 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Matt Helsley, serue, Containers, linux-fsdevel

Restore the file-owner information for each 'struct file'.  This is
essentially is like a new fcntl(F_SETOWN) and fcntl(F_SETSIG) calls,
except that the pid, uid, euid and signum values are read from the
checkpoint image.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
 fs/checkpoint.c |   34 ++++++++++++++++++++++++++++++++++
 1 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/fs/checkpoint.c b/fs/checkpoint.c
index 0fa4ce8..73e2bc9 100644
--- a/fs/checkpoint.c
+++ b/fs/checkpoint.c
@@ -615,6 +615,36 @@ static int attach_file(struct file *file)
 	return fd;
 }
 
+static int restore_file_owner(struct ckpt_ctx *ctx, struct ckpt_hdr_file *h,
+		struct file *file)
+{
+	int ret;
+	struct pid *pid;
+
+	ckpt_debug("restore_file_owner(): uid %u, euid %u, pid %d, type %d\n",
+			h->f_owner_uid, h->f_owner_euid, h->f_owner_pid,
+			h->f_owner_pid_type);
+
+	rcu_read_lock();
+	pid = find_vpid(h->f_owner_pid);
+
+	/*
+	 * TODO: Do we need to force==1 or can it be 0 ? 'force' is used to
+	 * 	 modify the owner, if one is already set. Can it be set when
+	 * 	 we restart an application ?
+	 */
+	ret = __f_setown_uid(file, pid, h->f_owner_pid_type, h->f_owner_uid,
+			h->f_owner_euid, 1);
+	rcu_read_unlock();
+
+	file->f_owner.signum = h->f_owner_signum;
+
+	if (ret < 0)
+		ckpt_err(ctx, ret, "__fsetown_uid() failed\n");
+
+	return ret;
+}
+
 #define CKPT_SETFL_MASK  \
 	(O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
 
@@ -648,6 +678,10 @@ int restore_file_common(struct ckpt_ctx *ctx, struct file *file,
 	if (ret < 0)
 		return ret;
 
+	ret = restore_file_owner(ctx, h, file);
+	if (ret < 0)
+		return ret;
+
 	/*
 	 * Normally f_mode is set by open, and modified only via
 	 * fcntl(), so its value now should match that at checkpoint.
-- 
1.6.0.4


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
       [not found]   ` <1273617500-13653-3-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2010-05-12  8:06     ` Serge E. Hallyn
       [not found]       ` <20100512080629.GB2636-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2010-05-12 14:07     ` Matthew Wilcox
  1 sibling, 1 reply; 23+ messages in thread
From: Serge E. Hallyn @ 2010-05-12  8:06 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):

[From patch 2]

> @@ -222,9 +222,16 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
>  	if (err)
>  		return err;
> 
> -	f_modown(filp, pid, type, current_uid(), current_euid(), force);
> +	f_modown(filp, pid, type, uid, euid, force);
>  	return 0;
>  }
> +
> +int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
> +		int force)
> +{
> +	return __f_setown_uid(filp, pid, type, current_uid(), current_euid(),
> +			force);
> +}
>  EXPORT_SYMBOL(__f_setown);

[From patch 4]

> +	/*
> +	 * TODO: Do we need to force==1 or can it be 0 ? 'force' is used to
> +	 * 	 modify the owner, if one is already set. Can it be set when
> +	 * 	 we restart an application ?
> +	 */
> +	ret = __f_setown_uid(file, pid, h->f_owner_pid_type, h->f_owner_uid,
> +			h->f_owner_euid, 1);
> +	rcu_read_unlock();

I think you need to modify how __f_setown() is calling
security_file_set_fowner().  Though I guess noone looks at the
current_uid(), so maybe it's not so important at this point.

(I do wonder whether converting fowner to using a struct cred
is the way to go)

-serge

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
       [not found]       ` <20100512080629.GB2636-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-05-12  8:43         ` Serge E. Hallyn
       [not found]           ` <20100512084317.GA8842-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Serge E. Hallyn @ 2010-05-12  8:43 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers

Quoting Serge E. Hallyn (serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org):
> Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> 
> [From patch 2]
> 
> > @@ -222,9 +222,16 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
> >  	if (err)
> >  		return err;
> > 
> > -	f_modown(filp, pid, type, current_uid(), current_euid(), force);
> > +	f_modown(filp, pid, type, uid, euid, force);
> >  	return 0;
> >  }
> > +
> > +int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
> > +		int force)
> > +{
> > +	return __f_setown_uid(filp, pid, type, current_uid(), current_euid(),
> > +			force);
> > +}
> >  EXPORT_SYMBOL(__f_setown);
> 
> [From patch 4]
> 
> > +	/*
> > +	 * TODO: Do we need to force==1 or can it be 0 ? 'force' is used to
> > +	 * 	 modify the owner, if one is already set. Can it be set when
> > +	 * 	 we restart an application ?
> > +	 */
> > +	ret = __f_setown_uid(file, pid, h->f_owner_pid_type, h->f_owner_uid,
> > +			h->f_owner_euid, 1);
> > +	rcu_read_unlock();
> 
> I think you need to modify how __f_setown() is calling
> security_file_set_fowner().  Though I guess noone looks at the
> current_uid(), so maybe it's not so important at this point.
> 
> (I do wonder whether converting fowner to using a struct cred
> is the way to go)

Well you can probably skip LSM implications at this point.

But I'm worried about the fact that you do no check on uid here.
Note that now if a signal is to be sent, fown->pid will
get signal fow->signum sent by fown->uid.  So this looks like
a way for an unprivileged task to use root privs to kill a
task he shouldn't be able to.

-serge

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
       [not found]   ` <1273617500-13653-3-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
  2010-05-12  8:06     ` Serge E. Hallyn
@ 2010-05-12 14:07     ` Matthew Wilcox
  1 sibling, 0 replies; 23+ messages in thread
From: Matthew Wilcox @ 2010-05-12 14:07 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Containers

On Tue, May 11, 2010 at 03:38:18PM -0700, Sukadev Bhattiprolu wrote:
> __f_setown_uid() is same as __f_setown(), except that instead of assuming the
> uid and euid of current process, it expects them to be passed in as parameters.
> 
> This interface will be useful when checkpointing and restarting an application
> that has a 'file owner' specified for any of the application's open files.
> The uid, euid of the process setting up the owner is saved in the checkpoint
> image. When the application is restarted, the save uid and euid values are
> restored.

There are only four callers of __f_setown in the kernel.  I'd rather see
__f_setown converted to take the arguments directly.

-- 
Matthew Wilcox				Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
  2010-05-11 22:38 ` [RFC][PATCH 2/4][cr]: Define __f_setown_uid() Sukadev Bhattiprolu
       [not found]   ` <1273617500-13653-3-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2010-05-12 14:07   ` Matthew Wilcox
  2010-05-12 17:05     ` Sukadev Bhattiprolu
       [not found]     ` <20100512140741.GF10452-6jwH94ZQLHl74goWV3ctuw@public.gmane.org>
  1 sibling, 2 replies; 23+ messages in thread
From: Matthew Wilcox @ 2010-05-12 14:07 UTC (permalink / raw)
  To: Sukadev Bhattiprolu
  Cc: Oren Laadan, Matt Helsley, serue, Containers, linux-fsdevel

On Tue, May 11, 2010 at 03:38:18PM -0700, Sukadev Bhattiprolu wrote:
> __f_setown_uid() is same as __f_setown(), except that instead of assuming the
> uid and euid of current process, it expects them to be passed in as parameters.
> 
> This interface will be useful when checkpointing and restarting an application
> that has a 'file owner' specified for any of the application's open files.
> The uid, euid of the process setting up the owner is saved in the checkpoint
> image. When the application is restarted, the save uid and euid values are
> restored.

There are only four callers of __f_setown in the kernel.  I'd rather see
__f_setown converted to take the arguments directly.

-- 
Matthew Wilcox				Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
       [not found]           ` <20100512084317.GA8842-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-05-12 16:59             ` Sukadev Bhattiprolu
       [not found]               ` <20100512165922.GA11144-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-12 16:59 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Containers

Serge E. Hallyn [serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org] wrote:
| Quoting Serge E. Hallyn (serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org):
| > Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
| > 
| > [From patch 2]
| > 
| > > @@ -222,9 +222,16 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
| > >  	if (err)
| > >  		return err;
| > > 
| > > -	f_modown(filp, pid, type, current_uid(), current_euid(), force);
| > > +	f_modown(filp, pid, type, uid, euid, force);
| > >  	return 0;
| > >  }
| > > +
| > > +int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
| > > +		int force)
| > > +{
| > > +	return __f_setown_uid(filp, pid, type, current_uid(), current_euid(),
| > > +			force);
| > > +}
| > >  EXPORT_SYMBOL(__f_setown);
| > 
| > [From patch 4]
| > 
| > > +	/*
| > > +	 * TODO: Do we need to force==1 or can it be 0 ? 'force' is used to
| > > +	 * 	 modify the owner, if one is already set. Can it be set when
| > > +	 * 	 we restart an application ?
| > > +	 */
| > > +	ret = __f_setown_uid(file, pid, h->f_owner_pid_type, h->f_owner_uid,
| > > +			h->f_owner_euid, 1);
| > > +	rcu_read_unlock();
| > 
| > I think you need to modify how __f_setown() is calling
| > security_file_set_fowner().  Though I guess noone looks at the
| > current_uid(), so maybe it's not so important at this point.
| > 
| > (I do wonder whether converting fowner to using a struct cred
| > is the way to go)
| 
| Well you can probably skip LSM implications at this point.
| 
| But I'm worried about the fact that you do no check on uid here.
| Note that now if a signal is to be sent, fown->pid will
| get signal fow->signum sent by fown->uid.  So this looks like
| a way for an unprivileged task to use root privs to kill a
| task he shouldn't be able to.

Yes, the uid should not be trusted since the checkpoint image can be
tampered.  Matt pointed it out too.

The process P1 that called fcntl(F_SETOWN) may have exited and hence
may not in the checkpoint-image. So during restart, some other process
will need to act for P1. Would requiring CAP_SETUID, like we do for
restoring creds be an overkill ?

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
       [not found]     ` <20100512140741.GF10452-6jwH94ZQLHl74goWV3ctuw@public.gmane.org>
@ 2010-05-12 17:05       ` Sukadev Bhattiprolu
  0 siblings, 0 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-12 17:05 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Containers

Matthew Wilcox [matthew-Ztpu424NOJ8@public.gmane.org] wrote:
| On Tue, May 11, 2010 at 03:38:18PM -0700, Sukadev Bhattiprolu wrote:
| > __f_setown_uid() is same as __f_setown(), except that instead of assuming the
| > uid and euid of current process, it expects them to be passed in as parameters.
| > 
| > This interface will be useful when checkpointing and restarting an application
| > that has a 'file owner' specified for any of the application's open files.
| > The uid, euid of the process setting up the owner is saved in the checkpoint
| > image. When the application is restarted, the save uid and euid values are
| > restored.
| 
| There are only four callers of __f_setown in the kernel.  I'd rather see
| __f_setown converted to take the arguments directly.

Ok.

Sukadev

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
  2010-05-12 14:07   ` Matthew Wilcox
@ 2010-05-12 17:05     ` Sukadev Bhattiprolu
       [not found]     ` <20100512140741.GF10452-6jwH94ZQLHl74goWV3ctuw@public.gmane.org>
  1 sibling, 0 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-12 17:05 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Oren Laadan, Matt Helsley, serue, Containers, linux-fsdevel

Matthew Wilcox [matthew@wil.cx] wrote:
| On Tue, May 11, 2010 at 03:38:18PM -0700, Sukadev Bhattiprolu wrote:
| > __f_setown_uid() is same as __f_setown(), except that instead of assuming the
| > uid and euid of current process, it expects them to be passed in as parameters.
| > 
| > This interface will be useful when checkpointing and restarting an application
| > that has a 'file owner' specified for any of the application's open files.
| > The uid, euid of the process setting up the owner is saved in the checkpoint
| > image. When the application is restarted, the save uid and euid values are
| > restored.
| 
| There are only four callers of __f_setown in the kernel.  I'd rather see
| __f_setown converted to take the arguments directly.

Ok.

Sukadev

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown()
       [not found]   ` <1273617500-13653-2-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2010-05-12 17:05     ` Jamie Lokier
  0 siblings, 0 replies; 23+ messages in thread
From: Jamie Lokier @ 2010-05-12 17:05 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Containers

Sukadev Bhattiprolu wrote:
> Checkpoint/restart of file-owner.
> 
> Add uid, euid parameters to f_modown(). These parameters will be needed
> when restarting an application (and hence restoring the file information),
> from a checkpoint image.

This is used to make sure I/O signals on sockets, ttys, devices and so
on are delivered to a particular process.

If any of those signals are lost when an event happens around the same
time as c/r (for example, more data arriving on a pipe, a device
becomes readable/writable, or room becoming available to write, or
urgent data on a socket), a process depending on it can get stuck -
unless the process knows that c/r happened, so it knows to call
select() on all those fds after the c/r.

Can you say if the c/r avoids that kind of race condition?

Thanks,
-- Jamie

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown()
  2010-05-11 22:38 ` [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown() Sukadev Bhattiprolu
       [not found]   ` <1273617500-13653-2-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
@ 2010-05-12 17:05   ` Jamie Lokier
  2010-05-12 17:30     ` Sukadev Bhattiprolu
       [not found]     ` <20100512170513.GD19314-yetKDKU6eevNLxjTenLetw@public.gmane.org>
  1 sibling, 2 replies; 23+ messages in thread
From: Jamie Lokier @ 2010-05-12 17:05 UTC (permalink / raw)
  To: Sukadev Bhattiprolu
  Cc: Oren Laadan, Matt Helsley, serue, Containers, linux-fsdevel

Sukadev Bhattiprolu wrote:
> Checkpoint/restart of file-owner.
> 
> Add uid, euid parameters to f_modown(). These parameters will be needed
> when restarting an application (and hence restoring the file information),
> from a checkpoint image.

This is used to make sure I/O signals on sockets, ttys, devices and so
on are delivered to a particular process.

If any of those signals are lost when an event happens around the same
time as c/r (for example, more data arriving on a pipe, a device
becomes readable/writable, or room becoming available to write, or
urgent data on a socket), a process depending on it can get stuck -
unless the process knows that c/r happened, so it knows to call
select() on all those fds after the c/r.

Can you say if the c/r avoids that kind of race condition?

Thanks,
-- Jamie

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown()
       [not found]     ` <20100512170513.GD19314-yetKDKU6eevNLxjTenLetw@public.gmane.org>
@ 2010-05-12 17:30       ` Sukadev Bhattiprolu
  0 siblings, 0 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-12 17:30 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Containers

Jamie Lokier [jamie-yetKDKU6eevNLxjTenLetw@public.gmane.org] wrote:
| Sukadev Bhattiprolu wrote:
| > Checkpoint/restart of file-owner.
| > 
| > Add uid, euid parameters to f_modown(). These parameters will be needed
| > when restarting an application (and hence restoring the file information),
| > from a checkpoint image.
| 
| This is used to make sure I/O signals on sockets, ttys, devices and so
| on are delivered to a particular process.

Good point.
| 
| If any of those signals are lost when an event happens around the same

Well, signals are not lost across C/R - if they were pending at
checkpoint, they will be pending on restart.

| time as c/r (for example, more data arriving on a pipe, a device
| becomes readable/writable, or room becoming available to write, or
| urgent data on a socket), a process depending on it can get stuck -
| unless the process knows that c/r happened, so it knows to call
| select() on all those fds after the c/r.

Real devices like ttys are still TBD from C/R perspective - so data
arriving from the tty is still a problem. Applications using such
devices cannot be checkpointed.

But for pipes, (and sockets ?) we expect that both ends are checkpointed
as a container. So before the container is frozen for checkpoint, either
both the write() and SIGIO (due to new data on the pipe) both happen or
neither.


Sukadev

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown()
  2010-05-12 17:05   ` Jamie Lokier
@ 2010-05-12 17:30     ` Sukadev Bhattiprolu
  2010-05-12 20:12       ` Oren Laadan
       [not found]       ` <20100512173048.GC11144-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
       [not found]     ` <20100512170513.GD19314-yetKDKU6eevNLxjTenLetw@public.gmane.org>
  1 sibling, 2 replies; 23+ messages in thread
From: Sukadev Bhattiprolu @ 2010-05-12 17:30 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: Oren Laadan, Matt Helsley, serue, Containers, linux-fsdevel

Jamie Lokier [jamie@shareable.org] wrote:
| Sukadev Bhattiprolu wrote:
| > Checkpoint/restart of file-owner.
| > 
| > Add uid, euid parameters to f_modown(). These parameters will be needed
| > when restarting an application (and hence restoring the file information),
| > from a checkpoint image.
| 
| This is used to make sure I/O signals on sockets, ttys, devices and so
| on are delivered to a particular process.

Good point.
| 
| If any of those signals are lost when an event happens around the same

Well, signals are not lost across C/R - if they were pending at
checkpoint, they will be pending on restart.

| time as c/r (for example, more data arriving on a pipe, a device
| becomes readable/writable, or room becoming available to write, or
| urgent data on a socket), a process depending on it can get stuck -
| unless the process knows that c/r happened, so it knows to call
| select() on all those fds after the c/r.

Real devices like ttys are still TBD from C/R perspective - so data
arriving from the tty is still a problem. Applications using such
devices cannot be checkpointed.

But for pipes, (and sockets ?) we expect that both ends are checkpointed
as a container. So before the container is frozen for checkpoint, either
both the write() and SIGIO (due to new data on the pipe) both happen or
neither.


Sukadev

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 2/4][cr]: Define __f_setown_uid()
       [not found]               ` <20100512165922.GA11144-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-05-12 17:54                 ` Serge E. Hallyn
  0 siblings, 0 replies; 23+ messages in thread
From: Serge E. Hallyn @ 2010-05-12 17:54 UTC (permalink / raw)
  To: Sukadev Bhattiprolu; +Cc: Containers

Quoting Sukadev Bhattiprolu (sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org):
> The process P1 that called fcntl(F_SETOWN) may have exited and hence
> may not in the checkpoint-image. So during restart, some other process
> will need to act for P1. Would requiring CAP_SETUID, like we do for
> restoring creds be an overkill ?

Yeah I think CAP_SETUID is overkill.  Yes, it's what would have been
needed to cause the condition originally, but the only real implication
is CAP_KILL.  And since the application might have originally run with
euid=1001 and suid=1002, done the fcntl, and then done
setresuid(1002,1002,1002), CAP_SETUID may not have originaly been
necessary (if I'm thinking straight).

In any case, CAP_KILL is what you can do with the result, so I think
that suffices.

-serge

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown()
       [not found]       ` <20100512173048.GC11144-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2010-05-12 20:12         ` Oren Laadan
  0 siblings, 0 replies; 23+ messages in thread
From: Oren Laadan @ 2010-05-12 20:12 UTC (permalink / raw)
  To: Sukadev Bhattiprolu
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA, Containers, Jamie Lokier



Sukadev Bhattiprolu wrote:
> Jamie Lokier [jamie-yetKDKU6eevNLxjTenLetw@public.gmane.org] wrote:
> | Sukadev Bhattiprolu wrote:
> | > Checkpoint/restart of file-owner.
> | > 
> | > Add uid, euid parameters to f_modown(). These parameters will be needed
> | > when restarting an application (and hence restoring the file information),
> | > from a checkpoint image.
> | 
> | This is used to make sure I/O signals on sockets, ttys, devices and so
> | on are delivered to a particular process.
> 
> Good point.
> | 
> | If any of those signals are lost when an event happens around the same
> 
> Well, signals are not lost across C/R - if they were pending at
> checkpoint, they will be pending on restart.
> 
> | time as c/r (for example, more data arriving on a pipe, a device
> | becomes readable/writable, or room becoming available to write, or
> | urgent data on a socket), a process depending on it can get stuck -
> | unless the process knows that c/r happened, so it knows to call
> | select() on all those fds after the c/r.
> 
> Real devices like ttys are still TBD from C/R perspective - so data
> arriving from the tty is still a problem. Applications using such
> devices cannot be checkpointed.
> 
> But for pipes, (and sockets ?) we expect that both ends are checkpointed
> as a container. So before the container is frozen for checkpoint, either
> both the write() and SIGIO (due to new data on the pipe) both happen or
> neither.

To elaborate on this:

1) In a "full-container" checkpoint, everything must belong to the
container. Thus, pipes, sockets, and ptys are either all-in or
entirely out, and are inactive when the container is frozen - so
will not generate signals.

2) Only virtual devices can be checkpointed. Use of physical devices
will cause the checkpoint to fail. For instance, c/r supports ptys,
but not ttys. Virtual devices will have to behave nicely.

3) For network devices, the network must be frozen (in the case of
checkpoint for migration) in addition to the container. This protects
against signals due to incoming data (SIGIO, SIGURG).

4) The state of timers is saved atomically with signals.

5) Processes that waited in select/poll when checkpointed, will be
forced to re-invoke the syscall once restart succeeds.

----

All that said, it is still useful sometimes for userspace to learn
about a checkpoint or restart that took place. I plan to add some
interface by which a program can request to be notified about such
events (e.g. by a signal).

Oren.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown()
  2010-05-12 17:30     ` Sukadev Bhattiprolu
@ 2010-05-12 20:12       ` Oren Laadan
       [not found]       ` <20100512173048.GC11144-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  1 sibling, 0 replies; 23+ messages in thread
From: Oren Laadan @ 2010-05-12 20:12 UTC (permalink / raw)
  To: Sukadev Bhattiprolu
  Cc: Jamie Lokier, Matt Helsley, serue, Containers, linux-fsdevel



Sukadev Bhattiprolu wrote:
> Jamie Lokier [jamie@shareable.org] wrote:
> | Sukadev Bhattiprolu wrote:
> | > Checkpoint/restart of file-owner.
> | > 
> | > Add uid, euid parameters to f_modown(). These parameters will be needed
> | > when restarting an application (and hence restoring the file information),
> | > from a checkpoint image.
> | 
> | This is used to make sure I/O signals on sockets, ttys, devices and so
> | on are delivered to a particular process.
> 
> Good point.
> | 
> | If any of those signals are lost when an event happens around the same
> 
> Well, signals are not lost across C/R - if they were pending at
> checkpoint, they will be pending on restart.
> 
> | time as c/r (for example, more data arriving on a pipe, a device
> | becomes readable/writable, or room becoming available to write, or
> | urgent data on a socket), a process depending on it can get stuck -
> | unless the process knows that c/r happened, so it knows to call
> | select() on all those fds after the c/r.
> 
> Real devices like ttys are still TBD from C/R perspective - so data
> arriving from the tty is still a problem. Applications using such
> devices cannot be checkpointed.
> 
> But for pipes, (and sockets ?) we expect that both ends are checkpointed
> as a container. So before the container is frozen for checkpoint, either
> both the write() and SIGIO (due to new data on the pipe) both happen or
> neither.

To elaborate on this:

1) In a "full-container" checkpoint, everything must belong to the
container. Thus, pipes, sockets, and ptys are either all-in or
entirely out, and are inactive when the container is frozen - so
will not generate signals.

2) Only virtual devices can be checkpointed. Use of physical devices
will cause the checkpoint to fail. For instance, c/r supports ptys,
but not ttys. Virtual devices will have to behave nicely.

3) For network devices, the network must be frozen (in the case of
checkpoint for migration) in addition to the container. This protects
against signals due to incoming data (SIGIO, SIGURG).

4) The state of timers is saved atomically with signals.

5) Processes that waited in select/poll when checkpointed, will be
forced to re-invoke the syscall once restart succeeds.

----

All that said, it is still useful sometimes for userspace to learn
about a checkpoint or restart that took place. I plan to add some
interface by which a program can request to be notified about such
events (e.g. by a signal).

Oren.


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2010-05-12 20:12 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-11 22:38 [RFC][PATCH 0/4][cr]: Checkpoint/restart file-owner info Sukadev Bhattiprolu
     [not found] ` <1273617500-13653-1-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2010-05-11 22:38   ` [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown() Sukadev Bhattiprolu
2010-05-11 22:38   ` [RFC][PATCH 2/4][cr]: Define __f_setown_uid() Sukadev Bhattiprolu
2010-05-11 22:38   ` [RFC][PATCH 3/4][cr]: Checkpoint file-owner information Sukadev Bhattiprolu
2010-05-11 22:38   ` [RFC][PATCH 4/4][cr]: Restore file_owner info Sukadev Bhattiprolu
2010-05-11 22:38 ` [RFC][PATCH 1/4][cr]: Add uid, euid params to f_modown() Sukadev Bhattiprolu
     [not found]   ` <1273617500-13653-2-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2010-05-12 17:05     ` Jamie Lokier
2010-05-12 17:05   ` Jamie Lokier
2010-05-12 17:30     ` Sukadev Bhattiprolu
2010-05-12 20:12       ` Oren Laadan
     [not found]       ` <20100512173048.GC11144-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-05-12 20:12         ` Oren Laadan
     [not found]     ` <20100512170513.GD19314-yetKDKU6eevNLxjTenLetw@public.gmane.org>
2010-05-12 17:30       ` Sukadev Bhattiprolu
2010-05-11 22:38 ` [RFC][PATCH 2/4][cr]: Define __f_setown_uid() Sukadev Bhattiprolu
     [not found]   ` <1273617500-13653-3-git-send-email-sukadev-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2010-05-12  8:06     ` Serge E. Hallyn
     [not found]       ` <20100512080629.GB2636-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-05-12  8:43         ` Serge E. Hallyn
     [not found]           ` <20100512084317.GA8842-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-05-12 16:59             ` Sukadev Bhattiprolu
     [not found]               ` <20100512165922.GA11144-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2010-05-12 17:54                 ` Serge E. Hallyn
2010-05-12 14:07     ` Matthew Wilcox
2010-05-12 14:07   ` Matthew Wilcox
2010-05-12 17:05     ` Sukadev Bhattiprolu
     [not found]     ` <20100512140741.GF10452-6jwH94ZQLHl74goWV3ctuw@public.gmane.org>
2010-05-12 17:05       ` Sukadev Bhattiprolu
2010-05-11 22:38 ` [RFC][PATCH 3/4][cr]: Checkpoint file-owner information Sukadev Bhattiprolu
2010-05-11 22:38 ` [RFC][PATCH 4/4][cr]: Restore file_owner info Sukadev Bhattiprolu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.