* [PATCH 0/2] IPC: message queue checkpoint support
@ 2012-02-15 16:54 Stanislav Kinsbursky
2012-02-15 16:54 ` [PATCH 1/2] IPC: message queue receive cleanup Stanislav Kinsbursky
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Stanislav Kinsbursky @ 2012-02-15 16:54 UTC (permalink / raw)
To: akpm; +Cc: serge.hallyn, criu, lucas.demarchi, linux-kernel, cmetcalf,
dhowells
This patch set conatins v2 of patch
"IPC: message queue stealing feature introduced"
The following series consists of:
---
Stanislav Kinsbursky (2):
IPC: message queue receive cleanup
IPC: message queue stealing feature introduced
arch/tile/kernel/compat.c | 11 +-----
include/linux/compat.h | 2 +
include/linux/msg.h | 13 ++++++-
ipc/compat.c | 62 ++++++++++++++++++++++++--------
ipc/msg.c | 86 +++++++++++++++++++++++++++++++++++----------
5 files changed, 126 insertions(+), 48 deletions(-)
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 1/2] IPC: message queue receive cleanup 2012-02-15 16:54 [PATCH 0/2] IPC: message queue checkpoint support Stanislav Kinsbursky @ 2012-02-15 16:54 ` Stanislav Kinsbursky 2012-04-04 23:05 ` Andrew Morton 2012-02-15 16:54 ` [PATCH 2/2] IPC: message queue stealing feature introduced Stanislav Kinsbursky 2012-03-05 13:04 ` [CRIU] [PATCH 0/2] IPC: message queue checkpoint support Kinsbursky Stanislav 2 siblings, 1 reply; 11+ messages in thread From: Stanislav Kinsbursky @ 2012-02-15 16:54 UTC (permalink / raw) To: akpm; +Cc: serge.hallyn, criu, lucas.demarchi, linux-kernel, cmetcalf, dhowells This patch moves all message related manipulation into one function msg_fill(). Actually, two functions because of the compat one. Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com> --- arch/tile/kernel/compat.c | 11 +---------- include/linux/compat.h | 2 ++ include/linux/msg.h | 5 +++-- ipc/compat.c | 33 +++++++++++++++++---------------- ipc/msg.c | 44 +++++++++++++++++++++++--------------------- 5 files changed, 46 insertions(+), 49 deletions(-) diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index bf5e9d7..d963b45 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -121,16 +121,7 @@ long tile_compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp, size_t msgsz, long msgtyp, int msgflg) { - long err, mtype; - - err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); - if (err < 0) - goto out; - - if (put_user(mtype, &msgp->mtype)) - err = -EFAULT; - out: - return err; + return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, compat_do_msg_fill); } /* Provide the compat syscall number to call mapping. */ diff --git a/include/linux/compat.h b/include/linux/compat.h index 41c9f65..7389209 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -224,6 +224,7 @@ struct compat_sysinfo; struct compat_sysctl_args; struct compat_kexec_segment; struct compat_mq_attr; +struct msg_msg; extern void compat_exit_robust_list(struct task_struct *curr); @@ -236,6 +237,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, long compat_sys_semctl(int first, int second, int third, void __user *uptr); long compat_sys_msgsnd(int first, int second, int third, void __user *uptr); +long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz); long compat_sys_msgrcv(int first, int second, int msgtyp, int third, int version, void __user *uptr); long compat_sys_msgctl(int first, int second, void __user *uptr); diff --git a/include/linux/msg.h b/include/linux/msg.h index 6689e73..9411b76 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -105,8 +105,9 @@ struct msg_queue { /* Helper routines for sys_msgsnd and sys_msgrcv */ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, size_t msgsz, int msgflg); -extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext, - size_t msgsz, long msgtyp, int msgflg); +extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, + int msgflg, + long (*msg_fill)(void __user *, struct msg_msg *, size_t )); #endif /* __KERNEL__ */ diff --git a/ipc/compat.c b/ipc/compat.c index b828244..38c1ee5 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -328,13 +328,23 @@ long compat_sys_msgsnd(int first, int second, int third, void __user *uptr) return do_msgsnd(first, type, up->mtext, second, third); } +long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) +{ + struct compat_msgbuf __user *msgp; + size_t msgsz; + + if (put_user(msg->m_type, &msgp->mtype)) + return -EFAULT; + + msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; + if (store_msg(msgp->mtext, msg, msgsz)) + return -EFAULT; + return msgsz; +} + long compat_sys_msgrcv(int first, int second, int msgtyp, int third, int version, void __user *uptr) { - struct compat_msgbuf __user *up; - long type; - int err; - if (first < 0) return -EINVAL; if (second < 0) @@ -342,23 +352,14 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third, if (!version) { struct compat_ipc_kludge ipck; - err = -EINVAL; if (!uptr) - goto out; - err = -EFAULT; + return -EINVAL; if (copy_from_user (&ipck, uptr, sizeof(ipck))) - goto out; + return -EFAULT; uptr = compat_ptr(ipck.msgp); msgtyp = ipck.msgtyp; } - up = uptr; - err = do_msgrcv(first, &type, up->mtext, second, msgtyp, third); - if (err < 0) - goto out; - if (put_user(type, &up->mtype)) - err = -EFAULT; -out: - return err; + return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill); } static inline int get_compat_msqid64(struct msqid64_ds *m64, diff --git a/ipc/msg.c b/ipc/msg.c index 303362b..1d34c11 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -762,15 +762,30 @@ static inline int convert_mode(long *msgtyp, int msgflg) return SEARCH_EQUAL; } -long do_msgrcv(int msqid, long *pmtype, void __user *mtext, - size_t msgsz, long msgtyp, int msgflg) +static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) +{ + struct msgbuf __user *msgp = dest; + size_t msgsz; + + if (put_user(msg->m_type, &msgp->mtype)) + return -EFAULT; + + msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; + if (store_msg(msgp->mtext, msg, msgsz)) + return -EFAULT; + return msgsz; +} + +long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, + int msgflg, + long (*msg_fill)(void __user *, struct msg_msg *, size_t )) { struct msg_queue *msq; struct msg_msg *msg; int mode; struct ipc_namespace *ns; - if (msqid < 0 || (long) msgsz < 0) + if (msqid < 0 || (long) bufsz < 0) return -EINVAL; mode = convert_mode(&msgtyp, msgflg); ns = current->nsproxy->ipc_ns; @@ -814,7 +829,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext, * Found a suitable message. * Unlink it from the queue. */ - if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { + if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { msg = ERR_PTR(-E2BIG); goto out_unlock; } @@ -841,7 +856,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext, if (msgflg & MSG_NOERROR) msr_d.r_maxsize = INT_MAX; else - msr_d.r_maxsize = msgsz; + msr_d.r_maxsize = bufsz; msr_d.r_msg = ERR_PTR(-EAGAIN); current->state = TASK_INTERRUPTIBLE; msg_unlock(msq); @@ -904,29 +919,16 @@ out_unlock: if (IS_ERR(msg)) return PTR_ERR(msg); - msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; - *pmtype = msg->m_type; - if (store_msg(mtext, msg, msgsz)) - msgsz = -EFAULT; - + bufsz = msg_fill(buf, msg, bufsz); free_msg(msg); - return msgsz; + return bufsz; } SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, long, msgtyp, int, msgflg) { - long err, mtype; - - err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); - if (err < 0) - goto out; - - if (put_user(mtype, &msgp->mtype)) - err = -EFAULT; -out: - return err; + return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); } #ifdef CONFIG_PROC_FS ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] IPC: message queue receive cleanup 2012-02-15 16:54 ` [PATCH 1/2] IPC: message queue receive cleanup Stanislav Kinsbursky @ 2012-04-04 23:05 ` Andrew Morton 2012-04-04 23:39 ` Chris Metcalf 0 siblings, 1 reply; 11+ messages in thread From: Andrew Morton @ 2012-04-04 23:05 UTC (permalink / raw) To: Stanislav Kinsbursky Cc: serge.hallyn, criu, lucas.demarchi, linux-kernel, cmetcalf, dhowells, Arnd Bergmann On Wed, 15 Feb 2012 20:54:31 +0400 Stanislav Kinsbursky <skinsbursky@parallels.com> wrote: > This patch moves all message related manipulation into one function msg_fill(). > Actually, two functions because of the compat one. > > Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com> > > --- > arch/tile/kernel/compat.c | 11 +---------- > include/linux/compat.h | 2 ++ > include/linux/msg.h | 5 +++-- > ipc/compat.c | 33 +++++++++++++++++---------------- > ipc/msg.c | 44 +++++++++++++++++++++++--------------------- The arch/tile/kernel/compat.c change throws a big reject against current mainline because the code it is patching has disappeared and I didn't check where it went. Check this, please? Arnd, could you please pass an eye across this work? Thanks. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] IPC: message queue receive cleanup 2012-04-04 23:05 ` Andrew Morton @ 2012-04-04 23:39 ` Chris Metcalf 0 siblings, 0 replies; 11+ messages in thread From: Chris Metcalf @ 2012-04-04 23:39 UTC (permalink / raw) To: Andrew Morton Cc: Stanislav Kinsbursky, serge.hallyn, criu, lucas.demarchi, linux-kernel, dhowells, Arnd Bergmann On 4/4/2012 7:05 PM, Andrew Morton wrote: > On Wed, 15 Feb 2012 20:54:31 +0400 > Stanislav Kinsbursky <skinsbursky@parallels.com> wrote: > >> This patch moves all message related manipulation into one function msg_fill(). >> Actually, two functions because of the compat one. >> >> Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com> >> >> --- >> arch/tile/kernel/compat.c | 11 +---------- >> include/linux/compat.h | 2 ++ >> include/linux/msg.h | 5 +++-- >> ipc/compat.c | 33 +++++++++++++++++---------------- >> ipc/msg.c | 44 +++++++++++++++++++++++--------------------- > The arch/tile/kernel/compat.c change throws a big reject against > current mainline because the code it is patching has disappeared and I > didn't check where it went. Check this, please? I merged that code into ipc/compat.c with a new CONFIG_ARCH_WANT_OLD_COMPAT_IPC guard in commit 48b25c43e6ee. > Arnd, could you please pass an eye across this work? Thanks. -- Chris Metcalf, Tilera Corp. http://www.tilera.com ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 2/2] IPC: message queue stealing feature introduced 2012-02-15 16:54 [PATCH 0/2] IPC: message queue checkpoint support Stanislav Kinsbursky 2012-02-15 16:54 ` [PATCH 1/2] IPC: message queue receive cleanup Stanislav Kinsbursky @ 2012-02-15 16:54 ` Stanislav Kinsbursky 2012-04-04 23:12 ` Andrew Morton 2012-03-05 13:04 ` [CRIU] [PATCH 0/2] IPC: message queue checkpoint support Kinsbursky Stanislav 2 siblings, 1 reply; 11+ messages in thread From: Stanislav Kinsbursky @ 2012-02-15 16:54 UTC (permalink / raw) To: akpm; +Cc: serge.hallyn, criu, lucas.demarchi, linux-kernel, cmetcalf, dhowells v2: 1) compat functions added. 2) message slot size in array is now aligned by struct msgbuf_a. 3) check for enough free space in buffer before message copying added. 4) if MSG_STEAL flag is set, then do_msgrcv() returns number of bytes written to buffer. 5) flag MSG_NOERROR is ignored if MSG_STEAL flag is set. This patch is required for checkpoint/restore in userspace. IOW, c/r requires some way to get all pending IPC messages without deleting them for the queue (checkpoint can fail and in this case tasks will be resumed, so queue have to be valid). To achive this, new operation flag MSG_STEAL for sys_msgrcv() system call introduced. If this flag is set, then passed struct msgbuf pointer will be used for storing array of structures: struct msgbuf_a { long mtype; /* type of message */ int msize; /* size of message */ char mtext[0]; /* message text */ }; each of which will be followed by corresponding message data. Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com> --- include/linux/msg.h | 8 ++++++++ ipc/compat.c | 31 ++++++++++++++++++++++++++++++- ipc/msg.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 2 deletions(-) diff --git a/include/linux/msg.h b/include/linux/msg.h index 9411b76..5eb43a2 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -11,6 +11,7 @@ /* msgrcv options */ #define MSG_NOERROR 010000 /* no error if message is too big */ #define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/ +#define MSG_STEAL 040000 /* copy (not remove) all queue messages */ /* Obsolete, used only for backwards compatibility and libc5 compiles */ struct msqid_ds { @@ -38,6 +39,13 @@ struct msgbuf { char mtext[1]; /* message text */ }; +/* message buffer for msgrcv in case of array calls */ +struct msgbuf_a { + long mtype; /* type of message */ + int msize; /* size of message */ + char mtext[0]; /* message text */ +}; + /* buffer for msgctl calls IPC_INFO, MSG_INFO */ struct msginfo { int msgpool; diff --git a/ipc/compat.c b/ipc/compat.c index 38c1ee5..d2b34f8 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -38,6 +38,12 @@ struct compat_msgbuf { char mtext[1]; }; +struct compat_msgbuf_a { + compat_long_t mtype; + int msize; + char mtext[0]; +}; + struct compat_ipc_perm { key_t key; __compat_uid_t uid; @@ -328,6 +334,27 @@ long compat_sys_msgsnd(int first, int second, int third, void __user *uptr) return do_msgsnd(first, type, up->mtext, second, third); } + +static long compat_do_msg_steal(void __user *dest, struct msg_msg *msg, size_t bufsz) +{ + struct compat_msgbuf_a __user *msgp = dest; + size_t msgsz; + + msgsz = roundup(sizeof(struct msgbuf_a) + msg->m_ts, + __alignof__(struct msgbuf_a)); + + if (bufsz < msgsz) + return -E2BIG; + + if (put_user(msg->m_type, &msgp->mtype)) + return -EFAULT; + if (put_user(msg->m_ts, &msgp->msize)) + return -EFAULT; + if (store_msg(msgp->mtext, msg, msg->m_ts)) + return -EFAULT; + return msgsz; +} + long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) { struct compat_msgbuf __user *msgp; @@ -359,7 +386,9 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third, uptr = compat_ptr(ipck.msgp); msgtyp = ipck.msgtyp; } - return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill); + return do_msgrcv(first, uptr, second, msgtyp, third, + (third & MSG_STEAL) ? compat_do_msg_steal + : compat_do_msg_fill); } static inline int get_compat_msqid64(struct msqid64_ds *m64, diff --git a/ipc/msg.c b/ipc/msg.c index 1d34c11..64f83b6 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -762,6 +762,33 @@ static inline int convert_mode(long *msgtyp, int msgflg) return SEARCH_EQUAL; } +static long do_msg_steal(void __user *dest, struct msg_msg *msg, size_t bufsz) +{ + struct msgbuf_a __user *msgp = dest; + size_t msgsz; + + /* + * Message size have to be aligned. + */ + msgsz = roundup(sizeof(struct msgbuf_a) + msg->m_ts, + __alignof__(struct msgbuf_a)); + + /* + * No need to support MSG_NOERROR flag because truncated message array + * is useless. + */ + if (bufsz < msgsz) + return -E2BIG; + + if (put_user(msg->m_type, &msgp->mtype)) + return -EFAULT; + if (put_user(msg->m_ts, &msgp->msize)) + return -EFAULT; + if (store_msg(msgp->mtext, msg, msg->m_ts)) + return -EFAULT; + return msgsz; +} + static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) { struct msgbuf __user *msgp = dest; @@ -784,6 +811,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, struct msg_msg *msg; int mode; struct ipc_namespace *ns; + size_t arrsz = bufsz; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; @@ -817,6 +845,16 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, walk_msg->m_type != 1) { msg = walk_msg; msgtyp = walk_msg->m_type - 1; + } else if (msgflg & MSG_STEAL) { + long ret; + + ret = msg_fill(buf, msg, arrsz); + if (ret < 0) { + msg = ERR_PTR(ret); + goto out_unlock; + } + buf += ret; + arrsz -= ret; } else { msg = walk_msg; break; @@ -825,6 +863,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, tmp = tmp->next; } if (!IS_ERR(msg)) { + if (msgflg & MSG_STEAL) + goto out_unlock; /* * Found a suitable message. * Unlink it from the queue. @@ -919,6 +959,9 @@ out_unlock: if (IS_ERR(msg)) return PTR_ERR(msg); + if (msgflg & MSG_STEAL) + return bufsz - arrsz; + bufsz = msg_fill(buf, msg, bufsz); free_msg(msg); @@ -928,7 +971,8 @@ out_unlock: SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, long, msgtyp, int, msgflg) { - return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); + return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, + (msgflg & MSG_STEAL) ? do_msg_steal : do_msg_fill); } #ifdef CONFIG_PROC_FS ^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] IPC: message queue stealing feature introduced 2012-02-15 16:54 ` [PATCH 2/2] IPC: message queue stealing feature introduced Stanislav Kinsbursky @ 2012-04-04 23:12 ` Andrew Morton 2012-04-04 23:50 ` Michael Kerrisk (man-pages) 2012-04-05 10:53 ` Stanislav Kinsbursky 0 siblings, 2 replies; 11+ messages in thread From: Andrew Morton @ 2012-04-04 23:12 UTC (permalink / raw) To: Stanislav Kinsbursky Cc: serge.hallyn, criu, lucas.demarchi, linux-kernel, cmetcalf, dhowells, Michael Kerrisk, Arnd Bergmann On Wed, 15 Feb 2012 20:54:39 +0400 Stanislav Kinsbursky <skinsbursky@parallels.com> wrote: > v2: > 1) compat functions added. > 2) message slot size in array is now aligned by struct msgbuf_a. > 3) check for enough free space in buffer before message copying added. > 4) if MSG_STEAL flag is set, then do_msgrcv() returns number of bytes written > to buffer. > 5) flag MSG_NOERROR is ignored if MSG_STEAL flag is set. > > This patch is required for checkpoint/restore in userspace. > IOW, c/r requires some way to get all pending IPC messages without deleting > them for the queue (checkpoint can fail and in this case tasks will be resumed, > so queue have to be valid). > To achive this, new operation flag MSG_STEAL for sys_msgrcv() system call > introduced. > If this flag is set, then passed struct msgbuf pointer will be used for storing > array of structures: > > struct msgbuf_a { > long mtype; /* type of message */ > int msize; /* size of message */ > char mtext[0]; /* message text */ > }; > > each of which will be followed by corresponding message data. > I'd be a bit more comfortable if there was some sign that other c/r developers have reviewed and tested this and have successfully used it in c/r operation testing? We've been trying to isolate the c/r-specific functions inside #ifdef CONFIG_CHECKPOINT_RESTORE, but this patch doesn't do that. I have been encouraging this isolation so that people who aren't using c/r don't have to carry the overhead it adds and so that we can more easily hunt down and remove everything if the entire c/r project doesn't work out successfully. This patch modifies the sys_msgrcv() API and so we should update the manpage for that syscall. Please work with Michael on this. What does all the compat fiddling actually do? I guess it's needed for checkpoint and restore of 32-bit userspace on 64-bit kernels? Does c/r as a whole support that? It should. How well tested is this? ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] IPC: message queue stealing feature introduced 2012-04-04 23:12 ` Andrew Morton @ 2012-04-04 23:50 ` Michael Kerrisk (man-pages) 2012-04-04 23:59 ` Michael Kerrisk 2012-04-05 11:42 ` Stanislav Kinsbursky 2012-04-05 10:53 ` Stanislav Kinsbursky 1 sibling, 2 replies; 11+ messages in thread From: Michael Kerrisk (man-pages) @ 2012-04-04 23:50 UTC (permalink / raw) To: Andrew Morton Cc: Stanislav Kinsbursky, serge.hallyn, criu, lucas.demarchi, linux-kernel, cmetcalf, dhowells, Arnd Bergmann, Linux API On Thu, Apr 5, 2012 at 11:12 AM, Andrew Morton <akpm@linux-foundation.org> wrote: > On Wed, 15 Feb 2012 20:54:39 +0400 > Stanislav Kinsbursky <skinsbursky@parallels.com> wrote: Stanislav, Luckily, Andrew added me to CC. However, as noted in Documentation/SubmitChecklist, all patches that change the ABI/API should CC linux-api@vger.kernel.org. Please do that for the future. (CC added for this thread.) >> v2: >> 1) compat functions added. >> 2) message slot size in array is now aligned by struct msgbuf_a. >> 3) check for enough free space in buffer before message copying added. >> 4) if MSG_STEAL flag is set, then do_msgrcv() returns number of bytes written >> to buffer. By the way, why the name "MSG_STEAL"? At first glance, it sounds like that means we're taking messages that belong to someone else. But I gather you are in fact just flushing all messages from a queue. So I more intuitive name seems to be in order (MSG_READ_ALL, MSG_FLUSH_ALL?). >> 5) flag MSG_NOERROR is ignored if MSG_STEAL flag is set. Would it not be better to return an error if MSG_NOERROR is specified? >> This patch is required for checkpoint/restore in userspace. >> IOW, c/r requires some way to get all pending IPC messages without deleting >> them for the queue (checkpoint can fail and in this case tasks will be resumed, >> so queue have to be valid). >> To achive this, new operation flag MSG_STEAL for sys_msgrcv() system call >> introduced. >> If this flag is set, then passed struct msgbuf pointer will be used for storing >> array of structures: >> >> struct msgbuf_a { >> long mtype; /* type of message */ >> int msize; /* size of message */ >> char mtext[0]; /* message text */ >> }; So, I'm not clear on how things look here. We get back array like this: [0] [1] [2] | mtype | msize | mtext | mtype | msize | mtext | ... How do I calculate the offset of element 1 of the array? I assume it can't be just (&item[0].mtext + msize), since there'll be alignment issues to deal with, so that there are padding bytes after each mtext. In that case, msize on its own seems insufficient (since there is no null-terminator in msize). >> each of which will be followed by corresponding message data. >> > > I'd be a bit more comfortable if there was some sign that other c/r > developers have reviewed and tested this and have successfully used it > in c/r operation testing? Indeed, do you have a userspace test program that you could post? > We've been trying to isolate the c/r-specific functions inside #ifdef > CONFIG_CHECKPOINT_RESTORE, but this patch doesn't do that. I have been > encouraging this isolation so that people who aren't using c/r don't > have to carry the overhead it adds and so that we can more easily hunt > down and remove everything if the entire c/r project doesn't work out > successfully. > > This patch modifies the sys_msgrcv() API and so we should update the > manpage for that syscall. Please work with Michael on this. Yes please. Cheers, Michael > What does all the compat fiddling actually do? I guess it's needed for > checkpoint and restore of 32-bit userspace on 64-bit kernels? Does c/r > as a whole support that? It should. How well tested is this? > -- Michael Kerrisk Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/ Author of "The Linux Programming Interface"; http://man7.org/tlpi/ ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] IPC: message queue stealing feature introduced 2012-04-04 23:50 ` Michael Kerrisk (man-pages) @ 2012-04-04 23:59 ` Michael Kerrisk 2012-04-05 11:42 ` Stanislav Kinsbursky 1 sibling, 0 replies; 11+ messages in thread From: Michael Kerrisk @ 2012-04-04 23:59 UTC (permalink / raw) To: Andrew Morton Cc: Stanislav Kinsbursky, serge.hallyn, criu, lucas.demarchi, linux-kernel, cmetcalf, dhowells, Arnd Bergmann, Linux API Stanislav, I reread your mail, and see I missed a point. On Thu, Apr 5, 2012 at 11:50 AM, Michael Kerrisk (man-pages) <mtk.manpages@gmail.com> wrote: > On Thu, Apr 5, 2012 at 11:12 AM, Andrew Morton > <akpm@linux-foundation.org> wrote: >> On Wed, 15 Feb 2012 20:54:39 +0400 >> Stanislav Kinsbursky <skinsbursky@parallels.com> wrote: > > Stanislav, > > Luckily, Andrew added me to CC. However, as noted in > Documentation/SubmitChecklist, all patches that change the ABI/API > should CC linux-api@vger.kernel.org. Please do that for the future. > (CC added for this thread.) > >>> v2: >>> 1) compat functions added. >>> 2) message slot size in array is now aligned by struct msgbuf_a. >>> 3) check for enough free space in buffer before message copying added. >>> 4) if MSG_STEAL flag is set, then do_msgrcv() returns number of bytes written >>> to buffer. > > By the way, why the name "MSG_STEAL"? At first glance, it sounds like > that means we're taking messages that belong to someone else. But I > gather you are in fact just flushing all messages from a queue. So I > more intuitive name seems to be in order (MSG_READ_ALL, > MSG_FLUSH_ALL?). Looking at what you say below, perhaps a better name for this flag is MSG_PEEK_ALL (by analogy with recvmsg(2)'s MSG_PEEK_ALL)? >>> 5) flag MSG_NOERROR is ignored if MSG_STEAL flag is set. > > Would it not be better to return an error if MSG_NOERROR is specified? > >>> This patch is required for checkpoint/restore in userspace. >>> IOW, c/r requires some way to get all pending IPC messages without deleting >>> them for the queue (checkpoint can fail and in this case tasks will be resumed, >>> so queue have to be valid). >>> To achive this, new operation flag MSG_STEAL for sys_msgrcv() system call >>> introduced. >>> If this flag is set, then passed struct msgbuf pointer will be used for storing >>> array of structures: >>> >>> struct msgbuf_a { >>> long mtype; /* type of message */ >>> int msize; /* size of message */ >>> char mtext[0]; /* message text */ >>> }; > > So, I'm not clear on how things look here. We get back array like this: > > [0] [1] [2] > | mtype | msize | mtext | mtype | msize | mtext | ... > > How do I calculate the offset of element 1 of the array? I assume it > can't be just > (&item[0].mtext + msize), since there'll be alignment issues to deal > with, so that there are padding bytes after each mtext. In that case, > msize on its own seems insufficient (since there is no null-terminator > in msize). > >>> each of which will be followed by corresponding message data. >>> >> >> I'd be a bit more comfortable if there was some sign that other c/r >> developers have reviewed and tested this and have successfully used it >> in c/r operation testing? > > Indeed, do you have a userspace test program that you could post? > >> We've been trying to isolate the c/r-specific functions inside #ifdef >> CONFIG_CHECKPOINT_RESTORE, but this patch doesn't do that. I have been >> encouraging this isolation so that people who aren't using c/r don't >> have to carry the overhead it adds and so that we can more easily hunt >> down and remove everything if the entire c/r project doesn't work out >> successfully. >> >> This patch modifies the sys_msgrcv() API and so we should update the >> manpage for that syscall. Please work with Michael on this. > > Yes please. > > Cheers, > > Michael Thanks, Michael -- Michael Kerrisk Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/ Author of "The Linux Programming Interface", http://blog.man7.org/ ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] IPC: message queue stealing feature introduced 2012-04-04 23:50 ` Michael Kerrisk (man-pages) 2012-04-04 23:59 ` Michael Kerrisk @ 2012-04-05 11:42 ` Stanislav Kinsbursky 1 sibling, 0 replies; 11+ messages in thread From: Stanislav Kinsbursky @ 2012-04-05 11:42 UTC (permalink / raw) To: mtk.manpages@gmail.com Cc: Andrew Morton, serge.hallyn@canonical.com, criu@openvz.org, lucas.demarchi@profusion.mobi, linux-kernel@vger.kernel.org, cmetcalf@tilera.com, dhowells@redhat.com, Arnd Bergmann, Linux API 05.04.2012 03:50, Michael Kerrisk (man-pages) пишет: > On Thu, Apr 5, 2012 at 11:12 AM, Andrew Morton > <akpm@linux-foundation.org> wrote: >> On Wed, 15 Feb 2012 20:54:39 +0400 >> Stanislav Kinsbursky<skinsbursky@parallels.com> wrote: > > Stanislav, > > Luckily, Andrew added me to CC. However, as noted in > Documentation/SubmitChecklist, all patches that change the ABI/API > should CC linux-api@vger.kernel.org. Please do that for the future. > (CC added for this thread.) > Yep, sure. Next time I'll do that. Sorry. >>> v2: >>> 1) compat functions added. >>> 2) message slot size in array is now aligned by struct msgbuf_a. >>> 3) check for enough free space in buffer before message copying added. >>> 4) if MSG_STEAL flag is set, then do_msgrcv() returns number of bytes written >>> to buffer. > > By the way, why the name "MSG_STEAL"? At first glance, it sounds like > that means we're taking messages that belong to someone else. But I > gather you are in fact just flushing all messages from a queue. So I > more intuitive name seems to be in order (MSG_READ_ALL, > MSG_FLUSH_ALL?). > Nice to know, that it sounds to you exactly like it is desired to. I.e. this flag was designed to take messages that belong to someone else without any trace, visible to the owner of the messages. But (like you mentioned in following letter) MSG_PEEK_ALL sound better to me. Thus I'll replace MSG_STEAL with MSG_PEEK_ALL, if you don't mind. >>> 5) flag MSG_NOERROR is ignored if MSG_STEAL flag is set. > > Would it not be better to return an error if MSG_NOERROR is specified? > I don't think so. IOW, error can be returned in this case, but I don't see any practical reason for this. If you do - please share. >>> This patch is required for checkpoint/restore in userspace. >>> IOW, c/r requires some way to get all pending IPC messages without deleting >>> them for the queue (checkpoint can fail and in this case tasks will be resumed, >>> so queue have to be valid). >>> To achive this, new operation flag MSG_STEAL for sys_msgrcv() system call >>> introduced. >>> If this flag is set, then passed struct msgbuf pointer will be used for storing >>> array of structures: >>> >>> struct msgbuf_a { >>> long mtype; /* type of message */ >>> int msize; /* size of message */ >>> char mtext[0]; /* message text */ >>> }; > > So, I'm not clear on how things look here. We get back array like this: > > [0] [1] [2] > | mtype | msize | mtext | mtype | msize | mtext | ... > > How do I calculate the offset of element 1 of the array? I assume it > can't be just > (&item[0].mtext + msize), since there'll be alignment issues to deal > with, so that there are padding bytes after each mtext. In that case, > msize on its own seems insufficient (since there is no null-terminator > in msize). > This is easy. You can be sure, that struct msgbuf_a is aligned by sizeof(struct msgbuf_a). And since you know the size of this structure and length of data segment, then offset of any element of the queue can be calculated like this: addr_N+1 = addr_N + round_up(sizeof(struct msgbuf_a) + msize, sizeof(struct msgbuf_a)); >>> each of which will be followed by corresponding message data. >>> >> >> I'd be a bit more comfortable if there was some sign that other c/r >> developers have reviewed and tested this and have successfully used it >> in c/r operation testing? > > Indeed, do you have a userspace test program that you could post? > Yes, sure. I'll add it to the patch set. But please not, that this test (in it's current state) can be executed only as a part of out CRIU test suit. So you won't be able to execute it as is. >> We've been trying to isolate the c/r-specific functions inside #ifdef >> CONFIG_CHECKPOINT_RESTORE, but this patch doesn't do that. I have been >> encouraging this isolation so that people who aren't using c/r don't >> have to carry the overhead it adds and so that we can more easily hunt >> down and remove everything if the entire c/r project doesn't work out >> successfully. >> >> This patch modifies the sys_msgrcv() API and so we should update the >> manpage for that syscall. Please work with Michael on this. > > Yes please. > Sure. What do you want me to do? -- Best regards, Stanislav Kinsbursky ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] IPC: message queue stealing feature introduced 2012-04-04 23:12 ` Andrew Morton 2012-04-04 23:50 ` Michael Kerrisk (man-pages) @ 2012-04-05 10:53 ` Stanislav Kinsbursky 1 sibling, 0 replies; 11+ messages in thread From: Stanislav Kinsbursky @ 2012-04-05 10:53 UTC (permalink / raw) To: Andrew Morton Cc: serge.hallyn@canonical.com, criu@openvz.org, lucas.demarchi@profusion.mobi, linux-kernel@vger.kernel.org, cmetcalf@tilera.com, dhowells@redhat.com, Michael Kerrisk, Arnd Bergmann 05.04.2012 03:12, Andrew Morton пишет: > > I'd be a bit more comfortable if there was some sign that other c/r > developers have reviewed and tested this and have successfully used it > in c/r operation testing? > We have a user-space test (part of our regression testing test suite) for this functionality. Thus we run this test very ofter. > We've been trying to isolate the c/r-specific functions inside #ifdef > CONFIG_CHECKPOINT_RESTORE, but this patch doesn't do that. I have been > encouraging this isolation so that people who aren't using c/r don't > have to carry the overhead it adds and so that we can more easily hunt > down and remove everything if the entire c/r project doesn't work out > successfully. > Sorry. I'll add this ifdef's and send rebased patch set once more. > This patch modifies the sys_msgrcv() API and so we should update the > manpage for that syscall. Please work with Michael on this. > Sure. > What does all the compat fiddling actually do? I guess it's needed for > checkpoint and restore of 32-bit userspace on 64-bit kernels? Does c/r > as a whole support that? It should. How well tested is this? > CRIU doesn't support 32-bit processes migration yet. But we are going to add this support in future. -- Best regards, Stanislav Kinsbursky ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [CRIU] [PATCH 0/2] IPC: message queue checkpoint support 2012-02-15 16:54 [PATCH 0/2] IPC: message queue checkpoint support Stanislav Kinsbursky 2012-02-15 16:54 ` [PATCH 1/2] IPC: message queue receive cleanup Stanislav Kinsbursky 2012-02-15 16:54 ` [PATCH 2/2] IPC: message queue stealing feature introduced Stanislav Kinsbursky @ 2012-03-05 13:04 ` Kinsbursky Stanislav 2 siblings, 0 replies; 11+ messages in thread From: Kinsbursky Stanislav @ 2012-03-05 13:04 UTC (permalink / raw) To: akpm@linux-foundation.org Cc: serge.hallyn@canonical.com, dhowells@redhat.com, lucas.demarchi@profusion.mobi, linux-kernel@vger.kernel.org, cmetcalf@tilera.com, criu@openvz.org Hello, Andrew. Are you going to take this patch set? 15.02.2012 20:54, Stanislav Kinsbursky пишет: > This patch set conatins v2 of patch > "IPC: message queue stealing feature introduced" > > The following series consists of: > > --- > > Stanislav Kinsbursky (2): > IPC: message queue receive cleanup > IPC: message queue stealing feature introduced > > > arch/tile/kernel/compat.c | 11 +----- > include/linux/compat.h | 2 + > include/linux/msg.h | 13 ++++++- > ipc/compat.c | 62 ++++++++++++++++++++++++-------- > ipc/msg.c | 86 +++++++++++++++++++++++++++++++++++---------- > 5 files changed, 126 insertions(+), 48 deletions(-) > > _______________________________________________ > CRIU mailing list > CRIU@openvz.org > https://openvz.org/mailman/listinfo/criu -- Best regards, Stanislav Kinsbursky ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2012-04-05 11:43 UTC | newest] Thread overview: 11+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-02-15 16:54 [PATCH 0/2] IPC: message queue checkpoint support Stanislav Kinsbursky 2012-02-15 16:54 ` [PATCH 1/2] IPC: message queue receive cleanup Stanislav Kinsbursky 2012-04-04 23:05 ` Andrew Morton 2012-04-04 23:39 ` Chris Metcalf 2012-02-15 16:54 ` [PATCH 2/2] IPC: message queue stealing feature introduced Stanislav Kinsbursky 2012-04-04 23:12 ` Andrew Morton 2012-04-04 23:50 ` Michael Kerrisk (man-pages) 2012-04-04 23:59 ` Michael Kerrisk 2012-04-05 11:42 ` Stanislav Kinsbursky 2012-04-05 10:53 ` Stanislav Kinsbursky 2012-03-05 13:04 ` [CRIU] [PATCH 0/2] IPC: message queue checkpoint support Kinsbursky Stanislav
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox