* [PATCH] [2/2] posix message queues
@ 2003-10-03 15:59 Peter Wächtler
2003-10-03 18:16 ` Manfred Spraul
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Peter Wächtler @ 2003-10-03 15:59 UTC (permalink / raw)
To: linux-kernel; +Cc: akpm, torvalds, bo.z.li, manfred
[-- Attachment #1: Type: text/plain, Size: 244 bytes --]
[2/2] implements the interface in ipc/posixmsg.c through a set of
new syscalls and a new pseudo filesystem msgfs
the implementation passes the posixtestsuite 1.2 and has SIGEV_THREAD
support through userspace (taken from Benedyczak,Wronski)
[-- Attachment #2: px.fget --]
[-- Type: text/plain, Size: 31286 bytes --]
diff -X dontdiff -Nur vanilla-2.6.0-test6/arch/i386/kernel/entry.S linux-2.6.0-test6/arch/i386/kernel/entry.S
--- vanilla-2.6.0-test6/arch/i386/kernel/entry.S 2003-08-23 01:53:39.000000000 +0200
+++ linux-2.6.0-test6/arch/i386/kernel/entry.S 2003-09-07 21:48:07.000000000 +0200
@@ -879,5 +879,12 @@
.long sys_tgkill /* 270 */
.long sys_utimes
.long sys_fadvise64_64
+ .long sys_mq_open
+ .long sys_mq_unlink
+ .long sys_mq_timedsend /* 275 */
+ .long sys_mq_timedreceive
+ .long sys_mq_notify
+ .long sys_mq_getattr
+ .long sys_mq_setattr
nr_syscalls=(.-sys_call_table)/4
diff -X dontdiff -Nur vanilla-2.6.0-test6/CREDITS linux-2.6.0-test6/CREDITS
--- vanilla-2.6.0-test6/CREDITS 2003-10-01 20:10:35.000000000 +0200
+++ linux-2.6.0-test6/CREDITS 2003-10-03 14:10:31.000000000 +0200
@@ -3286,6 +3295,14 @@
S: 5554 GG Valkenswaard
S: The Netherlands
+N: Peter Wächtler
+E: pwaechtler@mac.com
+W: http://homepage.mac.com/pwaechtler/
+D: Posix message queues
+S: Fliederstr. 3
+S: 30167 Hannover
+S: Germany
+
N: Peter Shaobo Wang
E: pwang@mmdcorp.com
W: http://www.mmdcorp.com/pw/linux
diff -X dontdiff -Nur vanilla-2.6.0-test6/include/asm-i386/unistd.h linux-2.6.0-test6/include/asm-i386/unistd.h
--- vanilla-2.6.0-test6/include/asm-i386/unistd.h 2003-08-23 01:57:19.000000000 +0200
+++ linux-2.6.0-test6/include/asm-i386/unistd.h 2003-09-07 21:48:07.000000000 +0200
@@ -278,8 +278,15 @@
#define __NR_tgkill 270
#define __NR_utimes 271
#define __NR_fadvise64_64 272
+#define __NR_sys_mq_open 273
+#define __NR_sys_mq_unlink (__NR_sys_mq_open+1)
+#define __NR_mq_timedsend (__NR_sys_mq_open+2)
+#define __NR_mq_timedreceive (__NR_sys_mq_open+3)
+#define __NR_mq_notify (__NR_sys_mq_open+4)
+#define __NR_mq_getattr (__NR_sys_mq_open+5)
+#define __NR_mq_setattr (__NR_sys_mq_open+6)
-#define NR_syscalls 273
+#define NR_syscalls 279
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
diff -X dontdiff -Nur vanilla-2.6.0-test6/include/linux/pxqueue.h linux-2.6.0-test6/include/linux/pxqueue.h
--- vanilla-2.6.0-test6/include/linux/pxqueue.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.0-test6/include/linux/pxqueue.h 2003-09-28 22:30:48.000000000 +0200
@@ -0,0 +1,48 @@
+#ifndef _LINUX_MQUEUE_H
+#define _LINUX_MQUEUE_H
+
+#define MQ_MAXMSG 40 /* max number of messages in each queue */
+#define MQ_MAXSYSSIZE 1048576 /* max size that all m.q. can have together */
+#define MQ_PRIO_MAX 256 /* max priority */
+
+#define MSGFS_MAGIC 0x4D455347
+#define _POSIX_MESSAGE_PASSING 1
+
+typedef int mqd_t; /* message queue descriptor */
+
+struct mq_attr {
+ long mq_flags; /* message queue flags */
+ long mq_maxmsg; /* maximum number of messages */
+ long mq_msgsize; /* maximum message size */
+ long mq_curmsgs; /* number of messages currently queued */
+};
+
+asmlinkage mqd_t sys_mq_open(const char *u_path, int oflag, mode_t mode,
+ struct mq_attr *u_attr);
+asmlinkage int sys_mq_close(mqd_t mqdes);
+asmlinkage int sys_mq_unlink(const char *u_name);
+asmlinkage int sys_mq_timedsend(mqd_t mqdes, const char *msg_ptr,
+ size_t msg_len, unsigned int msg_prio, struct timespec *utime);
+asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char *msg_ptr,
+ size_t msg_len, unsigned int *msg_prio, struct timespec *utime);
+asmlinkage int sys_mq_notify(mqd_t mqdes,
+ const struct sigevent *u_notification);
+asmlinkage int sys_mq_getattr(mqd_t mqdes, struct mq_attr *u_mqstat);
+asmlinkage int sys_mq_setattr(mqd_t mqdes, const struct mq_attr *u_mqstat,
+ struct mq_attr *u_omqstat);
+
+#ifdef __KERNEL__
+
+struct mqueue_ds { /* queue */
+ struct mq_attr attr;
+ struct msg_queue queue; /* ipc/msg */
+ spinlock_t lock;
+ wait_queue_head_t wait_recv;
+ wait_queue_head_t wait_send;
+
+ pid_t notify_pid; /* who we have to notify (or 0) */
+ struct sigevent notify; /* notification */
+};
+#endif /* __KERNEL__ */
+
+#endif
diff -X dontdiff -Nur vanilla-2.6.0-test6/init/Kconfig linux-2.6.0-test6/init/Kconfig
--- vanilla-2.6.0-test6/init/Kconfig 2003-10-01 20:10:58.000000000 +0200
+++ linux-2.6.0-test6/init/Kconfig 2003-10-01 20:05:34.000000000 +0200
@@ -91,6 +91,15 @@
section 6.4 of the Linux Programmer's Guide, available from
<http://www.tldp.org/docs.html#guide>.
+config POSIXMSG
+ bool "POSIX message queues"
+ ---help---
+ This gives you POSIX compliant interfaces for message queues.
+ For userspace stub look at
+ <http://homepage.mac.com/pwaechtler/mqueue.html>.
+
+
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
help
diff -X dontdiff -Nur vanilla-2.6.0-test6/ipc/Makefile linux-2.6.0-test6/ipc/Makefile
--- vanilla-2.6.0-test6/ipc/Makefile 2003-08-23 01:52:27.000000000 +0200
+++ linux-2.6.0-test6/ipc/Makefile 2003-09-07 23:07:47.000000000 +0200
@@ -5,3 +5,5 @@
obj-y := util.o
obj-$(CONFIG_SYSVIPC) += msg.o sem.o shm.o
+obj-$(CONFIG_POSIXMSG) += posixmsg.o
diff -X dontdiff -Nur vanilla-2.6.0-test6/ipc/posixmsg.c linux-2.6.0-test6/ipc/posixmsg.c
--- vanilla-2.6.0-test6/ipc/posixmsg.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.0-test6/ipc/posixmsg.c 2003-10-03 13:24:54.000000000 +0200
@@ -0,0 +1,945 @@
+/*
+ * linux/ipc/posixmsg.c
+ *
+ * Copyright 2002,2003 Peter Wächtler <pwaechtler@mac.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * root can mount the filesystem to see the names of the currently
+ * used mqueues. The i_size shows the number of msgs in the queue.
+ * The only shell operation on the "fifos" is an unlink() via rm(1)
+ * The queues are accessed through syscalls, poll is supported
+ *
+ * CHANGES:
+ * put MSGFS_MAGIC into mqueue.h
+ * update inode->m_time on send (yes, no posix semantics on this)
+ * update inode->a_time on recv
+ * translate absolute timeouts in kernel to prevent too long sleeps
+ * corrected spinlocks + [gs]etattr/mq_flags
+ * replaced spinlocks with inode->i_sem
+ * (don't know yet what's broken on SMP with spinlocks)
+ * use spinlocks, where was the problem?
+ * open coded the wait_event_interruptible_timeouts to avoid races
+ * fixed accmode tests
+ * fixed freespace check (meant a per queue limit?)
+ * fixed msg_len check in sys_mq_timedreceive
+ * defer check for negative timeout on mq_timedreceive
+ * don't put_user in timedreceive if msg_prio is NULL
+ * add macros to choose fget/fput or not (read comment at get_filp)
+ *
+ * * passes the posixtestsuite 1.2
+ *
+ * TODO:
+ * check for more sysv msg limits (or add some new, e.g. MQ_PRIO_MAX)?
+ * think about prio based waitqueues: simply enqueue them in order?
+ * bits/posix_opt.h claims to support _POSIX_PRIORITY_SCHEDULING
+ * make wake_up_interruptible_all sysctl'able
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h> /* PAGE_CACHE_SIZE */
+#include <linux/poll.h>
+#include <linux/security.h>
+#include <linux/pxqueue.h>
+#include <linux/msg.h>
+#include <asm/uaccess.h>
+#include "util.h"
+
+
+extern int msg_ctlmnb; /* default max size of a message queue (all msgs sum up) */
+extern int msg_ctlmni; /* max # of msg queue identifiers */
+extern int msg_ctlmax; /* max size of one message (bytes) */
+
+static atomic_t msg_bytes = ATOMIC_INIT(0);
+static struct vfsmount *msg_mnt;
+
+#define get_mqueue(filp)\
+ ((filp) ? filp->f_dentry->d_inode->u.generic_ip: filp)
+
+static struct msg_queue *
+init_queue(struct msg_queue *queue)
+{
+ int retval;
+
+ queue->q_perm.mode = 0;
+ queue->q_perm.key = 0;
+ queue->q_perm.security = NULL;
+ retval = security_msg_queue_alloc(queue);
+ if (retval)
+ return ERR_PTR(retval);;
+
+ queue->q_stime = queue->q_rtime = 0;
+ queue->q_qbytes = queue->q_cbytes = queue->q_qnum = 0;
+ queue->q_lspid = queue->q_lrpid = 0;
+ INIT_LIST_HEAD(&queue->q_messages);
+ INIT_LIST_HEAD(&queue->q_receivers); /* unused */
+ INIT_LIST_HEAD(&queue->q_senders); /* unused */
+
+ return queue;
+}
+
+static int
+mqueue_unlink (struct inode *dir, struct dentry *dentry)
+{
+ dentry->d_inode->i_nlink--;
+ dput(dentry);
+ return 0;
+}
+
+/* cleans up after close() or exit() if the refount drops to zero */
+static int
+mqueue_close(struct inode *inode, struct file *filp)
+{
+ struct mqueue_ds *q = inode->u.generic_ip;
+
+ if (!q || !filp)
+ return -EBADFD;
+
+ spin_lock(&q->lock);
+ /* remove possible notification;
+ * sys_getpid() returns the tgid if multithreaded */
+ if (q->notify_pid == current->pid) {
+ q->notify_pid = 0;
+ q->notify.sigev_signo = 0;
+ q->notify.sigev_notify = SIGEV_NONE;
+ }
+ spin_unlock(&q->lock);
+ return 0;
+}
+
+static unsigned int
+mqueue_poll(struct file *filp, struct poll_table_struct *wait)
+{
+ struct mqueue_ds *q = get_mqueue(filp);
+ int ret = 0;
+
+ poll_wait(filp, &q->wait_recv, wait);
+ poll_wait(filp, &q->wait_send, wait);
+
+ if (q->queue.q_qnum)
+ ret = POLLIN | POLLRDNORM;
+
+ if (q->queue.q_qnum < (unsigned long)q->attr.mq_maxmsg)
+ ret |= POLLOUT | POLLWRNORM;
+
+ return ret;
+}
+
+static struct file_operations msg_fops = {
+ .llseek = no_llseek,
+ .release = mqueue_close,
+ .poll = mqueue_poll,
+};
+static struct inode_operations msg_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .unlink = mqueue_unlink,
+};
+
+static struct inode *
+get_msg_inode(struct super_block *sb, mode_t mode)
+{
+ struct mqueue_ds *q;
+ struct inode *inode = new_inode(sb);
+
+ if (inode) {
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_blksize = 1024;
+ switch (mode & S_IFMT) {
+ default:
+ case S_IFIFO:
+ case S_IFREG:
+ inode->i_fop = &msg_fops;
+ if ((q = kmalloc(sizeof (struct mqueue_ds), GFP_KERNEL))) {
+ if (!init_queue(&q->queue)) {
+ iput(inode);
+ inode = ERR_PTR(-EACCES);
+ }
+ inode->u.generic_ip = q;
+ spin_lock_init(&q->lock);
+ } else {
+ iput(inode);
+ inode = ERR_PTR(-ENOSPC);
+ }
+ break;
+ case S_IFDIR:
+ inode->i_op = &msg_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inode->i_nlink++;
+ break;
+ case S_IFLNK:
+ break;
+ }
+ } else
+ inode = ERR_PTR(-ENOSPC);
+
+ return inode;
+}
+
+#if 0
+/* don't use fget() to avoid the fput() for speed reason
+ * on create/open the refcount is 1 and decremented on close
+ * if you have a multithreaded app where one thread closes
+ * the mqueue while another thread operates on it -> possible crash
+ * the spec says the behavior is undefined
+ * separate processes are not affected
+ */
+static struct file *
+get_filp(mqd_t fd)
+{
+ struct files_struct *files = current->files;
+ struct file *rc = NULL;
+
+ spin_lock(&files->file_lock);
+ if (fd < files->max_fds && fd >= 0)
+ rc = files->fd[fd];
+ spin_unlock(&files->file_lock);
+ return rc;
+
+}
+#define ERROUT(x) return x;
+#define FPUTOUT
+
+#else
+
+#define get_filp(x) fget(x)
+#define ERROUT(x) { err = x; goto out_fput;}
+#define FPUTOUT out_fput: fput(filp)
+#endif
+
+static inline int
+freespace(struct mqueue_ds *q, size_t msg_len)
+{
+ return (int) ( q->queue.q_qnum < (unsigned long)q->attr.mq_maxmsg);
+}
+
+/* removes a queue - called from vfs when an inode gets deleted */
+static void
+mqueue_release(struct inode *inode)
+{
+ struct mqueue_ds *q = inode->u.generic_ip;
+ struct msg_queue *queue;
+ struct list_head *tmp;
+
+ if (q){
+ queue = &q->queue;
+ tmp = queue->q_messages.next;
+ while (tmp != &queue->q_messages) {
+ struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
+ tmp = tmp->next;
+ free_msg(msg);
+ }
+ atomic_sub(queue->q_cbytes, &msg_bytes);
+ security_msg_queue_free(queue);
+ inode->u.generic_ip = NULL;
+ kfree(q);
+ }
+ clear_inode(inode);
+}
+
+static int
+create_queue(struct dentry *dir, struct qstr *qname,
+ int oflag, mode_t mode, struct mq_attr *u_attr)
+{
+ int ret, fd;
+ struct file *filp;
+ struct mqueue_ds *q;
+ struct inode *inode;
+ struct dentry *dentry;
+
+ inode = get_msg_inode(msg_mnt->mnt_sb, S_IFIFO | (mode & S_IRWXUGO));
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto out_ret;
+ }
+ q = inode->u.generic_ip;
+
+ if (u_attr != NULL) {
+ if (copy_from_user(&q->attr, u_attr, sizeof (struct mq_attr))) {
+ ret = -EFAULT;
+ goto out_inode;
+ }
+ if (q->attr.mq_maxmsg <= 0
+ || q->attr.mq_msgsize <= 0
+ || q->attr.mq_maxmsg > MQ_MAXMSG
+ || q->attr.mq_msgsize > msg_ctlmax) {
+ ret = -EINVAL;
+ goto out_inode;
+ }
+ } else { /* implementation defined */
+ q->attr.mq_maxmsg = MQ_MAXMSG;
+ q->attr.mq_msgsize = 1024 /*msg_ctlmax */ ;
+ }
+ q->attr.mq_flags = oflag & O_ACCMODE;
+ q->notify_pid = 0;
+ q->notify.sigev_signo = 0;
+ q->notify.sigev_notify = SIGEV_NONE;
+ init_waitqueue_head(&q->wait_send);
+ init_waitqueue_head(&q->wait_recv);
+
+ ret = -ENFILE;
+ if ((fd = get_unused_fd()) < 0)
+ goto out_inode;
+ if (!(filp = get_empty_filp()))
+ goto out_fd;
+
+ qname->hash = full_name_hash(qname->name, qname->len);
+ dentry = d_alloc(dir, qname);
+ if (!dentry) {
+ ret = -ENOMEM;
+ goto out_filp;
+ }
+ d_add(dentry, inode);
+ ret = get_write_access(inode);
+ if (ret)
+ goto out_filp;
+
+ filp->f_vfsmnt = mntget(msg_mnt);
+ filp->f_dentry = dget(dentry); /* leave it active */
+ filp->f_op = &msg_fops;
+ filp->f_mode = (q->attr.mq_flags +1 ) & O_ACCMODE;
+ filp->f_flags = oflag;
+
+ /* Now we map fd to filp, so userspace can access it */
+ fd_install(fd, filp);
+ ret = fd;
+ goto out_ret;
+
+out_filp:
+ put_filp(filp);
+out_fd:
+ put_unused_fd(fd);
+out_inode:
+ iput(inode);
+out_ret:
+ return ret;
+}
+
+/**
+ * sys_mq_open - opens a message queue associated with @u_name
+ * @mqdes: descriptor of mqueue
+ * @oflag: flags like O_CREAT, O_EXCL, O_RDWR
+ * @mode: when O_CREAT is specified, the permission bits
+ * @u_attr: pointer to the attributes, like max msgsize, when creating
+ *
+ * returns a descriptor to the opened queue or negative value on error
+ */
+asmlinkage mqd_t
+sys_mq_open(const char *u_name, int oflag, mode_t mode, struct mq_attr * u_attr)
+{
+ struct file *filp;
+ struct dentry *dentry;
+ struct qstr this;
+ static int oflag2acc[O_ACCMODE] =
+ { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE };
+ int fd, ret;
+
+ if (IS_ERR(this.name = getname(u_name)))
+ return PTR_ERR(this.name);
+ this.len = strlen(this.name);
+ down(&msg_mnt->mnt_root->d_inode->i_sem);
+ dentry = lookup_one_len(this.name, msg_mnt->mnt_root, this.len);
+
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto out_ret;
+ }
+ if (oflag & O_CREAT) {
+ if (dentry->d_inode) {
+ /* entry exists already */
+ if (oflag & O_EXCL) {
+ ret = -EEXIST;
+ } else {
+ goto open_existing;
+ }
+ goto out_dput;
+ } else {
+ ret = create_queue(msg_mnt->mnt_root, &this,
+ oflag, mode, u_attr);
+ }
+
+ } else { /* O_CREAT */
+ if (!dentry->d_inode) {
+ ret = -ENOENT;
+ } else {
+open_existing:
+ if (permission
+ (dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
+ ret = -EACCES;
+ } else {
+ fd = get_unused_fd();
+ if (fd >= 0) {
+ mntget(msg_mnt);
+ filp =
+ dentry_open(dentry, msg_mnt, oflag);
+ filp->f_op = &msg_fops;
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ put_unused_fd(fd);
+ goto out_dput;
+ }
+ dget(dentry);
+ fd_install(fd, filp);
+ }
+ ret = fd;
+ }
+ }
+ }
+out_dput:
+ dput(dentry);
+out_ret:
+ up(&msg_mnt->mnt_root->d_inode->i_sem);
+ putname(this.name);
+
+ return ret;
+}
+
+/**
+ * sys_mq_unlink - removes a message queue from the namespace
+ *
+ * @u_name: pointer to the name
+ */
+asmlinkage int
+sys_mq_unlink(const char *u_name)
+{
+ int err=0;
+ struct dentry *dentry;
+ char *name = getname(u_name);
+
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+
+ down(&msg_mnt->mnt_root->d_inode->i_sem);
+ dentry = lookup_one_len(name, msg_mnt->mnt_root, strlen(name));
+ if (IS_ERR(dentry)){
+ err= PTR_ERR(dentry);
+ goto out_unlock;
+ }
+ if (!dentry->d_inode){
+ err= -ENOENT;
+ goto out_unlock;
+ }
+ err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+
+out_unlock:
+ up(&msg_mnt->mnt_root->d_inode->i_sem);
+ putname(name);
+ return err;
+}
+
+static inline long get_timeout( struct timespec *abs, int *err)
+{
+ struct timespec t;
+ *err = 0;
+ if (abs->tv_nsec >= 1000000000L || abs->tv_nsec < 0 || abs->tv_sec < 0)
+ return *err = -EINVAL;
+
+ t=current_kernel_time();
+ if (t.tv_sec > abs->tv_sec ||
+ (t.tv_sec == abs->tv_sec && t.tv_nsec > abs->tv_nsec))
+ return -ETIMEDOUT;
+
+ t.tv_sec = abs->tv_sec - t.tv_sec;
+ t.tv_nsec = abs->tv_nsec - t.tv_nsec;
+ if (t.tv_nsec < 0){
+ t.tv_sec--;
+ t.tv_nsec+= 1000000000;
+ }
+ return timespec_to_jiffies(&t) + 1;
+}
+
+static void local_add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
+{
+ wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+ spin_lock(&q->lock);
+ __add_wait_queue(q, wait);
+ spin_unlock(&q->lock);
+}
+
+static void local_remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
+{
+ spin_lock(&q->lock);
+ __remove_wait_queue(q, wait);
+ spin_unlock(&q->lock);
+}
+
+/**
+ * sys_mq_timedsend - send a message to the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @msg_ptr: pointer to buffer holding the message
+ * @msg_len: length of the message
+ * @msg_prio: the priority of the message
+ * @utime: if !NULL the function will only block for specified time
+ */
+asmlinkage int
+sys_mq_timedsend(mqd_t mqdes,
+ const char *msg_ptr, size_t msg_len,
+ unsigned int msg_prio, struct timespec *utime)
+{
+ struct msg_msg *msg;
+ struct list_head *p;
+ struct msg_queue *queue;
+ int err;
+ long timeout;
+ struct timespec ts;
+ struct mqueue_ds *q;
+ struct file *filp = get_filp(mqdes);
+ struct inode *inode;
+
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+ if ( !(filp->f_mode & FMODE_WRITE))
+ ERROUT( -EBADF);
+ if ((unsigned int) msg_prio >= (unsigned int) MQ_PRIO_MAX)
+ ERROUT( -EINVAL);
+ if ((long)msg_len > q->attr.mq_msgsize)
+ ERROUT( -EMSGSIZE);
+
+ queue = &q->queue;
+ inode=filp->f_dentry->d_inode;
+ spin_lock(&q->lock);
+ if ((filp->f_flags & O_NONBLOCK) && !freespace(q, msg_len)){
+ spin_unlock(&q->lock);
+ ERROUT( -EAGAIN);
+ }
+ spin_unlock(&q->lock);
+ /* check if this message will exceed overall limit for messages */
+ if (atomic_read(&msg_bytes) + msg_len > MQ_MAXSYSSIZE)
+ ERROUT( -ENOMEM);
+
+ if (utime) {
+ if (copy_from_user(&ts, utime, sizeof (ts)))
+ ERROUT( -EFAULT);
+ timeout = get_timeout(&ts, &err);
+ if ( err == -EINVAL )
+ ERROUT( err);
+ /* don't timeout yet, check if queue is empty */
+ } else
+ timeout = 0L;
+
+ msg = get_msg((char *) msg_ptr, msg_len);
+ if (IS_ERR(msg))
+ ERROUT( PTR_ERR(msg));
+
+ msg->m_type = msg_prio;
+ msg->m_ts = msg_len;
+
+ if (!timeout)
+ timeout = MAX_SCHEDULE_TIMEOUT;
+
+ /* open coded wait_event_interruptible_timeout()
+ * to avoid race on freespace()
+ */
+ spin_lock(&q->lock);
+ if (!freespace(q, msg_len) ){
+ if ( unlikely(timeout < 0) ){
+ err = -ETIMEDOUT;
+ goto out_unlock;
+ }
+ wait_queue_t __wait;
+ init_waitqueue_entry(&__wait, current);
+
+ local_add_wait_queue(&q->wait_send, &__wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( freespace(q, msg_len))
+ break;
+ if (!signal_pending(current)) {
+ spin_unlock(&q->lock);
+ timeout = schedule_timeout(timeout);
+ spin_lock(&q->lock);
+ if (!timeout)
+ break;
+ continue;
+ }
+ timeout = -ERESTARTSYS;
+ break;
+ }
+ current->state = TASK_RUNNING;
+ local_remove_wait_queue(&q->wait_send, &__wait);
+
+ if (unlikely(timeout == -ERESTARTSYS)){
+ err = -EINTR;
+ goto out_unlock;
+ }
+ if (unlikely(timeout == 0)){
+ err = -ETIMEDOUT;
+ goto out_unlock;
+ }
+ }
+ err = 0;
+ /* enqueue message in prio order */
+ p = queue->q_messages.next; /* used as flag if msg was queued */
+ if (msg_prio > 0 && !list_empty(&queue->q_messages)) {
+
+ list_for_each(p, &queue->q_messages) {
+ struct msg_msg *tmp =
+ list_entry(p, struct msg_msg, m_list);
+ if (tmp->m_type < (long)msg_prio) {
+ list_add_tail(&msg->m_list, p);
+ p = NULL;
+ break;
+ }
+ }
+ }
+ if (p) /* ok, put it at the end */
+ list_add_tail(&msg->m_list, &queue->q_messages);
+
+ queue->q_lspid = current->pid;
+ queue->q_cbytes += msg_len;
+ atomic_add(msg_len, &msg_bytes);
+ queue->q_qnum++;
+ inode->i_size = queue->q_qnum;
+ inode->i_mtime = CURRENT_TIME;
+
+ if (waitqueue_active(&q->wait_recv)) {
+ /* wake up all waiters to serve the highest prio waiter */
+ wake_up_interruptible_all(&q->wait_recv);
+ } else {
+ /* since there was no synchronously waiting process for message
+ * we notify it when the state of queue changed from
+ * empty to not empty */
+ if (q->notify_pid != 0 && queue->q_qnum == 1) {
+ /* TODO: Add support for sigev_notify==SIGEV_THREAD
+ * we should create a thread in userspace
+ */
+ if (q->notify.sigev_notify == SIGEV_THREAD) {
+
+ err = -ENOSYS;
+ pr_info("mq_*send: SIGEV_THREAD not supported\n");
+ }
+ /* sends signal */
+ if (q->notify.sigev_notify == SIGEV_SIGNAL) {
+ struct siginfo sig_i;
+ sig_i.si_signo = q->notify.sigev_signo;
+ sig_i.si_errno = 0;
+ sig_i.si_code = SI_MESGQ;
+ sig_i.si_pid = current->pid;
+ sig_i.si_uid = current->uid;
+ kill_proc_info(q->notify.sigev_signo,
+ &sig_i, q->notify_pid);
+ }
+ /* after notification unregister process */
+ q->notify_pid = 0;
+ }
+ }
+out_unlock:
+ spin_unlock(&q->lock);
+FPUTOUT;
+ return err;
+}
+
+/**
+ * sys_mq_timedreceive - receive a message from the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @msg_ptr: pointer to buffer to hold the message
+ * @msg_len: length of the userspace buffer
+ * @msg_prio: will hold the priority if a message was received
+ * @utime: if !NULL the function will only block for specified time
+ */
+asmlinkage ssize_t
+sys_mq_timedreceive(mqd_t mqdes,
+ char *msg_ptr, size_t msg_len,
+ unsigned int *msg_prio, struct timespec * utime)
+{
+ struct msg_queue *queue;
+ struct msg_msg *msg;
+ int err;
+ long timeout;
+ struct timespec ts;
+ struct mqueue_ds *q;
+ struct file *filp = get_filp(mqdes);
+ struct inode *inode;
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+ if ((long)msg_len < q->attr.mq_msgsize)
+ ERROUT( -EMSGSIZE);
+
+ if ( !(filp->f_mode & FMODE_READ))
+ ERROUT( -EBADF);
+
+ queue = &q->queue;
+ if ((filp->f_flags & O_NONBLOCK) && queue->q_qnum == 0)
+ ERROUT( -EAGAIN);
+
+ if (utime) {
+ if (copy_from_user(&ts, utime, sizeof (ts)))
+ ERROUT( -EFAULT);
+ timeout = get_timeout(&ts, &err);
+ if ( err == -EINVAL )
+ ERROUT( err);
+ /* we do not check yet for the timeout */
+ } else
+ timeout = 0L;
+ if (!timeout)
+ timeout = MAX_SCHEDULE_TIMEOUT;
+
+ spin_lock(&q->lock);
+wait_on_msg:
+ if (queue->q_qnum == 0 ){
+ if (unlikely(timeout < 0)){
+ err = -ETIMEDOUT;
+ goto out_unlock;
+ }
+ wait_queue_t __wait;
+ init_waitqueue_entry(&__wait, current);
+
+ local_add_wait_queue(&q->wait_recv, &__wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( queue->q_qnum > 0)
+ break;
+ if (!signal_pending(current)) {
+ spin_unlock(&q->lock);
+ timeout = schedule_timeout(timeout);
+ spin_lock(&q->lock);
+ if (!timeout)
+ break;
+ continue;
+ }
+ timeout = -ERESTARTSYS;
+ break;
+ }
+ current->state = TASK_RUNNING;
+ local_remove_wait_queue(&q->wait_recv, &__wait);
+
+ if (unlikely(timeout == -ERESTARTSYS)){
+ err = -EINTR;
+ goto out_unlock;
+ }
+ if (unlikely(timeout == 0)){
+ err = -ETIMEDOUT;
+ goto out_unlock;
+ }
+ }
+ if (unlikely(list_empty(&queue->q_messages)))
+ goto wait_on_msg; /* another task was faster and removed the msg */
+
+ msg=list_entry(queue->q_messages.next, struct msg_msg, m_list);
+ list_del(&msg->m_list);
+ queue->q_lrpid = current->pid;
+ queue->q_cbytes -= msg->m_ts;
+ atomic_sub(msg->m_ts, &msg_bytes);
+ queue->q_qnum--;
+ inode=filp->f_dentry->d_inode;
+ inode->i_size = queue->q_qnum;
+ inode->i_atime = CURRENT_TIME;
+ /* msg is removed from list, we're already safe */
+ spin_unlock(&q->lock);
+
+ wake_up_interruptible(&q->wait_send);
+
+ msg_len = (msg_len > (size_t)msg->m_ts) ? (size_t)msg->m_ts : msg_len;
+
+ if ( put_msg(msg_ptr, msg, msg_len) ||
+ ( msg_prio && put_user(msg->m_type, msg_prio))) {
+ msg_len = -EFAULT; /* hmh, now the msg is lost */
+ }
+ free_msg(msg);
+
+ fput(filp);
+ return msg_len;
+
+out_unlock:
+ spin_unlock(&q->lock);
+FPUTOUT;
+ return err;
+}
+
+/**
+ * sys_mq_notify - set or remove a notification on the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @u_notification: pointer to struct sigevent
+ *
+ */
+asmlinkage int
+sys_mq_notify(mqd_t mqdes, const struct sigevent *u_notification)
+{
+ struct sigevent notify;
+ struct file *filp = get_filp(mqdes);
+ struct mqueue_ds *q;
+ int err = 0;
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+ if (u_notification != NULL)
+ if (copy_from_user
+ (¬ify, u_notification, sizeof (struct sigevent)))
+ ERROUT( -EFAULT);
+
+ down(&filp->f_dentry->d_inode->i_sem);
+ if (q->notify_pid == current->pid
+ && (u_notification == NULL || notify.sigev_notify == SIGEV_NONE)) {
+ q->notify_pid = 0; /* remove notification */
+ q->notify.sigev_signo = 0;
+ q->notify.sigev_notify = SIGEV_NONE;
+ } else if (q->notify_pid > 0) {
+ err = -EBUSY;
+ } else if (u_notification != NULL) {
+ if (notify.sigev_notify == SIGEV_SIGNAL) {
+ /* add notification */
+ q->notify_pid = current->pid;
+ q->notify.sigev_signo = notify.sigev_signo;
+ q->notify.sigev_notify = notify.sigev_notify;
+ } else if (notify.sigev_notify == SIGEV_THREAD) {
+ err = -ENOSYS;
+ pr_info("mq_*send: SIGEV_THREAD not supported yet\n");
+ }
+ }
+ up(&filp->f_dentry->d_inode->i_sem);
+FPUTOUT;
+ return err;
+}
+
+static inline void
+fill_flags(struct mqueue_ds *q, struct file *filp)
+{
+ spin_lock(&q->lock);
+ q->attr.mq_flags = (filp->f_mode -1) & O_ACCMODE;
+ q->attr.mq_flags |= (filp->f_flags) & O_NONBLOCK;
+ q->attr.mq_curmsgs = q->queue.q_qnum;
+ spin_unlock(&q->lock);
+}
+
+/**
+ * sys_mq_getattr - get the attributes of the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @u_mqstat: pointer to struct holding the new values
+ *
+ */
+asmlinkage int
+sys_mq_getattr(mqd_t mqdes, struct mq_attr *u_mqstat)
+{
+ int err = 0;
+ struct mqueue_ds *q;
+ struct file *filp = get_filp(mqdes);
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+
+ fill_flags(q,filp);
+ if (copy_to_user(u_mqstat, &q->attr, sizeof (struct mq_attr)))
+ err = -EFAULT;
+FPUTOUT;
+ return err;
+}
+
+/**
+ * sys_mq_setattr - set the attributes of the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @u_mqstat: pointer to struct holding the new values
+ * @u_omqstat: pointer to store the original attributes (if !NULL)
+ *
+ */
+asmlinkage int
+sys_mq_setattr(mqd_t mqdes, const struct mq_attr *u_mqstat,
+ struct mq_attr *u_omqstat)
+{
+ struct mq_attr mqstat;
+ struct mqueue_ds *q;
+ struct file *filp = get_filp(mqdes);
+ int err = 0;
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+
+ if (u_omqstat != NULL) {
+ fill_flags(q,filp);
+ if (copy_to_user(u_omqstat, &q->attr, sizeof (struct mq_attr)))
+ ERROUT( -EFAULT);
+ }
+ if (copy_from_user(&mqstat, u_mqstat, sizeof (struct mq_attr)))
+ ERROUT( -EFAULT);
+ if (mqstat.mq_flags & O_NONBLOCK)
+ filp->f_flags |= O_NONBLOCK;
+ else
+ filp->f_flags &= ~O_NONBLOCK;
+
+FPUTOUT;
+ return err;
+}
+
+static struct super_operations msg_s_ops = {
+ .statfs = simple_statfs,
+ .drop_inode = generic_delete_inode,
+ .delete_inode = mqueue_release,
+};
+
+static int
+msg_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct inode *root;
+ struct dentry *root_dentry;
+ static const struct qstr this={ .name="msg:", .len=4, .hash=0 };
+
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = MSGFS_MAGIC;
+ sb->s_op = &msg_s_ops;
+
+ root = get_msg_inode(sb, S_IFDIR | S_IRWXUGO | S_ISVTX);
+ if (!root)
+ goto out;
+ root_dentry = d_alloc(NULL, &this);
+ if (!root_dentry)
+ goto out_iput;
+ sb->s_root = root_dentry;
+ sb->s_root->d_sb = sb;
+ sb->s_root->d_parent = sb->s_root;
+ d_instantiate(sb->s_root, root);
+ return 0;
+
+out_iput:
+ iput(root);
+out:
+ return -ENOMEM;
+}
+
+static struct super_block *
+msgfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ return get_sb_single(fs_type, flags, data, msg_fill_super);
+}
+
+static struct file_system_type msg_fs_type = {
+ .name = "msgfs",
+ .get_sb = msgfs_get_sb,
+ .kill_sb = kill_anon_super,
+};
+
+static int __init
+mqueue_init(void)
+{
+ int err = register_filesystem(&msg_fs_type);
+
+ if (!err){
+ if (IS_ERR(msg_mnt = kern_mount(&msg_fs_type))){
+ unregister_filesystem(&msg_fs_type);
+ err= PTR_ERR(msg_mnt);
+ } else
+ err=0;
+ }
+
+ return err;
+}
+
+__initcall(mqueue_init);
diff -X dontdiff -Nur vanilla-2.6.0-test6/MAINTAINERS linux-2.6.0-test6/MAINTAINERS
--- vanilla-2.6.0-test6/MAINTAINERS 2003-10-01 20:10:36.000000000 +0200
+++ linux-2.6.0-test6/MAINTAINERS 2003-10-03 13:40:59.000000000 +0200
@@ -1558,6 +1558,11 @@
M: ambx1@neo.rr.com
S: Maintained
+POSIXMSG message queues
+P: Peter Wächtler
+M: pwaechtler@mac.com
+S: Maintained
+
PPP PROTOCOL DRIVERS AND COMPRESSORS
P: Paul Mackerras
M: paulus@samba.org
diff -X dontdiff -Nur vanilla-2.6.0-test6/ipc/util.c linux-2.6.0-test6/ipc/util.c
--- vanilla-2.6.0-test6/ipc/util.c 2003-10-03 17:17:58.000000000 +0200
+++ linux-2.6.0-test6/ipc/util.c 2003-10-03 14:58:35.000000000 +0200
@@ -24,6 +24,7 @@
#include <linux/security.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
+#include <linux/pxqueue.h>
#if defined(CONFIG_SYSVIPC)
@@ -731,3 +732,50 @@
}
#endif /* CONFIG_SYSVIPC */
+
+#if !defined(CONFIG_POSIXMSG)
+/*
+ * Dummy functions when POSIXMSG isn't configured
+ */
+
+asmlinkage mqd_t sys_mq_open(const char *u_path, int oflag, mode_t mode,
+ struct mq_attr *u_attr)
+{
+ return (mqd_t) -ENOSYS;
+}
+
+asmlinkage int sys_mq_unlink(const char *u_name)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_mq_timedsend(mqd_t mqdes, const char *msg_ptr,
+ size_t msg_len, unsigned int msg_prio, struct timespec *utime)
+{
+ return -ENOSYS;
+}
+
+asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char *msg_ptr,
+ size_t msg_len, unsigned int *msg_prio, struct timespec *utime)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_mq_notify(mqd_t mqdes,
+ const struct sigevent *u_notification)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_mq_getattr(mqd_t mqdes, struct mq_attr *u_mqstat)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_mq_setattr(mqd_t mqdes, const struct mq_attr *u_mqstat,
+ struct mq_attr *u_omqstat)
+{
+ return -ENOSYS;
+}
+
+#endif /* CONFIG_POSIXMSG */
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [2/2] posix message queues
2003-10-03 15:59 [PATCH] [2/2] posix message queues Peter Wächtler
@ 2003-10-03 18:16 ` Manfred Spraul
2003-10-05 12:42 ` Peter Wächtler
2003-10-03 22:22 ` Jakub Jelinek
2003-10-04 6:39 ` Ulrich Drepper
2 siblings, 1 reply; 9+ messages in thread
From: Manfred Spraul @ 2003-10-03 18:16 UTC (permalink / raw)
To: Peter Wächtler; +Cc: linux-kernel, akpm, torvalds, bo.z.li
Peter Wächtler wrote:
>+
>+#if 0
>+/* don't use fget() to avoid the fput() for speed reason
>+ * on create/open the refcount is 1 and decremented on close
>+ * if you have a multithreaded app where one thread closes
>+ * the mqueue while another thread operates on it -> possible crash
>+ * the spec says the behavior is undefined
>+ * separate processes are not affected
>+ */
>
Could you remove that block, instead of just disabling it? Bugs spread
at an incredible rate...
The right approach to avoid the cost of the fget is fget_light. But
that's an optimization, it can be added later.
>+
>+static void local_remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
>+{
>+ spin_lock(&q->lock);
>+ __remove_wait_queue(q, wait);
>+ spin_unlock(&q->lock);
>+}
>
What's the difference between remove_wait_queue() and
local_remove_wait_queue?
>+ queue->q_lspid = current->pid;
>+ queue->q_cbytes += msg_len;
>+ atomic_add(msg_len, &msg_bytes);
>
You are accounting posix messages in the sysv msg variables. Is that
something we want, or should posix messages have their own accounting
variables? I don't know what's better, but it should be discussed.
>+ queue->q_qnum++;
>+ inode->i_size = queue->q_qnum;
>+ inode->i_mtime = CURRENT_TIME;
>+
>+ if (waitqueue_active(&q->wait_recv)) {
>+ /* wake up all waiters to serve the highest prio waiter */
>+ wake_up_interruptible_all(&q->wait_recv);
>
Would it be possible to sort the waiters according to their prio?
wake_all is always bad.
>+ } else {
>+ /* since there was no synchronously waiting process for message
>+ * we notify it when the state of queue changed from
>+ * empty to not empty */
>+ if (q->notify_pid != 0 && queue->q_qnum == 1) {
>+ /* TODO: Add support for sigev_notify==SIGEV_THREAD
>+ * we should create a thread in userspace
>+ */
>
Is that comment still correct? You wrote that it's supported in user space.
It looks good.
--
Manfred
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [2/2] posix message queues
2003-10-03 15:59 [PATCH] [2/2] posix message queues Peter Wächtler
2003-10-03 18:16 ` Manfred Spraul
@ 2003-10-03 22:22 ` Jakub Jelinek
2003-10-05 12:42 ` Peter Wächtler
2003-10-04 6:39 ` Ulrich Drepper
2 siblings, 1 reply; 9+ messages in thread
From: Jakub Jelinek @ 2003-10-03 22:22 UTC (permalink / raw)
To: Peter Wächtler; +Cc: linux-kernel, akpm, torvalds, bo.z.li, manfred
On Fri, Oct 03, 2003 at 05:59:26PM +0200, Peter Wächtler wrote:
> diff -X dontdiff -Nur vanilla-2.6.0-test6/include/asm-i386/unistd.h linux-2.6.0-test6/include/asm-i386/unistd.h
> --- vanilla-2.6.0-test6/include/asm-i386/unistd.h 2003-08-23 01:57:19.000000000 +0200
> +++ linux-2.6.0-test6/include/asm-i386/unistd.h 2003-09-07 21:48:07.000000000 +0200
> @@ -278,8 +278,15 @@
> #define __NR_tgkill 270
> #define __NR_utimes 271
> #define __NR_fadvise64_64 272
> +#define __NR_sys_mq_open 273
> +#define __NR_sys_mq_unlink (__NR_sys_mq_open+1)
> +#define __NR_mq_timedsend (__NR_sys_mq_open+2)
> +#define __NR_mq_timedreceive (__NR_sys_mq_open+3)
> +#define __NR_mq_notify (__NR_sys_mq_open+4)
> +#define __NR_mq_getattr (__NR_sys_mq_open+5)
> +#define __NR_mq_setattr (__NR_sys_mq_open+6)
s/__NR_sys_mq/__NR_mq/g
Jakub
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [2/2] posix message queues
2003-10-03 15:59 [PATCH] [2/2] posix message queues Peter Wächtler
2003-10-03 18:16 ` Manfred Spraul
2003-10-03 22:22 ` Jakub Jelinek
@ 2003-10-04 6:39 ` Ulrich Drepper
2 siblings, 0 replies; 9+ messages in thread
From: Ulrich Drepper @ 2003-10-04 6:39 UTC (permalink / raw)
To: Peter �; +Cc: linux-kernel, akpm, torvalds, bo.z.li, manfred
Peter Wächtler wrote:
> + if (q->notify.sigev_notify == SIGEV_THREAD) {
> +
> + err = -ENOSYS;
> + pr_info("mq_*send: SIGEV_THREAD not supported\n");
> + }
In all the SIGEV_THREAD cases I suggest that you expect to be passed a
futex address. Upon completion you increment the value and call
sys_futex with FUTEX_WAKE.
This gives plenty of freedom at userlevel to implement the actual work.
--
--------------. ,-. 444 Castro Street
Ulrich Drepper \ ,-----------------' \ Mountain View, CA 94041 USA
Red Hat `--' drepper at redhat.com `---------------------------
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [2/2] posix message queues
2003-10-03 18:16 ` Manfred Spraul
@ 2003-10-05 12:42 ` Peter Wächtler
2003-10-05 14:39 ` Arjan van de Ven
2003-10-05 15:58 ` Ulrich Drepper
0 siblings, 2 replies; 9+ messages in thread
From: Peter Wächtler @ 2003-10-05 12:42 UTC (permalink / raw)
To: Manfred Spraul; +Cc: linux-kernel, akpm, torvalds, bo.z.li
Am Fre, 2003-10-03 um 20.16 schrieb Manfred Spraul:
> Peter Wächtler wrote:
>
> >+
> >+#if 0
> >+/* don't use fget() to avoid the fput() for speed reason
> >+ * on create/open the refcount is 1 and decremented on close
> >+ * if you have a multithreaded app where one thread closes
> >+ * the mqueue while another thread operates on it -> possible crash
> >+ * the spec says the behavior is undefined
> >+ * separate processes are not affected
> >+ */
> >
> Could you remove that block, instead of just disabling it? Bugs spread
> at an incredible rate...
> The right approach to avoid the cost of the fget is fget_light. But
> that's an optimization, it can be added later.
>
removed and replaced with fget_light/fput_light
> >+
> >+static void local_remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
> >+{
> >+ spin_lock(&q->lock);
> >+ __remove_wait_queue(q, wait);
> >+ spin_unlock(&q->lock);
> >+}
> >
> What's the difference between remove_wait_queue() and
> local_remove_wait_queue?
>
don't disable local_irq , because no irq involved
don't know how expensive a local_irq_save is on SMP
> >+ queue->q_lspid = current->pid;
> >+ queue->q_cbytes += msg_len;
> >+ atomic_add(msg_len, &msg_bytes);
> >
> You are accounting posix messages in the sysv msg variables. Is that
> something we want, or should posix messages have their own accounting
> variables? I don't know what's better, but it should be discussed.
msg_bytes is local to posixqueue.c
if I use the SysV queue code, I use its storage. What do you mean by
accounting? Whatever security_msg_msg_alloc() does?
We have no enforcable user limits on queues (in context of ulimits).
> >+ queue->q_qnum++;
> >+ inode->i_size = queue->q_qnum;
> >+ inode->i_mtime = CURRENT_TIME;
> >+
> >+ if (waitqueue_active(&q->wait_recv)) {
> >+ /* wake up all waiters to serve the highest prio waiter */
> >+ wake_up_interruptible_all(&q->wait_recv);
> >
> Would it be possible to sort the waiters according to their prio?
> wake_all is always bad.
>
yes, I will try that.
> >+ } else {
> >+ /* since there was no synchronously waiting process for message
> >+ * we notify it when the state of queue changed from
> >+ * empty to not empty */
> >+ if (q->notify_pid != 0 && queue->q_qnum == 1) {
> >+ /* TODO: Add support for sigev_notify==SIGEV_THREAD
> >+ * we should create a thread in userspace
> >+ */
> >
> Is that comment still correct? You wrote that it's supported in user space.
>
Userspace translates SIGEV_THREAD to something that uses SIGEV_SIGNAL.
Ulrich made a suggestion to use a futex, but I think of something even
more lightweight. Just put the requestor right to sleep.
No further syscall involved (and avoids a race inbetween sys_mq_notify
and sigsuspend).
--
Peter Wächtler http://homepage.mac.com/pwaechtler/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [2/2] posix message queues
2003-10-03 22:22 ` Jakub Jelinek
@ 2003-10-05 12:42 ` Peter Wächtler
0 siblings, 0 replies; 9+ messages in thread
From: Peter Wächtler @ 2003-10-05 12:42 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: linux-kernel, akpm, torvalds, bo.z.li, manfred
[-- Attachment #1: Type: text/plain, Size: 378 bytes --]
s/__NR_sys_mq/__NR_mq/g in asm-i386/unistd.h
changed comment for SIGEV_THREAD
+ * removed unwanted/ugly macros and replaced with fget_light/fput_light
+ * this only helps single threaded apps
+ * removed wait_on_msg test (list_empty() can't happen here)
+ * removed wake_up_interruptible_all, since waiters are sorted in
+ * static_prio order on wait_queues when added
[-- Attachment #2: Type: text/plain, Size: 31578 bytes --]
diff -X dontdiff -Nur vanilla-2.6.0-test6/arch/i386/kernel/entry.S linux-2.6.0-test6/arch/i386/kernel/entry.S
--- vanilla-2.6.0-test6/arch/i386/kernel/entry.S 2003-08-23 01:53:39.000000000 +0200
+++ linux-2.6.0-test6/arch/i386/kernel/entry.S 2003-09-07 21:48:07.000000000 +0200
@@ -879,5 +879,12 @@
.long sys_tgkill /* 270 */
.long sys_utimes
.long sys_fadvise64_64
+ .long sys_mq_open
+ .long sys_mq_unlink
+ .long sys_mq_timedsend /* 275 */
+ .long sys_mq_timedreceive
+ .long sys_mq_notify
+ .long sys_mq_getattr
+ .long sys_mq_setattr
nr_syscalls=(.-sys_call_table)/4
diff -X dontdiff -Nur vanilla-2.6.0-test6/CREDITS linux-2.6.0-test6/CREDITS
--- vanilla-2.6.0-test6/CREDITS 2003-10-04 16:37:56.000000000 +0200
+++ linux-2.6.0-test6/CREDITS 2003-10-03 14:10:31.000000000 +0200
@@ -3286,6 +3295,14 @@
S: 5554 GG Valkenswaard
S: The Netherlands
+N: Peter Wächtler
+E: pwaechtler@mac.com
+W: http://homepage.mac.com/pwaechtler/
+D: Posix message queues
+S: Fliederstr. 3
+S: 30167 Hannover
+S: Germany
+
N: Peter Shaobo Wang
E: pwang@mmdcorp.com
W: http://www.mmdcorp.com/pw/linux
diff -X dontdiff -Nur vanilla-2.6.0-test6/include/asm-i386/unistd.h linux-2.6.0-test6/include/asm-i386/unistd.h
--- vanilla-2.6.0-test6/include/asm-i386/unistd.h 2003-08-23 01:57:19.000000000 +0200
+++ linux-2.6.0-test6/include/asm-i386/unistd.h 2003-10-04 17:55:41.000000000 +0200
@@ -278,8 +278,15 @@
#define __NR_tgkill 270
#define __NR_utimes 271
#define __NR_fadvise64_64 272
+#define __NR_mq_open 273
+#define __NR_mq_unlink (__NR_mq_open+1)
+#define __NR_mq_timedsend (__NR_mq_open+2)
+#define __NR_mq_timedreceive (__NR_mq_open+3)
+#define __NR_mq_notify (__NR_mq_open+4)
+#define __NR_mq_getattr (__NR_mq_open+5)
+#define __NR_mq_setattr (__NR_mq_open+6)
-#define NR_syscalls 273
+#define NR_syscalls 279
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
diff -X dontdiff -Nur vanilla-2.6.0-test6/include/linux/pxqueue.h linux-2.6.0-test6/include/linux/pxqueue.h
--- vanilla-2.6.0-test6/include/linux/pxqueue.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.0-test6/include/linux/pxqueue.h 2003-09-28 22:30:48.000000000 +0200
@@ -0,0 +1,48 @@
+#ifndef _LINUX_MQUEUE_H
+#define _LINUX_MQUEUE_H
+
+#define MQ_MAXMSG 40 /* max number of messages in each queue */
+#define MQ_MAXSYSSIZE 1048576 /* max size that all m.q. can have together */
+#define MQ_PRIO_MAX 256 /* max priority */
+
+#define MSGFS_MAGIC 0x4D455347
+#define _POSIX_MESSAGE_PASSING 1
+
+typedef int mqd_t; /* message queue descriptor */
+
+struct mq_attr {
+ long mq_flags; /* message queue flags */
+ long mq_maxmsg; /* maximum number of messages */
+ long mq_msgsize; /* maximum message size */
+ long mq_curmsgs; /* number of messages currently queued */
+};
+
+asmlinkage mqd_t sys_mq_open(const char *u_path, int oflag, mode_t mode,
+ struct mq_attr *u_attr);
+asmlinkage int sys_mq_close(mqd_t mqdes);
+asmlinkage int sys_mq_unlink(const char *u_name);
+asmlinkage int sys_mq_timedsend(mqd_t mqdes, const char *msg_ptr,
+ size_t msg_len, unsigned int msg_prio, struct timespec *utime);
+asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char *msg_ptr,
+ size_t msg_len, unsigned int *msg_prio, struct timespec *utime);
+asmlinkage int sys_mq_notify(mqd_t mqdes,
+ const struct sigevent *u_notification);
+asmlinkage int sys_mq_getattr(mqd_t mqdes, struct mq_attr *u_mqstat);
+asmlinkage int sys_mq_setattr(mqd_t mqdes, const struct mq_attr *u_mqstat,
+ struct mq_attr *u_omqstat);
+
+#ifdef __KERNEL__
+
+struct mqueue_ds { /* queue */
+ struct mq_attr attr;
+ struct msg_queue queue; /* ipc/msg */
+ spinlock_t lock;
+ wait_queue_head_t wait_recv;
+ wait_queue_head_t wait_send;
+
+ pid_t notify_pid; /* who we have to notify (or 0) */
+ struct sigevent notify; /* notification */
+};
+#endif /* __KERNEL__ */
+
+#endif
diff -X dontdiff -Nur vanilla-2.6.0-test6/init/Kconfig linux-2.6.0-test6/init/Kconfig
--- vanilla-2.6.0-test6/init/Kconfig 2003-10-04 16:38:22.000000000 +0200
+++ linux-2.6.0-test6/init/Kconfig 2003-10-03 17:08:01.000000000 +0200
@@ -91,6 +91,14 @@
section 6.4 of the Linux Programmer's Guide, available from
<http://www.tldp.org/docs.html#guide>.
+config POSIXMSG
+ bool "POSIX message queues"
+ ---help---
+ This gives you POSIX compliant interfaces for message queues.
+ For userspace stub look at
+ <http://homepage.mac.com/pwaechtler/mqueue.html>.
+
+
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
help
diff -X dontdiff -Nur vanilla-2.6.0-test6/ipc/Makefile linux-2.6.0-test6/ipc/Makefile
--- vanilla-2.6.0-test6/ipc/Makefile 2003-10-05 14:31:50.000000000 +0200
+++ linux-2.6.0-test6/ipc/Makefile 2003-10-05 14:30:58.000000000 +0200
@@ -5,3 +5,4 @@
obj-y := util.o
obj-$(CONFIG_SYSVIPC) += msg.o sem.o shm.o
+obj-$(CONFIG_POSIXMSG) += posixmsg.o
diff -X dontdiff -Nur vanilla-2.6.0-test6/ipc/posixmsg.c linux-2.6.0-test6/ipc/posixmsg.c
--- vanilla-2.6.0-test6/ipc/posixmsg.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.0-test6/ipc/posixmsg.c 2003-10-05 14:16:43.000000000 +0200
@@ -0,0 +1,945 @@
+/*
+ * linux/ipc/posixmsg.c
+ *
+ * Copyright 2002,2003 Peter Wächtler <pwaechtler@mac.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * root can mount the filesystem to see the names of the currently
+ * used mqueues. The i_size shows the number of msgs in the queue.
+ * The only shell operation on the "fifos" is an unlink() via rm(1)
+ * The queues are accessed through syscalls, poll is supported
+ *
+ * CHANGES:
+ * put MSGFS_MAGIC into mqueue.h
+ * update inode->m_time on send (yes, no posix semantics on this)
+ * update inode->a_time on recv
+ * translate absolute timeouts in kernel to prevent too long sleeps
+ * corrected spinlocks + [gs]etattr/mq_flags
+ * replaced spinlocks with inode->i_sem
+ * (don't know yet what's broken on SMP with spinlocks)
+ * use spinlocks, where was the problem?
+ * open coded the wait_event_interruptible_timeouts to avoid races
+ * fixed accmode tests
+ * fixed freespace check (meant a per queue limit?)
+ * fixed msg_len check in sys_mq_timedreceive
+ * defer check for negative timeout on mq_timedreceive
+ * don't put_user in timedreceive if msg_prio is NULL
+ * add macros to choose fget/fput or not (read comment at get_filp)
+ * removed unwanted/ugly macros and replaced with fget_light/fput_light
+ * this only helps single threaded apps
+ * removed wait_on_msg test (list_empty() can't happen here)
+ * removed wake_up_interruptible_all, since waiters are sorted in
+ * static_prio order on wait_queues when added
+ *
+ * * passes the posixtestsuite 1.2
+ *
+ * TODO:
+ * check for more sysv msg limits (or add some new, e.g. MQ_PRIO_MAX)?
+ * rethink SIGEV_THREAD support (can't block immediatly for error handling)
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h> /* PAGE_CACHE_SIZE */
+#include <linux/poll.h>
+#include <linux/security.h>
+#include <linux/pxqueue.h>
+#include <linux/msg.h>
+#include <asm/uaccess.h>
+#include "util.h"
+
+
+extern int msg_ctlmnb; /* default max size of a message queue (all msgs sum up) */
+extern int msg_ctlmni; /* max # of msg queue identifiers */
+extern int msg_ctlmax; /* max size of one message (bytes) */
+
+static atomic_t msg_bytes = ATOMIC_INIT(0);
+static struct vfsmount *msg_mnt;
+
+#define get_mqueue(filp)\
+ ((filp) ? filp->f_dentry->d_inode->u.generic_ip: filp)
+
+static struct msg_queue *
+init_queue(struct msg_queue *queue)
+{
+ int retval;
+
+ queue->q_perm.mode = 0;
+ queue->q_perm.key = 0;
+ queue->q_perm.security = NULL;
+ retval = security_msg_queue_alloc(queue);
+ if (retval)
+ return ERR_PTR(retval);;
+
+ queue->q_stime = queue->q_rtime = 0;
+ queue->q_qbytes = queue->q_cbytes = queue->q_qnum = 0;
+ queue->q_lspid = queue->q_lrpid = 0;
+ INIT_LIST_HEAD(&queue->q_messages);
+ INIT_LIST_HEAD(&queue->q_receivers); /* unused */
+ INIT_LIST_HEAD(&queue->q_senders); /* unused */
+
+ return queue;
+}
+
+static int
+mqueue_unlink (struct inode *dir, struct dentry *dentry)
+{
+ dentry->d_inode->i_nlink--;
+ dput(dentry);
+ return 0;
+}
+
+/* cleans up after close() or exit() if the refount drops to zero */
+static int
+mqueue_close(struct inode *inode, struct file *filp)
+{
+ struct mqueue_ds *q = inode->u.generic_ip;
+
+ if (!q || !filp)
+ return -EBADFD;
+
+ spin_lock(&q->lock);
+ /* remove possible notification;
+ * sys_getpid() returns the tgid if multithreaded */
+ if (q->notify_pid == current->pid) {
+ q->notify_pid = 0;
+ q->notify.sigev_signo = 0;
+ q->notify.sigev_notify = SIGEV_NONE;
+ }
+ spin_unlock(&q->lock);
+ return 0;
+}
+
+static unsigned int
+mqueue_poll(struct file *filp, struct poll_table_struct *wait)
+{
+ struct mqueue_ds *q = get_mqueue(filp);
+ int ret = 0;
+
+ poll_wait(filp, &q->wait_recv, wait);
+ poll_wait(filp, &q->wait_send, wait);
+
+ if (q->queue.q_qnum)
+ ret = POLLIN | POLLRDNORM;
+
+ if (q->queue.q_qnum < (unsigned long)q->attr.mq_maxmsg)
+ ret |= POLLOUT | POLLWRNORM;
+
+ return ret;
+}
+
+static struct file_operations msg_fops = {
+ .llseek = no_llseek,
+ .release = mqueue_close,
+ .poll = mqueue_poll,
+};
+static struct inode_operations msg_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .unlink = mqueue_unlink,
+};
+
+static struct inode *
+get_msg_inode(struct super_block *sb, mode_t mode)
+{
+ struct mqueue_ds *q;
+ struct inode *inode = new_inode(sb);
+
+ if (inode) {
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_blksize = 1024;
+ switch (mode & S_IFMT) {
+ default:
+ case S_IFIFO:
+ case S_IFREG:
+ inode->i_fop = &msg_fops;
+ if ((q = kmalloc(sizeof (struct mqueue_ds), GFP_KERNEL))) {
+ if (!init_queue(&q->queue)) {
+ iput(inode);
+ inode = ERR_PTR(-EACCES);
+ }
+ inode->u.generic_ip = q;
+ spin_lock_init(&q->lock);
+ } else {
+ iput(inode);
+ inode = ERR_PTR(-ENOSPC);
+ }
+ break;
+ case S_IFDIR:
+ inode->i_op = &msg_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inode->i_nlink++;
+ break;
+ case S_IFLNK:
+ break;
+ }
+ } else
+ inode = ERR_PTR(-ENOSPC);
+
+ return inode;
+}
+
+static inline int
+freespace(struct mqueue_ds *q, size_t msg_len)
+{
+ return (int) ( q->queue.q_qnum < (unsigned long)q->attr.mq_maxmsg);
+}
+
+/* removes a queue - called from vfs when an inode gets deleted */
+static void
+mqueue_release(struct inode *inode)
+{
+ struct mqueue_ds *q = inode->u.generic_ip;
+ struct msg_queue *queue;
+ struct list_head *tmp;
+
+ if (q){
+ queue = &q->queue;
+ tmp = queue->q_messages.next;
+ while (tmp != &queue->q_messages) {
+ struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
+ tmp = tmp->next;
+ free_msg(msg);
+ }
+ atomic_sub(queue->q_cbytes, &msg_bytes);
+ security_msg_queue_free(queue);
+ inode->u.generic_ip = NULL;
+ kfree(q);
+ }
+ clear_inode(inode);
+}
+
+static int
+create_queue(struct dentry *dir, struct qstr *qname,
+ int oflag, mode_t mode, struct mq_attr *u_attr)
+{
+ int ret, fd;
+ struct file *filp;
+ struct mqueue_ds *q;
+ struct inode *inode;
+ struct dentry *dentry;
+
+ inode = get_msg_inode(msg_mnt->mnt_sb, S_IFIFO | (mode & S_IRWXUGO));
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto out_ret;
+ }
+ q = inode->u.generic_ip;
+
+ if (u_attr != NULL) {
+ if (copy_from_user(&q->attr, u_attr, sizeof (struct mq_attr))) {
+ ret = -EFAULT;
+ goto out_inode;
+ }
+ if (q->attr.mq_maxmsg <= 0
+ || q->attr.mq_msgsize <= 0
+ || q->attr.mq_maxmsg > MQ_MAXMSG
+ || q->attr.mq_msgsize > msg_ctlmax) {
+ ret = -EINVAL;
+ goto out_inode;
+ }
+ } else { /* implementation defined */
+ q->attr.mq_maxmsg = MQ_MAXMSG;
+ q->attr.mq_msgsize = 1024 /*msg_ctlmax */ ;
+ }
+ q->attr.mq_flags = oflag & O_ACCMODE;
+ q->notify_pid = 0;
+ q->notify.sigev_signo = 0;
+ q->notify.sigev_notify = SIGEV_NONE;
+ init_waitqueue_head(&q->wait_send);
+ init_waitqueue_head(&q->wait_recv);
+
+ ret = -ENFILE;
+ if ((fd = get_unused_fd()) < 0)
+ goto out_inode;
+ if (!(filp = get_empty_filp()))
+ goto out_fd;
+
+ qname->hash = full_name_hash(qname->name, qname->len);
+ dentry = d_alloc(dir, qname);
+ if (!dentry) {
+ ret = -ENOMEM;
+ goto out_filp;
+ }
+ d_add(dentry, inode);
+ ret = get_write_access(inode);
+ if (ret)
+ goto out_filp;
+
+ filp->f_vfsmnt = mntget(msg_mnt);
+ filp->f_dentry = dget(dentry); /* leave it active */
+ filp->f_op = &msg_fops;
+ filp->f_mode = (q->attr.mq_flags +1 ) & O_ACCMODE;
+ filp->f_flags = oflag;
+
+ /* Now we map fd to filp, so userspace can access it */
+ fd_install(fd, filp);
+ ret = fd;
+ goto out_ret;
+
+out_filp:
+ put_filp(filp);
+out_fd:
+ put_unused_fd(fd);
+out_inode:
+ iput(inode);
+out_ret:
+ return ret;
+}
+
+/**
+ * sys_mq_open - opens a message queue associated with @u_name
+ * @mqdes: descriptor of mqueue
+ * @oflag: flags like O_CREAT, O_EXCL, O_RDWR
+ * @mode: when O_CREAT is specified, the permission bits
+ * @u_attr: pointer to the attributes, like max msgsize, when creating
+ *
+ * returns a descriptor to the opened queue or negative value on error
+ */
+asmlinkage mqd_t
+sys_mq_open(const char *u_name, int oflag, mode_t mode, struct mq_attr * u_attr)
+{
+ struct file *filp;
+ struct dentry *dentry;
+ struct qstr this;
+ static int oflag2acc[O_ACCMODE] =
+ { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE };
+ int fd, ret;
+
+ if (IS_ERR(this.name = getname(u_name)))
+ return PTR_ERR(this.name);
+ this.len = strlen(this.name);
+ down(&msg_mnt->mnt_root->d_inode->i_sem);
+ dentry = lookup_one_len(this.name, msg_mnt->mnt_root, this.len);
+
+ if (IS_ERR(dentry)) {
+ ret = PTR_ERR(dentry);
+ goto out_ret;
+ }
+ if (oflag & O_CREAT) {
+ if (dentry->d_inode) {
+ /* entry exists already */
+ if (oflag & O_EXCL) {
+ ret = -EEXIST;
+ } else {
+ goto open_existing;
+ }
+ goto out_dput;
+ } else {
+ ret = create_queue(msg_mnt->mnt_root, &this,
+ oflag, mode, u_attr);
+ }
+
+ } else { /* O_CREAT */
+ if (!dentry->d_inode) {
+ ret = -ENOENT;
+ } else {
+open_existing:
+ if (permission
+ (dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
+ ret = -EACCES;
+ } else {
+ fd = get_unused_fd();
+ if (fd >= 0) {
+ mntget(msg_mnt);
+ filp =
+ dentry_open(dentry, msg_mnt, oflag);
+ filp->f_op = &msg_fops;
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ put_unused_fd(fd);
+ goto out_dput;
+ }
+ dget(dentry);
+ fd_install(fd, filp);
+ }
+ ret = fd;
+ }
+ }
+ }
+out_dput:
+ dput(dentry);
+out_ret:
+ up(&msg_mnt->mnt_root->d_inode->i_sem);
+ putname(this.name);
+
+ return ret;
+}
+
+/**
+ * sys_mq_unlink - removes a message queue from the namespace
+ *
+ * @u_name: pointer to the name
+ */
+asmlinkage int
+sys_mq_unlink(const char *u_name)
+{
+ int err=0;
+ struct dentry *dentry;
+ char *name = getname(u_name);
+
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+
+ down(&msg_mnt->mnt_root->d_inode->i_sem);
+ dentry = lookup_one_len(name, msg_mnt->mnt_root, strlen(name));
+ if (IS_ERR(dentry)){
+ err= PTR_ERR(dentry);
+ goto out_unlock;
+ }
+ if (!dentry->d_inode){
+ err= -ENOENT;
+ goto out_unlock;
+ }
+ err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+
+out_unlock:
+ up(&msg_mnt->mnt_root->d_inode->i_sem);
+ putname(name);
+ return err;
+}
+
+static inline long get_timeout( struct timespec *abs, int *err)
+{
+ struct timespec t;
+ *err = 0;
+ if (abs->tv_nsec >= 1000000000L || abs->tv_nsec < 0 || abs->tv_sec < 0)
+ return *err = -EINVAL;
+
+ t=current_kernel_time();
+ if (t.tv_sec > abs->tv_sec ||
+ (t.tv_sec == abs->tv_sec && t.tv_nsec > abs->tv_nsec))
+ return -ETIMEDOUT;
+
+ t.tv_sec = abs->tv_sec - t.tv_sec;
+ t.tv_nsec = abs->tv_nsec - t.tv_nsec;
+ if (t.tv_nsec < 0){
+ t.tv_sec--;
+ t.tv_nsec+= 1000000000;
+ }
+ return timespec_to_jiffies(&t) + 1;
+}
+
+static void local_add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
+{
+ struct list_head *p;
+ wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+ spin_lock(&q->lock);
+
+ /* enqueue waiters in prio order */
+ p = q->task_list.next; /* used as flag if msg was queued */
+ if ( !list_empty(&q->task_list)) {
+
+ list_for_each(p, &q->task_list) {
+ wait_queue_t *wtask =
+ list_entry(p, wait_queue_t, task_list);
+ if (wtask->task->static_prio < current->static_prio) {
+ list_add_tail(&wait->task_list, p);
+ p = NULL;
+ break;
+ }
+ }
+ }
+ if (p) /* ok, put it at the end */
+ list_add_tail(&wait->task_list, &q->task_list);
+
+ spin_unlock(&q->lock);
+}
+
+static void local_remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
+{
+ spin_lock(&q->lock);
+ __remove_wait_queue(q, wait);
+ spin_unlock(&q->lock);
+}
+
+/**
+ * sys_mq_timedsend - send a message to the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @msg_ptr: pointer to buffer holding the message
+ * @msg_len: length of the message
+ * @msg_prio: the priority of the message
+ * @utime: if !NULL the function will only block for specified time
+ */
+asmlinkage int
+sys_mq_timedsend(mqd_t mqdes,
+ const char *msg_ptr, size_t msg_len,
+ unsigned int msg_prio, struct timespec *utime)
+{
+ struct msg_msg *msg;
+ struct list_head *p;
+ struct msg_queue *queue;
+ int err;
+ long timeout;
+ struct timespec ts;
+ struct mqueue_ds *q;
+ int fput_needed;
+ struct file *filp = fget_light(mqdes, &fput_needed);
+ struct inode *inode;
+
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+ if ( !(filp->f_mode & FMODE_WRITE))
+ { err = -EBADF; goto out_fput;};
+ if ((unsigned int) msg_prio >= (unsigned int) MQ_PRIO_MAX)
+ { err = -EINVAL; goto out_fput;}
+ if ((long)msg_len > q->attr.mq_msgsize)
+ { err = -EMSGSIZE; goto out_fput;}
+
+ queue = &q->queue;
+ inode=filp->f_dentry->d_inode;
+ spin_lock(&q->lock);
+ if ((filp->f_flags & O_NONBLOCK) && !freespace(q, msg_len)){
+ spin_unlock(&q->lock);
+ { err = -EAGAIN; goto out_fput;}
+ }
+ spin_unlock(&q->lock);
+ /* check if this message will exceed overall limit for messages */
+ if (atomic_read(&msg_bytes) + msg_len > MQ_MAXSYSSIZE)
+ { err = -ENOMEM; goto out_fput;}
+
+ if (utime) {
+ if (copy_from_user(&ts, utime, sizeof (ts)))
+ { err = -EFAULT; goto out_fput;}
+ timeout = get_timeout(&ts, &err);
+ if ( err == -EINVAL )
+ goto out_fput;
+ /* don't timeout yet, check if queue is empty */
+ } else
+ timeout = 0L;
+
+ msg = get_msg((char *) msg_ptr, msg_len);
+ if (IS_ERR(msg))
+ { err = PTR_ERR(msg); goto out_fput;}
+
+ msg->m_type = msg_prio;
+ msg->m_ts = msg_len;
+
+ if (!timeout)
+ timeout = MAX_SCHEDULE_TIMEOUT;
+
+ /* open coded wait_event_interruptible_timeout()
+ * to avoid race on freespace()
+ */
+ spin_lock(&q->lock);
+ if (!freespace(q, msg_len) ){
+ if ( unlikely(timeout < 0) ){
+ err = -ETIMEDOUT;
+ goto out_unlock;
+ }
+ wait_queue_t __wait;
+ init_waitqueue_entry(&__wait, current);
+
+ local_add_wait_queue(&q->wait_send, &__wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( freespace(q, msg_len))
+ break;
+ if (!signal_pending(current)) {
+ spin_unlock(&q->lock);
+ timeout = schedule_timeout(timeout);
+ spin_lock(&q->lock);
+ if (!timeout)
+ break;
+ continue;
+ }
+ timeout = -ERESTARTSYS;
+ break;
+ }
+ current->state = TASK_RUNNING;
+ local_remove_wait_queue(&q->wait_send, &__wait);
+
+ if (unlikely(timeout == -ERESTARTSYS)){
+ err = -EINTR;
+ goto out_unlock;
+ }
+ if (unlikely(timeout == 0)){
+ err = -ETIMEDOUT;
+ goto out_unlock;
+ }
+ }
+ err = 0;
+ /* enqueue message in prio order */
+ p = queue->q_messages.next; /* used as flag if msg was queued */
+ if (msg_prio > 0 && !list_empty(&queue->q_messages)) {
+
+ list_for_each(p, &queue->q_messages) {
+ struct msg_msg *tmp =
+ list_entry(p, struct msg_msg, m_list);
+ if (tmp->m_type < (long)msg_prio) {
+ list_add_tail(&msg->m_list, p);
+ p = NULL;
+ break;
+ }
+ }
+ }
+ if (p) /* ok, put it at the end */
+ list_add_tail(&msg->m_list, &queue->q_messages);
+
+ queue->q_lspid = current->pid;
+ queue->q_cbytes += msg_len;
+ atomic_add(msg_len, &msg_bytes);
+ queue->q_qnum++;
+ inode->i_size = queue->q_qnum;
+ inode->i_mtime = CURRENT_TIME;
+
+ if (waitqueue_active(&q->wait_recv)) {
+ /* wake up all waiters to serve the highest prio waiter */
+ wake_up_interruptible(&q->wait_recv);
+ } else {
+ /* since there was no synchronously waiting process for message
+ * we notify it when the state of queue changed from
+ * empty to not empty */
+ if (q->notify_pid != 0 && queue->q_qnum == 1) {
+ /* userspace translates SIGEV_THREAD into SIGEV_SIGNAL
+ * for now, perhaps we block the task on a passed in futex
+ * in the future
+ */
+ if (q->notify.sigev_notify == SIGEV_THREAD) {
+
+ err = -ENOSYS;
+ pr_info("mq_*send: SIGEV_THREAD not supported\n");
+ }
+ /* sends signal */
+ if (q->notify.sigev_notify == SIGEV_SIGNAL) {
+ struct siginfo sig_i;
+ sig_i.si_signo = q->notify.sigev_signo;
+ sig_i.si_errno = 0;
+ sig_i.si_code = SI_MESGQ;
+ sig_i.si_pid = current->pid;
+ sig_i.si_uid = current->uid;
+ kill_proc_info(q->notify.sigev_signo,
+ &sig_i, q->notify_pid);
+ }
+ /* after notification unregister process */
+ q->notify_pid = 0;
+ }
+ }
+out_unlock:
+ spin_unlock(&q->lock);
+out_fput:
+ fput_light(filp, fput_needed);
+
+ return err;
+}
+
+/**
+ * sys_mq_timedreceive - receive a message from the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @msg_ptr: pointer to buffer to hold the message
+ * @msg_len: length of the userspace buffer
+ * @msg_prio: will hold the priority if a message was received
+ * @utime: if !NULL the function will only block for specified time
+ */
+asmlinkage ssize_t
+sys_mq_timedreceive(mqd_t mqdes,
+ char *msg_ptr, size_t msg_len,
+ unsigned int *msg_prio, struct timespec * utime)
+{
+ struct msg_queue *queue;
+ struct msg_msg *msg;
+ int err;
+ long timeout;
+ struct timespec ts;
+ struct mqueue_ds *q;
+ int fput_needed;
+ struct file *filp = fget_light(mqdes, &fput_needed);
+ struct inode *inode;
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+ if ((long)msg_len < q->attr.mq_msgsize)
+ { err = -EMSGSIZE; goto out_fput;}
+
+ if ( !(filp->f_mode & FMODE_READ))
+ { err = -EBADF; goto out_fput;}
+
+ queue = &q->queue;
+ if ((filp->f_flags & O_NONBLOCK) && queue->q_qnum == 0)
+ { err = -EAGAIN; goto out_fput;}
+
+ if (utime) {
+ if (copy_from_user(&ts, utime, sizeof (ts)))
+ { err = -EFAULT; goto out_fput;}
+ timeout = get_timeout(&ts, &err);
+ if ( err == -EINVAL )
+ goto out_fput;
+ /* we do not check yet for the timeout */
+ } else
+ timeout = 0L;
+ if (!timeout)
+ timeout = MAX_SCHEDULE_TIMEOUT;
+
+ spin_lock(&q->lock);
+
+ if (queue->q_qnum == 0 ){
+ if (unlikely(timeout < 0)){
+ err = -ETIMEDOUT;
+ goto out_unlock;
+ }
+ wait_queue_t __wait;
+ init_waitqueue_entry(&__wait, current);
+
+ local_add_wait_queue(&q->wait_recv, &__wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( queue->q_qnum > 0)
+ break;
+ if (!signal_pending(current)) {
+ spin_unlock(&q->lock);
+ timeout = schedule_timeout(timeout);
+ spin_lock(&q->lock);
+ if (!timeout)
+ break;
+ continue;
+ }
+ timeout = -ERESTARTSYS;
+ break;
+ }
+ current->state = TASK_RUNNING;
+ local_remove_wait_queue(&q->wait_recv, &__wait);
+
+ if (unlikely(timeout == -ERESTARTSYS)){
+ err = -EINTR;
+ goto out_unlock;
+ }
+ if (unlikely(timeout == 0)){
+ err = -ETIMEDOUT;
+ goto out_unlock;
+ }
+ }
+ msg=list_entry(queue->q_messages.next, struct msg_msg, m_list);
+ list_del(&msg->m_list);
+ queue->q_lrpid = current->pid;
+ queue->q_cbytes -= msg->m_ts;
+ atomic_sub(msg->m_ts, &msg_bytes);
+ queue->q_qnum--;
+ inode=filp->f_dentry->d_inode;
+ inode->i_size = queue->q_qnum;
+ inode->i_atime = CURRENT_TIME;
+ /* msg is removed from list, we're already safe */
+ spin_unlock(&q->lock);
+
+ wake_up_interruptible(&q->wait_send);
+
+ msg_len = (msg_len > (size_t)msg->m_ts) ? (size_t)msg->m_ts : msg_len;
+
+ if ( put_msg(msg_ptr, msg, msg_len) ||
+ ( msg_prio && put_user(msg->m_type, msg_prio))) {
+ msg_len = -EFAULT; /* hmh, now the msg is lost */
+ }
+ free_msg(msg);
+
+ fput_light(filp, fput_needed);
+ return msg_len;
+
+out_unlock:
+ spin_unlock(&q->lock);
+out_fput:
+ fput_light(filp, fput_needed);
+ return err;
+}
+
+/**
+ * sys_mq_notify - set or remove a notification on the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @u_notification: pointer to struct sigevent
+ *
+ */
+asmlinkage int
+sys_mq_notify(mqd_t mqdes, const struct sigevent *u_notification)
+{
+ struct sigevent notify;
+ int fput_needed;
+ struct file *filp = fget_light(mqdes, &fput_needed);
+ struct mqueue_ds *q;
+ int err = 0;
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+ if (u_notification != NULL)
+ if (copy_from_user
+ (¬ify, u_notification, sizeof (struct sigevent)))
+ { err = -EFAULT; goto out_fput;}
+
+ down(&filp->f_dentry->d_inode->i_sem);
+ if (q->notify_pid == current->pid
+ && (u_notification == NULL || notify.sigev_notify == SIGEV_NONE)) {
+ q->notify_pid = 0; /* remove notification */
+ q->notify.sigev_signo = 0;
+ q->notify.sigev_notify = SIGEV_NONE;
+ } else if (q->notify_pid > 0) {
+ err = -EBUSY;
+ } else if (u_notification != NULL) {
+ if (notify.sigev_notify == SIGEV_SIGNAL) {
+ /* add notification */
+ q->notify_pid = current->pid;
+ q->notify.sigev_signo = notify.sigev_signo;
+ q->notify.sigev_notify = notify.sigev_notify;
+ } else if (notify.sigev_notify == SIGEV_THREAD) {
+ err = -ENOSYS;
+ pr_info("mq_*send: SIGEV_THREAD not supported yet\n");
+ }
+ }
+ up(&filp->f_dentry->d_inode->i_sem);
+out_fput:
+ fput_light(filp, fput_needed);
+
+ return err;
+}
+
+static inline void
+fill_flags(struct mqueue_ds *q, struct file *filp)
+{
+ spin_lock(&q->lock);
+ q->attr.mq_flags = (filp->f_mode -1) & O_ACCMODE;
+ q->attr.mq_flags |= (filp->f_flags) & O_NONBLOCK;
+ q->attr.mq_curmsgs = q->queue.q_qnum;
+ spin_unlock(&q->lock);
+}
+
+/**
+ * sys_mq_getattr - get the attributes of the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @u_mqstat: pointer to struct holding the new values
+ *
+ */
+asmlinkage int
+sys_mq_getattr(mqd_t mqdes, struct mq_attr *u_mqstat)
+{
+ int err = 0;
+ struct mqueue_ds *q;
+ int fput_needed;
+ struct file *filp = fget_light(mqdes, &fput_needed);
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+
+ fill_flags(q,filp);
+ if (copy_to_user(u_mqstat, &q->attr, sizeof (struct mq_attr)))
+ err = -EFAULT;
+ fput_light(filp, fput_needed);
+
+ return err;
+}
+
+/**
+ * sys_mq_setattr - set the attributes of the queue associated
+ * with the descriptor mqdes
+ * @mqdes: descriptor of mqueue
+ * @u_mqstat: pointer to struct holding the new values
+ * @u_omqstat: pointer to store the original attributes (if !NULL)
+ *
+ */
+asmlinkage int
+sys_mq_setattr(mqd_t mqdes, const struct mq_attr *u_mqstat,
+ struct mq_attr *u_omqstat)
+{
+ struct mq_attr mqstat;
+ struct mqueue_ds *q;
+ int fput_needed;
+ struct file *filp = fget_light(mqdes, &fput_needed);
+ int err = 0;
+
+ if (!(q = get_mqueue(filp)))
+ return -EBADF;
+
+ if (u_omqstat != NULL) {
+ fill_flags(q,filp);
+ if (copy_to_user(u_omqstat, &q->attr, sizeof (struct mq_attr)))
+ { err = -EFAULT; goto out_fput;}
+ }
+ if (copy_from_user(&mqstat, u_mqstat, sizeof (struct mq_attr)))
+ { err = -EFAULT; goto out_fput;}
+ if (mqstat.mq_flags & O_NONBLOCK)
+ filp->f_flags |= O_NONBLOCK;
+ else
+ filp->f_flags &= ~O_NONBLOCK;
+
+out_fput:
+ fput_light(filp, fput_needed);
+
+ return err;
+}
+
+static struct super_operations msg_s_ops = {
+ .statfs = simple_statfs,
+ .drop_inode = generic_delete_inode,
+ .delete_inode = mqueue_release,
+};
+
+static int
+msg_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct inode *root;
+ struct dentry *root_dentry;
+ static const struct qstr this={ .name="msg:", .len=4, .hash=0 };
+
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = MSGFS_MAGIC;
+ sb->s_op = &msg_s_ops;
+
+ root = get_msg_inode(sb, S_IFDIR | S_IRWXUGO | S_ISVTX);
+ if (!root)
+ goto out;
+ root_dentry = d_alloc(NULL, &this);
+ if (!root_dentry)
+ goto out_iput;
+ sb->s_root = root_dentry;
+ sb->s_root->d_sb = sb;
+ sb->s_root->d_parent = sb->s_root;
+ d_instantiate(sb->s_root, root);
+ return 0;
+
+out_iput:
+ iput(root);
+out:
+ return -ENOMEM;
+}
+
+static struct super_block *
+msgfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ return get_sb_single(fs_type, flags, data, msg_fill_super);
+}
+
+static struct file_system_type msg_fs_type = {
+ .name = "msgfs",
+ .get_sb = msgfs_get_sb,
+ .kill_sb = kill_anon_super,
+};
+
+static int __init
+mqueue_init(void)
+{
+ int err = register_filesystem(&msg_fs_type);
+
+ if (!err){
+ if (IS_ERR(msg_mnt = kern_mount(&msg_fs_type))){
+ unregister_filesystem(&msg_fs_type);
+ err= PTR_ERR(msg_mnt);
+ } else
+ err=0;
+ }
+
+ return err;
+}
+
+__initcall(mqueue_init);
diff -X dontdiff -Nur vanilla-2.6.0-test6/ipc/util.c linux-2.6.0-test6/ipc/util.c
--- vanilla-2.6.0-test6/ipc/util.c 2003-10-05 14:32:25.000000000 +0200
+++ linux-2.6.0-test6/ipc/util.c 2003-10-03 14:58:35.000000000 +0200
@@ -24,6 +24,7 @@
#include <linux/security.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
+#include <linux/pxqueue.h>
#if defined(CONFIG_SYSVIPC)
@@ -731,3 +732,50 @@
}
#endif /* CONFIG_SYSVIPC */
+
+#if !defined(CONFIG_POSIXMSG)
+/*
+ * Dummy functions when POSIXMSG isn't configured
+ */
+
+asmlinkage mqd_t sys_mq_open(const char *u_path, int oflag, mode_t mode,
+ struct mq_attr *u_attr)
+{
+ return (mqd_t) -ENOSYS;
+}
+
+asmlinkage int sys_mq_unlink(const char *u_name)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_mq_timedsend(mqd_t mqdes, const char *msg_ptr,
+ size_t msg_len, unsigned int msg_prio, struct timespec *utime)
+{
+ return -ENOSYS;
+}
+
+asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char *msg_ptr,
+ size_t msg_len, unsigned int *msg_prio, struct timespec *utime)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_mq_notify(mqd_t mqdes,
+ const struct sigevent *u_notification)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_mq_getattr(mqd_t mqdes, struct mq_attr *u_mqstat)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_mq_setattr(mqd_t mqdes, const struct mq_attr *u_mqstat,
+ struct mq_attr *u_omqstat)
+{
+ return -ENOSYS;
+}
+
+#endif /* CONFIG_POSIXMSG */
diff -X dontdiff -Nur vanilla-2.6.0-test6/MAINTAINERS linux-2.6.0-test6/MAINTAINERS
--- vanilla-2.6.0-test6/MAINTAINERS 2003-10-04 16:37:57.000000000 +0200
+++ linux-2.6.0-test6/MAINTAINERS 2003-10-03 13:40:59.000000000 +0200
@@ -1558,6 +1558,11 @@
M: ambx1@neo.rr.com
S: Maintained
+POSIXMSG message queues
+P: Peter Wächtler
+M: pwaechtler@mac.com
+S: Maintained
+
PPP PROTOCOL DRIVERS AND COMPRESSORS
P: Paul Mackerras
M: paulus@samba.org
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [2/2] posix message queues
2003-10-05 12:42 ` Peter Wächtler
@ 2003-10-05 14:39 ` Arjan van de Ven
2003-10-05 15:58 ` Ulrich Drepper
1 sibling, 0 replies; 9+ messages in thread
From: Arjan van de Ven @ 2003-10-05 14:39 UTC (permalink / raw)
To: Peter Wächtler; +Cc: Manfred Spraul, linux-kernel, akpm, torvalds, bo.z.li
[-- Attachment #1: Type: text/plain, Size: 299 bytes --]
On Sun, 2003-10-05 at 14:42, Peter Wächtler wrote:
> > What's the difference between remove_wait_queue() and
> > local_remove_wait_queue?
> >
>
> don't disable local_irq , because no irq involved
> don't know how expensive a local_irq_save is on SMP
like 5 to 7 cycles typically
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [2/2] posix message queues
2003-10-05 12:42 ` Peter Wächtler
2003-10-05 14:39 ` Arjan van de Ven
@ 2003-10-05 15:58 ` Ulrich Drepper
1 sibling, 0 replies; 9+ messages in thread
From: Ulrich Drepper @ 2003-10-05 15:58 UTC (permalink / raw)
To: Peter Wächtler; +Cc: Manfred Spraul, linux-kernel, akpm, torvalds, bo.z.li
Peter Wächtler wrote:
> Userspace translates SIGEV_THREAD to something that uses SIGEV_SIGNAL.
> Ulrich made a suggestion to use a futex, but I think of something even
> more lightweight. Just put the requestor right to sleep.
This has the disadvantage that it codifies the requirement of creating
the thread for the userlevel signaling ahead of time. While this is a
valid implementation it is not the best (resource-wise) and doesn't
allow for optimizations. Using a futex will allow to implement a scheme
where one single thread does the waiting for possibly many requests.
Imagine the benefits in case there are dozens of outstanding requests.
The additional system exit+entry isn't that expensive to justify
removing the flexibility.
--
--------------. ,-. 444 Castro Street
Ulrich Drepper \ ,-----------------' \ Mountain View, CA 94041 USA
Red Hat `--' drepper at redhat.com `---------------------------
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH] 2/2 POSIX message queues
@ 2003-11-25 11:42 Michal Wronski
0 siblings, 0 replies; 9+ messages in thread
From: Michal Wronski @ 2003-11-25 11:42 UTC (permalink / raw)
To: linux-kernel; +Cc: Krzysztof Benedyczak
diff -urN 2.6.0-test10-orig_2/arch/i386/kernel/entry.S 2.6.0-test10-patched_2/arch/i386/kernel/entry.S
--- 2.6.0-test10-orig_2/arch/i386/kernel/entry.S 2003-11-24 16:28:52.000000000 +0100
+++ 2.6.0-test10-patched_2/arch/i386/kernel/entry.S 2003-11-24 16:46:44.000000000 +0100
@@ -882,5 +882,12 @@
.long sys_utimes
.long sys_fadvise64_64
.long sys_ni_syscall /* sys_vserver */
+ .long sys_mq_open
+ .long sys_mq_unlink /* 275 */
+ .long sys_mq_timedsend
+ .long sys_mq_timedreceive
+ .long sys_mq_notify
+ .long sys_mq_getattr
+ .long sys_mq_setattr
syscall_table_size=(.-sys_call_table)
diff -urN 2.6.0-test10-orig_2/CREDITS 2.6.0-test10-patched_2/CREDITS
--- 2.6.0-test10-orig_2/CREDITS 2003-11-07 17:07:13.000000000 +0100
+++ 2.6.0-test10-patched_2/CREDITS 2003-11-21 16:48:21.000000000 +0100
@@ -289,6 +289,15 @@
S: Terni 05100
S: Italy
+N: Krzysztof Benedyczak
+E: golbi@mat.uni.torun.pl
+W: http://www.mat.uni.torun.pl/~golbi
+D: POSIX message queues fs (with M. Wronski)
+S: ul. Podmiejska 52
+S: Radunica
+S: 83-000 Pruszcz Gdanski
+S: Poland
+
N: Randolph Bentson
E: bentson@grieg.seaslug.org
W: http://www.aa.net/~bentson/
@@ -3475,6 +3484,14 @@
S: Beaverton, OR 97005
S: USA
+N: Michal Wronski
+E: wrona@mat.uni.torun.pl
+W: http://www.mat.uni.torun.pl/~wrona
+D: POSIX message queues fs (with K. Benedyczak)
+S: ul. Teczowa 23/12
+S: 80-680 Gdansk-Sobieszewo
+S: Poland
+
N: Frank Xia
E: qx@math.columbia.edu
D: Xiafs filesystem [defunct]
diff -urN 2.6.0-test10-orig_2/Documentation/filesystems/proc.txt 2.6.0-test10-patched_2/Documentation/filesystems/proc.txt
--- 2.6.0-test10-orig_2/Documentation/filesystems/proc.txt 2003-11-07 17:07:13.000000000 +0100
+++ 2.6.0-test10-patched_2/Documentation/filesystems/proc.txt 2003-11-24 17:37:30.000000000 +0100
@@ -38,6 +38,7 @@
2.8 /proc/sys/net/ipv4 - IPV4 settings
2.9 Appletalk
2.10 IPX
+ 2.11 /proc/sys/fs/mqueue - POSIX message queues filesystem
------------------------------------------------------------------------------
Preface
@@ -1805,6 +1806,30 @@
gives the destination network, the router node (or Directly) and the network
address of the router (or Connected) for internal networks.
+2.11 /proc/sys/fs/mqueue - POSIX message queues filesystem
+----------------------------------------------------------
+
+The "mqueue" filesystem provides the necessary kernel features to enable the
+creation of a user space library that implements the POSIX message queues
+API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System
+Interfaces specification.)
+
+The "mqueue" filesystem contains values for determining/setting the amount of
+resources used by the file system.
+
+/proc/sys/fs/mqueue/queues_max is a read/write file for setting/getting the
+maximum number of message queues allowed on the system.
+
+/proc/sys/fs/mqueue/msg_max is a read/write file for setting/getting the
+maximum number of messages in a queue value. In fact it is the limiting value
+for another (user) limit which is set in mq_open invocation. This attribute of
+a queue must be less or equal then msg_max.
+
+/proc/sys/fs/mqueue/msgsize_max is a read/write file for setting/getting the
+maximum message size value (it is every message queue's attribute set during
+its creation).
+
+
------------------------------------------------------------------------------
Summary
------------------------------------------------------------------------------
diff -urN 2.6.0-test10-orig_2/fs/Kconfig 2.6.0-test10-patched_2/fs/Kconfig
--- 2.6.0-test10-orig_2/fs/Kconfig 2003-11-07 17:07:13.000000000 +0100
+++ 2.6.0-test10-patched_2/fs/Kconfig 2003-11-21 16:59:14.000000000 +0100
@@ -893,6 +893,23 @@
To compile this as a module, choose M here: the module will be called
ramfs.
+config POSIX_MQUEUE_FS
+ bool "POSIX Message Queues"
+ ---help---
+ POSIX variant of message queues is a part of IPC. In POSIX message
+ queues every message has a priority which decides about succession
+ of receiving it by a process. If you want to compile and run
+ programs written e.g. for Solaris with use of its POSIX message
+ queues (functions mq_*) say Y here. To use this feature you will
+ also need mqueue library, available from
+ <http://www.mat.uni.torun.pl/~wrona/posix_ipc/>
+
+ POSIX message queues are visible as a filesystem called 'mqueue'
+ and can be mounted somewhere if you want to do filesystem
+ operations on message queues.
+
+ If unsure, say N.
+
endmenu
menu "Miscellaneous filesystems"
diff -urN 2.6.0-test10-orig_2/include/asm-generic/siginfo.h 2.6.0-test10-patched_2/include/asm-generic/siginfo.h
--- 2.6.0-test10-orig_2/include/asm-generic/siginfo.h 2003-11-07 17:07:13.000000000 +0100
+++ 2.6.0-test10-patched_2/include/asm-generic/siginfo.h 2003-11-21 16:48:21.000000000 +0100
@@ -118,6 +118,7 @@
#define __SI_FAULT (3 << 16)
#define __SI_CHLD (4 << 16)
#define __SI_RT (5 << 16)
+#define __SI_MESGQ (6 << 16)
#define __SI_CODE(T,N) ((T) | ((N) & 0xffff))
#else
#define __SI_KILL 0
@@ -126,6 +127,7 @@
#define __SI_FAULT 0
#define __SI_CHLD 0
#define __SI_RT 0
+#define __SI_MESGQ 0
#define __SI_CODE(T,N) (N)
#endif
@@ -137,7 +139,7 @@
#define SI_KERNEL 0x80 /* sent by the kernel from somewhere */
#define SI_QUEUE -1 /* sent by sigqueue */
#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */
-#define SI_MESGQ -3 /* sent by real time mesq state change */
+#define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change */
#define SI_ASYNCIO -4 /* sent by AIO completion */
#define SI_SIGIO -5 /* sent by queued SIGIO */
#define SI_TKILL -6 /* sent by tkill system call */
diff -urN 2.6.0-test10-orig_2/include/asm-i386/unistd.h 2.6.0-test10-patched_2/include/asm-i386/unistd.h
--- 2.6.0-test10-orig_2/include/asm-i386/unistd.h 2003-11-07 17:07:13.000000000 +0100
+++ 2.6.0-test10-patched_2/include/asm-i386/unistd.h 2003-11-21 16:48:21.000000000 +0100
@@ -279,8 +279,15 @@
#define __NR_utimes 271
#define __NR_fadvise64_64 272
#define __NR_vserver 273
-
-#define NR_syscalls 274
+#define __NR_mq_open 274
+#define __NR_mq_unlink (__NR_mq_open+1)
+#define __NR_mq_timedsend (__NR_mq_open+2)
+#define __NR_mq_timedreceive (__NR_mq_open+3)
+#define __NR_mq_notify (__NR_mq_open+4)
+#define __NR_mq_getattr (__NR_mq_open+5)
+#define __NR_mq_setattr (__NR_mq_open+6)
+
+#define NR_syscalls 281
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
diff -urN 2.6.0-test10-orig_2/include/linux/mqueue.h 2.6.0-test10-patched_2/include/linux/mqueue.h
--- 2.6.0-test10-orig_2/include/linux/mqueue.h 1970-01-01 01:00:00.000000000 +0100
+++ 2.6.0-test10-patched_2/include/linux/mqueue.h 2003-11-24 16:37:46.000000000 +0100
@@ -0,0 +1,64 @@
+/* Copyright (C) 2003 Krzysztof Benedyczak & Michal Wronski
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ It is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this software; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _LINUX_MQUEUE_H
+#define _LINUX_MQUEUE_H
+
+#define MQ_PRIO_MAX 32768
+
+typedef int mqd_t;
+
+struct mq_attr {
+ long mq_flags; /* message queue flags */
+ long mq_maxmsg; /* maximum number of messages */
+ long mq_msgsize; /* maximum message size */
+ long mq_curmsgs; /* number of messages currently queued */
+};
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/signal.h>
+#include <linux/linkage.h>
+
+asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr);
+asmlinkage long sys_mq_unlink(const char __user *name);
+asmlinkage long mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout);
+asmlinkage ssize_t mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout);
+asmlinkage long mq_notify(mqd_t mqdes, const struct sigevent __user *notification);
+asmlinkage long mq_getattr(mqd_t mqdes, struct mq_attr __user *mqstat);
+asmlinkage long mq_setattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat);
+
+#else
+
+#include <signal.h>
+#include <fcntl.h>
+#include <time.h>
+
+mqd_t mq_open(const char *name, int oflag, /* mode_t mode, struct mq_attr *attr */ ...);
+int mq_close(mqd_t mqdes);
+int mq_unlink(const char *name);
+int mq_send(mqd_t mqdes, const char *msg_ptr, size_t msg_len, unsigned int msg_prio);
+int mq_timedsend(mqd_t mqdes, const char *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
+ssize_t mq_receive(mqd_t mqdes, char *msg_ptr, size_t msg_len, unsigned int *msg_prio);
+ssize_t mq_timedreceive(mqd_t mqdes, char *__restrict msg_ptr, size_t msg_len, unsigned int *__restrict msg_prio, const struct timespec *__restrict abs_timeout);
+int mq_notify(mqd_t mqdes, const struct sigevent *notification);
+int mq_getattr(mqd_t mqdes, struct mq_attr *mqstat);
+int mq_setattr(mqd_t mqdes, const struct mq_attr *__restrict mqstat, struct mq_attr *__restrict omqstat);
+#endif
+
+#endif
diff -urN 2.6.0-test10-orig_2/ipc/Makefile 2.6.0-test10-patched_2/ipc/Makefile
--- 2.6.0-test10-orig_2/ipc/Makefile 2003-11-07 17:07:13.000000000 +0100
+++ 2.6.0-test10-patched_2/ipc/Makefile 2003-11-21 16:48:21.000000000 +0100
@@ -5,3 +5,4 @@
obj-y := util.o
obj-$(CONFIG_SYSVIPC) += msg.o sem.o shm.o
+obj-$(CONFIG_POSIX_MQUEUE_FS) += mqueue.o
diff -urN 2.6.0-test10-orig_2/ipc/mqueue.c 2.6.0-test10-patched_2/ipc/mqueue.c
--- 2.6.0-test10-orig_2/ipc/mqueue.c 1970-01-01 01:00:00.000000000 +0100
+++ 2.6.0-test10-patched_2/ipc/mqueue.c 2003-11-24 16:37:46.000000000 +0100
@@ -0,0 +1,1260 @@
+/*
+ * POSIX message queues filesystem for Linux.
+ *
+ * Copyright (C) 2003 Krzysztof Benedyczak (golbi@mat.uni.torun.pl)
+ * Michal Wronski (wrona@mat.uni.torun.pl)
+ *
+ * Spinlocks: Mohamed Abbas (abbas.mohamed@intel.com)
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/mqueue.h>
+#include <linux/msg.h>
+#include <linux/list.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/sysctl.h>
+#include "util.h"
+
+#define MQUEUE_MAGIC 0x19800202
+#define DIRENT_SIZE 20
+#define FILENT_SIZE 60
+#define SEND 0
+#define RECV 1
+
+#define ERRNO_OK_SIGNAL 0
+#define ERRNO_OK_THREAD 1
+#define ERRNO_REMOVE_THREAD 2
+
+/* used by sysctl */
+#define FS_MQUEUE 1
+#define CTL_QUEUESMAX 2
+#define CTL_MSGMAX 3
+#define CTL_MSGSIZEMAX 4
+
+/* default values */
+#define DFLT_QUEUESMAX 64 /* max number of message queues */
+#define DFLT_MSGMAX 40 /* max number of messages in each queue */
+#define DFLT_MSGSIZEMAX 16384 /* max message size */
+
+struct ext_wait_queue { /* queue of sleeping tasks */
+ struct task_struct *task;
+ struct list_head list;
+};
+
+struct mqueue_inode_info {
+ struct mq_attr attr;
+ struct msg_msg **messages;
+
+ struct sigevent notify;
+ pid_t notify_task;
+ pid_t notify_owner; /* == tgid of notify_task */
+
+ /* for tasks waiting for free space or message (respectively) */
+ /* this is left mainly because of poll */
+ wait_queue_head_t wait_q[2];
+ /* avoids extra invocations of wake_up */
+ wait_queue_head_t wait_q2[2];
+ struct ext_wait_queue e_wait_q[2]; /* 0=free space 1=message */
+
+ __u32 qsize; /* size of queue in memory (msgs & struct) */
+ spinlock_t lock;
+ struct inode vfs_inode;
+};
+
+static struct inode_operations mqueue_dir_inode_operations;
+static struct file_operations mqueue_file_operations;
+static struct super_operations mqueue_super_ops;
+static inline void remove_notification(struct mqueue_inode_info *info);
+
+static spinlock_t mq_lock;
+static kmem_cache_t *mqueue_inode_cachep;
+static struct vfsmount *mqueue_mnt;
+
+static unsigned int queues_count;
+static unsigned int queues_max = DFLT_QUEUESMAX;
+static unsigned int msg_max = DFLT_MSGMAX;
+static unsigned int msgsize_max = DFLT_MSGSIZEMAX;
+
+static struct ctl_table_header * mq_sysctl_table;
+
+
+static inline struct mqueue_inode_info *MQUEUE_I(struct inode *ino)
+{
+ return list_entry(ino, struct mqueue_inode_info, vfs_inode);
+}
+
+static struct inode *mqueue_get_inode(struct super_block *sb, int mode)
+{
+ struct inode *inode;
+ struct mqueue_inode_info *ino_extra;
+ struct msg_msg **msgs = NULL;
+ int size = msg_max;
+
+ if ((mode & S_IFMT) == S_IFREG) {
+ msgs = (struct msg_msg **)kmalloc(size * sizeof(struct msg_msg *), GFP_KERNEL);
+ if (!msgs)
+ return NULL;
+ }
+
+ inode = new_inode(sb);
+ if (inode) {
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_blocks = 0;
+ inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME;
+
+ if ((mode & S_IFMT) == S_IFREG) {
+ inode->i_fop = &mqueue_file_operations;
+ inode->i_size = FILENT_SIZE;
+ /* mqueue specific info */
+ ino_extra = MQUEUE_I(inode);
+ spin_lock_init(&(ino_extra->lock));
+ init_waitqueue_head((&(ino_extra->wait_q[0])));
+ init_waitqueue_head((&(ino_extra->wait_q[1])));
+ init_waitqueue_head((&(ino_extra->wait_q2[0])));
+ init_waitqueue_head((&(ino_extra->wait_q2[1])));
+ INIT_LIST_HEAD(&(ino_extra->e_wait_q[0].list));
+ INIT_LIST_HEAD(&(ino_extra->e_wait_q[1].list));
+ ino_extra->notify_task = 0;
+ ino_extra->notify_owner = 0;
+ ino_extra->notify.sigev_signo = 0;
+ ino_extra->notify.sigev_notify = SIGEV_NONE;
+ ino_extra->qsize = sizeof(struct mqueue_inode_info);
+ ino_extra->attr.mq_curmsgs = 0;
+ /* fill up with defaults */
+ ino_extra->attr.mq_maxmsg = size;
+ ino_extra->attr.mq_msgsize = msgsize_max;
+ ino_extra->messages = msgs;
+ } else if ((mode & S_IFMT) == S_IFDIR) {
+ inode->i_nlink++;
+ /* Some things misbehave if size == 0 on a directory */
+ inode->i_size = 2 * DIRENT_SIZE;
+ inode->i_op = &mqueue_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ }
+ } else if (msgs)
+ kfree(msgs);
+ return inode;
+}
+
+
+static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct inode *inode;
+
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = MQUEUE_MAGIC;
+ sb->s_op = &mqueue_super_ops;
+
+ inode = mqueue_get_inode(sb, S_IFDIR | S_IRWXUGO);
+ if (!inode)
+ return -ENOMEM;
+
+ sb->s_root = d_alloc_root(inode);
+ if (!sb->s_root) {
+ iput(inode);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static struct super_block *mqueue_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return get_sb_single(fs_type, flags, data, mqueue_fill_super);
+}
+
+static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+{
+ struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;
+
+ if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
+ inode_init_once(&p->vfs_inode);
+}
+
+static struct inode *mqueue_alloc_inode(struct super_block *sb)
+{
+ struct mqueue_inode_info *ei;
+
+ ei = (struct mqueue_inode_info *)kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL);
+ if (!ei)
+ return NULL;
+ return &ei->vfs_inode;
+}
+
+static void mqueue_destroy_inode(struct inode *inode)
+{
+ kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
+}
+
+static void mqueue_delete_inode(struct inode *ino)
+{
+ struct mqueue_inode_info *info;
+ int i;
+
+ if ((ino->i_mode & S_IFMT) == S_IFDIR) {
+ clear_inode(ino);
+ return;
+ }
+ info = MQUEUE_I(ino);
+ spin_lock(&info->lock);
+ for (i = 0; i < info->attr.mq_curmsgs; i++)
+ free_msg(info->messages[i]);
+ kfree(info->messages);
+ spin_unlock(&info->lock);
+
+ clear_inode(ino);
+
+ spin_lock(&mq_lock);
+ queues_count--;
+ spin_unlock(&mq_lock);
+}
+
+static int mqueue_unlink(struct inode *dir, struct dentry *dent)
+{
+ struct inode *inode = dent->d_inode;
+ dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
+ dir->i_size -= DIRENT_SIZE;
+ inode->i_nlink--;
+ dput(dent);
+ return 0;
+}
+
+static struct dentry *mqueue_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+{
+ if (dentry->d_name.len > NAME_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ d_add(dentry, NULL);
+ return NULL;
+}
+
+static int mqueue_create(struct inode *dir, struct dentry *dent, int mode, struct nameidata *nd)
+{
+ struct inode *ino;
+ int error;
+
+ spin_lock(&mq_lock);
+ if (queues_count >= queues_max) {
+ error = -ENOSPC;
+ goto out_lock;
+ }
+ queues_count++;
+ spin_unlock(&mq_lock);
+
+ ino = mqueue_get_inode(dir->i_sb, mode);
+ if (!ino) {
+ error = -ENOMEM;
+ spin_lock(&mq_lock);
+ queues_count--;
+ goto out_lock;
+ }
+
+ dir->i_size += DIRENT_SIZE;
+ dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
+
+ d_instantiate(dent, ino);
+ dget(dent);
+ return 0;
+out_lock:
+ spin_unlock(&mq_lock);
+ return error;
+}
+
+/*
+* This is routine for system read from queue file.
+* To avoid mess with doing here some sort of mq_receive we allow
+* to read only queue size & notification info (the only values
+* that are interesting from user point of view and aren't accessible
+* through std routines)
+*/
+static ssize_t mqueue_read_file(struct file *filp, char __user *data,
+ size_t count, loff_t * off)
+{
+ size_t pos;
+ ssize_t retval;
+ char buffer[FILENT_SIZE + 1];
+ struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
+
+ pos = *off;
+ if ((ssize_t) count < 0)
+ return -EINVAL;
+ if (!count)
+ return 0;
+ if (pos >= FILENT_SIZE)
+ return 0;
+ if (pos + count >= FILENT_SIZE)
+ count = FILENT_SIZE - pos - 1;
+
+ if (!access_ok(VERIFY_WRITE, data, count))
+ return -EFAULT;
+
+ snprintf(buffer, FILENT_SIZE + 1,
+ "QSIZE:%-10u NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n",
+ info->qsize, info->notify.sigev_notify,
+ info->notify.sigev_signo, info->notify_owner);
+
+ retval = FILENT_SIZE - *off;
+ if (copy_to_user(data, buffer + pos, retval)) {
+ retval = (ssize_t)-EFAULT;
+ goto out;
+ }
+ *off += retval;
+ filp->f_dentry->d_inode->i_atime = filp->f_dentry->d_inode->i_ctime = CURRENT_TIME;
+out:
+ return retval;
+}
+
+
+static int mqueue_release_file(struct inode *ino, struct file *filp)
+{
+ struct mqueue_inode_info *info = MQUEUE_I(ino);
+
+ spin_lock(&info->lock);
+ if (current->tgid == info->notify_owner)
+ remove_notification(info);
+
+ spin_unlock(&info->lock);
+ return 0;
+}
+
+
+static unsigned int mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
+{
+ struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
+ int retval = 0;
+
+ poll_wait(filp, &info->wait_q[0], poll_tab);
+ poll_wait(filp, &info->wait_q[1], poll_tab);
+
+ spin_lock(&info->lock);
+ if (info->attr.mq_curmsgs)
+ retval = POLLIN | POLLRDNORM;
+
+ if (info->attr.mq_curmsgs < info->attr.mq_maxmsg)
+ retval |= POLLOUT | POLLWRNORM;
+ spin_unlock(&info->lock);
+
+ return retval;
+}
+
+/*
+* This cut&paste version of wait_event() without event checking & with
+* exclusive adding to queue.
+*/
+void inline wait_exclusive(wait_queue_head_t * wq,
+ struct mqueue_inode_info *i)
+{
+ wait_queue_t wait;
+ init_waitqueue_entry(&wait, current);
+
+ add_wait_queue_exclusive(wq, &wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ spin_unlock(&i->lock);
+ schedule();
+ spin_lock(&i->lock);
+
+ current->state = TASK_RUNNING;
+ remove_wait_queue(wq, &wait);
+}
+
+/* Removes from info->e_wait_q[sr] current task */
+static void wq_remove(struct mqueue_inode_info *info, int sr)
+{
+ struct ext_wait_queue *ptr;
+
+ if (!list_empty(&(info->e_wait_q[sr].list)))
+ list_for_each_entry(ptr, &(info->e_wait_q[sr].list), list) {
+ if (ptr->task == current) {
+ list_del(&(ptr->list));
+ kfree(ptr);
+ break;
+ }
+ }
+}
+
+/* adds current to info->e_wait_q[sr] before element with smaller prio */
+static inline void wq_add(struct mqueue_inode_info *info, int sr,
+ struct ext_wait_queue *tmp)
+{
+ struct ext_wait_queue *ptr;
+
+ tmp->task = current;
+
+ if (list_empty(&info->e_wait_q[sr].list))
+ list_add(&tmp->list, &info->e_wait_q[sr].list);
+ else {
+ list_for_each_entry(ptr, &info->e_wait_q[sr].list, list)
+ if (ptr->task->static_prio <= current->static_prio) {
+ /* we add before ptr element */
+ __list_add(&tmp->list, ptr->list.prev, &ptr->list);
+ return;
+ }
+ /* we add on tail */
+ list_add_tail(&tmp->list, &info->e_wait_q[sr].list);
+ }
+ return;
+}
+
+/* removes from info->e_wait_q[sr] current task.
+ * Only for wq_sleep(): as we are here current must be one
+ * before-first (last) (meaning first in order as our 'queue' is inversed) */
+static inline void wq_remove_last(struct mqueue_inode_info *info, int sr)
+{
+ struct ext_wait_queue *tmp = list_entry(info->e_wait_q[sr].list.prev, struct ext_wait_queue, list);
+ list_del(&(tmp->list));
+ kfree(tmp);
+}
+
+/*
+ * puts current task to sleep
+ * sr: SEND or RECV
+ */
+static int wq_sleep(struct mqueue_inode_info *info, int sr,
+ signed long timeout, struct ext_wait_queue *wq_ptr)
+{
+ wait_queue_t __wait;
+ long error;
+
+ wq_add(info, sr, wq_ptr);
+
+ init_waitqueue_entry(&__wait, current);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ((current == (list_entry(info->e_wait_q[sr].list.prev, struct ext_wait_queue, list))->task)
+ && ((info->attr.mq_curmsgs > 0 && sr == RECV)
+ || (info->attr.mq_curmsgs < info->attr.mq_maxmsg && sr == SEND)))
+ break;
+
+ if (signal_pending(current)) {
+ current->state = TASK_RUNNING;
+ wq_remove(info, sr);
+ return -EINTR;
+ }
+
+ spin_unlock(&info->lock);
+ error = schedule_timeout(timeout);
+ spin_lock(&info->lock);
+
+ if ((!error) && (!signal_pending(current))) {
+ wq_remove(info, sr);
+ return -ETIMEDOUT;
+ }
+ }
+ current->state = TASK_RUNNING;
+ wq_remove_last(info, sr);
+
+ return 0;
+}
+
+/* wakes up sleeping task */
+static void wq_wakeup(struct mqueue_inode_info *info, int sr)
+{
+ if (sr == SEND) {
+ /* We can't invoke wake_up for tasks waiting for free space
+ * if there is less then MAXMSG-1 messages - then wake_up was
+ * invoked previously (and finished) but mq_sleep() of proper
+ * (only one) task didn't start to continue running yet,
+ * thus we must wait until this task receives IT'S message
+ */
+ if ((info->attr.mq_curmsgs < info->attr.mq_maxmsg - 1)
+ && (!list_empty(&info->e_wait_q[sr].list)))
+ wait_exclusive(&(info->wait_q2[sr]), info);
+ } else {
+ /* As above but for tasks waiting for new message */
+ if ((info->attr.mq_curmsgs > 1) && (!list_empty(&info->e_wait_q[sr].list)))
+ wait_exclusive(&(info->wait_q2[sr]), info);
+ }
+ /* We can wake up now - either all are sleeping or queue is empty. */
+ if (!list_empty(&info->e_wait_q[sr].list))
+ wake_up_process((list_entry(info->e_wait_q[sr].list.prev, struct ext_wait_queue, list))->task);
+ /* for poll */
+ wake_up_interruptible(&(info->wait_q[sr]));
+}
+
+
+/* Auxiliary functions to manipulate messages' list */
+static inline void msg_insert(struct msg_msg *ptr, struct mqueue_inode_info *info)
+{
+ int k;
+
+ k = (info->attr.mq_curmsgs)-1;
+ while ((k >= 0) && (info->messages[k]->m_type >= ptr->m_type)) {
+ info->messages[k + 1] = info->messages[k];
+ k--;
+ }
+ (info->attr.mq_curmsgs)++;
+ info->messages[k + 1] = ptr;
+}
+
+static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
+{
+ return info->messages[--(info->attr.mq_curmsgs)];
+}
+
+/*
+ * The next function is only to split too long sys_mq_timedsend
+ */
+static inline void __do_notify(struct mqueue_inode_info *info)
+{
+ struct siginfo sig_i;
+ struct task_struct *p;
+
+ /* notification
+ * invoked when there is registered process and there isn't process
+ * waiting synchronously for message AND state of queue changed from
+ * empty to not empty */
+ if ((info->notify.sigev_notify != SIGEV_NONE) && list_empty(&info->e_wait_q[RECV].list)
+ && info->attr.mq_curmsgs == 1) {
+
+ sig_i.si_signo = info->notify.sigev_signo;
+ sig_i.si_errno = ERRNO_OK_SIGNAL;
+ sig_i.si_code = SI_MESGQ;
+ sig_i.si_value = info->notify.sigev_value;
+ sig_i.si_pid = current->tgid;
+ sig_i.si_uid = current->uid;
+
+ /* sends signal */
+ if (info->notify.sigev_notify == SIGEV_SIGNAL) {
+ kill_proc_info(info->notify.sigev_signo,
+ &sig_i, info->notify_task);
+ } else if (info->notify.sigev_notify == SIGEV_THREAD ||
+ info->notify.sigev_notify == SIGEV_THREAD_ID) {
+ sig_i.si_errno = ERRNO_OK_THREAD;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(info->notify_task);
+ if (p && (p->tgid == info->notify_owner))
+ send_sig_info(info->notify.sigev_signo, &sig_i, p);
+ read_unlock(&tasklist_lock);
+ }
+ /* after notification unregisters process */
+ info->notify_task = 0;
+ info->notify_owner = 0;
+ info->notify.sigev_signo = 0;
+ info->notify.sigev_notify = SIGEV_NONE;
+ }
+}
+
+static inline long prepare_timeout(const struct timespec __user *arg)
+{
+ struct timespec ts, nowts;
+ long timeout;
+
+ if (arg) {
+ if (copy_from_user(&ts, arg, sizeof(struct timespec)))
+ return -EFAULT;
+
+ if (ts.tv_nsec < 0 || ts.tv_sec < 0
+ || ts.tv_nsec >= NSEC_PER_SEC)
+ return -EINVAL;
+ nowts = CURRENT_TIME;
+ /* first subtract as jiffies can't be too big */
+ ts.tv_sec -= nowts.tv_sec;
+ if (ts.tv_nsec < nowts.tv_nsec) {
+ ts.tv_nsec += NSEC_PER_SEC;
+ ts.tv_sec--;
+ }
+ ts.tv_nsec -= nowts.tv_nsec;
+ if (ts.tv_sec < 0)
+ return 0;
+
+ timeout = timespec_to_jiffies(&ts) + 1;
+ } else
+ return MAX_SCHEDULE_TIMEOUT;
+
+ return timeout;
+}
+
+
+static inline void remove_notification(struct mqueue_inode_info *info)
+{
+ struct siginfo sig_i;
+ struct task_struct *p;
+
+ if (info->notify.sigev_notify == SIGEV_THREAD) {
+ /* cancel waiting thread */
+ sig_i.si_signo = info->notify.sigev_signo;
+ sig_i.si_errno = ERRNO_REMOVE_THREAD;
+ sig_i.si_code = SI_MESGQ;
+ sig_i.si_value = info->notify.sigev_value;
+ sig_i.si_pid = current->tgid;
+ sig_i.si_uid = current->uid;
+
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(info->notify_task);
+
+ if (p && (p->tgid == info->notify_owner))
+ send_sig_info(info->notify.sigev_signo, &sig_i, p);
+ read_unlock(&tasklist_lock);
+ }
+ info->notify_task = 0;
+ info->notify_owner = 0;
+ info->notify.sigev_signo = 0;
+ info->notify.sigev_notify = SIGEV_NONE;
+}
+
+/*
+ * Invoked when creating a new queue via sys_mq_open
+ */
+static struct file *do_create(struct dentry *dir, struct dentry *dentry,
+ int oflag, mode_t mode, struct mq_attr __user *u_attr)
+{
+ struct file *filp;
+ struct inode *ino;
+ struct mqueue_inode_info *info;
+ struct msg_msg **msgs = NULL;
+ struct mq_attr attr;
+ int ret;
+
+ if (u_attr != NULL) {
+ if (copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
+ return ERR_PTR(-EFAULT);
+
+ if (attr.mq_maxmsg <= 0 || attr.mq_msgsize <= 0
+ || attr.mq_maxmsg > msg_max || attr.mq_msgsize > msgsize_max)
+ return ERR_PTR(-EINVAL);
+ msgs = (struct msg_msg **)kmalloc(attr.mq_maxmsg * sizeof(struct msg_msg *), GFP_KERNEL);
+ if (!msgs)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = vfs_create(dir->d_inode, dentry, mode, NULL);
+ if (ret) {
+ if (msgs)
+ kfree(msgs);
+ return ERR_PTR(ret);
+ }
+
+ ino = dentry->d_inode;
+ info = MQUEUE_I(ino);
+ if (u_attr != NULL) {
+ info->attr.mq_maxmsg = attr.mq_maxmsg;
+ info->attr.mq_msgsize = attr.mq_msgsize;
+ kfree(info->messages);
+ info->messages = msgs;
+ }
+
+ filp = dentry_open(dentry, mqueue_mnt, oflag);
+ if (!IS_ERR(filp))
+ dget(dentry);
+
+ return filp;
+}
+
+/* opens existing queue */
+static struct file *do_open(struct dentry *dentry, int oflag)
+{
+ struct file *filp;
+ static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE };
+
+ if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
+ return ERR_PTR(-EINVAL);
+
+ if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL))
+ return ERR_PTR(-EACCES);
+
+ filp = dentry_open(dentry, mqueue_mnt, oflag);
+
+ if (!IS_ERR(filp))
+ dget(dentry);
+
+ return filp;
+}
+
+asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
+ struct mq_attr __user *attr)
+{
+ struct dentry *dentry;
+ struct file *filp;
+ char *name;
+ int fd, error;
+
+ if (IS_ERR(name = getname(u_name)))
+ return PTR_ERR(name);
+
+ fd = get_unused_fd();
+ if (fd < 0)
+ goto out_putname;
+
+ down(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto out_err;
+ }
+ mntget(mqueue_mnt);
+
+ if (oflag & O_CREAT) {
+ if (dentry->d_inode) { /* entry already exists */
+ filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) : do_open(dentry, oflag);
+ } else {
+ filp = do_create(mqueue_mnt->mnt_root, dentry, oflag, mode, attr);
+ }
+ } else
+ filp = (dentry->d_inode) ? do_open(dentry, oflag) : ERR_PTR(-ENOENT);
+
+ dput(dentry);
+
+ if (IS_ERR(filp)) {
+ error = PTR_ERR(filp);
+ goto out_putfd;
+ }
+
+ fd_install(fd, filp);
+ goto out_upsem;
+
+out_putfd:
+ mntput(mqueue_mnt);
+ put_unused_fd(fd);
+out_err:
+ fd = error;
+out_upsem:
+ up(&mqueue_mnt->mnt_root->d_inode->i_sem);
+out_putname:
+ putname(name);
+ return fd;
+}
+
+
+asmlinkage long sys_mq_unlink(const char __user *u_name)
+{
+ int err;
+ char *name;
+ struct dentry *dentry;
+ struct inode *ino = NULL;
+
+ name = getname(u_name);
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+
+ down(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto out_unlock;
+ }
+
+ if (!dentry->d_inode) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (permission(dentry->d_inode, MAY_WRITE, NULL)) {
+ err = -EACCES;
+ goto out_err;
+ }
+ ino = dentry->d_inode;
+ if (ino)
+ atomic_inc(&ino->i_count);
+
+ err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+out_err:
+ dput(dentry);
+
+out_unlock:
+ up(&mqueue_mnt->mnt_root->d_inode->i_sem);
+ putname(name);
+ if (ino)
+ iput(ino);
+
+ return err;
+}
+
+
+static long do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
+ size_t msg_len, unsigned int msg_prio, const long timeout)
+{
+ struct file *filp;
+ struct inode *ino;
+ struct ext_wait_queue *wq_ptr;
+ struct msg_msg *msg_ptr;
+ int ret;
+ struct mqueue_inode_info *info = MQUEUE_I(ino);
+
+ ret = -EBADF;
+ filp = fget(mqdes);
+ if (!filp)
+ goto out;
+
+ ino = filp->f_dentry->d_inode;
+ if (ino->i_sb->s_magic != MQUEUE_MAGIC)
+ goto out_fput;
+ info = MQUEUE_I(ino);
+
+ if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+ goto out_fput;
+
+ /* first try to allocate memory, before doing anything with
+ * existing queues */
+ msg_ptr = load_msg((void *)u_msg_ptr, msg_len);
+ if (IS_ERR(msg_ptr)) {
+ ret = PTR_ERR(msg_ptr);
+ goto out_fput;
+ }
+
+ /* This memory may be unnecessary but we must alloc it here
+ * because of spinlock. kfree is called in wq_remove(_last) */
+ wq_ptr = kmalloc(sizeof(struct ext_wait_queue), GFP_KERNEL);
+ if (wq_ptr == NULL) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ spin_lock(&info->lock);
+
+ if ((filp->f_flags & O_NONBLOCK) && (info->attr.mq_curmsgs == info->attr.mq_maxmsg)) {
+ ret = -EAGAIN;
+ goto out_1unlock;
+ }
+
+ if (msg_len > info->attr.mq_msgsize) {
+ ret = -EMSGSIZE;
+ goto out_1unlock;
+ }
+
+ /* checks if queue is full -> I'm waiting as O_NONBLOCK isn't
+ * set then. mq_receive wakes up only 1 task */
+ if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
+ ret = wq_sleep(info, SEND, timeout, wq_ptr);
+ if (ret)
+ goto out_1unlock_nofree;
+ } else
+ kfree(wq_ptr);
+
+ /* adds message to the queue */
+ msg_ptr->m_ts = msg_len;
+ msg_ptr->m_type = msg_prio;
+
+ msg_insert(msg_ptr, info);
+
+ info->qsize += msg_len;
+ ino->i_atime = ino->i_mtime = ino->i_ctime = CURRENT_TIME;
+ __do_notify(info);
+
+ /* after sending message we must wake up (ONLY 1 no matter which) */
+ /* task sleeping in wq_wakeup() */
+ wake_up(&(info->wait_q2[SEND]));
+
+ /* wakes up task waiting for message */
+ wq_wakeup(info, RECV);
+
+ spin_unlock(&info->lock);
+ ret = 0;
+ goto out_fput;
+
+ /* I hate this goto convention... */
+out_1unlock:
+ kfree(wq_ptr);
+out_1unlock_nofree:
+ spin_unlock(&info->lock);
+out_free:
+ free_msg(msg_ptr);
+out_fput:
+ fput(filp);
+out:
+ return ret;
+}
+
+asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
+ size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout)
+{
+ long timeout;
+
+ if (msg_prio >= (unsigned long) MQ_PRIO_MAX)
+ return -EINVAL;
+
+ if ((timeout = prepare_timeout(u_abs_timeout)) < 0)
+ return timeout;
+ return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, timeout);
+}
+
+
+static ssize_t do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
+ size_t msg_len, unsigned int __user *u_msg_prio, const long timeout)
+{
+ ssize_t ret;
+ struct msg_msg *msg_ptr;
+ struct file *filp;
+ struct inode *ino;
+ struct mqueue_inode_info *info;
+ struct ext_wait_queue *wq_ptr;
+
+ ret = -EBADF;
+ filp = fget(mqdes);
+ if (!filp)
+ goto out;
+
+ ino = filp->f_dentry->d_inode;
+ if (ino->i_sb->s_magic != MQUEUE_MAGIC)
+ goto out_fput;
+ info = MQUEUE_I(ino);
+
+ if ((filp->f_flags & O_ACCMODE) == O_WRONLY)
+ goto out_fput;
+
+ /* The same as in send */
+ wq_ptr = kmalloc(sizeof(struct ext_wait_queue), GFP_KERNEL);
+ if (wq_ptr == NULL) {
+ ret = -ENOMEM;
+ goto out_fput;
+ }
+
+ spin_lock(&info->lock);
+
+ /* checks if O_NONBLOCK is set and queue is empty */
+ if ((filp->f_flags & O_NONBLOCK) && (info->attr.mq_curmsgs == 0)) {
+ ret = -EAGAIN;
+ goto out_1unlock;
+ }
+
+ /* checks if buffer is big enough */
+ if (msg_len < info->attr.mq_msgsize) {
+ ret = -EMSGSIZE;
+ goto out_1unlock;
+ }
+
+ /* checks if queue is empty -> as O_NONBLOCK isn't set then
+ * we must wait */
+ if (info->attr.mq_curmsgs == 0) {
+ ret = wq_sleep(info, RECV, timeout, wq_ptr);
+ if (ret < 0)
+ goto out_unlock_only;
+ } else
+ kfree(wq_ptr);
+
+ msg_ptr = msg_get(info);
+ ret = msg_ptr->m_ts;
+
+ info->qsize -= ret;
+ ino->i_atime = ino->i_mtime = ino->i_ctime = CURRENT_TIME;
+
+ /* after receive we can wakeup 1 task waiting in wq_wakeup */
+ wake_up(&(info->wait_q2[RECV]));
+ /* wakes up task waiting for sending message */
+ wq_wakeup(info, SEND);
+
+ spin_unlock(&info->lock);
+
+ if (u_msg_prio) {
+ if (put_user(msg_ptr->m_type, u_msg_prio)) {
+ ret = -EFAULT;
+ goto out_2free;
+ }
+ }
+ if (store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts))
+ ret = -EFAULT;
+
+out_2free:
+ free_msg(msg_ptr);
+ goto out_fput;
+out_1unlock:
+ kfree(wq_ptr);
+out_unlock_only:
+ spin_unlock(&info->lock);
+out_fput:
+ fput(filp);
+out:
+ return ret;
+}
+
+asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
+ size_t msg_len, unsigned int __user *u_msg_prio,
+ const struct timespec __user *u_abs_timeout)
+{
+ long timeout;
+
+ if ((timeout = prepare_timeout(u_abs_timeout)) < 0)
+ return timeout;
+ return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, timeout);
+}
+
+
+/* Notes: the case when user wants us to deregister (with NULL as pointer or SIGEV_NONE)
+ * and he isn't currently owner of notification will be silently discarded.
+ * It isn't explicitly defined in the POSIX.
+ */
+asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
+{
+ int ret;
+ struct file *filp;
+ struct inode *ino;
+ struct sigevent notification;
+ struct mqueue_inode_info *info;
+
+ if (u_notification != NULL) {
+ if (copy_from_user(¬ification, u_notification, sizeof(struct sigevent)))
+ return -EFAULT;
+
+ if (unlikely(notification.sigev_notify != SIGEV_NONE &&
+ notification.sigev_notify != SIGEV_SIGNAL &&
+ notification.sigev_notify != SIGEV_THREAD))
+ return -EINVAL;
+ }
+
+ ret = -EBADF;
+ filp = fget(mqdes);
+ if (!filp)
+ goto out;
+
+ ino = filp->f_dentry->d_inode;
+ if (ino->i_sb->s_magic != MQUEUE_MAGIC)
+ goto out_fput;
+ info = MQUEUE_I(ino);
+
+ ret = 0;
+ spin_lock(&info->lock);
+
+ if (u_notification == NULL || notification.sigev_notify == SIGEV_NONE) {
+ if (info->notify_owner == current->tgid)
+ remove_notification(info);
+ goto out_unlock;
+ }
+
+ if (info->notify_task) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ /* add notification */
+ if (notification.sigev_signo < 0 || notification.sigev_signo > _NSIG)
+ ret = -EINVAL;
+ else {
+ info->notify_task = current->pid;
+ info->notify_owner = current->tgid;
+ info->notify.sigev_signo = notification.sigev_signo;
+ info->notify.sigev_notify = notification.sigev_notify;
+ info->notify.sigev_value = notification.sigev_value;
+ }
+out_unlock:
+ ino->i_atime = ino->i_ctime = CURRENT_TIME;
+ spin_unlock(&info->lock);
+out_fput:
+ fput(filp);
+out:
+ return ret;
+}
+
+asmlinkage long sys_mq_getattr(mqd_t mqdes, struct mq_attr __user *u_mqstat)
+{
+ int ret;
+ struct mq_attr attr;
+ struct file *filp;
+ struct inode *ino;
+ struct mqueue_inode_info *info;
+
+ if (u_mqstat == NULL)
+ return -EINVAL;
+
+ ret = -EBADF;
+ filp = fget(mqdes);
+ if (!filp)
+ goto out;
+
+ ino = filp->f_dentry->d_inode;
+ if (ino->i_sb->s_magic != MQUEUE_MAGIC)
+ goto out_fput;
+ info = MQUEUE_I(ino);
+
+ spin_lock(&info->lock);
+ attr = info->attr;
+ attr.mq_flags = filp->f_flags;
+ ino->i_atime = ino->i_ctime = CURRENT_TIME;
+
+ spin_unlock(&info->lock);
+
+ ret = 0;
+ if (copy_to_user(u_mqstat, &attr, sizeof(struct mq_attr)))
+ ret = -EFAULT;
+
+out_fput:
+ fput(filp);
+out:
+ return ret;
+}
+
+asmlinkage long sys_mq_setattr(mqd_t mqdes, const struct mq_attr __user *u_mqstat,
+ struct mq_attr __user *u_omqstat)
+{
+ int ret;
+ struct mq_attr mqstat, omqstat;
+ struct file *filp;
+ struct inode *ino;
+ struct mqueue_inode_info *info;
+
+ if (u_mqstat == NULL)
+ return -EINVAL;
+
+ if (copy_from_user(&mqstat, u_mqstat, sizeof (struct mq_attr)))
+ return -EFAULT;
+
+ ret = -EBADF;
+ filp = fget(mqdes);
+ if (!filp)
+ goto out;
+
+ ino = filp->f_dentry->d_inode;
+ if (ino->i_sb->s_magic != MQUEUE_MAGIC)
+ goto out_fput;
+ info = MQUEUE_I(ino);
+
+ spin_lock(&info->lock);
+
+ omqstat = info->attr;
+ omqstat.mq_flags = filp->f_flags;
+
+ if (mqstat.mq_flags & O_NONBLOCK)
+ filp->f_flags |= O_NONBLOCK;
+ else
+ filp->f_flags &= ~O_NONBLOCK;
+
+ ino->i_atime = ino->i_ctime = CURRENT_TIME;
+
+ spin_unlock(&info->lock);
+
+ ret = 0;
+ if (u_omqstat != NULL && copy_to_user(u_omqstat, &omqstat, sizeof(struct mq_attr)))
+ ret = -EFAULT;
+
+out_fput:
+ fput(filp);
+out:
+ return ret;
+}
+
+
+static struct inode_operations mqueue_dir_inode_operations = {
+ .lookup = mqueue_lookup,
+ .create = mqueue_create,
+ .unlink = mqueue_unlink,
+};
+
+static struct file_operations mqueue_file_operations = {
+ .release = mqueue_release_file,
+ .poll = mqueue_poll_file,
+ .read = mqueue_read_file,
+};
+
+static struct super_operations mqueue_super_ops = {
+ .alloc_inode = mqueue_alloc_inode,
+ .destroy_inode = mqueue_destroy_inode,
+ .statfs = simple_statfs,
+ .delete_inode = mqueue_delete_inode,
+ .drop_inode = generic_delete_inode,
+};
+
+static struct file_system_type mqueue_fs_type = {
+ .name = "mqueue",
+ .get_sb = mqueue_get_sb,
+ .kill_sb = kill_litter_super,
+};
+
+static ctl_table mq_sysctls[] = {
+ {
+ .ctl_name = CTL_QUEUESMAX,
+ .procname = "queues_max",
+ .data = &queues_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_MSGMAX,
+ .procname = "msg_max",
+ .data = &msg_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_MSGSIZEMAX,
+ .procname = "msgsize_max",
+ .data = &msgsize_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table mq_sysctl_dir[] = {
+ {
+ .ctl_name = FS_MQUEUE,
+ .procname = "mqueue",
+ .mode = 0555,
+ .child = mq_sysctls,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table mq_sysctl_root[] = {
+ {
+ .ctl_name = CTL_FS,
+ .procname = "fs",
+ .mode = 0555,
+ .child = mq_sysctl_dir,
+ },
+ { .ctl_name = 0 }
+};
+
+
+static int __init init_mqueue_fs(void)
+{
+ int error;
+
+ mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
+ sizeof(struct mqueue_inode_info), 0, SLAB_HWCACHE_ALIGN, init_once, NULL);
+
+ if (mqueue_inode_cachep == NULL)
+ return -ENOMEM;
+
+ mq_sysctl_table = register_sysctl_table(mq_sysctl_root, 0);
+ if (!mq_sysctl_table) {
+ error = -ENOMEM;
+ goto out_inodecache;
+ }
+
+ error = register_filesystem(&mqueue_fs_type);
+ if (error)
+ goto out_inodecache;
+
+ if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) {
+ unregister_filesystem(&mqueue_fs_type);
+ unregister_sysctl_table(mq_sysctl_table);
+ error = PTR_ERR(mqueue_mnt);
+ goto out_inodecache;
+ }
+
+ /* internal initialization - not common for vfs */
+ queues_count = 0;
+ spin_lock_init(&mq_lock);
+
+ return 0;
+
+out_inodecache:
+ if (kmem_cache_destroy(mqueue_inode_cachep))
+ printk(KERN_INFO "mqueue_inode_cache: not all structures were freed\n");
+ return error;
+}
+
+static void __exit exit_mqueue_fs(void)
+{
+ unregister_filesystem(&mqueue_fs_type);
+ unregister_sysctl_table(mq_sysctl_table);
+
+ if (kmem_cache_destroy(mqueue_inode_cachep))
+ printk(KERN_INFO "mqueue_inode_cache: not all structures were freed\n");
+}
+
+__initcall(init_mqueue_fs);
diff -urN 2.6.0-test10-orig_2/ipc/util.c 2.6.0-test10-patched_2/ipc/util.c
--- 2.6.0-test10-orig_2/ipc/util.c 2003-11-21 17:11:17.000000000 +0100
+++ 2.6.0-test10-patched_2/ipc/util.c 2003-11-24 17:47:23.000000000 +0100
@@ -24,10 +24,13 @@
#include <linux/security.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
+#include <linux/mqueue.h>
-#if defined(CONFIG_SYSVIPC)
-
+#if defined(CONFIG_SYSVIPC) || defined(CONFIG_POSIX_MQUEUE_FS)
#include "util.h"
+#endif
+
+#if defined(CONFIG_SYSVIPC)
/**
* ipc_init - initialise IPC subsystem
@@ -612,7 +615,7 @@
#endif /* CONFIG_SYSVIPC */
-#ifdef CONFIG_SYSVIPC
+#if defined(CONFIG_POSIX_MQUEUE_FS) || defined(CONFIG_SYSVIPC)
void free_msg(struct msg_msg* msg)
{
@@ -714,4 +717,51 @@
return 0;
}
-#endif /* CONFIG_SYSVIPC */
+#endif /* CONFIG_POSIX_MQUEUE_FS || CONFIG_SYSVIPC */
+
+#if !defined(CONFIG_POSIX_MQUEUE_FS)
+
+/*
+ * Return ENOSYS when posix mqueue filesystem is not compiled in
+ */
+
+asmlinkage long sys_mq_open(const char *name, int oflag, mode_t mode,
+ struct mq_attr *attr)
+{
+ return (mqd_t)-ENOSYS;
+}
+
+asmlinkage long sys_mq_unlink(const char *name)
+{
+ return -ENOSYS;
+}
+
+asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char *msg_ptr,
+ size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout)
+{
+ return -ENOSYS;
+}
+
+asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char *msg_ptr,
+ size_t msg_len, unsigned int *msg_prio, const struct timespec *abs_timeout)
+{
+ return -ENOSYS;
+}
+
+asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent *notification)
+{
+ return -ENOSYS;
+}
+
+asmlinkage long sys_mq_getattr(mqd_t mqdes, struct mq_attr *mqstat)
+{
+ return -ENOSYS;
+}
+
+asmlinkage long sys_mq_setattr(mqd_t mqdes, const struct mq_attr *mqstat,
+ struct mq_attr *omqstat)
+{
+ return -ENOSYS;
+}
+
+#endif /* ! CONFIG_POSIX_MQUEUE_FS */
diff -urN 2.6.0-test10-orig_2/kernel/signal.c 2.6.0-test10-patched_2/kernel/signal.c
--- 2.6.0-test10-orig_2/kernel/signal.c 2003-11-24 16:28:58.000000000 +0100
+++ 2.6.0-test10-patched_2/kernel/signal.c 2003-11-24 16:51:48.000000000 +0100
@@ -2044,6 +2044,7 @@
err |= __put_user(from->si_stime, &to->si_stime);
break;
case __SI_RT: /* This is not generated by the kernel as of now. */
+ case __SI_MESGQ: /* But this is */
err |= __put_user(from->si_pid, &to->si_pid);
err |= __put_user(from->si_uid, &to->si_uid);
err |= __put_user(from->si_int, &to->si_int);
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2003-11-25 11:43 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-10-03 15:59 [PATCH] [2/2] posix message queues Peter Wächtler
2003-10-03 18:16 ` Manfred Spraul
2003-10-05 12:42 ` Peter Wächtler
2003-10-05 14:39 ` Arjan van de Ven
2003-10-05 15:58 ` Ulrich Drepper
2003-10-03 22:22 ` Jakub Jelinek
2003-10-05 12:42 ` Peter Wächtler
2003-10-04 6:39 ` Ulrich Drepper
-- strict thread matches above, loose matches on Subject: below --
2003-11-25 11:42 [PATCH] 2/2 POSIX " Michal Wronski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox