From: Denys Vlasenko <vda.linux@googlemail.com>
To: linux-kernel@vger.kernel.org, Al Viro <viro@ftp.linux.org.uk>
Subject: [PATCH] allow send/recv(MSG_DONTWAIT) on non-sockets
Date: Sat, 25 Aug 2007 22:50:03 +0100 [thread overview]
Message-ID: <200708252250.03076.vda.linux@googlemail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1999 bytes --]
Hello list,
This patch attempts to make it possible to do a nonblocking read or
write of fd's pointing to possibly shared struct file's in a non-racy
manner, i.e. without using fcntl.
One use case is when you want to read standard input, but
don't want to switch fd 0 to O_NONBLOCK mode: if you get SIGKILLed,
O_NONBLOCK will stay and your parent (e.g. a shell) can be upset.
On Tuesday 14 August 2007 13:33, Alan Cox wrote:
> > b) Make recv(fd, buf, size, flags) and send(fd, buf, size, flags);
> > work with non-socket fds too, for flags==0 or flags==MSG_DONTWAIT.
> > (it's ok to fail with "socket op on non-socket fd" for other values
> > of flags)
>
> I think that makes a lot of sense, and to be honest other MSG_ flags make
> useful sense and have meaningful semantics that might be helpful
> elsewhere if ever coded that way.
Yes, that's my feeling too.
> If you want to do this the first job is going to be to sort out the way
> non-block is propogated to device driver read/write handlers. At the
> moment they all check filp->f_flags
...which happens in ~250 files. I'd rather not touch that much
of code, if possible.
Attached patch detects send/recv(fd, buf, size, MSG_DONTWAIT) on
non-sockets and turns them into non-blocking write/read.
Since filp->f_flags appear to be read and modified without any locking,
I cannot modify it without potentially affecting other processes
accessing the same file through shared struct file.
Therefore I simply make a temporary copy of struct file, set
O_NONBLOCK in it and pass it to vfs_read/write.
Is this heresy? ;) I see only one spinlock in struct file:
#ifdef CONFIG_EPOLL
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
Do I need to take it?
Also attached is ndelaytest.c which can be used to test that
send(MSG_DONTWAIT) indeed is failing with EAGAIN if write would block
and that other processes never see O_NONBLOCK set.
Comments?
--
vda
[-- Attachment #2: ndelaytest.c --]
[-- Type: text/x-csrc, Size: 1463 bytes --]
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <time.h>
#include <signal.h>
#define SECONDS 10
#define STR "."
//#define STR "123456789 123456789 123456789 123456789 "
/* To see send() resulting in EAGAIN:
* strace -ff -o log ndelaytest | while sleep 11; do break; done
* log.$PID:
* send(1, "123456789 123456789 123456789 12"..., 40, MSG_DONTWAIT)
* = -1 EAGAIN (Resource temporarily unavailable)
*/
int main()
{
pid_t pid;
time_t t;
int fl;
puts("starting");
t = time(0);
pid = fork();
if (pid == 0) {
/* child */
while ((time(0) - t) < SECONDS-1) {
#if 0
/* Uncomment this part and simply run the executable
* to see race detection code in action */
#define OP "write"
fcntl(1, F_SETFL, fcntl(1, F_GETFL) | O_NONBLOCK);
fl = write(1, STR, sizeof(STR) - 1);
fcntl(1, F_SETFL, fcntl(1, F_GETFL) & ~O_NONBLOCK);
#else
/* This part tests whether send(MSG_DONTWAIT)
* is racy or not */
#define OP "send"
fl = send(1, STR, sizeof(STR) - 1, MSG_DONTWAIT);
#endif
if (fl < 0) {
perror(OP);
kill(getppid(), SIGKILL);
return 1;
}
}
return 0;
}
while ((time(0) - t) < SECONDS) {
fl = fcntl(1, F_GETFL);
if (fl & O_NONBLOCK) {
fprintf(stderr, "NONBLOCK:1\n");
kill(pid, SIGKILL);
fcntl(1, F_SETFL, fl & ~O_NONBLOCK);
return 1;
}
}
fprintf(stderr, "NONBLOCK:0\n");
return 0;
}
[-- Attachment #3: nonblock_linux-2.6.22-rc6.patch --]
[-- Type: text/x-diff, Size: 2902 bytes --]
--- linux-2.6.22-rc6.src/fs/read_write.c Fri Jun 15 19:30:05 2007
+++ linux-2.6.22-rc6_ndelay/fs/read_write.c Sun Aug 19 10:43:24 2007
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
+#include <linux/socket.h>
#include "read_write.h"
#include <asm/uaccess.h>
@@ -351,6 +352,36 @@
static inline void file_pos_write(struct file *file, loff_t pos)
{
file->f_pos = pos;
+}
+
+/* Helper for send/recv on non-sockets */
+ssize_t rw_with_flags(struct file *file, int fput_needed, void __user *buf, size_t count, unsigned flags)
+{
+ int err;
+ loff_t pos;
+ struct file *file_copy;
+
+ file_copy = file;
+ if (flags & MSG_DONTWAIT) {
+ /* We make copy even if O_NONBLOCK is already set. */
+ /* We don't want it to change under our feet. */
+ file_copy = kmalloc(sizeof(*file_copy), GFP_KERNEL);
+ memcpy(file_copy, file, sizeof(*file_copy));
+ file_copy->f_flags |= O_NONBLOCK;
+ }
+
+ pos = file_pos_read(file);
+ if (flags & MSG_OOB) /* MSG_OOB is reused to mean 'write' */
+ err = vfs_write(file_copy, buf, count, &pos);
+ else
+ err = vfs_read(file_copy, buf, count, &pos);
+ file_pos_write(file, pos);
+
+ if (flags & MSG_DONTWAIT) {
+ kfree(file_copy);
+ }
+ fput_light(file, fput_needed);
+ return err;
}
asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
--- linux-2.6.22-rc6.src/include/linux/fs.h Wed Jun 27 21:24:18 2007
+++ linux-2.6.22-rc6_ndelay/include/linux/fs.h Sun Aug 19 10:32:20 2007
@@ -1154,6 +1154,9 @@
extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
unsigned long, loff_t *);
+extern ssize_t rw_with_flags(struct file *, int, void __user *, size_t,
+ unsigned);
+
/*
* NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
* without the big kernel lock held in all filesystems.
--- linux-2.6.22-rc6.src/net/socket.c Fri Jun 15 19:30:08 2007
+++ linux-2.6.22-rc6_ndelay/net/socket.c Sun Aug 19 11:34:07 2007
@@ -1585,8 +1585,17 @@
goto out;
sock = sock_from_file(sock_file, &err);
- if (!sock)
- goto out_put;
+ if (!sock) {
+ if (addr)
+ goto out_put;
+ if (flags & ~MSG_DONTWAIT)
+ goto out_put;
+ /* it's not a socket, but we support a special case:
+ * send(fd, buf, count, MSG_DONTWAIT)
+ * (MSG_OOB is reused to mean 'write') */
+ return rw_with_flags(sock_file, fput_needed, buff, len, flags | MSG_OOB);
+ }
+
iov.iov_base = buff;
iov.iov_len = len;
msg.msg_name = NULL;
@@ -1646,8 +1655,15 @@
goto out;
sock = sock_from_file(sock_file, &err);
- if (!sock)
- goto out_put;
+ if (!sock) {
+ if (addr)
+ goto out_put;
+ if (flags & ~MSG_DONTWAIT)
+ goto out_put;
+ /* it's not a socket, but we support a special case:
+ * recv(fd, ubuf, size, MSG_DONTWAIT) */
+ return rw_with_flags(sock_file, fput_needed, ubuf, size, flags);
+ }
msg.msg_control = NULL;
msg.msg_controllen = 0;
reply other threads:[~2007-08-25 21:50 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200708252250.03076.vda.linux@googlemail.com \
--to=vda.linux@googlemail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=viro@ftp.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.