From: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
To: Eric Dumazet <dada1@cosmosbay.com>
Cc: David Miller <davem@davemloft.net>,
axboe@kernel.dk, netdev@vger.kernel.org
Subject: Re: Fix for the fundamental network/block layer race in sendfile().
Date: Tue, 1 Apr 2008 22:07:51 +0400 [thread overview]
Message-ID: <20080401180751.GA3263@2ka.mipt.ru> (raw)
In-Reply-To: <20080401174759.GB28217@2ka.mipt.ru>
On Tue, Apr 01, 2008 at 09:47:59PM +0400, Evgeniy Polyakov (johnpol@2ka.mipt.ru) wrote:
> > 1) Is out_file garanted to be a socket ?
> > 2) SO_PRIVATE_CALLBACK is defined to be 37 later on your patch. Is ORing 37
> > to sk_flags really working ???
> > 3) So, once a socket has been used for a sendfile(), all subsequent
> > sendmsg() will call skb_splice_destructor ? (I see no clearing of
> > SO_PRIVATE_CALLBACK)
> > This will probably crash.
> > 4) god_blessed_us name ... Oh I got it, its April the first !!!!
Addressed all above (2 and 3 actually).
The formed by using correct socket flag (set_bit/clear_bit, like in a
real life), the latter by clearing bit on sendfile() exit and to fix
exit/clear race each spliced page also setups lru.prev to itself, which
is checked in destructor.
Name was not changed, does it matter?
Patch still works ok :)
Thanks Eric for pointing that bugs.
Signed-off-as-usual.
diff --git a/fs/read_write.c b/fs/read_write.c
index 49a9871..fda55f6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -16,6 +16,7 @@
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
+#include <net/sock.h>
#include "read_write.h"
#include <asm/uaccess.h>
@@ -703,6 +704,8 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
loff_t pos;
ssize_t retval;
int fput_needed_in, fput_needed_out, fl;
+ struct sock *sk;
+ struct socket *sock;
/*
* Get input file, and verify that it is ok..
@@ -762,6 +765,12 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
count = max - pos;
}
+ sock = out_file->private_data;
+ sk = sock->sk;
+
+ sk->sk_user_data = &skb_splice_destructor;
+ set_bit(SOCK_PRIVATE_CALLBACK, &sock->flags);
+
fl = 0;
#if 0
/*
@@ -775,6 +784,8 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
#endif
retval = do_splice_direct(in_file, ppos, out_file, count, fl);
+ clear_bit(SOCK_PRIVATE_CALLBACK, &sock->flags);
+
if (retval > 0) {
add_rchar(current, retval);
add_wchar(current, retval);
diff --git a/fs/splice.c b/fs/splice.c
index 0670c91..dcda32e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -29,6 +29,7 @@
#include <linux/syscalls.h>
#include <linux/uio.h>
#include <linux/security.h>
+#include <net/sock.h>
/*
* Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -535,6 +536,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
if (!ret) {
more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
+ buf->page->lru.next = (void *)pipe;
+ buf->page->lru.prev = (void *)buf->page;
ret = file->f_op->sendpage(file, buf->page, buf->offset,
sd->len, &pos, more);
}
@@ -629,6 +632,9 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
ret = 0;
do_wakeup = 0;
+ if (actor == pipe_to_sendpage)
+ atomic_set(&pipe->god_blessed_us, pipe->nrbufs);
+
for (;;) {
if (pipe->nrbufs) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
@@ -665,12 +671,20 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
do_wakeup = 1;
}
- if (!sd->total_len)
+ if (!sd->total_len) {
+ if (actor == pipe_to_sendpage)
+ wait_event_interruptible(pipe->wait,
+ !atomic_read(&pipe->god_blessed_us));
break;
+ }
}
if (pipe->nrbufs)
continue;
+
+ if (actor == pipe_to_sendpage)
+ wait_event_interruptible(pipe->wait, !atomic_read(&pipe->god_blessed_us));
+
if (!pipe->writers)
break;
if (!pipe->waiting_writers) {
diff --git a/include/asm-x86/socket.h b/include/asm-x86/socket.h
diff --git a/include/linux/net.h b/include/linux/net.h
index c414d90..5977a5e 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -65,6 +65,7 @@ typedef enum {
#define SOCK_NOSPACE 2
#define SOCK_PASSCRED 3
#define SOCK_PASSSEC 4
+#define SOCK_PRIVATE_CALLBACK 5
#ifndef ARCH_HAS_SOCKET_TYPES
/**
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8e41202..465405a 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -51,6 +51,7 @@ struct pipe_inode_info {
unsigned int waiting_writers;
unsigned int r_counter;
unsigned int w_counter;
+ atomic_t god_blessed_us;
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
struct inode *inode;
diff --git a/include/net/sock.h b/include/net/sock.h
index fd98760..441dcd2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -862,6 +862,8 @@ extern struct sk_buff *sock_rmalloc(struct sock *sk,
extern void sock_wfree(struct sk_buff *skb);
extern void sock_rfree(struct sk_buff *skb);
+extern void skb_splice_destructor(struct sk_buff *skb);
+
extern int sock_setsockopt(struct socket *sock, int level,
int op, char __user *optval,
int optlen);
@@ -1168,6 +1170,8 @@ static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
sock_hold(sk);
skb->sk = sk;
skb->destructor = sock_wfree;
+ if (sk->sk_socket && test_bit(SOCK_PRIVATE_CALLBACK, &sk->sk_socket->flags))
+ skb->destructor = sk->sk_user_data;
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 2654c14..64aa36a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -112,6 +112,7 @@
#include <linux/tcp.h>
#include <linux/init.h>
#include <linux/highmem.h>
+#include <linux/pipe_fs_i.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -1116,6 +1117,26 @@ void sock_wfree(struct sk_buff *skb)
sock_put(sk);
}
+void skb_splice_destructor(struct sk_buff *skb)
+{
+ if (skb_shinfo(skb)->nr_frags) {
+ int i;
+ struct pipe_inode_info *pipe;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ struct page *page = skb_shinfo(skb)->frags[i].page;
+
+ if (page->lru.prev == (struct list_head *)page) {
+ pipe = (struct pipe_inode_info *)page->lru.next;
+ if (atomic_dec_return(&pipe->god_blessed_us) == 0)
+ wake_up(&pipe->wait);
+ }
+ }
+ }
+
+ sock_wfree(skb);
+}
+
/*
* Read buffer destructor automatically called from kfree_skb.
*/
@@ -2164,6 +2185,7 @@ EXPORT_SYMBOL(sock_no_socketpair);
EXPORT_SYMBOL(sock_rfree);
EXPORT_SYMBOL(sock_setsockopt);
EXPORT_SYMBOL(sock_wfree);
+EXPORT_SYMBOL(skb_splice_destructor);
EXPORT_SYMBOL(sock_wmalloc);
EXPORT_SYMBOL(sock_i_uid);
EXPORT_SYMBOL(sock_i_ino);
--
Evgeniy Polyakov
next prev parent reply other threads:[~2008-04-01 18:08 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-28 9:20 Network/block layer race Evgeniy Polyakov
2008-03-28 20:40 ` David Miller
2008-03-28 20:56 ` Evgeniy Polyakov
2008-03-28 21:07 ` David Miller
2008-03-28 21:51 ` Evgeniy Polyakov
2008-04-01 16:49 ` Fix for the fundamental network/block layer race in sendfile() Evgeniy Polyakov
2008-04-01 17:14 ` Mika Penttilä
2008-04-01 17:36 ` Evgeniy Polyakov
2008-04-01 17:19 ` Eric Dumazet
2008-04-01 17:47 ` Evgeniy Polyakov
2008-04-01 18:07 ` Evgeniy Polyakov [this message]
2008-04-01 19:21 ` Eric Dumazet
2008-04-01 19:45 ` Evgeniy Polyakov
2008-04-01 20:59 ` Eric Dumazet
2008-04-01 21:14 ` Evgeniy Polyakov
2008-04-08 12:25 ` [take 2] " Evgeniy Polyakov
2008-04-08 12:58 ` Eric Dumazet
2008-04-08 17:26 ` Evgeniy Polyakov
2008-04-08 21:30 ` Evgeniy Polyakov
2008-04-09 11:33 ` Jens Axboe
2011-06-06 16:29 ` IPv6 DNSSL (rfc6106): please include the patch to pass it to user space Carlos Carvalho
2011-06-06 19:40 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080401180751.GA3263@2ka.mipt.ru \
--to=johnpol@2ka.mipt.ru \
--cc=axboe@kernel.dk \
--cc=dada1@cosmosbay.com \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.