From: Jens Axboe <axboe@suse.de>
To: "Luiz Fernando N. Capitulino" <lcapitulino@mandriva.com.br>
Cc: Andrew Morton <akpm@osdl.org>,
Michael Kerrisk <mtk-manpages@gmx.net>,
linux-kernel@vger.kernel.org, michael.kerrisk@gmx.net,
vendor-sec@lst.de
Subject: Re: splice/tee bugs?
Date: Sun, 9 Jul 2006 13:16:29 +0200 [thread overview]
Message-ID: <20060709111629.GV4188@suse.de> (raw)
In-Reply-To: <20060709103606.GU4188@suse.de>
On Sun, Jul 09 2006, Jens Axboe wrote:
> On Sat, Jul 08 2006, Luiz Fernando N. Capitulino wrote:
> >
> > Hi Jens,
> >
> > On Sat, 8 Jul 2006 08:41:32 +0200
> > Jens Axboe <axboe@suse.de> wrote:
> >
> > | On Fri, Jul 07 2006, Luiz Fernando N. Capitulino wrote:
> > | > On Fri, 7 Jul 2006 04:07:49 -0700
> > | > Andrew Morton <akpm@osdl.org> wrote:
> > | >
> > | > | On Fri, 07 Jul 2006 09:07:03 +0200
> > | > | "Michael Kerrisk" <mtk-manpages@gmx.net> wrote:
> > | > |
> > | > | > c) Occasionally the command line just hangs, producing no output.
> > | > | > In this case I can't kill it with ^C or ^\. This is a
> > | > | > hard-to-reproduce behaviour on my (x86) system, but I have
> > | > | > seen it several times by now.
> > | > |
> > | > | aka local DoS. Please capture sysrq-T output next time.
> > | >
> > | > If I run lots of them in parallel, I get the following OOPs in a few
> > | > seconds:
> > |
> > | With the patch posted? You need the i vs nrbufs fix.
> >
> > Yes, it fixes the problem. I didn't try it before because I thought
> > you were going to double check it [1].
>
> Yeah the patch needs reworking, however the isolated i vs nrbufs fix is
> safe enough on its own. I'll post a full patch for inclusion, I'm afraid
> I wont be able to fully test it enough for submitting it until tomorrow
> though.
Something like this, testing would be appreciated! Michael, can you
repeat your testing as well? Thanks.
diff --git a/fs/splice.c b/fs/splice.c
index 05fd278..ecf72bc 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1307,6 +1307,69 @@ asmlinkage long sys_splice(int fd_in, lo
}
/*
+ * Make sure there's data to read. Wait for input if we can, otherwise
+ * return an appropriate error.
+ */
+static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+{
+ int ret = 0;
+
+ mutex_lock(&pipe->inode->i_mutex);
+
+ while (!pipe->nrbufs) {
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+ if (!pipe->writers)
+ break;
+ if (!pipe->waiting_writers) {
+ if (flags & SPLICE_F_NONBLOCK) {
+ ret = -EAGAIN;
+ break;
+ }
+ }
+ pipe_wait(pipe);
+ }
+
+ mutex_unlock(&pipe->inode->i_mutex);
+ return ret;
+}
+
+/*
+ * Make sure there's writeable room. Wait for room if we can, otherwise
+ * return an appropriate error.
+ */
+static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+{
+ int ret = 0;
+
+ mutex_lock(&pipe->inode->i_mutex);
+
+ while (pipe->nrbufs >= PIPE_BUFFERS) {
+ if (!pipe->readers) {
+ send_sig(SIGPIPE, current, 0);
+ ret = -EPIPE;
+ break;
+ }
+ if (flags & SPLICE_F_NONBLOCK) {
+ ret = -EAGAIN;
+ break;
+ }
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+ pipe->waiting_writers++;
+ pipe_wait(pipe);
+ pipe->waiting_writers--;
+ }
+
+ mutex_unlock(&pipe->inode->i_mutex);
+ return ret;
+}
+
+/*
* Link contents of ipipe to opipe.
*/
static int link_pipe(struct pipe_inode_info *ipipe,
@@ -1314,9 +1377,9 @@ static int link_pipe(struct pipe_inode_i
size_t len, unsigned int flags)
{
struct pipe_buffer *ibuf, *obuf;
- int ret, do_wakeup, i, ipipe_first;
+ int ret, i, nbuf;
- ret = do_wakeup = ipipe_first = 0;
+ i = ret = 0;
/*
* Potential ABBA deadlock, work around it by ordering lock
@@ -1324,131 +1387,65 @@ static int link_pipe(struct pipe_inode_i
* could deadlock (one doing tee from A -> B, the other from B -> A).
*/
if (ipipe->inode < opipe->inode) {
- ipipe_first = 1;
- mutex_lock(&ipipe->inode->i_mutex);
- mutex_lock(&opipe->inode->i_mutex);
+ mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
} else {
- mutex_lock(&opipe->inode->i_mutex);
- mutex_lock(&ipipe->inode->i_mutex);
+ mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
}
- for (i = 0;; i++) {
+ do {
if (!opipe->readers) {
send_sig(SIGPIPE, current, 0);
if (!ret)
ret = -EPIPE;
break;
}
- if (ipipe->nrbufs - i) {
- ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
-
- /*
- * If we have room, fill this buffer
- */
- if (opipe->nrbufs < PIPE_BUFFERS) {
- int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
- /*
- * Get a reference to this pipe buffer,
- * so we can copy the contents over.
- */
- ibuf->ops->get(ipipe, ibuf);
-
- obuf = opipe->bufs + nbuf;
- *obuf = *ibuf;
-
- /*
- * Don't inherit the gift flag, we need to
- * prevent multiple steals of this page.
- */
- obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
-
- if (obuf->len > len)
- obuf->len = len;
-
- opipe->nrbufs++;
- do_wakeup = 1;
- ret += obuf->len;
- len -= obuf->len;
-
- if (!len)
- break;
- if (opipe->nrbufs < PIPE_BUFFERS)
- continue;
- }
-
- /*
- * We have input available, but no output room.
- * If we already copied data, return that. If we
- * need to drop the opipe lock, it must be ordered
- * last to avoid deadlocks.
- */
- if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
- if (!ret)
- ret = -EAGAIN;
- break;
- }
- if (signal_pending(current)) {
- if (!ret)
- ret = -ERESTARTSYS;
- break;
- }
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&opipe->wait))
- wake_up_interruptible(&opipe->wait);
- kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
- do_wakeup = 0;
- }
+ /*
+ * If we have iterated all input buffers or ran out of
+ * output room, break.
+ */
+ if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
+ break;
- opipe->waiting_writers++;
- pipe_wait(opipe);
- opipe->waiting_writers--;
- continue;
- }
+ ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
+ nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
/*
- * No input buffers, do the usual checks for available
- * writers and blocking and wait if necessary
+ * Get a reference to this pipe buffer,
+ * so we can copy the contents over.
*/
- if (!ipipe->writers)
- break;
- if (!ipipe->waiting_writers) {
- if (ret)
- break;
- }
+ ibuf->ops->get(ipipe, ibuf);
+
+ obuf = opipe->bufs + nbuf;
+ *obuf = *ibuf;
+
/*
- * pipe_wait() drops the ipipe mutex. To avoid deadlocks
- * with another process, we can only safely do that if
- * the ipipe lock is ordered last.
+ * Don't inherit the gift flag, we need to
+ * prevent multiple steals of this page.
*/
- if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
- if (!ret)
- ret = -EAGAIN;
- break;
- }
- if (signal_pending(current)) {
- if (!ret)
- ret = -ERESTARTSYS;
- break;
- }
+ obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
- if (waitqueue_active(&ipipe->wait))
- wake_up_interruptible_sync(&ipipe->wait);
- kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
+ if (obuf->len > len)
+ obuf->len = len;
- pipe_wait(ipipe);
- }
+ opipe->nrbufs++;
+ ret += obuf->len;
+ len -= obuf->len;
+ i++;
+ } while (len);
mutex_unlock(&ipipe->inode->i_mutex);
mutex_unlock(&opipe->inode->i_mutex);
- if (do_wakeup) {
+ if (ret) {
smp_mb();
if (waitqueue_active(&opipe->wait))
wake_up_interruptible(&opipe->wait);
kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
- }
+ } else if (flags & SPLICE_F_NONBLOCK)
+ ret = -EAGAIN;
return ret;
}
@@ -1464,12 +1461,23 @@ static long do_tee(struct file *in, stru
{
struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+ int ret;
/*
- * Link ipipe to the two output pipes, consuming as we go along.
+ * Duplicate the contents of ipipe to opipe without actually
+ * copying the data.
*/
- if (ipipe && opipe)
+ if (ipipe && opipe && ipipe != opipe) {
+ ret = link_ipipe_prep(ipipe, flags);
+ if (unlikely(ret))
+ return ret;
+
+ ret = link_opipe_prep(opipe, flags);
+ if (unlikely(ret))
+ return ret;
+
return link_pipe(ipipe, opipe, len, flags);
+ }
return -EINVAL;
}
--
Jens Axboe
next prev parent reply other threads:[~2006-07-09 11:14 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-07-07 7:07 splice/tee bugs? Michael Kerrisk
2006-07-07 11:07 ` Andrew Morton
2006-07-07 11:42 ` Michael Kerrisk
2006-07-07 12:03 ` Jens Axboe
2006-07-07 12:28 ` Jens Axboe
2006-07-07 12:31 ` Michael Kerrisk
2006-07-07 12:41 ` Jens Axboe
2006-07-07 13:12 ` Jens Axboe
2006-07-07 13:14 ` Jens Axboe
2006-07-07 13:21 ` Arjan van de Ven
2006-07-07 13:26 ` Jens Axboe
2006-07-07 13:54 ` Paulo Marques
2006-07-07 14:02 ` Jens Axboe
2006-07-07 14:05 ` Michael Kerrisk
2006-07-07 14:08 ` Jens Axboe
2006-07-07 16:13 ` Luiz Fernando N. Capitulino
2006-07-07 21:43 ` Luiz Fernando N. Capitulino
2006-07-08 6:41 ` Jens Axboe
2006-07-08 21:09 ` Luiz Fernando N. Capitulino
2006-07-09 10:36 ` Jens Axboe
2006-07-09 11:16 ` Jens Axboe [this message]
2006-07-09 16:47 ` Luiz Fernando N. Capitulino
2006-07-09 17:57 ` Jens Axboe
2006-07-10 6:25 ` Michael Kerrisk
2006-07-10 6:43 ` Jens Axboe
2006-07-10 8:09 ` Michael Kerrisk
2006-07-10 8:24 ` Jens Axboe
2006-07-10 8:40 ` Michael Kerrisk
2006-07-10 8:46 ` Jens Axboe
2006-07-10 8:50 ` Michael Kerrisk
2006-07-10 9:06 ` Jens Axboe
2006-07-10 9:08 ` Michael Kerrisk
2006-07-10 8:50 ` Jens Axboe
-- strict thread matches above, loose matches on Subject: below --
2006-07-08 5:33 Chuck Ebbert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060709111629.GV4188@suse.de \
--to=axboe@suse.de \
--cc=akpm@osdl.org \
--cc=lcapitulino@mandriva.com.br \
--cc=linux-kernel@vger.kernel.org \
--cc=michael.kerrisk@gmx.net \
--cc=mtk-manpages@gmx.net \
--cc=vendor-sec@lst.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.