public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@suse.de>
To: "Luiz Fernando N. Capitulino" <lcapitulino@mandriva.com.br>
Cc: Andrew Morton <akpm@osdl.org>,
	Michael Kerrisk <mtk-manpages@gmx.net>,
	linux-kernel@vger.kernel.org, michael.kerrisk@gmx.net,
	vendor-sec@lst.de
Subject: Re: splice/tee bugs?
Date: Sun, 9 Jul 2006 13:16:29 +0200	[thread overview]
Message-ID: <20060709111629.GV4188@suse.de> (raw)
In-Reply-To: <20060709103606.GU4188@suse.de>

On Sun, Jul 09 2006, Jens Axboe wrote:
> On Sat, Jul 08 2006, Luiz Fernando N. Capitulino wrote:
> > 
> >  Hi Jens,
> > 
> > On Sat, 8 Jul 2006 08:41:32 +0200
> > Jens Axboe <axboe@suse.de> wrote:
> > 
> > | On Fri, Jul 07 2006, Luiz Fernando N. Capitulino wrote:
> > | > On Fri, 7 Jul 2006 04:07:49 -0700
> > | > Andrew Morton <akpm@osdl.org> wrote:
> > | > 
> > | > | On Fri, 07 Jul 2006 09:07:03 +0200
> > | > | "Michael Kerrisk" <mtk-manpages@gmx.net> wrote:
> > | > | 
> > | > | > c) Occasionally the command line just hangs, producing no output.
> > | > | >    In this case I can't kill it with ^C or ^\.  This is a 
> > | > | >    hard-to-reproduce behaviour on my (x86) system, but I have 
> > | > | >    seen it several times by now.
> > | > | 
> > | > | aka local DoS.  Please capture sysrq-T output next time.
> > | > 
> > | >  If I run lots of them in parallel, I get the following OOPs in a few
> > | > seconds:
> > | 
> > | With the patch posted? You need the i vs nrbufs fix.
> > 
> >  Yes, it fixes the problem. I didn't try it before because I thought
> > you were going to double check it [1].
> 
> Yeah the patch needs reworking, however the isolated i vs nrbufs fix is
> safe enough on its own. I'll post a full patch for inclusion, I'm afraid
> I wont be able to fully test it enough for submitting it until tomorrow
> though.

Something like this, testing would be appreciated! Michael, can you
repeat your testing as well? Thanks.

diff --git a/fs/splice.c b/fs/splice.c
index 05fd278..ecf72bc 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1307,6 +1307,69 @@ asmlinkage long sys_splice(int fd_in, lo
 }
 
 /*
+ * Make sure there's data to read. Wait for input if we can, otherwise
+ * return an appropriate error.
+ */
+static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+{
+	int ret = 0;
+
+	mutex_lock(&pipe->inode->i_mutex);
+
+	while (!pipe->nrbufs) {
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		if (!pipe->writers)
+			break;
+		if (!pipe->waiting_writers) {
+			if (flags & SPLICE_F_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
+		}
+		pipe_wait(pipe);
+	}
+	
+	mutex_unlock(&pipe->inode->i_mutex);
+	return ret;
+}
+
+/*
+ * Make sure there's writeable room. Wait for room if we can, otherwise
+ * return an appropriate error.
+ */
+static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+{
+	int ret = 0;
+
+	mutex_lock(&pipe->inode->i_mutex);
+
+	while (pipe->nrbufs >= PIPE_BUFFERS) {
+		if (!pipe->readers) {
+			send_sig(SIGPIPE, current, 0);
+			ret = -EPIPE;
+			break;
+		}
+		if (flags & SPLICE_F_NONBLOCK) {
+			ret = -EAGAIN;
+			break;
+		}
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		pipe->waiting_writers++;
+		pipe_wait(pipe);
+		pipe->waiting_writers--;
+	}
+
+	mutex_unlock(&pipe->inode->i_mutex);
+	return ret;
+}
+
+/*
  * Link contents of ipipe to opipe.
  */
 static int link_pipe(struct pipe_inode_info *ipipe,
@@ -1314,9 +1377,9 @@ static int link_pipe(struct pipe_inode_i
 		     size_t len, unsigned int flags)
 {
 	struct pipe_buffer *ibuf, *obuf;
-	int ret, do_wakeup, i, ipipe_first;
+	int ret, i, nbuf;
 
-	ret = do_wakeup = ipipe_first = 0;
+	i = ret = 0;
 
 	/*
 	 * Potential ABBA deadlock, work around it by ordering lock
@@ -1324,131 +1387,65 @@ static int link_pipe(struct pipe_inode_i
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
 	if (ipipe->inode < opipe->inode) {
-		ipipe_first = 1;
-		mutex_lock(&ipipe->inode->i_mutex);
-		mutex_lock(&opipe->inode->i_mutex);
+		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
 	} else {
-		mutex_lock(&opipe->inode->i_mutex);
-		mutex_lock(&ipipe->inode->i_mutex);
+		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
 	}
 
-	for (i = 0;; i++) {
+	do {
 		if (!opipe->readers) {
 			send_sig(SIGPIPE, current, 0);
 			if (!ret)
 				ret = -EPIPE;
 			break;
 		}
-		if (ipipe->nrbufs - i) {
-			ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
-
-			/*
-			 * If we have room, fill this buffer
-			 */
-			if (opipe->nrbufs < PIPE_BUFFERS) {
-				int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
 
-				/*
-				 * Get a reference to this pipe buffer,
-				 * so we can copy the contents over.
-				 */
-				ibuf->ops->get(ipipe, ibuf);
-
-				obuf = opipe->bufs + nbuf;
-				*obuf = *ibuf;
-
-				/*
-				 * Don't inherit the gift flag, we need to
-				 * prevent multiple steals of this page.
-				 */
-				obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
-
-				if (obuf->len > len)
-					obuf->len = len;
-
-				opipe->nrbufs++;
-				do_wakeup = 1;
-				ret += obuf->len;
-				len -= obuf->len;
-
-				if (!len)
-					break;
-				if (opipe->nrbufs < PIPE_BUFFERS)
-					continue;
-			}
-
-			/*
-			 * We have input available, but no output room.
-			 * If we already copied data, return that. If we
-			 * need to drop the opipe lock, it must be ordered
-			 * last to avoid deadlocks.
-			 */
-			if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
-				if (!ret)
-					ret = -EAGAIN;
-				break;
-			}
-			if (signal_pending(current)) {
-				if (!ret)
-					ret = -ERESTARTSYS;
-				break;
-			}
-			if (do_wakeup) {
-				smp_mb();
-				if (waitqueue_active(&opipe->wait))
-					wake_up_interruptible(&opipe->wait);
-				kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
-				do_wakeup = 0;
-			}
+		/*
+		 * If we have iterated all input buffers or ran out of
+		 * output room, break.
+		 */
+		if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
+			break;
 
-			opipe->waiting_writers++;
-			pipe_wait(opipe);
-			opipe->waiting_writers--;
-			continue;
-		}
+		ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
+		nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
 
 		/*
-		 * No input buffers, do the usual checks for available
-		 * writers and blocking and wait if necessary
+		 * Get a reference to this pipe buffer,
+		 * so we can copy the contents over.
 		 */
-		if (!ipipe->writers)
-			break;
-		if (!ipipe->waiting_writers) {
-			if (ret)
-				break;
-		}
+		ibuf->ops->get(ipipe, ibuf);
+
+		obuf = opipe->bufs + nbuf;
+		*obuf = *ibuf;
+
 		/*
-		 * pipe_wait() drops the ipipe mutex. To avoid deadlocks
-		 * with another process, we can only safely do that if
-		 * the ipipe lock is ordered last.
+		 * Don't inherit the gift flag, we need to
+		 * prevent multiple steals of this page.
 		 */
-		if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
-			if (!ret)
-				ret = -EAGAIN;
-			break;
-		}
-		if (signal_pending(current)) {
-			if (!ret)
-				ret = -ERESTARTSYS;
-			break;
-		}
+		obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 
-		if (waitqueue_active(&ipipe->wait))
-			wake_up_interruptible_sync(&ipipe->wait);
-		kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
+		if (obuf->len > len)
+			obuf->len = len;
 
-		pipe_wait(ipipe);
-	}
+		opipe->nrbufs++;
+		ret += obuf->len;
+		len -= obuf->len;
+		i++;
+	} while (len);
 
 	mutex_unlock(&ipipe->inode->i_mutex);
 	mutex_unlock(&opipe->inode->i_mutex);
 
-	if (do_wakeup) {
+	if (ret) {
 		smp_mb();
 		if (waitqueue_active(&opipe->wait))
 			wake_up_interruptible(&opipe->wait);
 		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
-	}
+	} else if (flags & SPLICE_F_NONBLOCK)
+		ret = -EAGAIN;
 
 	return ret;
 }
@@ -1464,12 +1461,23 @@ static long do_tee(struct file *in, stru
 {
 	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
 	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+	int ret;
 
 	/*
-	 * Link ipipe to the two output pipes, consuming as we go along.
+	 * Duplicate the contents of ipipe to opipe without actually
+	 * copying the data.
 	 */
-	if (ipipe && opipe)
+	if (ipipe && opipe && ipipe != opipe) {
+		ret = link_ipipe_prep(ipipe, flags);
+		if (unlikely(ret))
+			return ret;
+
+		ret = link_opipe_prep(opipe, flags);
+		if (unlikely(ret))
+			return ret;
+
 		return link_pipe(ipipe, opipe, len, flags);
+	}
 
 	return -EINVAL;
 }

-- 
Jens Axboe


  reply	other threads:[~2006-07-09 11:14 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-07-07  7:07 splice/tee bugs? Michael Kerrisk
2006-07-07 11:07 ` Andrew Morton
2006-07-07 11:42   ` Michael Kerrisk
2006-07-07 12:03     ` Jens Axboe
2006-07-07 12:28       ` Jens Axboe
2006-07-07 12:31         ` Michael Kerrisk
2006-07-07 12:41           ` Jens Axboe
2006-07-07 13:12             ` Jens Axboe
2006-07-07 13:14               ` Jens Axboe
2006-07-07 13:21                 ` Arjan van de Ven
2006-07-07 13:26                   ` Jens Axboe
2006-07-07 13:54                     ` Paulo Marques
2006-07-07 14:02                       ` Jens Axboe
2006-07-07 14:05               ` Michael Kerrisk
2006-07-07 14:08                 ` Jens Axboe
2006-07-07 16:13   ` Luiz Fernando N. Capitulino
2006-07-07 21:43     ` Luiz Fernando N. Capitulino
2006-07-08  6:41     ` Jens Axboe
2006-07-08 21:09       ` Luiz Fernando N. Capitulino
2006-07-09 10:36         ` Jens Axboe
2006-07-09 11:16           ` Jens Axboe [this message]
2006-07-09 16:47             ` Luiz Fernando N. Capitulino
2006-07-09 17:57               ` Jens Axboe
2006-07-10  6:25                 ` Michael Kerrisk
2006-07-10  6:43                   ` Jens Axboe
2006-07-10  8:09                     ` Michael Kerrisk
2006-07-10  8:24                       ` Jens Axboe
2006-07-10  8:40                         ` Michael Kerrisk
2006-07-10  8:46                           ` Jens Axboe
2006-07-10  8:50                             ` Michael Kerrisk
2006-07-10  9:06                               ` Jens Axboe
2006-07-10  9:08                                 ` Michael Kerrisk
2006-07-10  8:50                             ` Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2006-07-08  5:33 Chuck Ebbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060709111629.GV4188@suse.de \
    --to=axboe@suse.de \
    --cc=akpm@osdl.org \
    --cc=lcapitulino@mandriva.com.br \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michael.kerrisk@gmx.net \
    --cc=mtk-manpages@gmx.net \
    --cc=vendor-sec@lst.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox