linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: subhra mazumdar <subhra.mazumdar@oracle.com>
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, tglx@linutronix.de,
	dhaval.giani@oracle.com, steven.sistare@oracle.com
Subject: [RFC PATCH v2 1/1] pipe: busy wait for pipe
Date: Tue, 25 Sep 2018 16:32:40 -0700	[thread overview]
Message-ID: <20180925233240.24451-2-subhra.mazumdar@oracle.com> (raw)
In-Reply-To: <20180925233240.24451-1-subhra.mazumdar@oracle.com>

Introduce pipe_ll_usec field for pipes that indicates the amount of micro
seconds a thread should spin if pipe is empty or full before sleeping. This
is similar to network sockets. Workloads like hackbench in pipe mode
benefits significantly from this by avoiding the sleep and wakeup overhead.
Other similar usecases can benefit. A tunable pipe_busy_poll is introduced
to enable or disable busy waiting via /proc. The value of it specifies the
amount of spin in microseconds. Default value is 0 indicating no spin.

Signed-off-by: subhra mazumdar <subhra.mazumdar@oracle.com>
---
 fs/pipe.c                 | 12 ++++++++++++
 include/linux/pipe_fs_i.h |  2 ++
 kernel/sysctl.c           |  7 +++++++
 3 files changed, 21 insertions(+)

diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c..35d805b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,6 +26,7 @@
 
 #include <linux/uaccess.h>
 #include <asm/ioctls.h>
+#include <linux/sched/clock.h>
 
 #include "internal.h"
 
@@ -40,6 +41,7 @@ unsigned int pipe_max_size = 1048576;
  */
 unsigned long pipe_user_pages_hard;
 unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+unsigned int pipe_busy_poll;
 
 /*
  * We use a start+len construction, which provides full use of the 
@@ -106,6 +108,7 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
 void pipe_wait(struct pipe_inode_info *pipe)
 {
 	DEFINE_WAIT(wait);
+	u64 start;
 
 	/*
 	 * Pipes are system-local resources, so sleeping on them
@@ -113,6 +116,10 @@ void pipe_wait(struct pipe_inode_info *pipe)
 	 */
 	prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
 	pipe_unlock(pipe);
+	start = local_clock();
+	while (current->state != TASK_RUNNING &&
+	       ((local_clock() - start) >> 10) < pipe->pipe_ll_usec)
+		cpu_relax();
 	schedule();
 	finish_wait(&pipe->wait, &wait);
 	pipe_lock(pipe);
@@ -825,6 +832,7 @@ static int do_pipe2(int __user *fildes, int flags)
 	struct file *files[2];
 	int fd[2];
 	int error;
+	struct pipe_inode_info *pipe;
 
 	error = __do_pipe_flags(fd, files, flags);
 	if (!error) {
@@ -838,6 +846,10 @@ static int do_pipe2(int __user *fildes, int flags)
 			fd_install(fd[0], files[0]);
 			fd_install(fd[1], files[1]);
 		}
+		pipe = files[0]->private_data;
+		pipe->pipe_ll_usec = pipe_busy_poll;
+		pipe = files[1]->private_data;
+		pipe->pipe_ll_usec = pipe_busy_poll;
 	}
 	return error;
 }
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 5a3bb3b..73267d2 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -55,6 +55,7 @@ struct pipe_inode_info {
 	unsigned int waiting_writers;
 	unsigned int r_counter;
 	unsigned int w_counter;
+	unsigned int pipe_ll_usec;
 	struct page *tmp_page;
 	struct fasync_struct *fasync_readers;
 	struct fasync_struct *fasync_writers;
@@ -170,6 +171,7 @@ void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
 extern unsigned int pipe_max_size;
 extern unsigned long pipe_user_pages_hard;
 extern unsigned long pipe_user_pages_soft;
+extern unsigned int pipe_busy_poll;
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cc02050..0e9ce0c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1863,6 +1863,13 @@ static struct ctl_table fs_table[] = {
 		.proc_handler	= proc_doulongvec_minmax,
 	},
 	{
+		.procname       = "pipe-busy-poll",
+		.data           = &pipe_busy_poll,
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_minmax,
+	},
+	{
 		.procname	= "mount-max",
 		.data		= &sysctl_mount_max,
 		.maxlen		= sizeof(unsigned int),
-- 
2.9.3


  reply	other threads:[~2018-09-25 23:33 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-25 23:32 [RFC PATCH v2 0/1] Pipe busy wait subhra mazumdar
2018-09-25 23:32 ` subhra mazumdar [this message]
2018-11-05 10:08   ` [RFC PATCH v2 1/1] pipe: busy wait for pipe Mel Gorman
2018-11-05 23:40     ` Subhra Mazumdar
2018-11-06  8:41       ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180925233240.24451-2-subhra.mazumdar@oracle.com \
    --to=subhra.mazumdar@oracle.com \
    --cc=dhaval.giani@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=steven.sistare@oracle.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).