public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Frederic Weisbecker <fweisbec@gmail.com>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
	Christoph Lameter <cl@linux.com>,
	Kevin Hilman <khilman@linaro.org>,
	Mike Galbraith <bitbucket@online.de>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Tejun Heo <tj@kernel.org>, Viresh Kumar <viresh.kumar@linaro.org>
Subject: [PATCH 3/3] workqueue: Add anon workqueue sysfs hierarchy
Date: Fri, 14 Mar 2014 17:38:51 +0100	[thread overview]
Message-ID: <1394815131-17271-4-git-send-email-fweisbec@gmail.com> (raw)
In-Reply-To: <1394815131-17271-1-git-send-email-fweisbec@gmail.com>

We call "anon workqueues" the set of unbound workqueues that don't
carry the WQ_SYSFS flag.

They are a problem nowaday because people who work on CPU isolation
(HPC, Real time, etc...) want to be able to migrate all the unbound
workqueues away to a single CPU. This control is possible through sysfs
but only with WQ_SYSFS workqueues.

Now we need to deal with the other unbound workqueues. There is two
possible solutions:

1) Implement a sysfs directory for each unbound !WQ_SYSFS. This could
be done with a specific Kconfig to make sure that these workqueue
won't be considered as a stable ABI. But we all know that all distros
will enable this Kconfig symbol and that a warning in the Kconfig help
text won't protect against anything.

2) Implement a single sysfs directory containing only the cpumask file
to the control the affinity of all the !WQ_SYSFS workqueues.

This patch implements the second solution.

Two issues I have seen though:

* This triggers the following warning in apply_workqueue_attrs():

	/* creating multiple pwqs breaks ordering guarantee */
	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
		return -EINVAL;

  I haven't yet checked into the details.

* wq_calc_node_cpumask() tells that if NUMA affinity is not enabled,
  cpumask is always used. Which suggest that if NUMA affinity is enabled
  the cpumask may be ignored?

Cc: Christoph Lameter <cl@linux.com>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Mike Galbraith <bitbucket@online.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Not-Yet-Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/workqueue.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ad8f727..aabee1f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -289,7 +289,8 @@ static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
 
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
-static DEFINE_MUTEX(wq_unbound_mutex);	/* protects list of unbound workqueues */
+/* protects list of unbound workqueues and wq_anon_cpumask*/
+static DEFINE_MUTEX(wq_unbound_mutex);
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
 static LIST_HEAD(workqueues);		/* PL: list of all workqueues */
@@ -3311,13 +3312,122 @@ static struct device_attribute wq_sysfs_unbound_attrs[] = {
 	__ATTR_NULL,
 };
 
+/* Protected by wq_unbound_mutex */
+static cpumask_t wq_anon_cpumask;
+static ssize_t wq_anon_cpumask_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	int written;
+
+	mutex_lock(&wq_unbound_mutex);
+	written = cpumask_scnprintf(buf, PAGE_SIZE, &wq_anon_cpumask);
+	mutex_unlock(&wq_unbound_mutex);
+
+	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
+
+	return written;
+}
+
+/* Must be called with wq_unbound_mutex held */
+static int wq_anon_cpumask_set(cpumask_var_t cpumask)
+{
+	struct workqueue_attrs *attrs;
+	struct workqueue_struct *wq;
+	int ret;
+
+	list_for_each_entry(wq, &workqueues_unbound, unbound_list) {
+		if (wq->flags & WQ_SYSFS)
+			continue;
+		attrs = wq_sysfs_prep_attrs(wq);
+		if (!attrs)
+			return -ENOMEM;
+
+		cpumask_copy(attrs->cpumask, cpumask);
+		ret = apply_workqueue_attrs(wq, attrs);
+		free_workqueue_attrs(attrs);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static ssize_t wq_anon_cpumask_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	cpumask_var_t cpumask;
+	int ret = -EINVAL;
+
+	if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = cpumask_parse(buf, cpumask);
+	if (ret)
+		goto out;
+
+	get_online_cpus();
+	if (cpumask_intersects(cpumask, cpu_online_mask)) {
+		mutex_lock(&wq_unbound_mutex);
+		ret = wq_anon_cpumask_set(cpumask);
+		if (!ret)
+			cpumask_copy(&wq_anon_cpumask, cpumask);
+		mutex_unlock(&wq_unbound_mutex);
+	}
+	put_online_cpus();
+out:
+	free_cpumask_var(cpumask);
+	return ret ? ret : count;
+}
+
+static void device_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+static struct device_attribute wq_sysfs_anon_attr =
+	__ATTR(cpumask, 0644, wq_anon_cpumask_show, wq_anon_cpumask_store);
+
 static struct bus_type wq_subsys = {
 	.name				= "workqueue",
 };
 
 static int __init wq_sysfs_init(void)
 {
-	return subsys_virtual_register(&wq_subsys, NULL);
+	struct device *anon_dev;
+	int ret;
+
+	ret = subsys_virtual_register(&wq_subsys, NULL);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&wq_unbound_mutex);
+	cpumask_copy(&wq_anon_cpumask, cpu_possible_mask);
+	mutex_unlock(&wq_unbound_mutex);
+
+	anon_dev = kzalloc(sizeof(*anon_dev), GFP_KERNEL);
+	if (!anon_dev)
+		return -ENOMEM;
+
+	anon_dev->bus = &wq_subsys;
+	anon_dev->init_name = "anon_wqs";
+	anon_dev->release = device_release;
+
+	ret = device_register(anon_dev);
+	if (ret) {
+		kfree(anon_dev);
+		return ret;
+	}
+
+	ret = device_create_file(anon_dev, &wq_sysfs_anon_attr);
+	if (ret) {
+		device_unregister(anon_dev);
+		return ret;
+	}
+
+	kobject_uevent(&anon_dev->kobj, KOBJ_ADD);
+
+	return 0;
 }
 core_initcall(wq_sysfs_init);
 
-- 
1.8.3.1


  parent reply	other threads:[~2014-03-14 16:39 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-14 16:38 [RFC PATCH 0/3] workqueue: Control cpu affinity of !WQ_SYSFS unbound workqueues Frederic Weisbecker
2014-03-14 16:38 ` [PATCH 1/3] workqueue: Move workqueue bus attr to device attribute Frederic Weisbecker
2014-03-14 16:38 ` [PATCH 2/3] workqueues: Account unbound workqueue in a seperate list Frederic Weisbecker
2014-03-14 18:17   ` Kevin Hilman
2014-03-15 12:40     ` Frederic Weisbecker
2014-03-14 16:38 ` Frederic Weisbecker [this message]
2014-03-14 19:08   ` [PATCH 3/3] workqueue: Add anon workqueue sysfs hierarchy Kevin Hilman
2014-03-17 14:02     ` Frederic Weisbecker
2014-03-22 17:01     ` Frederic Weisbecker
2014-03-22 18:55       ` Tejun Heo
2014-03-22 22:04         ` Frederic Weisbecker
2014-03-14 18:06 ` [RFC PATCH 0/3] workqueue: Control cpu affinity of !WQ_SYSFS unbound workqueues Kevin Hilman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1394815131-17271-4-git-send-email-fweisbec@gmail.com \
    --to=fweisbec@gmail.com \
    --cc=bitbucket@online.de \
    --cc=cl@linux.com \
    --cc=khilman@linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=tj@kernel.org \
    --cc=viresh.kumar@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox