From: Bandan Das <bsd@redhat.com>
To: kvm@vger.kernel.org
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
mst@redhat.com, Eyal Moscovici <EYALMO@il.ibm.com>,
Razya Ladelsky <RAZYA@il.ibm.com>,
cgroups@vger.kernel.org, jasowang@redhat.com
Subject: [RFC PATCH 1/4] vhost: Introduce a universal thread to serve all users
Date: Mon, 13 Jul 2015 00:07:32 -0400 [thread overview]
Message-ID: <1436760455-5686-2-git-send-email-bsd@redhat.com> (raw)
In-Reply-To: <1436760455-5686-1-git-send-email-bsd@redhat.com>
vhost threads are per-device, but in most cases a single thread
is enough. This change creates a single thread that is used to
serve all guests.
However, this complicates cgroups associations. The current policy
is to attach the per-device thread to all cgroups of the parent process
that the device is associated it. This is no longer possible if we
have a single thread. So, we end up moving the thread around to
cgroups of whichever device that needs servicing. This is a very
inefficient protocol but seems to be the only way to integrate
cgroups support.
Signed-off-by: Razya Ladelsky <razya@il.ibm.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
drivers/vhost/scsi.c | 15 +++--
drivers/vhost/vhost.c | 150 ++++++++++++++++++++++++--------------------------
drivers/vhost/vhost.h | 19 +++++--
3 files changed, 97 insertions(+), 87 deletions(-)
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index ea32b38..6c42936 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -535,7 +535,7 @@ static void vhost_scsi_complete_cmd(struct vhost_scsi_cmd *cmd)
llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list);
- vhost_work_queue(&vs->dev, &vs->vs_completion_work);
+ vhost_work_queue(vs->dev.worker, &vs->vs_completion_work);
}
static int vhost_scsi_queue_data_in(struct se_cmd *se_cmd)
@@ -1282,7 +1282,7 @@ vhost_scsi_send_evt(struct vhost_scsi *vs,
}
llist_add(&evt->list, &vs->vs_event_list);
- vhost_work_queue(&vs->dev, &vs->vs_event_work);
+ vhost_work_queue(vs->dev.worker, &vs->vs_event_work);
}
static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
@@ -1335,8 +1335,8 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
/* Flush both the vhost poll and vhost work */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
vhost_scsi_flush_vq(vs, i);
- vhost_work_flush(&vs->dev, &vs->vs_completion_work);
- vhost_work_flush(&vs->dev, &vs->vs_event_work);
+ vhost_work_flush(vs->dev.worker, &vs->vs_completion_work);
+ vhost_work_flush(vs->dev.worker, &vs->vs_event_work);
/* Wait for all reqs issued before the flush to be finished */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
@@ -1584,8 +1584,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
if (!vqs)
goto err_vqs;
- vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work);
- vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work);
+ vhost_work_init(&vs->dev, &vs->vs_completion_work,
+ vhost_scsi_complete_cmd_work);
+
+ vhost_work_init(&vs->dev, &vs->vs_event_work,
+ vhost_scsi_evt_work);
vs->vs_events_nr = 0;
vs->vs_events_missed = false;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2ee2826..951c96b 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -11,6 +11,8 @@
* Generic code for virtio server in host kernel.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/eventfd.h>
#include <linux/vhost.h>
#include <linux/uio.h>
@@ -28,6 +30,9 @@
#include "vhost.h"
+/* Just one worker thread to service all devices */
+static struct vhost_worker *worker;
+
enum {
VHOST_MEMORY_MAX_NREGIONS = 64,
VHOST_MEMORY_F_LOG = 0x1,
@@ -58,13 +63,15 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
return 0;
}
-void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
+void vhost_work_init(struct vhost_dev *dev,
+ struct vhost_work *work, vhost_work_fn_t fn)
{
INIT_LIST_HEAD(&work->node);
work->fn = fn;
init_waitqueue_head(&work->done);
work->flushing = 0;
work->queue_seq = work->done_seq = 0;
+ work->dev = dev;
}
EXPORT_SYMBOL_GPL(vhost_work_init);
@@ -78,7 +85,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
poll->dev = dev;
poll->wqh = NULL;
- vhost_work_init(&poll->work, fn);
+ vhost_work_init(dev, &poll->work, fn);
}
EXPORT_SYMBOL_GPL(vhost_poll_init);
@@ -116,30 +123,30 @@ void vhost_poll_stop(struct vhost_poll *poll)
}
EXPORT_SYMBOL_GPL(vhost_poll_stop);
-static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
- unsigned seq)
+static bool vhost_work_seq_done(struct vhost_worker *worker,
+ struct vhost_work *work, unsigned seq)
{
int left;
- spin_lock_irq(&dev->work_lock);
+ spin_lock_irq(&worker->work_lock);
left = seq - work->done_seq;
- spin_unlock_irq(&dev->work_lock);
+ spin_unlock_irq(&worker->work_lock);
return left <= 0;
}
-void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
+void vhost_work_flush(struct vhost_worker *worker, struct vhost_work *work)
{
unsigned seq;
int flushing;
- spin_lock_irq(&dev->work_lock);
+ spin_lock_irq(&worker->work_lock);
seq = work->queue_seq;
work->flushing++;
- spin_unlock_irq(&dev->work_lock);
- wait_event(work->done, vhost_work_seq_done(dev, work, seq));
- spin_lock_irq(&dev->work_lock);
+ spin_unlock_irq(&worker->work_lock);
+ wait_event(work->done, vhost_work_seq_done(worker, work, seq));
+ spin_lock_irq(&worker->work_lock);
flushing = --work->flushing;
- spin_unlock_irq(&dev->work_lock);
+ spin_unlock_irq(&worker->work_lock);
BUG_ON(flushing < 0);
}
EXPORT_SYMBOL_GPL(vhost_work_flush);
@@ -148,29 +155,30 @@ EXPORT_SYMBOL_GPL(vhost_work_flush);
* locks that are also used by the callback. */
void vhost_poll_flush(struct vhost_poll *poll)
{
- vhost_work_flush(poll->dev, &poll->work);
+ vhost_work_flush(poll->dev->worker, &poll->work);
}
EXPORT_SYMBOL_GPL(vhost_poll_flush);
-void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
+void vhost_work_queue(struct vhost_worker *worker,
+ struct vhost_work *work)
{
unsigned long flags;
- spin_lock_irqsave(&dev->work_lock, flags);
+ spin_lock_irqsave(&worker->work_lock, flags);
if (list_empty(&work->node)) {
- list_add_tail(&work->node, &dev->work_list);
+ list_add_tail(&work->node, &worker->work_list);
work->queue_seq++;
- spin_unlock_irqrestore(&dev->work_lock, flags);
- wake_up_process(dev->worker);
+ spin_unlock_irqrestore(&worker->work_lock, flags);
+ wake_up_process(worker->thread);
} else {
- spin_unlock_irqrestore(&dev->work_lock, flags);
+ spin_unlock_irqrestore(&worker->work_lock, flags);
}
}
EXPORT_SYMBOL_GPL(vhost_work_queue);
void vhost_poll_queue(struct vhost_poll *poll)
{
- vhost_work_queue(poll->dev, &poll->work);
+ vhost_work_queue(poll->dev->worker, &poll->work);
}
EXPORT_SYMBOL_GPL(vhost_poll_queue);
@@ -203,19 +211,18 @@ static void vhost_vq_reset(struct vhost_dev *dev,
static int vhost_worker(void *data)
{
- struct vhost_dev *dev = data;
+ struct vhost_worker *worker = data;
struct vhost_work *work = NULL;
unsigned uninitialized_var(seq);
mm_segment_t oldfs = get_fs();
set_fs(USER_DS);
- use_mm(dev->mm);
for (;;) {
/* mb paired w/ kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
- spin_lock_irq(&dev->work_lock);
+ spin_lock_irq(&worker->work_lock);
if (work) {
work->done_seq = seq;
if (work->flushing)
@@ -223,21 +230,35 @@ static int vhost_worker(void *data)
}
if (kthread_should_stop()) {
- spin_unlock_irq(&dev->work_lock);
+ spin_unlock_irq(&worker->work_lock);
__set_current_state(TASK_RUNNING);
break;
}
- if (!list_empty(&dev->work_list)) {
- work = list_first_entry(&dev->work_list,
+ if (!list_empty(&worker->work_list)) {
+ work = list_first_entry(&worker->work_list,
struct vhost_work, node);
list_del_init(&work->node);
seq = work->queue_seq;
} else
work = NULL;
- spin_unlock_irq(&dev->work_lock);
+ spin_unlock_irq(&worker->work_lock);
if (work) {
+ struct vhost_dev *dev = work->dev;
+
__set_current_state(TASK_RUNNING);
+
+ if (current->mm != dev->mm) {
+ unuse_mm(current->mm);
+ use_mm(dev->mm);
+ }
+
+ /* TODO: Consider a more elegant solution */
+ if (worker->owner != dev->owner) {
+ /* Should check for return value */
+ cgroup_attach_task_all(dev->owner, current);
+ worker->owner = dev->owner;
+ }
work->fn(work);
if (need_resched())
schedule();
@@ -245,7 +266,6 @@ static int vhost_worker(void *data)
schedule();
}
- unuse_mm(dev->mm);
set_fs(oldfs);
return 0;
}
@@ -304,9 +324,8 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->log_file = NULL;
dev->memory = NULL;
dev->mm = NULL;
- spin_lock_init(&dev->work_lock);
- INIT_LIST_HEAD(&dev->work_list);
- dev->worker = NULL;
+ dev->worker = worker;
+ dev->owner = current;
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
@@ -331,31 +350,6 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
}
EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
-struct vhost_attach_cgroups_struct {
- struct vhost_work work;
- struct task_struct *owner;
- int ret;
-};
-
-static void vhost_attach_cgroups_work(struct vhost_work *work)
-{
- struct vhost_attach_cgroups_struct *s;
-
- s = container_of(work, struct vhost_attach_cgroups_struct, work);
- s->ret = cgroup_attach_task_all(s->owner, current);
-}
-
-static int vhost_attach_cgroups(struct vhost_dev *dev)
-{
- struct vhost_attach_cgroups_struct attach;
-
- attach.owner = current;
- vhost_work_init(&attach.work, vhost_attach_cgroups_work);
- vhost_work_queue(dev, &attach.work);
- vhost_work_flush(dev, &attach.work);
- return attach.ret;
-}
-
/* Caller should have device mutex */
bool vhost_dev_has_owner(struct vhost_dev *dev)
{
@@ -366,7 +360,6 @@ EXPORT_SYMBOL_GPL(vhost_dev_has_owner);
/* Caller should have device mutex */
long vhost_dev_set_owner(struct vhost_dev *dev)
{
- struct task_struct *worker;
int err;
/* Is there an owner already? */
@@ -377,28 +370,15 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
/* No owner, become one */
dev->mm = get_task_mm(current);
- worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
- if (IS_ERR(worker)) {
- err = PTR_ERR(worker);
- goto err_worker;
- }
-
dev->worker = worker;
- wake_up_process(worker); /* avoid contributing to loadavg */
-
- err = vhost_attach_cgroups(dev);
- if (err)
- goto err_cgroup;
err = vhost_dev_alloc_iovecs(dev);
if (err)
- goto err_cgroup;
+ goto err_alloc;
return 0;
-err_cgroup:
- kthread_stop(worker);
+err_alloc:
dev->worker = NULL;
-err_worker:
if (dev->mm)
mmput(dev->mm);
dev->mm = NULL;
@@ -472,11 +452,6 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
/* No one will access memory at this point */
kfree(dev->memory);
dev->memory = NULL;
- WARN_ON(!list_empty(&dev->work_list));
- if (dev->worker) {
- kthread_stop(dev->worker);
- dev->worker = NULL;
- }
if (dev->mm)
mmput(dev->mm);
dev->mm = NULL;
@@ -1567,11 +1542,32 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify);
static int __init vhost_init(void)
{
+ struct vhost_worker *w =
+ kzalloc(sizeof(*w), GFP_KERNEL);
+ if (!w)
+ return -ENOMEM;
+
+ w->thread = kthread_create(vhost_worker,
+ w, "vhost-worker");
+ if (IS_ERR(w->thread))
+ return PTR_ERR(w->thread);
+
+ worker = w;
+ spin_lock_init(&worker->work_lock);
+ INIT_LIST_HEAD(&worker->work_list);
+ wake_up_process(worker->thread);
+ pr_info("Created universal thread to service requests\n");
+
return 0;
}
static void __exit vhost_exit(void)
{
+ if (worker) {
+ kthread_stop(worker->thread);
+ WARN_ON(!list_empty(&worker->work_list));
+ kfree(worker);
+ }
}
module_init(vhost_init);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 8c1c792..2f204ce 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -22,6 +22,7 @@ struct vhost_work {
int flushing;
unsigned queue_seq;
unsigned done_seq;
+ struct vhost_dev *dev;
};
/* Poll a file (eventfd or socket) */
@@ -35,8 +36,8 @@ struct vhost_poll {
struct vhost_dev *dev;
};
-void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
-void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
+void vhost_work_init(struct vhost_dev *dev,
+ struct vhost_work *work, vhost_work_fn_t fn);
void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
unsigned long mask, struct vhost_dev *dev);
@@ -44,7 +45,6 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file);
void vhost_poll_stop(struct vhost_poll *poll);
void vhost_poll_flush(struct vhost_poll *poll);
void vhost_poll_queue(struct vhost_poll *poll);
-void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work);
long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp);
struct vhost_log {
@@ -116,11 +116,22 @@ struct vhost_dev {
int nvqs;
struct file *log_file;
struct eventfd_ctx *log_ctx;
+ /* vhost shared worker */
+ struct vhost_worker *worker;
+ /* for cgroup support */
+ struct task_struct *owner;
+};
+
+struct vhost_worker {
spinlock_t work_lock;
struct list_head work_list;
- struct task_struct *worker;
+ struct task_struct *thread;
+ struct task_struct *owner;
};
+void vhost_work_queue(struct vhost_worker *worker,
+ struct vhost_work *work);
+void vhost_work_flush(struct vhost_worker *worker, struct vhost_work *work);
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
long vhost_dev_set_owner(struct vhost_dev *dev);
bool vhost_dev_has_owner(struct vhost_dev *dev);
--
2.4.3
next prev parent reply other threads:[~2015-07-13 4:07 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-13 4:07 [RFC PATCH 0/4] Shared vhost design Bandan Das
2015-07-13 4:07 ` Bandan Das
2015-07-13 4:07 ` Bandan Das [this message]
[not found] ` <OF8AF3E3F8.F0120188-ONC2257E8E.00740E46-C2257E90.0035BD30@il.ibm.com>
2015-08-08 22:40 ` [RFC PATCH 1/4] vhost: Introduce a universal thread to serve all users Bandan Das
2015-08-10 9:27 ` Michael S. Tsirkin
2015-08-10 20:09 ` Bandan Das
[not found] ` <jpg1tfarjly.fsf-oDDOE2N8RG3XLSnhx7PemevR1TjyzBtM@public.gmane.org>
2015-08-10 21:05 ` Bandan Das
2015-08-10 21:05 ` Bandan Das
2015-07-13 4:07 ` [RFC PATCH 2/4] vhost: Limit the number of devices served by a single worker thread Bandan Das
2015-07-13 4:07 ` [RFC PATCH 3/4] cgroup: Introduce a function to compare cgroups Bandan Das
2015-07-13 4:07 ` [RFC PATCH 4/4] vhost: Add cgroup-aware creation of worker threads Bandan Das
2015-07-27 21:12 ` Michael S. Tsirkin
[not found] ` <OF451FED84.3040AFD2-ONC2257E8C.0043F908-C2257E8C.00446592@il.ibm.com>
2015-07-27 19:48 ` [RFC PATCH 0/4] Shared vhost design Bandan Das
2015-07-27 21:07 ` Michael S. Tsirkin
[not found] ` <OFFB2CB583.341B00EF-ONC2257E94.002FF06E-C2257E94.0032BC0A@il.ibm.com>
[not found] ` <OFFB2CB583.341B00EF-ONC2257E94.002FF06E-C2257E94.0032BC0A-7z/5BgaJwgfQT0dZR+AlfA@public.gmane.org>
2015-08-01 18:48 ` Bandan Das
2015-08-01 18:48 ` Bandan Das
2015-07-27 21:02 ` Michael S. Tsirkin
[not found] ` <20150727235818-mutt-send-email-mst-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-08-08 23:06 ` Bandan Das
2015-08-08 23:06 ` Bandan Das
[not found] ` <jpgoaihs7lt.fsf-oDDOE2N8RG3XLSnhx7PemevR1TjyzBtM@public.gmane.org>
2015-08-09 12:45 ` Michael S. Tsirkin
2015-08-09 12:45 ` Michael S. Tsirkin
[not found] ` <OFC68F4730.CA40D595-ONC2257E9C.00515E83-C2257E9C.00523437@il.ibm.com>
2015-08-09 15:40 ` Michael S. Tsirkin
2015-08-10 20:00 ` Bandan Das
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1436760455-5686-2-git-send-email-bsd@redhat.com \
--to=bsd@redhat.com \
--cc=EYALMO@il.ibm.com \
--cc=RAZYA@il.ibm.com \
--cc=cgroups@vger.kernel.org \
--cc=jasowang@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.