qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Denis V. Lunev" <den@openvz.org>
Cc: Kevin Wolf <kwolf@redhat.com>, "Denis V. Lunev" <den@openvz.org>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	qemu-devel@nongnu.org,
	Raushaniya Maksudova <rmaksudova@virtuozzo.com>
Subject: [Qemu-devel] [PATCH 4/5] disk_deadlines: add control of requests time expiration
Date: Tue,  8 Sep 2015 11:00:27 +0300	[thread overview]
Message-ID: <1441699228-25767-5-git-send-email-den@openvz.org> (raw)
In-Reply-To: <1441699228-25767-1-git-send-email-den@openvz.org>

From: Raushaniya Maksudova <rmaksudova@virtuozzo.com>

If disk-deadlines option is enabled for a drive, one controls time
completion of this drive's requests. The method is as follows (further
assume that this option is enabled).

Every drive has its own red-black tree for keeping its requests.
Expiration time of the request is a key, cookie (as id of request) is an
appropriate node. Assume that every requests has 8 seconds to be completed.
If request was not accomplished in time for some reasons (server crash or
smth else), timer of this drive is fired and an appropriate callback
requests to stop Virtial Machine (VM).

VM remains stopped until all requests from the disk which caused VM's
stopping are completed. Furthermore, if there is another disks whose
requests are waiting to be completed, do not start VM : wait completion
of all "late" requests from all disks.

Signed-off-by: Raushaniya Maksudova <rmaksudova@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
---
 block/accounting.c             |   8 ++
 block/disk-deadlines.c         | 167 +++++++++++++++++++++++++++++++++++++++++
 include/block/disk-deadlines.h |  11 +++
 3 files changed, 186 insertions(+)

diff --git a/block/accounting.c b/block/accounting.c
index 01d594f..7b913fd 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -34,6 +34,10 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
     cookie->bytes = bytes;
     cookie->start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     cookie->type = type;
+
+    if (stats->disk_deadlines.enabled) {
+        insert_request(&stats->disk_deadlines, cookie);
+    }
 }
 
 void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
@@ -44,6 +48,10 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
     stats->nr_ops[cookie->type]++;
     stats->total_time_ns[cookie->type] +=
         qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - cookie->start_time_ns;
+
+    if (stats->disk_deadlines.enabled) {
+        remove_request(&stats->disk_deadlines, cookie);
+    }
 }
 
 
diff --git a/block/disk-deadlines.c b/block/disk-deadlines.c
index 39dec53..acb44bc 100644
--- a/block/disk-deadlines.c
+++ b/block/disk-deadlines.c
@@ -23,8 +23,175 @@
  */
 
 #include "block/disk-deadlines.h"
+#include "block/accounting.h"
+#include "sysemu/sysemu.h"
+#include "qemu/atomic.h"
+
+/*
+ * Number of late requests which were not completed in time
+ * (its timer has expired) and as a result it caused VM's stopping
+ */
+uint64_t num_requests_vmstopped;
+
+/* Give 8 seconds for request to complete by default */
+const uint64_t EXPIRE_DEFAULT_NS = 8000000000;
+
+typedef struct RequestInfo {
+    BlockAcctCookie *cookie;
+    int64_t expire_time;
+} RequestInfo;
+
+static gint compare(gconstpointer a, gconstpointer b)
+{
+    return (int64_t)a - (int64_t)b;
+}
+
+static gboolean find_request(gpointer key, gpointer value, gpointer data)
+{
+    BlockAcctCookie *cookie = value;
+    RequestInfo *request = data;
+    if (cookie == request->cookie) {
+        request->expire_time = (int64_t)key;
+        return true;
+    }
+    return false;
+}
+
+static gint search_min_key(gpointer key, gpointer data)
+{
+    int64_t tree_key = (int64_t)key;
+    int64_t *ptr_curr_min_key = data;
+
+    if ((tree_key <= *ptr_curr_min_key) || (*ptr_curr_min_key == 0)) {
+        *ptr_curr_min_key = tree_key;
+    }
+    /*
+     * We always want to proceed searching among key/value pairs
+     * with smaller key => return -1
+     */
+    return -1;
+}
+
+static int64_t soonest_expire_time(GTree *requests_tree)
+{
+    int64_t min_timestamp = 0;
+    /*
+     * g_tree_search() will always return NULL, because there is no
+     * key = 0 in the tree, we simply search for node the with the minimal key
+     */
+    g_tree_search(requests_tree, (GCompareFunc)search_min_key, &min_timestamp);
+    return min_timestamp;
+}
+
+static void disk_deadlines_callback(void *opaque)
+{
+    bool need_vmstop = false;
+    int64_t current_time, expire_time;
+    DiskDeadlines *disk_deadlines = opaque;
+
+    /*
+     * Check whether the request that triggered callback invocation
+     * is still in the tree of requests.
+     */
+    current_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    pthread_mutex_lock(&disk_deadlines->mtx_tree);
+    if (g_tree_nnodes(disk_deadlines->requests_tree) == 0) {
+        /* There are no requests in the tree, do nothing */
+        pthread_mutex_unlock(&disk_deadlines->mtx_tree);
+        return;
+    }
+    expire_time = soonest_expire_time(disk_deadlines->requests_tree);
+
+    /*
+     * If the request was not found, then there is no disk deadline detected,
+     * just update the timer with new value
+     */
+    if (expire_time > current_time) {
+        timer_mod_ns(disk_deadlines->request_timer, expire_time);
+        pthread_mutex_unlock(&disk_deadlines->mtx_tree);
+        return;
+    }
+
+    disk_deadlines->expired_tree = true;
+    need_vmstop = !atomic_fetch_inc(&num_requests_vmstopped);
+    pthread_mutex_unlock(&disk_deadlines->mtx_tree);
+
+    if (need_vmstop) {
+        qemu_system_vmstop_request_prepare();
+        qemu_system_vmstop_request(RUN_STATE_PAUSED);
+    }
+}
 
 void disk_deadlines_init(DiskDeadlines *disk_deadlines, bool enabled)
 {
     disk_deadlines->enabled = enabled;
+    if (!disk_deadlines->enabled) {
+        return;
+    }
+
+    disk_deadlines->requests_tree = g_tree_new(compare);
+    if (disk_deadlines->requests_tree == NULL) {
+        disk_deadlines->enabled = false;
+        fprintf(stderr,
+                "disk_deadlines_init: failed to allocate requests_tree\n");
+        return;
+    }
+
+    pthread_mutex_init(&disk_deadlines->mtx_tree, NULL);
+    disk_deadlines->expired_tree = false;
+    disk_deadlines->request_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
+                                                 disk_deadlines_callback,
+                                                 (void *)disk_deadlines);
+}
+
+void insert_request(DiskDeadlines *disk_deadlines, void *request)
+{
+    BlockAcctCookie *cookie = request;
+
+    int64_t expire_time = cookie->start_time_ns + EXPIRE_DEFAULT_NS;
+
+    pthread_mutex_lock(&disk_deadlines->mtx_tree);
+    /* Set up expire time for the current disk if it is not set yet */
+    if (timer_expired(disk_deadlines->request_timer,
+        qemu_clock_get_ns(QEMU_CLOCK_REALTIME))) {
+        timer_mod_ns(disk_deadlines->request_timer, expire_time);
+    }
+
+    g_tree_insert(disk_deadlines->requests_tree, (int64_t *)expire_time,
+                  cookie);
+    pthread_mutex_unlock(&disk_deadlines->mtx_tree);
+}
+
+void remove_request(DiskDeadlines *disk_deadlines, void *request)
+{
+    bool need_vmstart = false;
+    RequestInfo request_info = {
+        .cookie = request,
+        .expire_time = 0,
+    };
+
+    /* Find the request to remove */
+    pthread_mutex_lock(&disk_deadlines->mtx_tree);
+    g_tree_foreach(disk_deadlines->requests_tree, find_request, &request_info);
+    g_tree_remove(disk_deadlines->requests_tree,
+                  (int64_t *)request_info.expire_time);
+
+    /*
+     * If tree is empty, but marked as expired, then one needs to
+     * unset "expired_tree" flag and check whether VM can be resumed
+     */
+    if (!g_tree_nnodes(disk_deadlines->requests_tree) &&
+        disk_deadlines->expired_tree) {
+        disk_deadlines->expired_tree = false;
+        /*
+         * If all requests (from all disks with enabled
+         * "disk-deadlines" feature) are completed, resume VM
+         */
+        need_vmstart = !atomic_dec_fetch(&num_requests_vmstopped);
+    }
+    pthread_mutex_unlock(&disk_deadlines->mtx_tree);
+
+    if (need_vmstart) {
+        qemu_system_vmstart_request();
+    }
 }
diff --git a/include/block/disk-deadlines.h b/include/block/disk-deadlines.h
index 2ea193b..9672aff 100644
--- a/include/block/disk-deadlines.h
+++ b/include/block/disk-deadlines.h
@@ -25,11 +25,22 @@
 #define DISK_DEADLINES_H
 
 #include <stdbool.h>
+#include <stdint.h>
+#include <glib.h>
+
+#include "qemu/typedefs.h"
+#include "qemu/timer.h"
 
 typedef struct DiskDeadlines {
     bool enabled;
+    bool expired_tree;
+    pthread_mutex_t mtx_tree;
+    GTree *requests_tree;
+    QEMUTimer *request_timer;
 } DiskDeadlines;
 
 void disk_deadlines_init(DiskDeadlines *disk_deadlines, bool enabled);
+void insert_request(DiskDeadlines *disk_deadlines, void *request);
+void remove_request(DiskDeadlines *disk_deadlines, void *request);
 
 #endif
-- 
2.1.4

  parent reply	other threads:[~2015-09-08  8:00 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-08  8:00 [Qemu-devel] [PATCH RFC 0/5] disk deadlines Denis V. Lunev
2015-09-08  8:00 ` [Qemu-devel] [PATCH 1/5] add QEMU style defines for __sync_add_and_fetch Denis V. Lunev
2015-09-10  8:19   ` Stefan Hajnoczi
2015-09-08  8:00 ` [Qemu-devel] [PATCH 2/5] disk_deadlines: add request to resume Virtual Machine Denis V. Lunev
2015-09-10  8:51   ` Stefan Hajnoczi
2015-09-10 19:18     ` Denis V. Lunev
2015-09-14 16:46       ` Stefan Hajnoczi
2015-09-08  8:00 ` [Qemu-devel] [PATCH 3/5] disk_deadlines: add disk-deadlines option per drive Denis V. Lunev
2015-09-10  9:05   ` Stefan Hajnoczi
2015-09-08  8:00 ` Denis V. Lunev [this message]
2015-09-08  9:35   ` [Qemu-devel] [PATCH 4/5] disk_deadlines: add control of requests time expiration Fam Zheng
2015-09-08  9:42     ` Denis V. Lunev
2015-09-08 11:06   ` Kevin Wolf
2015-09-08 11:27     ` Denis V. Lunev
2015-09-08 13:05       ` Kevin Wolf
2015-09-08 14:23         ` Denis V. Lunev
2015-09-08 14:48           ` Kevin Wolf
2015-09-10 10:27             ` Stefan Hajnoczi
2015-09-10 11:39               ` Kevin Wolf
2015-09-14 16:53                 ` Stefan Hajnoczi
2015-09-25 12:34               ` Dr. David Alan Gilbert
2015-09-28 12:42                 ` Stefan Hajnoczi
2015-09-28 13:55                   ` Dr. David Alan Gilbert
2015-09-08  8:00 ` [Qemu-devel] [PATCH 5/5] disk_deadlines: add info disk-deadlines option Denis V. Lunev
2015-09-08 16:20   ` Eric Blake
2015-09-08 16:26     ` Eric Blake
2015-09-10 18:53       ` Denis V. Lunev
2015-09-10 19:13     ` Denis V. Lunev
2015-09-08  8:58 ` [Qemu-devel] [PATCH RFC 0/5] disk deadlines Vasiliy Tolstov
2015-09-08  9:20 ` Fam Zheng
2015-09-08 10:11   ` Kevin Wolf
2015-09-08 10:13     ` Denis V. Lunev
2015-09-08 10:20     ` Fam Zheng
2015-09-08 10:46       ` Denis V. Lunev
2015-09-08 10:49       ` Kevin Wolf
2015-09-08 13:20         ` Fam Zheng
2015-09-08  9:33 ` Paolo Bonzini
2015-09-08  9:41   ` Denis V. Lunev
2015-09-08  9:43     ` Paolo Bonzini
2015-09-08 10:37     ` Andrey Korolyov
2015-09-08 10:50       ` Denis V. Lunev
2015-09-08 10:07   ` Kevin Wolf
2015-09-08 10:08     ` Denis V. Lunev
2015-09-08 10:22   ` Stefan Hajnoczi
2015-09-08 10:26     ` Paolo Bonzini
2015-09-08 10:36     ` Denis V. Lunev
2015-09-08 19:11 ` John Snow
2015-09-10 19:29 ` [Qemu-devel] Summary: " Denis V. Lunev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1441699228-25767-5-git-send-email-den@openvz.org \
    --to=den@openvz.org \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rmaksudova@virtuozzo.com \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).