qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Juraj Marcin <jmarcin@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Juraj Marcin" <jmarcin@redhat.com>,
	vsementsov@yandex-team.ru,
	"Daniel P. Berrangé" <berrange@redhat.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>
Subject: [PATCH v3 5/5] utils/qemu-sockets: Introduce inet socket options controlling TCP keep-alive
Date: Tue,  8 Apr 2025 13:25:04 +0200	[thread overview]
Message-ID: <20250408112508.1638722-6-jmarcin@redhat.com> (raw)
In-Reply-To: <20250408112508.1638722-1-jmarcin@redhat.com>

From: Juraj Marcin <jmarcin@redhat.com>

With the default TCP stack configuration, it could be even 2 hours
before the connection times out due to the other side not being
reachable. However, in some cases, the application needs to be aware of
a connection issue much sooner.

This is the case, for example, for postcopy live migration. If there is
no traffic from the migration destination guest (server-side) to the
migration source guest (client-side), the destination keeps waiting for
pages indefinitely and does not switch to the postcopy-paused state.
This can happen, for example, if the destination QEMU instance is
started with the '-S' command line option and the machine is not started
yet, or if the machine is idle and produces no new page faults for
not-yet-migrated pages.

This patch introduces new inet socket parameters that control count,
idle period, and interval of TCP keep-alive packets before the
connection is considered broken. These parameters are available on
systems where the respective TCP socket options are defined
(TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL).

The default value for all is 0, which means the system configuration is
used.

Signed-off-by: Juraj Marcin <jmarcin@redhat.com>
---
 meson.build         |  6 ++++
 qapi/sockets.json   | 15 ++++++++
 util/qemu-sockets.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+)

diff --git a/meson.build b/meson.build
index 41f68d3806..680f47cf42 100644
--- a/meson.build
+++ b/meson.build
@@ -2734,6 +2734,12 @@ if linux_io_uring.found()
   config_host_data.set('HAVE_IO_URING_PREP_WRITEV2',
                        cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2'))
 endif
+config_host_data.set('HAVE_TCP_KEEPCNT',
+                     cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCNT'))
+config_host_data.set('HAVE_TCP_KEEPIDLE',
+                     cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPIDLE'))
+config_host_data.set('HAVE_TCP_KEEPINTVL',
+                     cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPINTVL'))
 
 # has_member
 config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
diff --git a/qapi/sockets.json b/qapi/sockets.json
index 62797cd027..bb9d298635 100644
--- a/qapi/sockets.json
+++ b/qapi/sockets.json
@@ -59,6 +59,18 @@
 # @keep-alive: enable keep-alive when connecting to/listening on this socket.
 #     (Since 4.2, not supported for listening sockets until 10.1)
 #
+# @keep-alive-count: number of keep-alive packets sent before the connection is
+#     closed.  Only supported for TCP sockets on systems where TCP_KEEPCNT
+#     socket option is defined.  (Since 10.1)
+#
+# @keep-alive-idle: time in seconds the connection needs to be idle before
+#     sending a keepalive packet.  Only supported for TCP sockets on systems
+#     where TCP_KEEPIDLE socket option is defined.  (Since 10.1)
+#
+# @keep-alive-interval: time in secods between keep-alive packets.  Only
+#     supported for TCP sockets on systems where TCP_KEEPINTVL is defined.
+#     (Since 10.1)
+#
 # @mptcp: enable multi-path TCP.  (Since 6.1)
 #
 # Since: 1.3
@@ -71,6 +83,9 @@
     '*ipv4': 'bool',
     '*ipv6': 'bool',
     '*keep-alive': 'bool',
+    '*keep-alive-count': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPCNT' },
+    '*keep-alive-idle': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPIDLE' },
+    '*keep-alive-interval': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPINTVL' },
     '*mptcp': { 'type': 'bool', 'if': 'HAVE_IPPROTO_MPTCP' } } }
 
 ##
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index fed17a1ffb..8e355b097c 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -217,6 +217,45 @@ static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp)
                              "Unable to set keep-alive option on socket");
             return -1;
         }
+#ifdef HAVE_TCP_KEEPCNT
+        if (saddr->has_keep_alive_count &&
+            saddr->keep_alive_count) {
+            int keep_count = saddr->has_keep_alive_count;
+            ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keep_count,
+                             sizeof(keep_count));
+            if (ret < 0) {
+                error_setg_errno(errp, errno,
+                                 "Unable to set TCP keep-alive count option on socket");
+                return -1;
+            }
+        }
+#endif
+#ifdef HAVE_TCP_KEEPIDLE
+        if (saddr->has_keep_alive_idle &&
+            saddr->keep_alive_idle) {
+            int keep_idle = saddr->has_keep_alive_idle;
+            ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keep_idle,
+                             sizeof(keep_idle));
+            if (ret < 0) {
+                error_setg_errno(errp, errno,
+                                 "Unable to set TCP keep-alive idle option on socket");
+                return -1;
+            }
+        }
+#endif
+#ifdef HAVE_TCP_KEEPINTVL
+        if (saddr->has_keep_alive_interval &&
+            saddr->keep_alive_interval) {
+            int keep_interval = saddr->has_keep_alive_interval;
+            ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &keep_interval,
+                             sizeof(keep_interval));
+            if (ret < 0) {
+                error_setg_errno(errp, errno,
+                                 "Unable to set TCP keep-alive interval option on socket");
+                return -1;
+            }
+        }
+#endif
     }
     return 0;
 }
@@ -628,6 +667,22 @@ static int inet_parse_flag(const char *flagname, const char *optstr, bool *val,
     return 0;
 }
 
+static int inet_parse_u32(const char *optname, const char *optstr,
+                          uint32_t max, uint32_t *val, Error **errp)
+{
+    int pos;
+    if (sscanf(optstr, "%" PRIu32 "%n", val, &pos) != 1 ||
+        (optstr[pos] != '\0' && optstr[pos] != ',')) {
+        error_setg(errp, "error parsing %s argument", optname);
+        return -1;
+    }
+    if (*val > max) {
+        error_setg(errp, "%s is too large", optname);
+        return -1;
+    }
+    return 0;
+}
+
 int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
 {
     const char *optstr, *h;
@@ -700,6 +755,39 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
         }
         addr->has_keep_alive = true;
     }
+#ifdef HAVE_TCP_KEEPCNT
+    begin = strstr(optstr, ",keep-alive-count=");
+    if (begin) {
+        if (inet_parse_u32("keep-alive-count",
+                           begin + strlen(",keep-alive-count="), INT_MAX,
+                           &addr->keep_alive_count, errp)) {
+            return -1;
+        }
+        addr->has_keep_alive_count = true;
+    }
+#endif
+#ifdef HAVE_TCP_KEEPIDLE
+    begin = strstr(optstr, ",keep-alive-idle=");
+    if (begin) {
+        if (inet_parse_u32("keep-alive-idle",
+                           begin + strlen(",keep-alive-idle="), INT_MAX,
+                           &addr->keep_alive_idle, errp)) {
+            return -1;
+        }
+        addr->has_keep_alive_idle = true;
+    }
+#endif
+#ifdef HAVE_TCP_KEEPINTVL
+    begin = strstr(optstr, ",keep-alive-interval=");
+    if (begin) {
+        if (inet_parse_u32("keep-alive-interval",
+                           begin + strlen(",keep-alive-interval="), INT_MAX,
+                           &addr->keep_alive_interval, errp)) {
+            return -1;
+        }
+        addr->has_keep_alive_interval = true;
+    }
+#endif
 #ifdef HAVE_IPPROTO_MPTCP
     begin = strstr(optstr, ",mptcp");
     if (begin) {
-- 
2.48.1



  parent reply	other threads:[~2025-04-08 11:27 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-08 11:24 [PATCH v3 0/5] util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive Juraj Marcin
2025-04-08 11:25 ` [PATCH v3 1/5] io: Fix partial struct copy in qio_dns_resolver_lookup_sync_inet() Juraj Marcin
2025-04-11 10:38   ` Daniel P. Berrangé
2025-04-08 11:25 ` [PATCH v3 2/5] util/qemu-sockets: Refactor setting client sockopts into a separate function Juraj Marcin
2025-04-11 10:40   ` Daniel P. Berrangé
2025-04-08 11:25 ` [PATCH v3 3/5] util/qemu-sockets: Refactor success and failure paths in inet_listen_saddr() Juraj Marcin
2025-04-11 13:47   ` Daniel P. Berrangé
2025-04-08 11:25 ` [PATCH v3 4/5] util/qemu-sockets: Add support for keep-alive flag to passive sockets Juraj Marcin
2025-04-11 13:49   ` Daniel P. Berrangé
2025-04-08 11:25 ` Juraj Marcin [this message]
2025-04-11 13:54   ` [PATCH v3 5/5] utils/qemu-sockets: Introduce inet socket options controlling TCP keep-alive Daniel P. Berrangé
2025-04-11 15:49     ` Daniel P. Berrangé
2025-04-30 14:47       ` Juraj Marcin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250408112508.1638722-6-jmarcin@redhat.com \
    --to=jmarcin@redhat.com \
    --cc=berrange@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=vsementsov@yandex-team.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).