* [Qemu-devel] [PATCH V1 0/3] Introduce COLO-compare
@ 2016-03-25 9:02 Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 1/3] colo-compare: introduce colo compare initlization Zhang Chen
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Zhang Chen @ 2016-03-25 9:02 UTC (permalink / raw)
To: qemu devel, Jason Wang
Cc: Li Zhijian, Gui jianfeng, eddie.dong, zhanghailiang,
Dr. David Alan Gilbert, Zhang Chen, Yang Hongyang
COLO-compare is a part of COLO project. It is used
to compare the network package to help COLO decide
whether to do checkpoint.
v1:
- initial patch
Zhang Chen (3):
colo-compare: introduce colo compare initlization
colo-compare: track connection and enqueue packet
colo-compare: introduce packet comparison thread
net/Makefile.objs | 1 +
net/colo-compare.c | 782 +++++++++++++++++++++++++++++++++++++++++++++++++++++
vl.c | 3 +-
3 files changed, 785 insertions(+), 1 deletion(-)
create mode 100644 net/colo-compare.c
--
1.9.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH V1 1/3] colo-compare: introduce colo compare initlization
2016-03-25 9:02 [Qemu-devel] [PATCH V1 0/3] Introduce COLO-compare Zhang Chen
@ 2016-03-25 9:02 ` Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 2/3] colo-compare: track connection and enqueue packet Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 3/3] colo-compare: introduce packet comparison thread Zhang Chen
2 siblings, 0 replies; 4+ messages in thread
From: Zhang Chen @ 2016-03-25 9:02 UTC (permalink / raw)
To: qemu devel, Jason Wang
Cc: Li Zhijian, Gui jianfeng, eddie.dong, zhanghailiang,
Dr. David Alan Gilbert, Zhang Chen, Yang Hongyang
packet come from primary char indev will be send to
outdev - packet come from secondary char dev will be drop
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
net/Makefile.objs | 1 +
net/colo-compare.c | 344 +++++++++++++++++++++++++++++++++++++++++++++++++++++
vl.c | 3 +-
3 files changed, 347 insertions(+), 1 deletion(-)
create mode 100644 net/colo-compare.c
diff --git a/net/Makefile.objs b/net/Makefile.objs
index b7c22fd..ba92f73 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -16,3 +16,4 @@ common-obj-$(CONFIG_NETMAP) += netmap.o
common-obj-y += filter.o
common-obj-y += filter-buffer.o
common-obj-y += filter-mirror.o
+common-obj-y += colo-compare.o
diff --git a/net/colo-compare.c b/net/colo-compare.c
new file mode 100644
index 0000000..62c66df
--- /dev/null
+++ b/net/colo-compare.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+
+#include "net/net.h"
+#include "net/vhost_net.h"
+#include "qom/object_interfaces.h"
+#include "qemu/iov.h"
+#include "qom/object.h"
+#include "qemu/typedefs.h"
+#include "net/queue.h"
+#include "sysemu/char.h"
+#include "qemu/sockets.h"
+
+#define TYPE_COLO_COMPARE "colo-compare"
+#define COLO_COMPARE(obj) \
+ OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
+
+#define COMPARE_READ_LEN_MAX NET_BUFSIZE
+
+static QTAILQ_HEAD(, CompareState) net_compares =
+ QTAILQ_HEAD_INITIALIZER(net_compares);
+
+typedef struct ReadState {
+ int state; /* 0 = getting length, 1 = getting data */
+ unsigned int index;
+ unsigned int packet_len;
+ uint8_t buf[COMPARE_READ_LEN_MAX];
+} ReadState;
+
+typedef struct CompareState {
+ Object parent;
+
+ char *pri_indev;
+ char *sec_indev;
+ char *outdev;
+ CharDriverState *chr_pri_in;
+ CharDriverState *chr_sec_in;
+ CharDriverState *chr_out;
+ QTAILQ_ENTRY(CompareState) next;
+ ReadState pri_rs;
+ ReadState sec_rs;
+} CompareState;
+
+static int compare_chr_send(CharDriverState *out, const uint8_t *buf, int size)
+{
+ int ret = 0;
+ uint32_t len = htonl(size);
+
+ if (!size) {
+ return 0;
+ }
+
+ ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
+ if (ret != sizeof(len)) {
+ goto err;
+ }
+
+ ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
+ if (ret != size) {
+ goto err;
+ }
+
+ return 0;
+
+err:
+ return ret < 0 ? ret : -EIO;
+}
+
+static int compare_chr_can_read(void *opaque)
+{
+ return COMPARE_READ_LEN_MAX;
+}
+
+/* Returns
+ * 0: readstate is not ready
+ * 1: readstate is ready
+ * otherwise error occurs
+ */
+static int compare_chr_fill_rstate(ReadState *rs, const uint8_t *buf, int size)
+{
+ unsigned int l;
+ while (size > 0) {
+ /* reassemble a packet from the network */
+ switch (rs->state) { /* 0 = getting length, 1 = getting data */
+ case 0:
+ l = 4 - rs->index;
+ if (l > size) {
+ l = size;
+ }
+ memcpy(rs->buf + rs->index, buf, l);
+ buf += l;
+ size -= l;
+ rs->index += l;
+ if (rs->index == 4) {
+ /* got length */
+ rs->packet_len = ntohl(*(uint32_t *)rs->buf);
+ rs->index = 0;
+ rs->state = 1;
+ }
+ break;
+ case 1:
+ l = rs->packet_len - rs->index;
+ if (l > size) {
+ l = size;
+ }
+ if (rs->index + l <= sizeof(rs->buf)) {
+ memcpy(rs->buf + rs->index, buf, l);
+ } else {
+ error_report("serious error: oversized packet received.");
+ rs->index = rs->state = 0;
+ return -1;
+ }
+
+ rs->index += l;
+ buf += l;
+ size -= l;
+ if (rs->index >= rs->packet_len) {
+ rs->index = 0;
+ rs->state = 0;
+ return 1;
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size)
+{
+ CompareState *s = COLO_COMPARE(opaque);
+ int ret;
+
+ ret = compare_chr_fill_rstate(&s->pri_rs, buf, size);
+ if (ret == 1) {
+ /* FIXME: enqueue to primary packet list */
+ compare_chr_send(s->chr_out, buf, size);
+ } else if (ret == -1) {
+ qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
+ }
+}
+
+static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
+{
+ CompareState *s = COLO_COMPARE(opaque);
+ int ret;
+
+ ret = compare_chr_fill_rstate(&s->sec_rs, buf, size);
+ if (ret == 1) {
+ /* TODO: enqueue to secondary packet list*/
+ } else if (ret == -1) {
+ qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
+ }
+}
+
+static char *compare_get_pri_indev(Object *obj, Error **errp)
+{
+ CompareState *s = COLO_COMPARE(obj);
+
+ return g_strdup(s->pri_indev);
+}
+
+static void compare_set_pri_indev(Object *obj, const char *value, Error **errp)
+{
+ CompareState *s = COLO_COMPARE(obj);
+
+ g_free(s->pri_indev);
+ s->pri_indev = g_strdup(value);
+}
+
+static char *compare_get_sec_indev(Object *obj, Error **errp)
+{
+ CompareState *s = COLO_COMPARE(obj);
+
+ return g_strdup(s->sec_indev);
+}
+
+static void compare_set_sec_indev(Object *obj, const char *value, Error **errp)
+{
+ CompareState *s = COLO_COMPARE(obj);
+
+ g_free(s->sec_indev);
+ s->sec_indev = g_strdup(value);
+}
+
+static char *compare_get_outdev(Object *obj, Error **errp)
+{
+ CompareState *s = COLO_COMPARE(obj);
+
+ return g_strdup(s->outdev);
+}
+
+static void compare_set_outdev(Object *obj, const char *value, Error **errp)
+{
+ CompareState *s = COLO_COMPARE(obj);
+
+ g_free(s->outdev);
+ s->outdev = g_strdup(value);
+}
+
+static void colo_compare_complete(UserCreatable *uc, Error **errp)
+{
+ CompareState *s = COLO_COMPARE(uc);
+
+ if (!s->pri_indev || !s->sec_indev || !s->outdev) {
+ error_setg(errp, "colo compare needs 'primary_in' ,"
+ "'secondary_in','outdev' property set");
+ return;
+ } else if (!strcmp(s->pri_indev, s->outdev) ||
+ !strcmp(s->sec_indev, s->outdev) ||
+ !strcmp(s->pri_indev, s->sec_indev)) {
+ error_setg(errp, "'indev' and 'outdev' could not be same "
+ "for compare module");
+ return;
+ }
+
+ s->chr_pri_in = qemu_chr_find(s->pri_indev);
+ if (s->chr_pri_in == NULL) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "IN Device '%s' not found", s->pri_indev);
+ goto out;
+ }
+
+ qemu_chr_fe_claim_no_fail(s->chr_pri_in);
+ qemu_chr_add_handlers(s->chr_pri_in, compare_chr_can_read,
+ compare_pri_chr_in, NULL, s);
+
+ s->chr_sec_in = qemu_chr_find(s->sec_indev);
+ if (s->chr_sec_in == NULL) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "IN Device '%s' not found", s->sec_indev);
+ goto out;
+ }
+
+ qemu_chr_fe_claim_no_fail(s->chr_sec_in);
+ qemu_chr_add_handlers(s->chr_sec_in, compare_chr_can_read,
+ compare_sec_chr_in, NULL, s);
+
+ s->chr_out = qemu_chr_find(s->outdev);
+ if (s->chr_out == NULL) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "OUT Device '%s' not found", s->outdev);
+ goto out;
+ }
+ qemu_chr_fe_claim_no_fail(s->chr_out);
+
+ QTAILQ_INSERT_TAIL(&net_compares, s, next);
+
+ return;
+
+out:
+ if (s->chr_pri_in) {
+ qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
+ qemu_chr_fe_release(s->chr_pri_in);
+ s->chr_pri_in = NULL;
+ }
+ if (s->chr_sec_in) {
+ qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
+ qemu_chr_fe_release(s->chr_sec_in);
+ s->chr_pri_in = NULL;
+ }
+}
+
+static void colo_compare_class_init(ObjectClass *oc, void *data)
+{
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+ ucc->complete = colo_compare_complete;
+}
+
+static void colo_compare_class_finalize(ObjectClass *oc, void *data)
+{
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+ CompareState *s = COLO_COMPARE(ucc);
+
+ if (s->chr_pri_in) {
+ qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
+ qemu_chr_fe_release(s->chr_pri_in);
+ }
+ if (s->chr_sec_in) {
+ qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
+ qemu_chr_fe_release(s->chr_sec_in);
+ }
+ if (s->chr_out) {
+ qemu_chr_fe_release(s->chr_out);
+ }
+
+ if (!QTAILQ_EMPTY(&net_compares)) {
+ QTAILQ_REMOVE(&net_compares, s, next);
+ }
+}
+
+static void colo_compare_init(Object *obj)
+{
+ object_property_add_str(obj, "primary_in",
+ compare_get_pri_indev, compare_set_pri_indev,
+ NULL);
+ object_property_add_str(obj, "secondary_in",
+ compare_get_sec_indev, compare_set_sec_indev,
+ NULL);
+ object_property_add_str(obj, "outdev",
+ compare_get_outdev, compare_set_outdev,
+ NULL);
+}
+
+static void colo_compare_finalize(Object *obj)
+{
+ CompareState *s = COLO_COMPARE(obj);
+
+ g_free(s->pri_indev);
+ g_free(s->sec_indev);
+ g_free(s->outdev);
+}
+
+static const TypeInfo colo_compare_info = {
+ .name = TYPE_COLO_COMPARE,
+ .parent = TYPE_OBJECT,
+ .instance_size = sizeof(CompareState),
+ .instance_init = colo_compare_init,
+ .instance_finalize = colo_compare_finalize,
+ .class_size = sizeof(CompareState),
+ .class_init = colo_compare_class_init,
+ .class_finalize = colo_compare_class_finalize,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+static void register_types(void)
+{
+ type_register_static(&colo_compare_info);
+}
+
+type_init(register_types);
diff --git a/vl.c b/vl.c
index dc6e63a..70064ad 100644
--- a/vl.c
+++ b/vl.c
@@ -2842,7 +2842,8 @@ static bool object_create_initial(const char *type)
if (g_str_equal(type, "filter-buffer") ||
g_str_equal(type, "filter-dump") ||
g_str_equal(type, "filter-mirror") ||
- g_str_equal(type, "filter-redirector")) {
+ g_str_equal(type, "filter-redirector") ||
+ g_str_equal(type, "colo-compare")) {
return false;
}
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH V1 2/3] colo-compare: track connection and enqueue packet
2016-03-25 9:02 [Qemu-devel] [PATCH V1 0/3] Introduce COLO-compare Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 1/3] colo-compare: introduce colo compare initlization Zhang Chen
@ 2016-03-25 9:02 ` Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 3/3] colo-compare: introduce packet comparison thread Zhang Chen
2 siblings, 0 replies; 4+ messages in thread
From: Zhang Chen @ 2016-03-25 9:02 UTC (permalink / raw)
To: qemu devel, Jason Wang
Cc: Li Zhijian, Gui jianfeng, eddie.dong, zhanghailiang,
Dr. David Alan Gilbert, Zhang Chen, Yang Hongyang
In this patch we use kernel jhash table to track
connection, and then enqueue net packet like this:
+ CompareState ++
| |
+---------------+ +---------------+ +---------------+
|conn list +--->conn +--------->conn |
+---------------+ +---------------+ +---------------+
| | | | | |
+---------------+ +---v----+ +---v----+ +---v----+ +---v----+
|primary | |secondary |primary | |secondary
|packet | |packet + |packet | |packet +
+--------+ +--------+ +--------+ +--------+
| | | |
+---v----+ +---v----+ +---v----+ +---v----+
|primary | |secondary |primary | |secondary
|packet | |packet + |packet | |packet +
+--------+ +--------+ +--------+ +--------+
| | | |
+---v----+ +---v----+ +---v----+ +---v----+
|primary | |secondary |primary | |secondary
|packet | |packet + |packet | |packet +
+--------+ +--------+ +--------+ +--------+
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
net/colo-compare.c | 324 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 321 insertions(+), 3 deletions(-)
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 62c66df..0bb5a51 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -20,15 +20,22 @@
#include "net/queue.h"
#include "sysemu/char.h"
#include "qemu/sockets.h"
+#include <sys/sysinfo.h>
+#include "slirp/slirp.h"
+#include "qemu/jhash.h"
+#include <sys/sysinfo.h>
#define TYPE_COLO_COMPARE "colo-compare"
#define COLO_COMPARE(obj) \
OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
+#define PAGE_SIZE 4096
+#define ETH_HLEN 14
static QTAILQ_HEAD(, CompareState) net_compares =
QTAILQ_HEAD_INITIALIZER(net_compares);
+static ssize_t hashtable_max_size;
typedef struct ReadState {
int state; /* 0 = getting length, 1 = getting data */
@@ -37,6 +44,28 @@ typedef struct ReadState {
uint8_t buf[COMPARE_READ_LEN_MAX];
} ReadState;
+/*
+ + CompareState ++
+ | |
+ +---------------+ +---------------+ +---------------+
+ |conn list +--->conn +--------->conn |
+ +---------------+ +---------------+ +---------------+
+ | | | | | |
+ +---------------+ +---v----+ +---v----+ +---v----+ +---v----+
+ |primary | |secondary |primary | |secondary
+ |packet | |packet + |packet | |packet +
+ +--------+ +--------+ +--------+ +--------+
+ | | | |
+ +---v----+ +---v----+ +---v----+ +---v----+
+ |primary | |secondary |primary | |secondary
+ |packet | |packet + |packet | |packet +
+ +--------+ +--------+ +--------+ +--------+
+ | | | |
+ +---v----+ +---v----+ +---v----+ +---v----+
+ |primary | |secondary |primary | |secondary
+ |packet | |packet + |packet | |packet +
+ +--------+ +--------+ +--------+ +--------+
+*/
typedef struct CompareState {
Object parent;
@@ -49,8 +78,268 @@ typedef struct CompareState {
QTAILQ_ENTRY(CompareState) next;
ReadState pri_rs;
ReadState sec_rs;
+
+ /* connection list: the connections belonged to this NIC could be found
+ * in this list.
+ * element type: Connection
+ */
+ GQueue conn_list;
+ QemuMutex conn_list_lock; /* to protect conn_list */
+ /* hashtable to save connection */
+ GHashTable *connection_track_table;
+ /* to save unprocessed_connections */
+ GQueue unprocessed_connections;
+ /* proxy current hash size */
+ ssize_t hashtable_size;
} CompareState;
+typedef struct Packet {
+ void *data;
+ union {
+ uint8_t *network_layer;
+ struct ip *ip;
+ };
+ uint8_t *transport_layer;
+ int size;
+ CompareState *s;
+} Packet;
+
+typedef struct ConnectionKey {
+ /* (src, dst) must be grouped, in the same way than in IP header */
+ struct in_addr src;
+ struct in_addr dst;
+ uint16_t src_port;
+ uint16_t dst_port;
+ uint8_t ip_proto;
+} QEMU_PACKED ConnectionKey;
+
+typedef struct Connection {
+ QemuMutex list_lock;
+ /* connection primary send queue: element type: Packet */
+ GQueue primary_list;
+ /* connection secondary send queue: element type: Packet */
+ GQueue secondary_list;
+ /* flag to enqueue unprocessed_connections */
+ bool processing;
+ int ip_proto;
+} Connection;
+
+enum {
+ PRIMARY_IN = 0,
+ SECONDARY_IN,
+};
+
+static void packet_destroy(void *opaque, void *user_data);
+static int compare_chr_send(CharDriverState *out, const uint8_t *buf, int size);
+
+static uint32_t connection_key_hash(const void *opaque)
+{
+ const ConnectionKey *key = opaque;
+ uint32_t a, b, c;
+
+ /* Jenkins hash */
+ a = b = c = JHASH_INITVAL + sizeof(*key);
+ a += key->src.s_addr;
+ b += key->dst.s_addr;
+ c += (key->src_port | key->dst_port << 16);
+ __jhash_mix(a, b, c);
+
+ a += key->ip_proto;
+ __jhash_final(a, b, c);
+
+ return c;
+}
+
+static int connection_key_equal(const void *opaque1, const void *opaque2)
+{
+ return memcmp(opaque1, opaque2, sizeof(ConnectionKey)) == 0;
+}
+
+/*
+ * initialize connecon_key for packet
+ * Return 0 on success, if return 1 the pkt will be sent later
+ */
+static int connection_key_init(Packet *pkt, ConnectionKey *key)
+{
+ int network_length;
+ uint8_t *data = pkt->data;
+ uint16_t l3_proto;
+ uint32_t tmp_ports;
+ ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
+
+ pkt->network_layer = data + ETH_HLEN;
+ l3_proto = eth_get_l3_proto(data, l2hdr_len);
+ if (l3_proto != ETH_P_IP) {
+ return 1;
+ }
+
+ network_length = pkt->ip->ip_hl * 4;
+ pkt->transport_layer = pkt->network_layer + network_length;
+ key->ip_proto = pkt->ip->ip_p;
+ key->src = pkt->ip->ip_src;
+ key->dst = pkt->ip->ip_dst;
+
+ switch (key->ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ tmp_ports = *(uint32_t *)(pkt->transport_layer);
+ key->src_port = tmp_ports & 0xffff;
+ key->dst_port = tmp_ports >> 16;
+ break;
+ case IPPROTO_AH:
+ tmp_ports = *(uint32_t *)(pkt->transport_layer + 4);
+ key->src_port = tmp_ports & 0xffff;
+ key->dst_port = tmp_ports >> 16;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static Connection *connection_new(ConnectionKey *key)
+{
+ Connection *conn = g_slice_new(Connection);
+
+ qemu_mutex_init(&conn->list_lock);
+ conn->ip_proto = key->ip_proto;
+ conn->processing = false;
+ g_queue_init(&conn->primary_list);
+ g_queue_init(&conn->secondary_list);
+
+ return conn;
+}
+
+/*
+ * Clear hashtable, stop this hash growing really huge
+ */
+static void connection_hashtable_reset(CompareState *s)
+{
+ s->hashtable_size = 0;
+ g_hash_table_remove_all(s->connection_track_table);
+}
+
+/* if not found, creata a new connection and add to hash table */
+static Connection *connection_get(CompareState *s, ConnectionKey *key)
+{
+ /* FIXME: protect connection_track_table */
+ Connection *conn = g_hash_table_lookup(s->connection_track_table, key);
+
+ if (conn == NULL) {
+ ConnectionKey *new_key = g_memdup(key, sizeof(*key));
+
+ conn = connection_new(key);
+
+ s->hashtable_size++;
+ if (s->hashtable_size > hashtable_max_size) {
+ error_report("colo proxy connection hashtable full, clear it");
+ connection_hashtable_reset(s);
+ /* TODO:clear conn_list */
+ } else {
+ g_hash_table_insert(s->connection_track_table, new_key, conn);
+ }
+ }
+
+ return conn;
+}
+
+static void connection_destroy(void *opaque)
+{
+ Connection *conn = opaque;
+
+ qemu_mutex_lock(&conn->list_lock);
+ g_queue_foreach(&conn->primary_list, packet_destroy, NULL);
+ g_queue_free(&conn->primary_list);
+ g_queue_foreach(&conn->secondary_list, packet_destroy, NULL);
+ g_queue_free(&conn->secondary_list);
+ qemu_mutex_unlock(&conn->list_lock);
+ qemu_mutex_destroy(&conn->list_lock);
+ g_slice_free(Connection, conn);
+}
+
+static Packet *packet_new(CompareState *s, const void *data,
+ int size, ConnectionKey *key)
+{
+ Packet *pkt = g_slice_new(Packet);
+
+ pkt->data = g_memdup(data, size);
+ pkt->size = size;
+ pkt->s = s;
+
+ if (connection_key_init(pkt, key)) {
+ packet_destroy(pkt, NULL);
+ pkt = NULL;
+ }
+
+ return pkt;
+}
+
+static int packet_enqueue(CompareState *s, int mode)
+{
+ ConnectionKey key = {{ 0 } };
+ Packet *pkt = NULL;
+ Connection *conn;
+
+ /* arp packet will be sent */
+ if (mode == PRIMARY_IN) {
+ pkt = packet_new(s, s->pri_rs.buf, s->pri_rs.packet_len, &key);
+ } else {
+ pkt = packet_new(s, s->sec_rs.buf, s->sec_rs.packet_len, &key);
+ }
+ if (!pkt) {
+ return -1;
+ }
+
+ conn = connection_get(s, &key);
+ if (!conn->processing) {
+ qemu_mutex_lock(&s->conn_list_lock);
+ g_queue_push_tail(&s->conn_list, conn);
+ qemu_mutex_unlock(&s->conn_list_lock);
+ conn->processing = true;
+ }
+
+ qemu_mutex_lock(&conn->list_lock);
+ if (mode == PRIMARY_IN) {
+ g_queue_push_tail(&conn->primary_list, pkt);
+ } else {
+ g_queue_push_tail(&conn->secondary_list, pkt);
+ }
+ qemu_mutex_unlock(&conn->list_lock);
+
+ return 0;
+}
+
+static void packet_destroy(void *opaque, void *user_data)
+{
+ Packet *pkt = opaque;
+
+ g_free(pkt->data);
+ g_slice_free(Packet, pkt);
+}
+
+static inline void colo_flush_connection(void *opaque, void *user_data)
+{
+ Connection *conn = opaque;
+ Packet *pkt = NULL;
+
+ qemu_mutex_lock(&conn->list_lock);
+ while (!g_queue_is_empty(&conn->primary_list)) {
+ pkt = g_queue_pop_head(&conn->primary_list);
+ compare_chr_send(pkt->s->chr_out, pkt->data, pkt->size);
+ /* FIXME: destroy pkt ?*/
+ }
+ while (!g_queue_is_empty(&conn->secondary_list)) {
+ pkt = g_queue_pop_head(&conn->secondary_list);
+ packet_destroy(pkt, NULL);
+ }
+ qemu_mutex_unlock(&conn->list_lock);
+}
+
static int compare_chr_send(CharDriverState *out, const uint8_t *buf, int size)
{
int ret = 0;
@@ -142,8 +431,10 @@ static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size)
ret = compare_chr_fill_rstate(&s->pri_rs, buf, size);
if (ret == 1) {
- /* FIXME: enqueue to primary packet list */
- compare_chr_send(s->chr_out, buf, size);
+ if (packet_enqueue(s, PRIMARY_IN)) {
+ error_report("primary: unsupported packet in");
+ compare_chr_send(s->chr_out, buf, size);
+ }
} else if (ret == -1) {
qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
}
@@ -156,7 +447,9 @@ static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
ret = compare_chr_fill_rstate(&s->sec_rs, buf, size);
if (ret == 1) {
- /* TODO: enqueue to secondary packet list*/
+ if (packet_enqueue(s, SECONDARY_IN)) {
+ error_report("secondary: unsupported packet in");
+ }
} else if (ret == -1) {
qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
}
@@ -210,6 +503,7 @@ static void compare_set_outdev(Object *obj, const char *value, Error **errp)
static void colo_compare_complete(UserCreatable *uc, Error **errp)
{
CompareState *s = COLO_COMPARE(uc);
+ struct sysinfo si;
if (!s->pri_indev || !s->sec_indev || !s->outdev) {
error_setg(errp, "colo compare needs 'primary_in' ,"
@@ -255,6 +549,29 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
QTAILQ_INSERT_TAIL(&net_compares, s, next);
+ g_queue_init(&s->conn_list);
+ qemu_mutex_init(&s->conn_list_lock);
+
+ s->hashtable_size = 0;
+ /*
+ * Idea from kernel tcp.c: use 1/16384 of memory. On i386: 32MB
+ * machine has 512 buckets. >= 1GB machines have 16384 buckets.
+ */
+ sysinfo(&si);
+ hashtable_max_size = si.totalram / 16384;
+ if (si.totalram > (1024 * 1024 * 1024 / PAGE_SIZE)) {
+ hashtable_max_size = 16384;
+ }
+ if (hashtable_max_size < 32) {
+ hashtable_max_size = 32;
+ }
+ hashtable_max_size = hashtable_max_size * 8; /* default factor = 8 */
+
+ s->connection_track_table = g_hash_table_new_full(connection_key_hash,
+ connection_key_equal,
+ g_free,
+ connection_destroy);
+
return;
out:
@@ -297,6 +614,7 @@ static void colo_compare_class_finalize(ObjectClass *oc, void *data)
if (!QTAILQ_EMPTY(&net_compares)) {
QTAILQ_REMOVE(&net_compares, s, next);
}
+ qemu_mutex_destroy(&s->conn_list_lock);
}
static void colo_compare_init(Object *obj)
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH V1 3/3] colo-compare: introduce packet comparison thread
2016-03-25 9:02 [Qemu-devel] [PATCH V1 0/3] Introduce COLO-compare Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 1/3] colo-compare: introduce colo compare initlization Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 2/3] colo-compare: track connection and enqueue packet Zhang Chen
@ 2016-03-25 9:02 ` Zhang Chen
2 siblings, 0 replies; 4+ messages in thread
From: Zhang Chen @ 2016-03-25 9:02 UTC (permalink / raw)
To: qemu devel, Jason Wang
Cc: Li Zhijian, Gui jianfeng, eddie.dong, zhanghailiang,
Dr. David Alan Gilbert, Zhang Chen, Yang Hongyang
if packets are same, we send primary packet and drop secondary
packet, otherwise notify COLO do checkpoint.
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
net/colo-compare.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 121 insertions(+), 1 deletion(-)
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 0bb5a51..1debc0e 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -36,6 +36,7 @@
static QTAILQ_HEAD(, CompareState) net_compares =
QTAILQ_HEAD_INITIALIZER(net_compares);
static ssize_t hashtable_max_size;
+static int colo_need_checkpoint;
typedef struct ReadState {
int state; /* 0 = getting length, 1 = getting data */
@@ -91,6 +92,13 @@ typedef struct CompareState {
GQueue unprocessed_connections;
/* proxy current hash size */
ssize_t hashtable_size;
+
+ /* notify compare thread */
+ QemuEvent event;
+ /* compare thread, a thread for each NIC */
+ QemuThread thread;
+ int thread_status;
+
} CompareState;
typedef struct Packet {
@@ -129,6 +137,15 @@ enum {
SECONDARY_IN,
};
+enum {
+ /* compare thread isn't started */
+ COMPARE_THREAD_NONE,
+ /* compare thread is running */
+ COMPARE_THREAD_RUNNING,
+ /* compare thread exit */
+ COMPARE_THREAD_EXIT,
+};
+
static void packet_destroy(void *opaque, void *user_data);
static int compare_chr_send(CharDriverState *out, const uint8_t *buf, int size);
@@ -340,6 +357,88 @@ static inline void colo_flush_connection(void *opaque, void *user_data)
qemu_mutex_unlock(&conn->list_lock);
}
+static void colo_notify_checkpoint(void)
+{
+ colo_need_checkpoint = true;
+}
+
+/* TODO colo_do_checkpoint() {
+ * we flush the connections and reset 'colo_need_checkpoint'
+ * }
+ */
+
+static inline void colo_dump_packet(Packet *pkt)
+{
+ int i;
+ for (i = 0; i < pkt->size; i++) {
+ printf("%02x ", ((uint8_t *)pkt->data)[i]);
+ }
+ printf("\n");
+}
+
+/*
+ * The IP packets sent by primary and secondary
+ * will be compared in here
+ * TODO support ip fragment, Out-Of-Order
+ * return: 0 means packet same
+ * > 0 || < 0 means packet different
+ */
+static int colo_packet_compare(Packet *ppkt, Packet *spkt)
+{
+ colo_dump_packet(ppkt);
+ colo_dump_packet(spkt);
+
+ if (ppkt->size == spkt->size) {
+ return memcmp(ppkt->data, spkt->data, spkt->size);
+ } else {
+ return -1;
+ }
+}
+
+static void colo_compare_connection(void *opaque, void *user_data)
+{
+ Connection *conn = opaque;
+ Packet *pkt = NULL;
+ GList *result = NULL;
+ int ret;
+
+ qemu_mutex_lock(&conn->list_lock);
+ while (!g_queue_is_empty(&conn->primary_list) &&
+ !g_queue_is_empty(&conn->secondary_list)) {
+ pkt = g_queue_pop_head(&conn->primary_list);
+ result = g_queue_find_custom(&conn->secondary_list,
+ pkt, (GCompareFunc)colo_packet_compare);
+
+ if (result) {
+ ret = compare_chr_send(pkt->s->chr_out, pkt->data, pkt->size);
+ if (ret < 0) {
+ error_report("colo_send_primary_packet failed");
+ }
+ g_queue_remove(&conn->secondary_list, result);
+ } else {
+ g_queue_push_head(&conn->primary_list, pkt);
+ colo_notify_checkpoint();
+ break;
+ }
+ }
+ qemu_mutex_unlock(&conn->list_lock);
+}
+
+static void *colo_compare_thread(void *opaque)
+{
+ CompareState *s = opaque;
+
+ while (s->thread_status == COMPARE_THREAD_RUNNING) {
+ qemu_event_wait(&s->event);
+ qemu_event_reset(&s->event);
+ qemu_mutex_lock(&s->conn_list_lock);
+ g_queue_foreach(&s->conn_list, colo_compare_connection, NULL);
+ qemu_mutex_unlock(&s->conn_list_lock);
+ }
+
+ return NULL;
+}
+
static int compare_chr_send(CharDriverState *out, const uint8_t *buf, int size)
{
int ret = 0;
@@ -433,7 +532,9 @@ static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size)
if (ret == 1) {
if (packet_enqueue(s, PRIMARY_IN)) {
error_report("primary: unsupported packet in");
- compare_chr_send(s->chr_out, buf, size);
+ compare_chr_send(s->chr_out, s->pri_rs.buf, s->pri_rs.packet_len);
+ } else {
+ qemu_event_set(&s->event);
}
} else if (ret == -1) {
qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
@@ -449,6 +550,8 @@ static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
if (ret == 1) {
if (packet_enqueue(s, SECONDARY_IN)) {
error_report("secondary: unsupported packet in");
+ } else {
+ qemu_event_set(&s->event);
}
} else if (ret == -1) {
qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
@@ -504,6 +607,8 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
{
CompareState *s = COLO_COMPARE(uc);
struct sysinfo si;
+ char thread_name[64];
+ static int compare_id;
if (!s->pri_indev || !s->sec_indev || !s->outdev) {
error_setg(errp, "colo compare needs 'primary_in' ,"
@@ -552,6 +657,7 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
g_queue_init(&s->conn_list);
qemu_mutex_init(&s->conn_list_lock);
+ colo_need_checkpoint = false;
s->hashtable_size = 0;
/*
* Idea from kernel tcp.c: use 1/16384 of memory. On i386: 32MB
@@ -572,6 +678,13 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
g_free,
connection_destroy);
+ s->thread_status = COMPARE_THREAD_RUNNING;
+ sprintf(thread_name, "proxy compare %d", compare_id);
+ qemu_thread_create(&s->thread, thread_name,
+ colo_compare_thread, s,
+ QEMU_THREAD_JOINABLE);
+ compare_id++;
+
return;
out:
@@ -615,6 +728,13 @@ static void colo_compare_class_finalize(ObjectClass *oc, void *data)
QTAILQ_REMOVE(&net_compares, s, next);
}
qemu_mutex_destroy(&s->conn_list_lock);
+
+ if (s->thread.thread) {
+ s->thread_status = COMPARE_THREAD_EXIT;
+ qemu_event_set(&s->event);
+ qemu_thread_join(&s->thread);
+ }
+ qemu_event_destroy(&s->event);
}
static void colo_compare_init(Object *obj)
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-03-25 9:02 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-03-25 9:02 [Qemu-devel] [PATCH V1 0/3] Introduce COLO-compare Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 1/3] colo-compare: introduce colo compare initlization Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 2/3] colo-compare: track connection and enqueue packet Zhang Chen
2016-03-25 9:02 ` [Qemu-devel] [PATCH V1 3/3] colo-compare: introduce packet comparison thread Zhang Chen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).