From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:54379) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bMVS6-0003zL-AI for qemu-devel@nongnu.org; Mon, 11 Jul 2016 03:16:28 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bMVS2-0007nz-6S for qemu-devel@nongnu.org; Mon, 11 Jul 2016 03:16:26 -0400 Received: from [59.151.112.132] (port=59346 helo=heian.cn.fujitsu.com) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bMVS0-0007nj-Ez for qemu-devel@nongnu.org; Mon, 11 Jul 2016 03:16:22 -0400 References: <1466681677-30487-1-git-send-email-zhangchen.fnst@cn.fujitsu.com> <1466681677-30487-4-git-send-email-zhangchen.fnst@cn.fujitsu.com> <577F2AD6.40004@redhat.com> From: Zhang Chen Message-ID: <578347FB.9020806@cn.fujitsu.com> Date: Mon, 11 Jul 2016 15:17:15 +0800 MIME-Version: 1.0 In-Reply-To: <577F2AD6.40004@redhat.com> Content-Type: text/plain; charset="utf-8"; format=flowed Content-Transfer-Encoding: 8bit Subject: Re: [Qemu-devel] [RFC PATCH V5 3/4] colo-compare: introduce packet comparison thread List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Jason Wang , qemu devel Cc: Li Zhijian , Wen Congyang , zhanghailiang , "eddie . dong" , "Dr . David Alan Gilbert" On 07/08/2016 12:23 PM, Jason Wang wrote: > > > On 2016年06月23日 19:34, Zhang Chen wrote: >> if packets are same, we send primary packet and drop secondary >> packet, otherwise notify COLO do checkpoint. > > More verbose please, e.g how to handle each case of exception (or > maybe comment in the code). > OK. >> >> Signed-off-by: Zhang Chen >> Signed-off-by: Li Zhijian >> Signed-off-by: Wen Congyang >> --- >> net/colo-base.c | 1 + >> net/colo-base.h | 3 + >> net/colo-compare.c | 214 >> +++++++++++++++++++++++++++++++++++++++++++++++++++++ >> trace-events | 2 + >> 4 files changed, 220 insertions(+) >> >> diff --git a/net/colo-base.c b/net/colo-base.c >> index 7e263e8..9673661 100644 >> --- a/net/colo-base.c >> +++ b/net/colo-base.c >> @@ -146,6 +146,7 @@ Packet *packet_new(const void *data, int size) >> pkt->data = g_memdup(data, size); >> pkt->size = size; >> + pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST); >> return pkt; >> } >> diff --git a/net/colo-base.h b/net/colo-base.h >> index 01c1a5d..8bb1043 100644 >> --- a/net/colo-base.h >> +++ b/net/colo-base.h >> @@ -18,6 +18,7 @@ >> #include "slirp/slirp.h" >> #include "qemu/jhash.h" >> #include "qemu/rcu.h" >> +#include "qemu/timer.h" >> #define HASHTABLE_MAX_SIZE 16384 >> @@ -47,6 +48,8 @@ typedef struct Packet { >> }; >> uint8_t *transport_layer; >> int size; >> + /* Time of packet creation, in wall clock ms */ >> + int64_t creation_ms; >> } Packet; >> typedef struct ConnectionKey { >> diff --git a/net/colo-compare.c b/net/colo-compare.c >> index 4231fe7..928d729 100644 >> --- a/net/colo-compare.c >> +++ b/net/colo-compare.c >> @@ -35,6 +35,8 @@ >> OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) >> #define COMPARE_READ_LEN_MAX NET_BUFSIZE >> +/* TODO: Should be configurable */ >> +#define REGULAR_CHECK_MS 400 > > "REGULAR" seems to generic, need a better name. Like "REGULAR_PACKET_CHECK_MS" ? > >> static QTAILQ_HEAD(, CompareState) net_compares = >> QTAILQ_HEAD_INITIALIZER(net_compares); >> @@ -86,6 +88,11 @@ typedef struct CompareState { >> GQueue unprocessed_connections; >> /* proxy current hash size */ >> uint32_t hashtable_size; >> + /* compare thread, a thread for each NIC */ >> + QemuThread thread; >> + int thread_status; >> + /* Timer used on the primary to find packets that are never >> matched */ >> + QEMUTimer *timer; >> } CompareState; >> typedef struct CompareClass { >> @@ -97,6 +104,15 @@ enum { >> SECONDARY_IN, >> }; >> +enum { >> + /* compare thread isn't started */ >> + COMPARE_THREAD_NONE, >> + /* compare thread is running */ >> + COMPARE_THREAD_RUNNING, >> + /* compare thread exit */ >> + COMPARE_THREAD_EXIT, >> +}; >> + >> static int compare_chr_send(CharDriverState *out, >> const uint8_t *buf, >> uint32_t size); >> @@ -143,6 +159,98 @@ static int packet_enqueue(CompareState *s, int >> mode) >> return 0; >> } >> +/* >> + * The IP packets sent by primary and secondary >> + * will be compared in here >> + * TODO support ip fragment, Out-Of-Order >> + * return: 0 means packet same >> + * > 0 || < 0 means packet different >> + */ >> +static int colo_packet_compare(Packet *ppkt, Packet *spkt) >> +{ >> + trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src), >> + inet_ntoa(ppkt->ip->ip_dst), spkt->size, >> + inet_ntoa(spkt->ip->ip_src), >> + inet_ntoa(spkt->ip->ip_dst)); >> + >> + if (ppkt->size == spkt->size) { >> + return memcmp(ppkt->data, spkt->data, spkt->size); >> + } else { >> + return -1; >> + } >> +} >> + >> +static int colo_packet_compare_all(Packet *spkt, Packet *ppkt) >> +{ >> + trace_colo_compare_main("compare all"); >> + return colo_packet_compare(ppkt, spkt); >> +} >> + >> +static void colo_old_packet_check(void *opaque_packet, void >> *opaque_found) >> +{ >> + int64_t now; >> + bool *found_old = (bool *)opaque_found; >> + Packet *ppkt = (Packet *)opaque_packet; >> + >> + if (*found_old) { >> + /* Someone found an old packet earlier in the queue */ >> + return; >> + } >> + >> + now = qemu_clock_get_ms(QEMU_CLOCK_HOST); >> + if ((ppkt->creation_ms < now) && > > Any case that ppkt->creation_ms >= now? No, will remove it. > >> + ((now - ppkt->creation_ms) > REGULAR_CHECK_MS)) { >> + trace_colo_old_packet_check_found(ppkt->creation_ms); >> + *found_old = true; >> + } >> +} >> + >> +/* >> + * called from the compare thread on the primary >> + * for compare connection >> + */ >> +static void colo_compare_connection(void *opaque, void *user_data) >> +{ >> + CompareState *s = user_data; >> + Connection *conn = opaque; >> + Packet *pkt = NULL; >> + GList *result = NULL; >> + bool found_old; >> + int ret; >> + >> + while (!g_queue_is_empty(&conn->primary_list) && >> + !g_queue_is_empty(&conn->secondary_list)) { >> + pkt = g_queue_pop_tail(&conn->primary_list); >> + result = g_queue_find_custom(&conn->secondary_list, >> + pkt, >> (GCompareFunc)colo_packet_compare_all); >> + >> + if (result) { >> + ret = compare_chr_send(s->chr_out, pkt->data, pkt->size); >> + if (ret < 0) { >> + error_report("colo_send_primary_packet failed"); >> + } >> + trace_colo_compare_main("packet same and release packet"); >> + g_queue_remove(&conn->secondary_list, result->data); >> + } else { > > A question I forget the answer, so may ask again. What if secondary > packet comes late? If secondary packet comes late, primary queue has some primary packet. we use timer to regular call colo_compare_connection(), In here, we foreach conn->primary_list, if have old primary packet(secondary packet comes late), will call colo_notify_checkpoint() to do a checkpoint,that can make primary and secondary be same. > >> + trace_colo_compare_main("packet different"); >> + g_queue_push_tail(&conn->primary_list, pkt); >> + /* TODO: colo_notify_checkpoint();*/ >> + break; >> + } >> + } >> + >> + /* >> + * Look for old packets that the secondary hasn't matched, >> + * if we have some then we have to checkpoint to wake >> + * the secondary up. >> + */ >> + found_old = false; >> + g_queue_foreach(&conn->primary_list, colo_old_packet_check, >> &found_old); >> + if (found_old) { >> + /* TODO: colo_notify_checkpoint();*/ > > Shouldn't we need to remove all "old" packets here? yes,will add remove func. > >> + } >> +} >> + >> static int compare_chr_send(CharDriverState *out, >> const uint8_t *buf, >> uint32_t size) >> @@ -170,6 +278,69 @@ err: >> return ret < 0 ? ret : -EIO; >> } >> +static int compare_chr_can_read(void *opaque) >> +{ >> + return COMPARE_READ_LEN_MAX; >> +} >> + >> +/* >> + * called from the main thread on the primary for packets >> + * arriving over the socket from the primary. >> + */ >> +static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int >> size) >> +{ >> + CompareState *s = COLO_COMPARE(opaque); >> + int ret; >> + >> + ret = net_fill_rstate(&s->pri_rs, buf, size); >> + if (ret == -1) { >> + qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL); >> + error_report("colo-compare primary_in error"); >> + } >> +} >> + >> +/* >> + * called from the main thread on the primary for packets >> + * arriving over the socket from the secondary. >> + */ >> +static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int >> size) >> +{ >> + CompareState *s = COLO_COMPARE(opaque); >> + int ret; >> + >> + ret = net_fill_rstate(&s->sec_rs, buf, size); >> + if (ret == -1) { >> + qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL); >> + error_report("colo-compare secondary_in error"); >> + } >> +} >> + >> +static void *colo_compare_thread(void *opaque) >> +{ >> + GMainContext *worker_context; >> + GMainLoop *compare_loop; >> + CompareState *s = opaque; >> + >> + worker_context = g_main_context_new(); >> + g_assert(g_main_context_get_thread_default() == NULL); >> + g_main_context_push_thread_default(worker_context); >> + g_assert(g_main_context_get_thread_default() == worker_context); >> + >> + qemu_chr_add_handlers(s->chr_pri_in, compare_chr_can_read, >> + compare_pri_chr_in, NULL, s); >> + qemu_chr_add_handlers(s->chr_sec_in, compare_chr_can_read, >> + compare_sec_chr_in, NULL, s); >> + >> + compare_loop = g_main_loop_new(worker_context, FALSE); >> + >> + g_main_loop_run(compare_loop); >> + >> + g_main_loop_unref(compare_loop); >> + g_main_context_pop_thread_default(worker_context); >> + g_main_context_unref(worker_context); >> + return NULL; >> +} >> + >> static char *compare_get_pri_indev(Object *obj, Error **errp) >> { >> CompareState *s = COLO_COMPARE(obj); >> @@ -222,6 +393,9 @@ static void >> compare_pri_rs_finalize(SocketReadState *pri_rs) >> if (packet_enqueue(s, PRIMARY_IN)) { >> trace_colo_compare_main("primary: unsupported packet in"); >> compare_chr_send(s->chr_out, pri_rs->buf, pri_rs->packet_len); >> + } else { >> + /* compare connection */ >> + g_queue_foreach(&s->conn_list, colo_compare_connection, s); >> } >> } >> @@ -231,16 +405,35 @@ static void >> compare_sec_rs_finalize(SocketReadState *sec_rs) >> if (packet_enqueue(s, SECONDARY_IN)) { >> trace_colo_compare_main("secondary: unsupported packet in"); >> + } else { >> + /* compare connection */ >> + g_queue_foreach(&s->conn_list, colo_compare_connection, s); >> } >> } >> /* >> + * Prod the compare thread regularly so it can watch for any packets >> + * that the secondary hasn't produced equivalents of. >> + */ >> +static void colo_compare_regular(void *opaque) >> +{ >> + CompareState *s = opaque; >> + >> + timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + >> + REGULAR_CHECK_MS); >> + /* compare connection */ >> + g_queue_foreach(&s->conn_list, colo_compare_connection, s); >> +} > > We need make sure this function was called from colo thread, but it > looks not? Yes, In next version I will make old_packet_check related codes independent with colo_compare_connection(). > >> + >> +/* >> * called from the main thread on the primary >> * to setup colo-compare. >> */ >> static void colo_compare_complete(UserCreatable *uc, Error **errp) >> { >> CompareState *s = COLO_COMPARE(uc); >> + char thread_name[64]; >> + static int compare_id; >> if (!s->pri_indev || !s->sec_indev || !s->outdev) { >> error_setg(errp, "colo compare needs 'primary_in' ," >> @@ -293,6 +486,19 @@ static void colo_compare_complete(UserCreatable >> *uc, Error **errp) >> g_free, >> connection_destroy); >> + s->thread_status = COMPARE_THREAD_RUNNING; >> + sprintf(thread_name, "compare %d", compare_id); >> + qemu_thread_create(&s->thread, thread_name, >> + colo_compare_thread, s, >> + QEMU_THREAD_JOINABLE); >> + compare_id++; >> + >> + /* A regular timer to kick any packets that the secondary >> doesn't match */ >> + s->timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, /* Only when guest >> runs */ >> + colo_compare_regular, s); >> + timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + >> + REGULAR_CHECK_MS); >> + >> return; >> } >> @@ -338,6 +544,14 @@ static void colo_compare_finalize(Object *obj) >> qemu_mutex_destroy(&s->conn_list_lock); >> g_queue_free(&s->conn_list); >> + if (s->thread.thread) { >> + s->thread_status = COMPARE_THREAD_EXIT; > > Looks like there's not any code that depends on the status, so why > need to this> Currently we needn't this, will move it to "work with colo-frame" related patch. Thanks Zhang Chen > >> + /* compare connection */ >> + g_queue_foreach(&s->conn_list, colo_compare_connection, s); >> + qemu_thread_join(&s->thread); >> + } >> + timer_del(s->timer); >> + >> g_free(s->pri_indev); >> g_free(s->sec_indev); >> g_free(s->outdev); >> diff --git a/trace-events b/trace-events >> index 703de1a..1537e91 100644 >> --- a/trace-events >> +++ b/trace-events >> @@ -1919,3 +1919,5 @@ aspeed_vic_write(uint64_t offset, unsigned >> size, uint32_t data) "To 0x%" PRIx64 >> # net/colo-compare.c >> colo_compare_main(const char *chr) ": %s" >> +colo_compare_ip_info(int psize, const char *sta, const char *stb, >> int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src >> = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" >> +colo_old_packet_check_found(int64_t old_time) "%" PRId64 > > > > . > -- Thanks zhangchen