All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
Cc: Li Zhijian <lizhijian@cn.fujitsu.com>,
	Gui jianfeng <guijianfeng@cn.fujitsu.com>,
	Jason Wang <jasowang@redhat.com>,
	"eddie.dong" <eddie.dong@intel.com>,
	qemu devel <qemu-devel@nongnu.org>,
	Huang peng <peter.huangpeng@huawei.com>,
	Gong lei <arei.gonglei@huawei.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	jan.kiszka@siemens.com, hongyang.yang@easystack.cn,
	zhanghailiang <zhang.zhanghailiang@huawei.com>
Subject: Re: [Qemu-devel] [RFC PATCH 7/9] net/colo-proxy: add packet enqueue and handle function
Date: Fri, 4 Dec 2015 09:14:27 +0000	[thread overview]
Message-ID: <20151204091418.GA3424@work-vm> (raw)
In-Reply-To: <5661069C.7090800@cn.fujitsu.com>

* Zhang Chen (zhangchen.fnst@cn.fujitsu.com) wrote:
> Hi,Dave
> 
> 
> On 12/03/2015 05:09 PM, Dr. David Alan Gilbert wrote:
> >* Zhang Chen (zhangchen.fnst@cn.fujitsu.com) wrote:
> >>Hi,Dave
> >>
> >>On 12/02/2015 12:12 AM, Dr. David Alan Gilbert wrote:
> >>>* Zhang Chen (zhangchen.fnst@cn.fujitsu.com) wrote:
> >>>>From: zhangchen <zhangchen.fnst@cn.fujitsu.com>
> >>>>
> >>>>Add common packet handle function and enqueue
> >>>>packet distinguished connection,then we can
> >>>>lookup one connection packet to compare
> >>>>
> >>>>Signed-off-by: zhangchen <zhangchen.fnst@cn.fujitsu.com>
> >>>>---
> >>>>  net/colo-proxy.c | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> >>>>  1 file changed, 166 insertions(+), 1 deletion(-)
> >>>>
> >>>>diff --git a/net/colo-proxy.c b/net/colo-proxy.c
> >>>>index 08a852f..a664e6d 100644
> >>>>--- a/net/colo-proxy.c
> >>>>+++ b/net/colo-proxy.c
> >>>>@@ -24,6 +24,170 @@
> >>>>  static char *mode;
> >>>>  static bool colo_do_checkpoint;
> >>>>+static void packet_destroy(void *opaque, void *user_data);
> >>>>+
> >>>>+static uint32_t connection_key_hash(const void *opaque)
> >>>>+{
> >>>>+    const Connection_key *key = opaque;
> >>>>+    uint32_t a, b, c;
> >>>>+
> >>>>+    /* Jenkins hash */
> >>>>+    a = b = c = JHASH_INITVAL + sizeof(*key);
> >>>>+    a += key->src;
> >>>>+    b += key->dst;
> >>>>+    c += key->ports;
> >>>>+    __jhash_mix(a, b, c);
> >>>>+
> >>>>+    a += key->ip_proto;
> >>>>+    __jhash_final(a, b, c);
> >>>>+
> >>>>+    return c;
> >>>>+}
> >>>>+
> >>>>+static int connection_key_equal(const void *opaque1, const void *opaque2)
> >>>>+{
> >>>>+    return memcmp(opaque1, opaque2, sizeof(Connection_key)) == 0;
> >>>>+}
> >>>>+
> >>>>+static void connection_destroy(void *opaque)
> >>>>+{
> >>>>+    Connection *connection = opaque;
> >>>>+    g_queue_foreach(&connection->primary_list, packet_destroy, NULL);
> >>>>+    g_queue_free(&connection->primary_list);
> >>>>+    g_queue_foreach(&connection->secondary_list, packet_destroy, NULL);
> >>>>+    g_queue_free(&connection->secondary_list);
> >>>>+    g_slice_free(Connection, connection);
> >>>>+}
> >>>>+
> >>>>+static Connection *connection_new(void)
> >>>>+{
> >>>>+    Connection *connection = g_slice_new(Connection);
> >>>>+
> >>>>+    g_queue_init(&connection->primary_list);
> >>>>+    g_queue_init(&connection->secondary_list);
> >>>>+    connection->processing = false;
> >>>>+
> >>>>+    return connection;
> >>>>+}
> >>>>+
> >>>>+/* Return 0 on success, or return -1 if the pkt is corrpted */
> >>>>+static int parse_packet_early(Packet *pkt, Connection_key *key)
> >>>>+{
> >>>>+    int network_length;
> >>>>+    uint8_t *data = pkt->data;
> >>>>+
> >>>>+    pkt->network_layer = data + ETH_HLEN;
> >>>>+    if (ntohs(*(uint16_t *)(data + 12)) != ETH_P_IP) {
> >>>>+        if (ntohs(*(uint16_t *)(data + 12)) == ETH_P_ARP) {
> >>>>+            return -1;
> >>>>+        }
> >>>>+        return 0;
> >>>>+    }
> >>>Can you use some of the functions/macros in include/net/eth.h to
> >>>make this easier? Maybe eth_get_l3_proto ?
> >>>Do you plan to do IPv6 at some point?
> >>I will use include/net/eth.h in next version
> >>
> >>IPv6 currently not support, still colo framework be merged
> >>
> >>>>+    network_length = pkt->ip->ip_hl * 4;
> >>>>+    pkt->transport_layer = pkt->network_layer + network_length;
> >>>>+    key->ip_proto = pkt->ip->ip_p;
> >>>>+    key->src = pkt->ip->ip_src;
> >>>>+    key->dst = pkt->ip->ip_dst;
> >>>>+
> >>>>+    switch (key->ip_proto) {
> >>>>+    case IPPROTO_TCP:
> >>>>+    case IPPROTO_UDP:
> >>>>+    case IPPROTO_DCCP:
> >>>>+    case IPPROTO_ESP:
> >>>>+    case IPPROTO_SCTP:
> >>>>+    case IPPROTO_UDPLITE:
> >>>>+        key->ports = *(uint32_t *)(pkt->transport_layer);
> >>>>+        break;
> >>>>+    case IPPROTO_AH:
> >>>>+        key->ports = *(uint32_t *)(pkt->transport_layer + 4);
> >>>Interesting; I don't see any other code in QEMU to handle AH,
> >>>and I don't know much about it.
> >>>
> >>>>+        break;
> >>>>+    default:
> >>>>+        break;
> >>>>+    }
> >>>>+
> >>>>+    return 0;
> >>>>+}
> >>>>+
> >>>>+static Packet *packet_new(ColoProxyState *s, const void *data,
> >>>>+                          int size, Connection_key *key, NetClientState *sender)
> >>>>+{
> >>>>+    Packet *pkt = g_slice_new(Packet);
> >>>>+
> >>>>+    pkt->data = g_malloc(size);
> >>>>+    memcpy(pkt->data, data, size);
> >>>g_memdup might be useful for these:
> >>>https://developer.gnome.org/glib/stable/glib-Memory-Allocation.html#g-memdup
> >>I will fix it in next version
> >>
> >>>>+    pkt->size = size;
> >>>>+    pkt->s = s;
> >>>>+    pkt->sender = sender;
> >>>>+    pkt->should_be_sent = false;
> >>>>+
> >>>>+    if (parse_packet_early(pkt, key)) {
> >>>>+        packet_destroy(pkt, NULL);
> >>>>+        pkt = NULL;
> >>>>+    }
> >>>>+
> >>>>+    return pkt;
> >>>>+}
> >>>>+
> >>>>+static void packet_destroy(void *opaque, void *user_data)
> >>>>+{
> >>>>+    Packet *pkt = opaque;
> >>>>+    g_free(pkt->data);
> >>>>+    g_slice_free(Packet, pkt);
> >>>>+}
> >>>>+
> >>>>+static Connection *colo_proxy_enqueue_packet(GHashTable *unprocessed_packets,
> >>>>+                                          Connection_key *key,
> >>>>+                                          Packet *pkt, packet_type type)
> >>>>+{
> >>>>+    Connection *connection;
> >>>>+    Packet *tmppkt;
> >>>>+    connection = g_hash_table_lookup(unprocessed_packets, key);
> >>>>+    if (connection == NULL) {
> >>>>+        Connection_key *new_key = g_malloc(sizeof(*key));
> >>>>+
> >>>>+        connection = connection_new();
> >>>>+        memcpy(new_key, key, sizeof(*key));
> >>>>+        key = new_key;
> >>>>+
> >>>>+        g_hash_table_insert(unprocessed_packets, key, connection);
> >>>Is 'unprocessed_packets' a good name for this hashtable? I'm not quite
> >>>sure I understand, but it looks to me like it's your connection-tracking equivalent,
> >>>which then has a queue for each connection with unprocessed packets?
> >>i will change hashtable name to connection_track_table,is it ok?
> >Yes, thank you.
> >
> >>>Also, do we do anything to stop this hash growing really huge? If there
> >>>are lots-and-lots of connections can we limit it somehow? (what does Linux do?)
> >>when we find PVM's packet different to SVM's packet,colo will do checkpoint.
> >>that's means we will flush all connection's packets,even though all packets
> >>are
> >>same,colo will alse do checkpoint periodically. so hashtable can't growing
> >>really huge.
> >I see the flush clears all the packets, but does it also clear the hash?
> >
> 
> I read the kernel code,TCP conntrack will clear hash one time every five
> days periodicity.
> and the hashtable size
>     /* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
>      * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
> I will follow kernel's done to fix colo-proxy in next version.

I think it's OK if you just set a size and make it limit to that size;
lets keep it simple for now.   I think you'll also have to free entries
when you see the TCP connection closed.


Dave

> 
> Thanks for review
> zhangchen
> 
> >>>>+    }
> >>>>+    switch (type) {
> >>>>+    case PRIMARY_OUTPUT:
> >>>>+        if (g_queue_get_length(&connection->secondary_list) > 0) {
> >>>Please add some more comments; I think this is when a packet comes in
> >>>on the primary, and then we find we've already got a packet from the secondary
> >>>waiting?
> >>yes,you are right
> >>
> >>I will add more comments in next version
> >Thank you.
> >
> >Dave
> >
> >>>>+            tmppkt = g_queue_pop_head(&connection->secondary_list);
> >>>>+            DEBUG("g_queue_get_length(&connection->primary_list)=%d\n",
> >>>>+                        g_queue_get_length(&connection->primary_list));
> >>>>+            DEBUG("g_queue_get_length(&connection->secondary_list)=%d\n",
> >>>>+                        g_queue_get_length(&connection->secondary_list));
> >>>>+            if (colo_packet_compare(pkt, tmppkt)) {
> >>>>+                DEBUG("packet same and release packet\n");
> >>>>+                pkt->should_be_sent = true;
> >>>>+                break;
> >>>>+            } else {
> >>>>+                DEBUG("packet different\n");
> >>>>+                colo_proxy_notify_checkpoint();
> >>>>+                pkt->should_be_sent = false;
> >>>>+                break;
> >>>>+            }
> >>>>+        } else {
> >>>>+            g_queue_push_tail(&connection->primary_list, pkt);
> >>>>+            pkt->should_be_sent = false;
> >>>>+        }
> >>>>+
> >>>>+        break;
> >>>>+    case SECONDARY_OUTPUT:
> >>>>+        g_queue_push_tail(&connection->secondary_list, pkt);
> >>>>+        DEBUG("secondary pkt data=%s,  pkt->ip->ipsrc=%x,pkt->ip->ipdst=%x\n",
> >>>>+                    (char *)pkt->data, pkt->ip->ip_src, pkt->ip->ip_dst);
> >>>>+        break;
> >>>>+    default:
> >>>>+        abort();
> >>>>+    }
> >>>>+
> >>>>+    return connection;
> >>>>+}
> >>>>+
> >>>>  /*
> >>>>   * Packets to be sent by colo forward to
> >>>>@@ -165,7 +329,8 @@ static ssize_t colo_proxy_primary_handler(NetFilterState *nf,
> >>>>      }
> >>>>      if (direction == NET_FILTER_DIRECTION_RX) {
> >>>>-        /* TODO: enqueue_primary_packet */
> >>>>+        ret = colo_enqueue_primary_packet(nf, sender, flags, iov,
> >>>>+                    iovcnt, sent_cb);
> >>>The routine above is 'colo_enqueue_packet' rather than colo_enqueue_primary_packet?
> >>yes,colo_enqueue_packet is enqueue packet common
> >>
> >>Thanks for review
> >>zhangchen
> >>
> >>>>      } else {
> >>>>          ret = colo_forward2another(nf, sender, flags, iov, iovcnt,
> >>>>                      sent_cb, COLO_PRIMARY_MODE);
> >>>>-- 
> >>>>1.9.1
> >>>Dave
> >>>
> >>>>
> >>>--
> >>>Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >>>
> >>>
> >>>.
> >>>
> >>
> >>
> >--
> >Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >
> >
> >.
> >
> 
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

  reply	other threads:[~2015-12-04  9:14 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-27 12:27 [Qemu-devel] [RFC PATCH 0/9] Add colo-proxy based on netfilter Zhang Chen
2015-11-27 12:27 ` [Qemu-devel] [RFC PATCH 1/9] Init colo-proxy object " Zhang Chen
2015-11-30  2:50   ` Wen Congyang
2015-11-30  5:38     ` Zhang Chen
2015-11-27 12:27 ` [Qemu-devel] [RFC PATCH 2/9] jhash: add linux kernel jhashtable in qemu Zhang Chen
2015-12-01 11:23   ` Dr. David Alan Gilbert
2015-12-03  3:40     ` Zhang Chen
2015-11-27 12:27 ` [Qemu-devel] [RFC PATCH 3/9] colo-proxy: add colo-proxy framework Zhang Chen
2015-11-28  2:46   ` Hailiang Zhang
2015-11-30  2:25     ` Zhang Chen
2015-11-30  3:10   ` Wen Congyang
2015-11-30  5:44     ` Zhang Chen
2015-11-27 12:27 ` [Qemu-devel] [RFC PATCH 4/9] colo-proxy: add colo-proxy setup work Zhang Chen
2015-11-28  3:02   ` Hailiang Zhang
2015-11-30  2:35     ` Zhang Chen
2015-12-01 15:35   ` Dr. David Alan Gilbert
2015-12-03  3:49     ` Zhang Chen
2015-11-27 12:27 ` [Qemu-devel] [RFC PATCH 5/9] net/colo-proxy: add colo packet handler Zhang Chen
2015-11-28  3:17   ` Hailiang Zhang
2015-11-30  5:37     ` Zhang Chen
2015-11-27 12:27 ` [Qemu-devel] [RFC PATCH 6/9] net/colo-proxy: add packet forward function Zhang Chen
2015-12-01 15:50   ` Dr. David Alan Gilbert
2015-12-03  6:17     ` Zhang Chen
2015-11-27 12:27 ` [Qemu-devel] [RFC PATCH 7/9] net/colo-proxy: add packet enqueue and handle function Zhang Chen
2015-12-01 16:12   ` Dr. David Alan Gilbert
2015-12-03  6:35     ` Zhang Chen
2015-12-03  9:09       ` Dr. David Alan Gilbert
2015-12-04  3:21         ` Zhang Chen
2015-12-04  9:14           ` Dr. David Alan Gilbert [this message]
2015-11-27 12:27 ` [Qemu-devel] [RFC PATCH 8/9] net/colo-proxy: enqueue primary and secondary packet Zhang Chen
2015-11-27 12:27 ` [Qemu-devel] [RFC PATCH 9/9] net/colo-proxy: add packet compare and notify checkpoint Zhang Chen
2015-12-01 16:37   ` Dr. David Alan Gilbert
2015-12-03  7:10     ` Zhang Chen
2015-12-01 16:44 ` [Qemu-devel] [RFC PATCH 0/9] Add colo-proxy based on netfilter Dr. David Alan Gilbert
2015-12-03  7:33   ` Zhang Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151204091418.GA3424@work-vm \
    --to=dgilbert@redhat.com \
    --cc=arei.gonglei@huawei.com \
    --cc=eddie.dong@intel.com \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=hongyang.yang@easystack.cn \
    --cc=jan.kiszka@siemens.com \
    --cc=jasowang@redhat.com \
    --cc=lizhijian@cn.fujitsu.com \
    --cc=peter.huangpeng@huawei.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=zhang.zhanghailiang@huawei.com \
    --cc=zhangchen.fnst@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.