qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: anton.ivanov@cambridgegreys.com, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 3/3] Unified Datagram Socket Transport - raw support
Date: Wed, 19 Jul 2017 13:58:48 +0800	[thread overview]
Message-ID: <e31eb033-a770-e5fd-97b0-0b50f268c22c@redhat.com> (raw)
In-Reply-To: <20170718170819.28494-4-anton.ivanov@cambridgegreys.com>



On 2017年07月19日 01:08, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>
> This adds raw socket support to the unified socket driver.

Interesting, in fact, I've finished a tpacket backend. Let me post it 
sometime after hardfreeze.

> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   net/Makefile.objs |   2 +-
>   net/clients.h     |   3 ++
>   net/net.c         |   5 +++
>   net/raw.c         | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   qapi-schema.json  |  25 +++++++++--
>   qemu-options.hx   |  33 +++++++++++++++
>   6 files changed, 186 insertions(+), 5 deletions(-)
>   create mode 100644 net/raw.c
>
> diff --git a/net/Makefile.objs b/net/Makefile.objs
> index 128164e39b..54cf7dd194 100644
> --- a/net/Makefile.objs
> +++ b/net/Makefile.objs
> @@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
>   common-obj-y += socket.o
>   common-obj-y += dump.o
>   common-obj-y += eth.o
> -common-obj-$(CONFIG_UNIFIED) += l2tpv3.o unified.o gre.o
> +common-obj-$(CONFIG_UNIFIED) += l2tpv3.o unified.o gre.o raw.o
>   common-obj-$(CONFIG_POSIX) += vhost-user.o
>   common-obj-$(CONFIG_SLIRP) += slirp.o
>   common-obj-$(CONFIG_VDE) += vde.o
> diff --git a/net/clients.h b/net/clients.h
> index 8f8a59aee3..98d8ae59b7 100644
> --- a/net/clients.h
> +++ b/net/clients.h
> @@ -53,6 +53,9 @@ int net_init_l2tpv3(const Netdev *netdev, const char *name,
>   int net_init_gre(const Netdev *netdev, const char *name,
>                       NetClientState *peer, Error **errp);
>   
> +int net_init_raw(const Netdev *netdev, const char *name,
> +                    NetClientState *peer, Error **errp);
> +
>   #ifdef CONFIG_VDE
>   int net_init_vde(const Netdev *netdev, const char *name,
>                    NetClientState *peer, Error **errp);
> diff --git a/net/net.c b/net/net.c
> index b75b6e8154..2d988a120c 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -962,6 +962,7 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
>   #ifdef CONFIG_UNIFIED
>           [NET_CLIENT_DRIVER_L2TPV3] = net_init_l2tpv3,
>           [NET_CLIENT_DRIVER_GRE] = net_init_gre,
> +        [NET_CLIENT_DRIVER_RAW] = net_init_raw,
>   #endif
>   };
>   
> @@ -1017,6 +1018,10 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
>               legacy.type = NET_CLIENT_DRIVER_GRE;
>               legacy.u.gre = opts->u.gre;
>               break;
> +        case NET_LEGACY_OPTIONS_TYPE_RAW:
> +            legacy.type = NET_CLIENT_DRIVER_RAW;
> +            legacy.u.raw = opts->u.raw;
> +            break;
>           case NET_LEGACY_OPTIONS_TYPE_SOCKET:
>               legacy.type = NET_CLIENT_DRIVER_SOCKET;
>               legacy.u.socket = opts->u.socket;
> diff --git a/net/raw.c b/net/raw.c
> new file mode 100644
> index 0000000000..73e2fd9fe3
> --- /dev/null
> +++ b/net/raw.c
> @@ -0,0 +1,123 @@
> +/*
> + * QEMU System Emulator
> + *
> + * Copyright (c) 2015-2017 Cambridge Greys Limited
> + * Copyright (c) 2003-2008 Fabrice Bellard
> + * Copyright (c) 2012-2014 Cisco Systems
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu/osdep.h"
> +#include <linux/ip.h>
> +#include <netdb.h>
> +#include <sys/ioctl.h>
> +#include <net/if.h>
> +#include "net/net.h"
> + #include <sys/socket.h>
> +#include <linux/if_packet.h>
> +#include <net/ethernet.h>
> +#include "clients.h"
> +#include "qemu-common.h"
> +#include "qemu/error-report.h"
> +#include "qemu/option.h"
> +#include "qemu/sockets.h"
> +#include "qemu/iov.h"
> +#include "qemu/main-loop.h"
> +#include "unified.h"
> +
> +static int noop(void *us, uint8_t *buf)
> +{
> +    return 0;
> +}
> +
> +int net_init_raw(const Netdev *netdev,
> +                    const char *name,
> +                    NetClientState *peer, Error **errp)
> +{
> +
> +    const NetdevRawOptions *raw;
> +    NetUnifiedState *s;
> +    NetClientState *nc;
> +
> +    int fd = -1;
> +    int err;
> +
> +    struct ifreq ifr;
> +    struct sockaddr_ll sock;
> +
> +
> +    nc = qemu_new_unified_net_client(name, peer);
> +
> +    s = DO_UPCAST(NetUnifiedState, nc, nc);
> +
> +    fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
> +    if (fd == -1) {
> +        err = -errno;
> +        error_report("raw_open : raw socket creation failed, errno = %d", -err);
> +        goto outerr;
> +    }
> +
> +
> +    s->form_header = NULL;
> +    s->verify_header = &noop;
> +    s->queue_head = 0;
> +    s->queue_tail = 0;
> +    s->header_mismatch = false;
> +    s->dgram_dst = NULL;
> +    s->dst_size = 0;
> +
> +    assert(netdev->type == NET_CLIENT_DRIVER_RAW);
> +    raw = &netdev->u.raw;
> +
> +    memset(&ifr, 0, sizeof(struct ifreq));
> +    strncpy((char *) &ifr.ifr_name, raw->ifname, sizeof(ifr.ifr_name) - 1);
> +
> +    if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
> +        err = -errno;
> +        error_report("SIOCGIFINDEX, failed to get raw interface index for %s",
> +            raw->ifname);
> +        goto outerr;
> +    }
> +
> +    sock.sll_family = AF_PACKET;
> +    sock.sll_protocol = htons(ETH_P_ALL);
> +    sock.sll_ifindex = ifr.ifr_ifindex;
> +
> +    if (bind(fd, (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
> +        error_report("raw: failed to bind raw socket");
> +        err = -errno;
> +        goto outerr;
> +    }
> +
> +    s->offset = 0;
> +
> +    qemu_net_finalize_unified_init(s, fd);
> +
> +    snprintf(s->nc.info_str, sizeof(s->nc.info_str),
> +             "raw: connected");
> +    return 0;
> +outerr:
> +    qemu_del_net_client(nc);
> +    if (fd >= 0) {
> +        close(fd);
> +    }
> +    return -1;
> +}
> +
> diff --git a/qapi-schema.json b/qapi-schema.json
> index aec303a14e..cde78ce3a1 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -3883,6 +3883,21 @@
>       '*txkey':    'uint32',
>       '*rxkey':    'uint32' } }
>   ##
> +# @NetdevRawOptions:
> +#
> +# Connect the VLAN to an network interface using raw sockets
> +#
> +# @ifname: network interface name
> +#
> +
> +# Since 2.9

2.11.

> +##
> +{ 'struct': 'NetdevRawOptions',
> +  'data': {
> +    'ifname':          'str'
> +} }
> +
> +##
>   # @NetdevVdeOptions:
>   #
>   # Connect the VLAN to a vde switch running on the host.
> @@ -4000,7 +4015,7 @@
>   ##
>   { 'enum': 'NetClientDriver',
>     'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde', 'dump',
> -            'bridge', 'hubport', 'netmap', 'vhost-user', 'gre' ] }
> +            'bridge', 'hubport', 'netmap', 'vhost-user', 'gre', 'raw' ] }
>   
>   ##
>   # @Netdev:
> @@ -4031,7 +4046,8 @@
>       'hubport':  'NetdevHubPortOptions',
>       'netmap':   'NetdevNetmapOptions',
>       'vhost-user': 'NetdevVhostUserOptions',
> -    'gre':      'NetdevGREOptions' } }
> +    'gre':      'NetdevGREOptions',
> +    'raw':      'NetdevRawOptions' } }
>   
>   ##
>   # @NetLegacy:
> @@ -4062,7 +4078,7 @@
>   ##
>   { 'enum': 'NetLegacyOptionsType',
>     'data': ['none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
> -           'dump', 'bridge', 'netmap', 'vhost-user', 'gre'] }
> +           'dump', 'bridge', 'netmap', 'vhost-user', 'gre', 'raw'] }
>   
>   ##
>   # @NetLegacyOptions:
> @@ -4086,7 +4102,8 @@
>       'bridge':   'NetdevBridgeOptions',
>       'netmap':   'NetdevNetmapOptions',
>       'vhost-user': 'NetdevVhostUserOptions',
> -    'gre':      'NetdevGREOptions' } }
> +    'gre':      'NetdevGREOptions',
> +    'raw':      'NetdevRawOptions' } }
>   
>   ##
>   # @NetFilterDirection:
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 6f8d5cbe21..d9db8b576b 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1988,6 +1988,13 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>       "                use 'txkey=0x01234' to specify a txkey\n"
>       "                use 'sequence=on' to add frame sequence to each packet\n"
>       "                use 'pinsequence=on' to work around broken sequence handling in peer\n"
> +    "-netdev raw,id=str,ifname=ifname\n"
> +    "                configure a network backend with ID 'str' connected to\n"
> +    "                an Ethernet interface named ifname via raw socket.\n"
> +    "                This backend does not change the interface settings.\n"
> +    "                Most interfaces will require being set into promisc mode,\n"
> +    "                as well having most offloads (TSO, etc) turned off.\n"
> +    "                Some virtual interfaces like tap support only RX.\n"

Pay attention that qemu supports vnet header. So any reason to turn off 
e.g TSO here?

>   #endif
>       "-netdev socket,id=str[,fd=h][,listen=[host]:port][,connect=host:port]\n"
>       "                configure a network backend to connect to another network\n"
> @@ -2463,6 +2470,32 @@ qemu-system-i386 linux.img -net nic -net gre,src=4.2.3.1,dst=1.2.3.4
>   
>   @end example
>   
> +@item -netdev raw,id=@var{id},ifname=@var{ifname}
> +@itemx -net raw[,vlan=@var{n}][,name=@var{name}],ifname=@var{ifname}
> +Connect VLAN @var{n} directly to an Ethernet interface using raw socket.
> +
> +This transport allows a VM to bypass most of the network stack which is
> +extremely useful for tapping.
> +
> +@item ifname=@var{ifname}
> +    interface name (mandatory)
> +
> +@example
> +# set up the interface - put it in promiscuous mode and turn off offloads
> +ifconfig eth0 up
> +ifconfig eth0 promisc
> +
> +/sbin/ethtool -K eth0 gro off
> +/sbin/ethtool -K eth0 tso off
> +/sbin/ethtool -K eth0 gso off
> +/sbin/ethtool -K eth0 tx off

Any reason to turn off tx here?

> +
> +# launch QEMU instance - if your network has reorder or is very lossy add ,pincounter
> +
> +qemu-system-i386 linux.img -net nic -net raw,ifname=eth0

Can we switch to use -netdev here?

Thanks

> +
> +@end example
> +
>   @item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
>   @itemx -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
>   Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and

  reply	other threads:[~2017-07-19  5:59 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-18 17:08 [Qemu-devel] Unified Socket Driver anton.ivanov
2017-07-18 17:08 ` [Qemu-devel] [PATCH 1/3] Unified Datagram Socket Transport anton.ivanov
2017-07-19  5:39   ` Jason Wang
2017-07-19  5:48     ` Anton Ivanov
2017-07-19  6:07       ` Jason Wang
2017-07-19  6:48         ` Anton Ivanov
2017-07-21 17:50     ` Anton Ivanov
2017-07-24  3:51       ` Jason Wang
2017-07-18 17:08 ` [Qemu-devel] [PATCH 2/3] Unified Datagram Socket Transport - GRE support anton.ivanov
2017-07-19  5:48   ` Jason Wang
2017-07-19  5:50     ` Anton Ivanov
2017-07-19 14:40   ` Eric Blake
2017-07-19 14:46     ` Anton Ivanov
2017-07-19 17:32     ` Anton Ivanov
2017-07-21 19:14       ` Eric Blake
2017-07-22  7:52         ` Anton Ivanov
2017-07-18 17:08 ` [Qemu-devel] [PATCH 3/3] Unified Datagram Socket Transport - raw support anton.ivanov
2017-07-19  5:58   ` Jason Wang [this message]
2017-07-19  6:02     ` Anton Ivanov
2017-07-21 18:50     ` Anton Ivanov
2017-07-24  4:03       ` Jason Wang
2017-09-08 17:22         ` Anton Ivanov
2017-07-19 14:42   ` Eric Blake

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e31eb033-a770-e5fd-97b0-0b50f268c22c@redhat.com \
    --to=jasowang@redhat.com \
    --cc=anton.ivanov@cambridgegreys.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).