All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Or Gerlitz <ogerlitz@voltaire.com>
Cc: Herbert Xu <herbert.xu@redhat.com>, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] net: add raw backend
Date: Thu, 2 Jul 2009 18:43:54 +0300	[thread overview]
Message-ID: <20090702154354.GA27022@redhat.com> (raw)
In-Reply-To: <Pine.LNX.4.64.0907011844470.32248@zuben.voltaire.com>

On Wed, Jul 01, 2009 at 06:46:43PM +0300, Or Gerlitz wrote:
> Add raw network backend option which uses a packet socket to provide
> raw networking access. Once the socket is opened its bouned to a
> provided host interface, such that packets received on the interface
> are delivered to the VM and packets sent by the VM are sent to the
> interface.
> 
> Signed-off-by: Or Gerlitz<ogerlitz@voltaire.com>

Looks good to me overall. A couple of comments:

> diff --git a/net.c b/net.c
> index 55f70f2..f7ff381 100644
> --- a/net.c
> +++ b/net.c
> @@ -93,6 +93,9 @@
>  #endif
>  #endif
> 
> +#include <netpacket/packet.h>
> +#include <net/ethernet.h>
> +
>  #if defined(__OpenBSD__)
>  #include <util.h>
>  #endif
> @@ -1476,6 +1479,155 @@ static TAPState *net_tap_init(VLANState *vlan, const char *model,
> 
>  #endif /* !_WIN32 */
> 
> +typedef struct RAWState {
> +    VLANClientState *vc;
> +    int fd;
> +    uint8_t buf[4096];
> +    int promisc;
> +} RAWState;
> +
> +static int net_raw_fd_init(Monitor *mon, const char *ifname, int promisc)
> +{
> +	int fd, ret;
> +	struct ifreq req;
> +	struct sockaddr_ll lladdr;
> +
> +	fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
> +	if (fd < 0)
> +		config_error(mon, "packet socket failed\n");
> +
> +	memset(&req, 0, sizeof(req));
> +	strncpy(req.ifr_name, ifname, IFNAMSIZ-1);
> +	ret = ioctl(fd, SIOCGIFINDEX, &req);
> +	if (ret < 0)
> +		config_error(mon, "SIOCGIFINDEX failed\n");
> +
> +	memset(&lladdr, 0, sizeof(lladdr));
> +	lladdr.sll_family   = AF_PACKET;
> +	lladdr.sll_protocol = htons(ETH_P_ALL);
> +	lladdr.sll_ifindex  = req.ifr_ifindex;
> +	ret = bind(fd, (const struct sockaddr *)&lladdr, sizeof(lladdr));
> +	if (ret < 0)
> +		config_error(mon, "bind failed\n");
> +
> +	/* set iface to promiscuous mode (packets sent to the VM MAC) */
> +	if (promisc) {
> +		ret = ioctl(fd, SIOCGIFFLAGS, &req);
> +		if (ret < 0)
> +			perror("SIOCGIFFLAGS failed\n");
> +		req.ifr_flags |= IFF_PROMISC;
> +		ret = ioctl(fd, SIOCSIFFLAGS, &req);
> +		if (ret < 0)
> +			config_error(mon, "SIOCSIFFLAGS to promiscous failed\n");
> +	}
> +
> +	ret = fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
> +	if (ret < 0)
> +		config_error(mon, "O_NONBLOCK set failed\n");
> +
> +	return fd;
> +}
> +
> +static void raw_cleanup(VLANClientState *vc)
> +{
> +	struct ifreq req;
> +	RAWState *s = vc->opaque;
> +
> +	qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
> +	if (s->promisc) {
> +		ioctl(s->fd, SIOCGIFFLAGS, &req);
> +		req.ifr_flags &= ~IFF_PROMISC;
> +		ioctl(s->fd, SIOCSIFFLAGS, &req);
> +	}
> +	close(s->fd);
> +	qemu_free(s);
> +}
> +
> +static void raw_send(void *opaque);
> +
> +static int raw_can_send(void *opaque)
> +{
> +	RAWState *s = opaque;
> +
> +	return qemu_can_send_packet(s->vc);
> +}
> +
> +static void raw_send_completed(VLANClientState *vc, ssize_t len)
> +{
> +	RAWState *s = vc->opaque;
> +
> +	qemu_set_fd_handler2(s->fd, raw_can_send, raw_send, NULL, s);
> +}
> +
> +static void raw_send(void *opaque)
> +{
> +	RAWState *s = opaque;
> +	int size;
> +
> +	do {
> +		size = recv(s->fd, s->buf, sizeof(s->buf), MSG_TRUNC);
> +		if (size <= 0)
> +			break;

A couple of improvement suggestions here:
- You might get size > sizeof(s->buf).
  Should not happen, but you might want to check for this condition and
  report it + discard the packet.

- It might be a good idea to request aux data and verify that checksum
  is set, calculate it if not. this will make it possible to bind to
  a local device as well.


> +
> +		size = qemu_send_packet_async(s->vc, s->buf, size,
> +						raw_send_completed);
> +		if (size == 0)
> +			qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
> +
> +	} while (size > 0);
> +}
> +
> +static ssize_t raw_receive_iov(VLANClientState *vc, const struct iovec *iov,
> +				int iovcnt)
> +{
> +	ssize_t len;
> +	RAWState *s = vc->opaque;
> +
> +	do {
> +		len = writev(s->fd, iov, iovcnt);
> +	} while (len == -1 && (errno == EINTR || errno == EAGAIN));
> +
> +	return len;
> +}
> +
> +static ssize_t raw_receive(VLANClientState *vc, const uint8_t *buf, size_t size)
> +{
> +	struct iovec iov[1];
> +
> +	iov[0].iov_base = (char *)buf;
> +	iov[0].iov_len  = size;
> +
> +	return raw_receive_iov(vc, iov, 1);
> +}
> +
> +static int net_raw_init(Monitor *mon, VLANState *vlan, const char *model,
> +			const char *name, const char *ifname,
> +			int promisc, int fd)
> +{
> +	RAWState *s;
> +
> +	s = qemu_mallocz(sizeof(RAWState));
> +
> +	if (fd == -1) {
> +		s->fd = net_raw_fd_init(mon, ifname, promisc);
> +		s->promisc = promisc;
> +	} else
> +		s->fd = fd;
> +
> +	s->vc = qemu_new_vlan_client(vlan, model, name, NULL, raw_receive,
> +					raw_receive_iov, raw_cleanup, s);
> +	qemu_set_fd_handler2(s->fd, raw_can_send, raw_send, NULL, s);
> +
> +	if (fd == -1)
> +		snprintf(s->vc->info_str, sizeof(s->vc->info_str),
> +			"raw: ifname=%s, promisc=%d", ifname, promisc);
> +	else
> +		snprintf(s->vc->info_str, sizeof(s->vc->info_str),
> +			"raw: fd=%d", fd);
> +
> +	return 0;
> +}
> +
>  #if defined(CONFIG_VDE)
>  typedef struct VDEState {
>      VLANClientState *vc;
> @@ -2348,6 +2500,41 @@ int net_client_init(Monitor *mon, const char *device, const char *p)
>          }
>      } else
>  #endif
> +    if (!strcmp(device, "raw")) {
> +	char chkbuf[64], ifname[64];
> +        int raw_fd = -1;
> +        int promisc = 1;

promisc = 0 might be a safer default.

> +        if (get_param_value(buf, sizeof(buf), "fd", p) > 0) {
> +            static const char * const fd_params[] = {
> +                "vlan", "name", "fd", NULL
> +            };
> +            if (check_params(chkbuf, sizeof(chkbuf), fd_params, p) < 0) {
> +                config_error(mon, "invalid parameter '%s' in '%s'\n", chkbuf, p);
> +                ret = -1;
> +                goto out;
> +            }
> +	    raw_fd = strtol(buf, NULL, 0);
> +	    fcntl(raw_fd, F_SETFL, fcntl(raw_fd, F_GETFL | O_NONBLOCK));
> +        } else {
> +            static const char * const tap_params[] = {
> +                "vlan", "name", "ifname", "promisc", NULL
> +            };
> +            if (check_params(chkbuf, sizeof(chkbuf), tap_params, p) < 0) {
> +                config_error(mon, "invalid parameter '%s' in '%s'\n", chkbuf, p);
> +                ret = -1;
> +                goto out;
> +            }
> +            if (get_param_value(ifname, sizeof(ifname), "ifname", p) <= 0) {
> +            	config_error(mon, "raw: no interface name\n");
> +            	ret = -1;
> +            	goto out;
> +            }
> +            if (get_param_value(buf, sizeof(buf), "promisc", p))
> +                promisc = atoi(buf);
> +	}
> +	vlan->nb_host_devs++;
> +	ret = net_raw_init(mon, vlan, device, name, ifname, promisc, raw_fd);
> +    } else
>      if (!strcmp(device, "socket")) {
>          char chkbuf[64];
>          if (get_param_value(buf, sizeof(buf), "fd", p) > 0) {
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 503da33..0a3c807 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -761,6 +761,10 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
>      "                use 'sndbuf=nbytes' to limit the size of the send buffer\n"
>  #endif
>  #endif
> +    "-net raw[,vlan=n][,name=str],ifname=name[,promisc=m]\n"
> +    "                bound the host network interface to VLAN 'n' in a raw manner:\n"

in a raw manner -> using a raw packet socket

> +    "                packets received on the interface are delivered to the vlan and\n"
> +    "                packets delivered on the vlan are sent to the interface\n"

document promisc option?

>      "-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n"
>      "                connect the vlan 'n' to another VLAN using a socket connection\n"
>      "-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port]\n"
> 

  parent reply	other threads:[~2009-07-02 15:44 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-01 15:46 [Qemu-devel] [PATCH] net: add raw backend Or Gerlitz
2009-07-01 16:21 ` Jamie Lokier
2009-07-02 12:25   ` Or Gerlitz
2009-07-03  2:39     ` Jamie Lokier
2009-07-07 13:33       ` Or Gerlitz
2009-07-07 14:57         ` Jamie Lokier
2009-07-08 14:45           ` Or Gerlitz
2009-07-14 13:54             ` Or Gerlitz
2009-07-15 20:38             ` Jamie Lokier
2009-07-15 21:06               ` Jan Kiszka
2009-07-15 21:52                 ` Jamie Lokier
2009-07-16  8:29               ` Or Gerlitz
2009-07-20 14:13               ` [Qemu-devel] [PATCH] net: add raw backend - some performance measurements Or Gerlitz
2009-07-20 15:53                 ` Herbert Xu
2009-07-20 18:20                   ` Michael S. Tsirkin
2009-07-21  1:46                     ` Herbert Xu
2009-07-21  7:03                   ` Or Gerlitz
2009-07-21  7:25                     ` Herbert Xu
2009-07-21  7:25                       ` Herbert Xu
2009-07-21 10:17                       ` Or Gerlitz
2009-07-21 10:17                         ` Or Gerlitz
2009-07-21 10:27                       ` Michael S. Tsirkin
2009-07-21 10:27                         ` Michael S. Tsirkin
2009-07-21 11:05                         ` Or Gerlitz
2009-07-21 11:05                           ` Or Gerlitz
2009-07-21 12:01                           ` Michael S. Tsirkin
2009-07-21 12:01                             ` Michael S. Tsirkin
2009-07-21 12:14                             ` Herbert Xu
2009-07-21 12:14                               ` Herbert Xu
2009-07-21 13:41                               ` Or Gerlitz
2009-07-21 13:41                                 ` Or Gerlitz
     [not found] ` <5b31733c0907011250i7afcdbcdnb844290de4ad64f2@mail.gmail.com>
2009-07-02 12:08   ` [Qemu-devel] [PATCH] net: add raw backend Or Gerlitz
2009-07-02 15:43 ` Michael S. Tsirkin [this message]
2009-07-07 14:45   ` Or Gerlitz
2009-07-07 14:49     ` Michael S. Tsirkin
2009-07-08 14:46       ` Or Gerlitz
2009-07-08 15:06       ` Or Gerlitz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090702154354.GA27022@redhat.com \
    --to=mst@redhat.com \
    --cc=herbert.xu@redhat.com \
    --cc=ogerlitz@voltaire.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.