From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1MMOSi-00032w-4K for qemu-devel@nongnu.org; Thu, 02 Jul 2009 11:44:36 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1MMOSd-00030P-Gv for qemu-devel@nongnu.org; Thu, 02 Jul 2009 11:44:35 -0400 Received: from [199.232.76.173] (port=44188 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1MMOSd-00030L-A7 for qemu-devel@nongnu.org; Thu, 02 Jul 2009 11:44:31 -0400 Received: from mx2.redhat.com ([66.187.237.31]:34394) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1MMOSc-000171-MJ for qemu-devel@nongnu.org; Thu, 02 Jul 2009 11:44:31 -0400 Date: Thu, 2 Jul 2009 18:43:54 +0300 From: "Michael S. Tsirkin" Subject: Re: [Qemu-devel] [PATCH] net: add raw backend Message-ID: <20090702154354.GA27022@redhat.com> References: MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Or Gerlitz Cc: Herbert Xu , qemu-devel@nongnu.org On Wed, Jul 01, 2009 at 06:46:43PM +0300, Or Gerlitz wrote: > Add raw network backend option which uses a packet socket to provide > raw networking access. Once the socket is opened its bouned to a > provided host interface, such that packets received on the interface > are delivered to the VM and packets sent by the VM are sent to the > interface. > > Signed-off-by: Or Gerlitz Looks good to me overall. A couple of comments: > diff --git a/net.c b/net.c > index 55f70f2..f7ff381 100644 > --- a/net.c > +++ b/net.c > @@ -93,6 +93,9 @@ > #endif > #endif > > +#include > +#include > + > #if defined(__OpenBSD__) > #include > #endif > @@ -1476,6 +1479,155 @@ static TAPState *net_tap_init(VLANState *vlan, const char *model, > > #endif /* !_WIN32 */ > > +typedef struct RAWState { > + VLANClientState *vc; > + int fd; > + uint8_t buf[4096]; > + int promisc; > +} RAWState; > + > +static int net_raw_fd_init(Monitor *mon, const char *ifname, int promisc) > +{ > + int fd, ret; > + struct ifreq req; > + struct sockaddr_ll lladdr; > + > + fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); > + if (fd < 0) > + config_error(mon, "packet socket failed\n"); > + > + memset(&req, 0, sizeof(req)); > + strncpy(req.ifr_name, ifname, IFNAMSIZ-1); > + ret = ioctl(fd, SIOCGIFINDEX, &req); > + if (ret < 0) > + config_error(mon, "SIOCGIFINDEX failed\n"); > + > + memset(&lladdr, 0, sizeof(lladdr)); > + lladdr.sll_family = AF_PACKET; > + lladdr.sll_protocol = htons(ETH_P_ALL); > + lladdr.sll_ifindex = req.ifr_ifindex; > + ret = bind(fd, (const struct sockaddr *)&lladdr, sizeof(lladdr)); > + if (ret < 0) > + config_error(mon, "bind failed\n"); > + > + /* set iface to promiscuous mode (packets sent to the VM MAC) */ > + if (promisc) { > + ret = ioctl(fd, SIOCGIFFLAGS, &req); > + if (ret < 0) > + perror("SIOCGIFFLAGS failed\n"); > + req.ifr_flags |= IFF_PROMISC; > + ret = ioctl(fd, SIOCSIFFLAGS, &req); > + if (ret < 0) > + config_error(mon, "SIOCSIFFLAGS to promiscous failed\n"); > + } > + > + ret = fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK); > + if (ret < 0) > + config_error(mon, "O_NONBLOCK set failed\n"); > + > + return fd; > +} > + > +static void raw_cleanup(VLANClientState *vc) > +{ > + struct ifreq req; > + RAWState *s = vc->opaque; > + > + qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); > + if (s->promisc) { > + ioctl(s->fd, SIOCGIFFLAGS, &req); > + req.ifr_flags &= ~IFF_PROMISC; > + ioctl(s->fd, SIOCSIFFLAGS, &req); > + } > + close(s->fd); > + qemu_free(s); > +} > + > +static void raw_send(void *opaque); > + > +static int raw_can_send(void *opaque) > +{ > + RAWState *s = opaque; > + > + return qemu_can_send_packet(s->vc); > +} > + > +static void raw_send_completed(VLANClientState *vc, ssize_t len) > +{ > + RAWState *s = vc->opaque; > + > + qemu_set_fd_handler2(s->fd, raw_can_send, raw_send, NULL, s); > +} > + > +static void raw_send(void *opaque) > +{ > + RAWState *s = opaque; > + int size; > + > + do { > + size = recv(s->fd, s->buf, sizeof(s->buf), MSG_TRUNC); > + if (size <= 0) > + break; A couple of improvement suggestions here: - You might get size > sizeof(s->buf). Should not happen, but you might want to check for this condition and report it + discard the packet. - It might be a good idea to request aux data and verify that checksum is set, calculate it if not. this will make it possible to bind to a local device as well. > + > + size = qemu_send_packet_async(s->vc, s->buf, size, > + raw_send_completed); > + if (size == 0) > + qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); > + > + } while (size > 0); > +} > + > +static ssize_t raw_receive_iov(VLANClientState *vc, const struct iovec *iov, > + int iovcnt) > +{ > + ssize_t len; > + RAWState *s = vc->opaque; > + > + do { > + len = writev(s->fd, iov, iovcnt); > + } while (len == -1 && (errno == EINTR || errno == EAGAIN)); > + > + return len; > +} > + > +static ssize_t raw_receive(VLANClientState *vc, const uint8_t *buf, size_t size) > +{ > + struct iovec iov[1]; > + > + iov[0].iov_base = (char *)buf; > + iov[0].iov_len = size; > + > + return raw_receive_iov(vc, iov, 1); > +} > + > +static int net_raw_init(Monitor *mon, VLANState *vlan, const char *model, > + const char *name, const char *ifname, > + int promisc, int fd) > +{ > + RAWState *s; > + > + s = qemu_mallocz(sizeof(RAWState)); > + > + if (fd == -1) { > + s->fd = net_raw_fd_init(mon, ifname, promisc); > + s->promisc = promisc; > + } else > + s->fd = fd; > + > + s->vc = qemu_new_vlan_client(vlan, model, name, NULL, raw_receive, > + raw_receive_iov, raw_cleanup, s); > + qemu_set_fd_handler2(s->fd, raw_can_send, raw_send, NULL, s); > + > + if (fd == -1) > + snprintf(s->vc->info_str, sizeof(s->vc->info_str), > + "raw: ifname=%s, promisc=%d", ifname, promisc); > + else > + snprintf(s->vc->info_str, sizeof(s->vc->info_str), > + "raw: fd=%d", fd); > + > + return 0; > +} > + > #if defined(CONFIG_VDE) > typedef struct VDEState { > VLANClientState *vc; > @@ -2348,6 +2500,41 @@ int net_client_init(Monitor *mon, const char *device, const char *p) > } > } else > #endif > + if (!strcmp(device, "raw")) { > + char chkbuf[64], ifname[64]; > + int raw_fd = -1; > + int promisc = 1; promisc = 0 might be a safer default. > + if (get_param_value(buf, sizeof(buf), "fd", p) > 0) { > + static const char * const fd_params[] = { > + "vlan", "name", "fd", NULL > + }; > + if (check_params(chkbuf, sizeof(chkbuf), fd_params, p) < 0) { > + config_error(mon, "invalid parameter '%s' in '%s'\n", chkbuf, p); > + ret = -1; > + goto out; > + } > + raw_fd = strtol(buf, NULL, 0); > + fcntl(raw_fd, F_SETFL, fcntl(raw_fd, F_GETFL | O_NONBLOCK)); > + } else { > + static const char * const tap_params[] = { > + "vlan", "name", "ifname", "promisc", NULL > + }; > + if (check_params(chkbuf, sizeof(chkbuf), tap_params, p) < 0) { > + config_error(mon, "invalid parameter '%s' in '%s'\n", chkbuf, p); > + ret = -1; > + goto out; > + } > + if (get_param_value(ifname, sizeof(ifname), "ifname", p) <= 0) { > + config_error(mon, "raw: no interface name\n"); > + ret = -1; > + goto out; > + } > + if (get_param_value(buf, sizeof(buf), "promisc", p)) > + promisc = atoi(buf); > + } > + vlan->nb_host_devs++; > + ret = net_raw_init(mon, vlan, device, name, ifname, promisc, raw_fd); > + } else > if (!strcmp(device, "socket")) { > char chkbuf[64]; > if (get_param_value(buf, sizeof(buf), "fd", p) > 0) { > diff --git a/qemu-options.hx b/qemu-options.hx > index 503da33..0a3c807 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -761,6 +761,10 @@ DEF("net", HAS_ARG, QEMU_OPTION_net, > " use 'sndbuf=nbytes' to limit the size of the send buffer\n" > #endif > #endif > + "-net raw[,vlan=n][,name=str],ifname=name[,promisc=m]\n" > + " bound the host network interface to VLAN 'n' in a raw manner:\n" in a raw manner -> using a raw packet socket > + " packets received on the interface are delivered to the vlan and\n" > + " packets delivered on the vlan are sent to the interface\n" document promisc option? > "-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n" > " connect the vlan 'n' to another VLAN using a socket connection\n" > "-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port]\n" >