From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([140.186.70.92]:55983) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1RBsQd-0006lS-Se for qemu-devel@nongnu.org; Thu, 06 Oct 2011 14:12:21 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1RBsQc-0001Hf-8a for qemu-devel@nongnu.org; Thu, 06 Oct 2011 14:12:19 -0400 Received: from e38.co.us.ibm.com ([32.97.110.159]:56227) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1RBsQb-0001HZ-Vg for qemu-devel@nongnu.org; Thu, 06 Oct 2011 14:12:18 -0400 Received: from d03relay05.boulder.ibm.com (d03relay05.boulder.ibm.com [9.17.195.107]) by e38.co.us.ibm.com (8.14.4/8.13.1) with ESMTP id p96I41Oc006897 for ; Thu, 6 Oct 2011 12:04:01 -0600 Received: from d03av04.boulder.ibm.com (d03av04.boulder.ibm.com [9.17.195.170]) by d03relay05.boulder.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id p96IAhhR140790 for ; Thu, 6 Oct 2011 12:10:50 -0600 Received: from d03av04.boulder.ibm.com (loopback [127.0.0.1]) by d03av04.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id p96IAceq015088 for ; Thu, 6 Oct 2011 12:10:38 -0600 Message-ID: <4E8DEF18.40904@linux.vnet.ibm.com> Date: Thu, 06 Oct 2011 14:10:32 -0400 From: Corey Bryant MIME-Version: 1.0 References: <1317915508-15491-1-git-send-email-rmarwah@linux.vnet.ibm.com> <1317915508-15491-2-git-send-email-rmarwah@linux.vnet.ibm.com> <4E8DE900.5060105@us.ibm.com> In-Reply-To: <4E8DE900.5060105@us.ibm.com> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH 1/4] Add basic version of bridge helper List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Anthony Liguori Cc: Richa Marwaha , qemu-devel@nongnu.org On 10/06/2011 01:44 PM, Anthony Liguori wrote: > On 10/06/2011 10:38 AM, Richa Marwaha wrote: >> This patch adds a helper that can be used to create a tap device >> attached to >> a bridge device. Since this helper is minimal in what it does, it can be >> given CAP_NET_ADMIN which allows qemu to avoid running as root while >> still >> satisfying the majority of what users tend to want to do with tap >> devices. >> >> The way this all works is that qemu launches this helper passing a bridge >> name and the name of an inherited file descriptor. The descriptor is one >> end of a socketpair() of domain sockets. This domain socket is used to >> transmit a file descriptor of the opened tap device from the helper to >> qemu. >> >> The helper can then exit and let qemu use the tap device. >> >> Signed-off-by: Richa Marwaha >> --- >> Makefile | 12 +++- >> configure | 1 + >> qemu-bridge-helper.c | 205 >> ++++++++++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 216 insertions(+), 2 deletions(-) >> create mode 100644 qemu-bridge-helper.c >> >> diff --git a/Makefile b/Makefile >> index 6ed3194..f2caedc 100644 >> --- a/Makefile >> +++ b/Makefile >> @@ -34,6 +34,8 @@ $(call set-vpath, $(SRC_PATH):$(SRC_PATH)/hw) >> >> LIBS+=-lz $(LIBS_TOOLS) >> >> +HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF) >> + >> ifdef BUILD_DOCS >> DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 >> QMP/qmp-commands.txt >> else >> @@ -74,7 +76,7 @@ defconfig: >> >> -include config-all-devices.mak >> >> -build-all: $(DOCS) $(TOOLS) recurse-all >> +build-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all >> >> config-host.h: config-host.h-timestamp >> config-host.h-timestamp: config-host.mak >> @@ -151,6 +153,8 @@ qemu-nbd$(EXESUF): qemu-nbd.o qemu-tool.o >> qemu-error.o $(oslib-obj-y) $(trace-ob >> >> qemu-io$(EXESUF): qemu-io.o cmd.o qemu-tool.o qemu-error.o >> $(oslib-obj-y) $(trace-obj-y) $(block-obj-y) $(qobject-obj-y) >> $(version-obj-y) qemu-timer-common.o >> >> +qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o >> + >> qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx >> $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h< $< > $@," GEN $@") >> >> @@ -208,7 +212,7 @@ clean: >> # avoid old build problems by removing potentially incorrect old files >> rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h >> gen-op-arm.h >> rm -f qemu-options.def >> - rm -f *.o *.d *.a *.lo $(TOOLS) qemu-ga TAGS cscope.* *.pod *~ */*~ >> + rm -f *.o *.d *.a *.lo $(TOOLS) $(HELPERS-y) qemu-ga TAGS cscope.* >> *.pod *~ */*~ >> rm -Rf .libs >> rm -f slirp/*.o slirp/*.d audio/*.o audio/*.d block/*.o block/*.d >> net/*.o net/*.d fsdev/*.o fsdev/*.d ui/*.o ui/*.d qapi/*.o qapi/*.d >> qga/*.o qga/*.d >> rm -f qemu-img-cmds.h >> @@ -275,6 +279,10 @@ install: all $(if $(BUILD_DOCS),install-doc) >> install-sysconfig >> ifneq ($(TOOLS),) >> $(INSTALL_PROG) $(STRIP_OPT) $(TOOLS) "$(DESTDIR)$(bindir)" >> endif >> +ifneq ($(HELPERS-y),) >> + $(INSTALL_DIR) "$(DESTDIR)$(libexecdir)" >> + $(INSTALL_PROG) $(STRIP_OPT) $(HELPERS-y) "$(DESTDIR)$(libexecdir)" >> +endif >> ifneq ($(BLOBS),) >> $(INSTALL_DIR) "$(DESTDIR)$(datadir)" >> set -e; for x in $(BLOBS); do \ >> diff --git a/configure b/configure >> index 59b1494..3e32834 100755 >> --- a/configure >> +++ b/configure >> @@ -2742,6 +2742,7 @@ echo "mandir=$mandir">> $config_host_mak >> echo "datadir=$datadir">> $config_host_mak >> echo "sysconfdir=$sysconfdir">> $config_host_mak >> echo "docdir=$docdir">> $config_host_mak >> +echo "libexecdir=\${prefix}/libexec">> $config_host_mak >> echo "confdir=$confdir">> $config_host_mak >> >> case "$cpu" in >> diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c >> new file mode 100644 >> index 0000000..4ac7b36 >> --- /dev/null >> +++ b/qemu-bridge-helper.c >> @@ -0,0 +1,205 @@ >> +/* >> + * QEMU Bridge Helper >> + * >> + * Copyright IBM, Corp. 2011 >> + * >> + * Authors: >> + * Anthony Liguori > > Heh, fairly sure that's not my email address ;-) > I thought that was a secret identity. :) We'll update that. >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2. See >> + * the COPYING file in the top-level directory. >> + * >> + */ >> + >> +#include "config-host.h" >> + >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> + >> +#include >> +#include >> +#include >> +#include >> +#include >> + >> +#include >> + >> +#include >> + >> +#include "net/tap-linux.h" >> + >> +static int has_vnet_hdr(int fd) >> +{ >> + unsigned int features = 0; >> + struct ifreq ifreq; >> + >> + if (ioctl(fd, TUNGETFEATURES,&features) == -1) { >> + return -errno; >> + } >> + >> + if (!(features& IFF_VNET_HDR)) { >> + return -ENOTSUP; >> + } >> + >> + if (ioctl(fd, TUNGETIFF,&ifreq) != -1 || errno != EBADFD) { >> + return -ENOTSUP; >> + } >> + >> + return 1; >> +} >> + >> +static void prep_ifreq(struct ifreq *ifr, const char *ifname) >> +{ >> + memset(ifr, 0, sizeof(*ifr)); >> + snprintf(ifr->ifr_name, IFNAMSIZ, "%s", ifname); >> +} >> + >> +static int send_fd(int c, int fd) >> +{ >> + char msgbuf[CMSG_SPACE(sizeof(fd))]; >> + struct msghdr msg = { >> + .msg_control = msgbuf, >> + .msg_controllen = sizeof(msgbuf), >> + }; >> + struct cmsghdr *cmsg; >> + struct iovec iov; >> + char req[1] = { 0x00 }; >> + >> + cmsg = CMSG_FIRSTHDR(&msg); >> + cmsg->cmsg_level = SOL_SOCKET; >> + cmsg->cmsg_type = SCM_RIGHTS; >> + cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); >> + msg.msg_controllen = cmsg->cmsg_len; >> + >> + iov.iov_base = req; >> + iov.iov_len = sizeof(req); >> + >> + msg.msg_iov =&iov; >> + msg.msg_iovlen = 1; >> + memcpy(CMSG_DATA(cmsg),&fd, sizeof(fd)); >> + >> + return sendmsg(c,&msg, 0); >> +} >> + >> +int main(int argc, char **argv) >> +{ >> + struct ifreq ifr; >> + int fd, ctlfd, unixfd; >> + int use_vnet = 0; >> + int mtu; >> + const char *bridge; >> + char iface[IFNAMSIZ]; >> + int index; >> + >> + /* parse arguments */ >> + if (argc< 3 || argc> 4) { >> + fprintf(stderr, "Usage: %s [--use-vnet] BRIDGE FD\n", argv[0]); >> + return 1; >> + } >> + >> + index = 1; >> + if (strcmp(argv[index], "--use-vnet") == 0) { >> + use_vnet = 1; >> + index++; >> + if (argc == 3) { >> + fprintf(stderr, "invalid number of arguments\n"); >> + return -1; >> + } >> + } >> + >> + bridge = argv[index++]; >> + unixfd = atoi(argv[index++]); >> + >> + /* open a socket to use to control the network interfaces */ >> + ctlfd = socket(AF_INET, SOCK_STREAM, 0); >> + if (ctlfd == -1) { >> + fprintf(stderr, "failed to open control socket\n"); >> + return -errno; >> + } >> + >> + /* open the tap device */ >> + fd = open("/dev/net/tun", O_RDWR); >> + if (fd == -1) { >> + fprintf(stderr, "failed to open /dev/net/tun\n"); >> + return -errno; >> + } >> + >> + /* request a tap device, disable PI, and add vnet header support if >> + * requested and it's available. */ >> + prep_ifreq(&ifr, "tap%d"); >> + ifr.ifr_flags = IFF_TAP|IFF_NO_PI; >> + if (use_vnet&& has_vnet_hdr(fd)) { >> + ifr.ifr_flags |= IFF_VNET_HDR; >> + } >> + >> + if (ioctl(fd, TUNSETIFF,&ifr) == -1) { >> + fprintf(stderr, "failed to create tun device\n"); >> + return -errno; >> + } >> + >> + /* save tap device name */ >> + snprintf(iface, sizeof(iface), "%s", ifr.ifr_name); >> + >> + /* get the mtu of the bridge */ >> + prep_ifreq(&ifr, bridge); >> + if (ioctl(ctlfd, SIOCGIFMTU,&ifr) == -1) { >> + fprintf(stderr, "failed to get mtu of bridge `%s'\n", bridge); >> + return -errno; >> + } >> + >> + /* save mtu */ >> + mtu = ifr.ifr_mtu; >> + >> + /* set the mtu of the interface based on the bridge */ >> + prep_ifreq(&ifr, iface); >> + ifr.ifr_mtu = mtu; >> + if (ioctl(ctlfd, SIOCSIFMTU,&ifr) == -1) { >> + fprintf(stderr, "failed to set mtu of device `%s' to %d\n", >> + iface, mtu); >> + return -errno; >> + } >> + >> + /* add the interface to the bridge */ >> + prep_ifreq(&ifr, bridge); >> + ifr.ifr_ifindex = if_nametoindex(iface); >> + >> + if (ioctl(ctlfd, SIOCBRADDIF,&ifr) == -1) { >> + fprintf(stderr, "failed to add interface `%s' to bridge `%s'\n", >> + iface, bridge); >> + return -errno; >> + } >> + >> + /* bring the interface up */ >> + prep_ifreq(&ifr, iface); >> + if (ioctl(ctlfd, SIOCGIFFLAGS,&ifr) == -1) { >> + fprintf(stderr, "failed to get interface flags for `%s'\n", iface); >> + return -errno; >> + } >> + >> + ifr.ifr_flags |= IFF_UP; >> + if (ioctl(ctlfd, SIOCSIFFLAGS,&ifr) == -1) { >> + fprintf(stderr, "failed to set bring up interface `%s'\n", iface); >> + return -errno; >> + } >> + >> + /* write fd to the domain socket */ >> + if (send_fd(unixfd, fd) == -1) { >> + fprintf(stderr, "failed to write fd to unix socket\n"); >> + return -errno; >> + } >> + >> + /* ... */ >> + >> + /* profit! */ > > Sold! > > Signed-off-by: Anthony Liguori > > Please put my SoB before yours in the next submission. > > Regards, > > Anthony Liguori > Will do. >> + >> + close(fd); >> + >> + close(ctlfd); >> + >> + return 0; >> +} > > -- Regards, Corey