From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:56967) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Vslwg-0003hN-2X for qemu-devel@nongnu.org; Mon, 16 Dec 2013 23:07:51 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Vslwa-0004aQ-Vy for qemu-devel@nongnu.org; Mon, 16 Dec 2013 23:07:46 -0500 Received: from mx1.redhat.com ([209.132.183.28]:47380) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Vslwa-0004Yz-LP for qemu-devel@nongnu.org; Mon, 16 Dec 2013 23:07:40 -0500 Message-ID: <52AFCDF4.9020804@redhat.com> Date: Tue, 17 Dec 2013 12:07:16 +0800 From: Fam Zheng MIME-Version: 1.0 References: <1387208069-9302-1-git-send-email-pl@kamp.de> In-Reply-To: <1387208069-9302-1-git-send-email-pl@kamp.de> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH] block: add native support for NFS List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Peter Lieven , qemu-devel@nongnu.org Cc: kwolf@redhat.com, pbonzini@redhat.com, stefanha@redhat.com, ronniesahlberg@gmail.com On 2013=E5=B9=B412=E6=9C=8816=E6=97=A5 23:34, Peter Lieven wrote: > This patch adds native support for accessing images on NFS shares witho= ut > the requirement to actually mount the entire NFS share on the host. > > NFS Images can simply be specified by an url of the form: > nfs://// > > For example: > qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2 > > You need libnfs from Ronnie Sahlberg available at: > git://github.com/sahlberg/libnfs.git > for this to work. > > During configure it is automatically probed for libnfs and support > is enabled on-the-fly. You can forbid or enforce libnfs support > with --disable-libnfs or --enable-libnfs respectively. > > Due to NFS restrictions you might need to execute your binaries > as root, allow them to open priviledged ports (<1024) or specify > insecure option on the NFS server. > > Signed-off-by: Peter Lieven Looks nice! Thanks for the work! > --- > MAINTAINERS | 5 + > block/Makefile.objs | 1 + > block/nfs.c | 420 ++++++++++++++++++++++++++++++++++++++++++= +++++++++ > configure | 38 +++++ > 4 files changed, 464 insertions(+) > create mode 100644 block/nfs.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index c19133f..f53d184 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -899,6 +899,11 @@ M: Peter Lieven > S: Supported > F: block/iscsi.c > > +NFS > +M: Peter Lieven > +S: Maintained > +F: block/nfs.c > + > SSH > M: Richard W.M. Jones > S: Supported > diff --git a/block/Makefile.objs b/block/Makefile.objs > index f43ecbc..1bac94e 100644 > --- a/block/Makefile.objs > +++ b/block/Makefile.objs > @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) +=3D linux-aio.o > ifeq ($(CONFIG_POSIX),y) > block-obj-y +=3D nbd.o sheepdog.o > block-obj-$(CONFIG_LIBISCSI) +=3D iscsi.o > +block-obj-$(CONFIG_LIBISCSI) +=3D nfs.o > block-obj-$(CONFIG_CURL) +=3D curl.o > block-obj-$(CONFIG_RBD) +=3D rbd.o > block-obj-$(CONFIG_GLUSTERFS) +=3D gluster.o > diff --git a/block/nfs.c b/block/nfs.c > new file mode 100644 > index 0000000..d6cb4c0 > --- /dev/null > +++ b/block/nfs.c > @@ -0,0 +1,420 @@ > +/* > + * QEMU Block driver for native access to files on NFS shares > + * > + * Copyright (c) 2013 Peter Lieven > + * > + * Permission is hereby granted, free of charge, to any person obtaini= ng a copy > + * of this software and associated documentation files (the "Software"= ), to deal > + * in the Software without restriction, including without limitation t= he rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/o= r sell > + * copies of the Software, and to permit persons to whom the Software = is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be incl= uded in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXP= RESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABI= LITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT S= HALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES O= R OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARI= SING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALI= NGS IN > + * THE SOFTWARE. > + */ > + > +#include "config-host.h" > + > +#include > +#include > +#include "qemu-common.h" > +#include "qemu/config-file.h" > +#include "qemu/error-report.h" > +#include "block/block_int.h" > +#include "trace.h" > +#include "block/scsi.h" > +#include "qemu/iov.h" > +#include "sysemu/sysemu.h" > +#include "qmp-commands.h" Copied from block/iscsi.c? SCSI and QMP are not necessary for this file.=20 And maybe also arpa/inet.h, I'm not sure about that though. > + > +#include > +#include > +#include > +#include > + > +typedef struct nfsclient { > + struct nfs_context *context; > + struct nfsfh *fh; > + int events; > + bool has_zero_init; > + int64_t allocated_file_size; > + QEMUBH *close_bh; This is unused. > +} nfsclient; Please use CamelCase for type names... > + > +typedef struct NFSTask { > + int status; > + int complete; > + QEMUIOVector *iov; > + Coroutine *co; > + QEMUBH *bh; > +} NFSTask; as you do with this. > + > +static void nfs_process_read(void *arg); > +static void nfs_process_write(void *arg); > + > +static void nfs_set_events(nfsclient *client) > +{ > + int ev; > + /* We always register a read handler. */ > + ev =3D POLLIN; > + ev |=3D nfs_which_events(client->context); > + if (ev !=3D client->events) { > + qemu_aio_set_fd_handler(nfs_get_fd(client->context), > + nfs_process_read, > + (ev & POLLOUT) ? nfs_process_write : NULL, > + client); > + > + } > + client->events =3D ev; > +} > + > +static void nfs_process_read(void *arg) > +{ > + nfsclient *client =3D arg; > + nfs_service(client->context, POLLIN); > + nfs_set_events(client); > +} > + > +static void nfs_process_write(void *arg) > +{ > + nfsclient *client =3D arg; > + nfs_service(client->context, POLLOUT); > + nfs_set_events(client); > +} > + > +static void nfs_co_init_task(nfsclient *client, NFSTask *Task) > +{ > + *Task =3D (NFSTask) { Please use lower case for variable names. > + .co =3D qemu_coroutine_self(), > + }; > +} > + > +static void nfs_co_generic_bh_cb(void *opaque) > +{ > + NFSTask *Task =3D opaque; > + qemu_bh_delete(Task->bh); > + qemu_coroutine_enter(Task->co, NULL); > +} > + > +static void nfs_co_generic_cb(int status, struct nfs_context *nfs, voi= d *data, void *private_data) This line is too long. Please use scripts/checkpatch.pl to check the=20 coding style. (Some other lines have trailing whitespaces) > +{ > + NFSTask *Task =3D private_data; > + Task->complete =3D 1; > + Task->status =3D status; > + if (Task->status > 0 && Task->iov) { > + if (Task->status =3D=3D Task->iov->size) { > + qemu_iovec_from_buf(Task->iov, 0, data, status); > + } else { > + Task->status =3D -1; > + } > + } > + if (Task->co) { > + Task->bh =3D qemu_bh_new(nfs_co_generic_bh_cb, Task); > + qemu_bh_schedule(Task->bh); > + } > +} > + > +static int coroutine_fn nfs_co_readv(BlockDriverState *bs, > + int64_t sector_num, int nb_sector= s, > + QEMUIOVector *iov) > +{ > + nfsclient *client =3D bs->opaque; > + struct NFSTask Task; > + > + nfs_co_init_task(client, &Task); > + Task.iov =3D iov; > + > + if (nfs_pread_async(client->context, client->fh, > + sector_num * BDRV_SECTOR_SIZE, > + nb_sectors * BDRV_SECTOR_SIZE, > + nfs_co_generic_cb, &Task) !=3D 0) { > + return -EIO; > + } > + > + while (!Task.complete) { > + nfs_set_events(client); > + qemu_coroutine_yield(); > + } > + > + if (Task.status !=3D nb_sectors * BDRV_SECTOR_SIZE) { > + return -EIO; In error case, does Task.status possibly contain error number other than=20 -EIO? Would it be useful to return the value? > + } > + > + return 0; > +} > + > +static int coroutine_fn nfs_co_writev(BlockDriverState *bs, > + int64_t sector_num, int nb_sec= tors, > + QEMUIOVector *iov) > +{ > + nfsclient *client =3D bs->opaque; > + struct NFSTask Task; > + char *buf =3D NULL; > + > + nfs_co_init_task(client, &Task); > + > + buf =3D g_malloc(nb_sectors * BDRV_SECTOR_SIZE); > + qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE); > + > + if (nfs_pwrite_async(client->context, client->fh, > + sector_num * BDRV_SECTOR_SIZE, > + nb_sectors * BDRV_SECTOR_SIZE, > + buf, nfs_co_generic_cb, &Task) !=3D 0) { > + g_free(buf); > + return -EIO; > + } > + > + while (!Task.complete) { > + nfs_set_events(client); > + qemu_coroutine_yield(); > + } > + > + g_free(buf); > + > + if (Task.status !=3D nb_sectors * BDRV_SECTOR_SIZE) { > + return -EIO; > + } > + > + bs->total_sectors =3D MAX(bs->total_sectors, sector_num + nb_secto= rs); > + client->allocated_file_size =3D -ENOTSUP; Why does allocated_file_size become not supported after a write? > + return 0; > +} > + > +static int coroutine_fn nfs_co_flush(BlockDriverState *bs) > +{ > + nfsclient *client =3D bs->opaque; > + struct NFSTask Task; > + > + nfs_co_init_task(client, &Task); > + > + if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb= , &Task) !=3D 0) { > + return -EIO; > + } > + > + while (!Task.complete) { > + nfs_set_events(client); > + qemu_coroutine_yield(); > + } > + > + if (Task.status !=3D 0) { > + return -EIO; > + } > + > + return 0; > +} > + > +static QemuOptsList runtime_opts =3D { > + .name =3D "nfs", > + .head =3D QTAILQ_HEAD_INITIALIZER(runtime_opts.head), > + .desc =3D { > + { > + .name =3D "filename", > + .type =3D QEMU_OPT_STRING, > + .help =3D "URL to the NFS file", > + }, > + { /* end of list */ } > + }, > +}; > + > +static void nfs_file_close(BlockDriverState *bs) > +{ > + nfsclient *client =3D bs->opaque; > + if (client->context) { > + if (client->fh) { > + nfs_close(client->context, client->fh); > + } > + qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NUL= L, NULL); > + nfs_destroy_context(client->context); > + } > + memset(client, 0, sizeof(nfsclient)); > +} > + > + > +static int nfs_file_open_common(BlockDriverState *bs, QDict *options, = int flags, > + int open_flags, Error **errp) > +{ > + nfsclient *client =3D bs->opaque; > + const char *filename; > + int ret =3D 0; > + QemuOpts *opts; > + Error *local_err =3D NULL; > + char *server =3D NULL, *path =3D NULL, *file =3D NULL, *strp; > + struct stat st; > + > + opts =3D qemu_opts_create_nofail(&runtime_opts); > + qemu_opts_absorb_qdict(opts, options, &local_err); > + if (error_is_set(&local_err)) { > + qerror_report_err(local_err); > + error_free(local_err); > + ret =3D -EINVAL; > + goto fail; > + } > + > + filename =3D qemu_opt_get(opts, "filename"); > + > + client->context =3D nfs_init_context(); > + > + if (client->context =3D=3D NULL) { > + error_setg(errp, "Failed to init NFS context"); > + ret =3D -EINVAL; > + goto fail; > + } > + > + server =3D g_strdup(filename + 6); Please check the length of filename is longer than 6 before accessing=20 filename[6]. > + if (server[0] =3D=3D '/' || server[0] =3D=3D '\0') { > + error_setg(errp, "Invalid server in URL"); > + ret =3D -EINVAL; > + goto fail; > + } > + strp =3D strchr(server, '/'); > + if (strp =3D=3D NULL) { > + error_setg(errp, "Invalid URL specified.\n"); > + ret =3D -EINVAL; > + goto fail; > + } > + path =3D g_strdup(strp); > + *strp =3D 0; > + strp =3D strrchr(path, '/'); > + if (strp =3D=3D NULL) { > + error_setg(errp, "Invalid URL specified.\n"); > + ret =3D -EINVAL; > + goto fail; > + } > + file =3D g_strdup(strp); > + *strp =3D 0; > + > + if (nfs_mount(client->context, server, path) !=3D 0) { > + error_setg(errp, "Failed to mount nfs share: %s", > + nfs_get_error(client->context)); > + ret =3D -EINVAL; > + goto fail; > + } > + > + if (open_flags & O_CREAT) { > + if (nfs_creat(client->context, file, 0600, &client->fh) !=3D 0= ) { > + error_setg(errp, "Failed to create file: %s", > + nfs_get_error(client->context)); > + ret =3D -EINVAL; > + goto fail; > + } > + } else { > + open_flags =3D (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY; > + if (nfs_open(client->context, file, open_flags, &client->fh) != =3D 0) { > + error_setg(errp, "Failed to open file : %s", > + nfs_get_error(client->context)); > + ret =3D -EINVAL; > + goto fail; > + } > + } > + > + if (nfs_fstat(client->context, client->fh, &st) !=3D 0) { > + error_setg(errp, "Failed to fstat file: %s", > + nfs_get_error(client->context)); > + ret =3D -EIO; > + goto fail; > + } > + > + bs->total_sectors =3D st.st_size / BDRV_SECTOR_SIZE; Please use DIV_ROUND_UP(). Otherwise the remainder in last sector=20 couldn't be read. > + client->has_zero_init =3D S_ISREG(st.st_mode); > + client->allocated_file_size =3D st.st_blocks * st.st_blksize; > + goto out; > +fail: > + nfs_file_close(bs); > +out: > + g_free(server); > + g_free(path); > + g_free(file); > + return ret; > +} > + > +static int nfs_file_open(BlockDriverState *bs, QDict *options, int fla= gs, > + Error **errp) { > + return nfs_file_open_common(bs, options, flags, 0, errp); > +} > + > +static int nfs_file_create(const char *filename, QEMUOptionParameter *= options, > + Error **errp) > +{ > + int ret =3D 0; > + int64_t total_size =3D 0; > + BlockDriverState *bs; > + nfsclient *client =3D NULL; > + QDict *bs_options; > + > + bs =3D bdrv_new(""); > + > + /* Read out options */ > + while (options && options->name) { > + if (!strcmp(options->name, "size")) { > + total_size =3D options->value.n / BDRV_SECTOR_SIZE; Why divide by BDRV_SECTOR_SIZE only to ... > + } > + options++; > + } > + > + bs->opaque =3D g_malloc0(sizeof(struct nfsclient)); > + client =3D bs->opaque; > + > + bs_options =3D qdict_new(); > + qdict_put(bs_options, "filename", qstring_from_str(filename)); > + ret =3D nfs_file_open_common(bs, bs_options, 0, O_CREAT, NULL); > + QDECREF(bs_options); > + if (ret !=3D 0) { > + goto out; > + } > + ret =3D nfs_ftruncate(client->context, client->fh, total_size * BD= RV_SECTOR_SIZE); ... multiply it back later? > + if (ret !=3D 0) { > + ret =3D -ENOSPC;; There is an extra semicolon. And is it right to hard code ENOSPC here,=20 without checking value of ret? > + } > +out: > + nfs_file_close(bs); > + g_free(bs->opaque); > + bs->opaque =3D NULL; > + bdrv_unref(bs); > + return ret; > +} > + Fam