From: Pavel Borzenkov <pborzenkov@virtuozzo.com>
To: Eric Blake <eblake@redhat.com>
Cc: qemu-devel@nongnu.org, Kevin Wolf <kwolf@redhat.com>,
Paolo Bonzini <pbonzini@redhat.com>,
"open list:Block layer core" <qemu-block@nongnu.org>,
alex@alex.org.uk
Subject: Re: [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server
Date: Sat, 9 Apr 2016 12:39:21 +0300 [thread overview]
Message-ID: <20160409093921.GB21526@phobos> (raw)
In-Reply-To: <1460153158-21612-18-git-send-email-eblake@redhat.com>
On Fri, Apr 08, 2016 at 04:05:57PM -0600, Eric Blake wrote:
> RFC because there is still discussion on the NBD list about
> adding an NBD_OPT_ to let the client suggest server defaults
> related to scanning for zeroes during NBD_CMD_WRITE, which may
> tweak this patch.
>
> Upstream NBD protocol recently added the ability to efficiently
> write zeroes without having to send the zeroes over the wire,
> along with a flag to control whether the client wants a hole.
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
> include/block/nbd.h | 5 ++++-
> nbd/server.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 64 insertions(+), 4 deletions(-)
>
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 4c57754..a1d955c 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */
> #define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */
> #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
> +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
> #define NBD_FLAG_SEND_CLOSE (1 << 8) /* Send CLOSE */
>
> /* New-style handshake (global) flags, sent from server to client, and
> @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_REP_ERR_UNKNOWN ((UINT32_C(1) << 31) | 6) /* Export unknown */
>
> /* Request flags, sent from client to server during transmission phase */
> -#define NBD_CMD_FLAG_FUA (1 << 0)
> +#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write */
> +#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */
>
> /* Supported request types */
> enum {
> @@ -101,6 +103,7 @@ enum {
> NBD_CMD_DISC = 2,
> NBD_CMD_FLUSH = 3,
> NBD_CMD_TRIM = 4,
> + NBD_CMD_WRITE_ZEROES = 5,
> NBD_CMD_CLOSE = 7,
> };
>
> diff --git a/nbd/server.c b/nbd/server.c
> index 2a6eaf2..09af915 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
> int rc;
> const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
> NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
> - NBD_FLAG_SEND_CLOSE);
> + NBD_FLAG_SEND_CLOSE |
> + NBD_FLAG_SEND_WRITE_ZEROES);
> bool oldStyle;
> size_t len;
>
> @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
> goto out;
> }
>
> - if (request->flags & ~NBD_CMD_FLAG_FUA) {
> + if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
> LOG("unsupported flags (got 0x%x)", request->flags);
> return -EINVAL;
> }
> @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque
> TRACE("Decoding type");
>
> command = request->type;
> - if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
> + if (request->flags & NBD_CMD_FLAG_NO_HOLE &&
> + !(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) {
> + LOG("NO_HOLE flag valid only with write operation");
> + return -EINVAL;
> + }
> + if (command == NBD_CMD_READ || command == NBD_CMD_WRITE ||
> + command == NBD_CMD_WRITE_ZEROES) {
> if (request->len > NBD_MAX_BUFFER_SIZE) {
> LOG("len (%" PRIu32" ) is larger than max len (%u)",
> request->len, NBD_MAX_BUFFER_SIZE);
> @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque)
> struct nbd_reply reply;
> ssize_t ret;
> uint32_t command;
> + int flags;
>
> TRACE("Reading request.");
> if (client->closing) {
> @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque)
>
> TRACE("Writing to device");
>
> + /* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we
> + * make that override a server that is set to look for
> + * holes? */
> ret = blk_write(exp->blk,
> (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
> req->data, request.len / BDRV_SECTOR_SIZE);
> @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque)
> goto out;
> }
> break;
> + case NBD_CMD_WRITE_ZEROES:
> + TRACE("Request type is WRITE_ZEROES");
> +
> + if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
> + TRACE("Server is read-only, return error");
> + reply.error = EROFS;
> + goto error_reply;
> + }
> +
> + TRACE("Writing to device");
> +
> + flags = 0;
> + if (request.flags & NBD_CMD_FLAG_FUA) {
> + flags |= BDRV_REQ_FUA;
> + }
> + if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
> + /* FIXME: should this depend on whether the server is set to
> + look for holes? */
> + flags |= BDRV_REQ_MAY_UNMAP;
> + }
> + ret = blk_write_zeroes(exp->blk,
> + ((request.from + exp->dev_offset) /
> + BDRV_SECTOR_SIZE),
> + request.len / BDRV_SECTOR_SIZE,
> + flags);
blk_write_zeroes() ignores flags right now. Something like this is
required for this code to work:
diff --git a/block/block-backend.c b/block/block-backend.c
index f0470bc..3ae4157 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -827,7 +827,7 @@ int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags)
{
return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
- BDRV_REQ_ZERO_WRITE);
+ BDRV_REQ_ZERO_WRITE | flags);
}
static void error_callback_bh(void *opaque)
> + if (ret < 0) {
> + LOG("writing to file failed");
> + reply.error = -ret;
> + goto error_reply;
> + }
> +
> + /* FIXME: do we need FUA flush here, if we also passed it to
> + * blk_write_zeroes? */
> + if (request.flags & NBD_CMD_FLAG_FUA) {
> + ret = blk_co_flush(exp->blk);
> + if (ret < 0) {
> + LOG("flush failed");
> + reply.error = -ret;
> + goto error_reply;
> + }
> + }
> +
> + if (nbd_co_send_reply(req, &reply, 0) < 0) {
> + goto out;
> + }
> + break;
> case NBD_CMD_DISC:
> TRACE("Request type is DISCONNECT");
> goto out;
> --
> 2.5.5
>
>
next prev parent reply other threads:[~2016-04-09 9:39 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-04-08 22:05 [Qemu-devel] [RFC PATCH 00/18] NBD protocol additions Eric Blake
2016-04-08 22:05 ` [Qemu-devel] [PATCH 01/18] nbd: Don't kill server on client that doesn't request TLS Eric Blake
2016-04-09 10:28 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 02/18] nbd: Don't fail handshake on NBD_OPT_LIST descriptions Eric Blake
2016-04-09 10:30 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 03/18] nbd: More debug typo fixes, use correct formats Eric Blake
2016-04-09 10:30 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 04/18] nbd: Detect servers that send unexpected error values Eric Blake
2016-04-09 10:31 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 05/18] nbd: Reject unknown request flags Eric Blake
2016-04-09 10:32 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 06/18] nbd: Avoid magic number for NBD max name size Eric Blake
2016-04-09 10:35 ` Alex Bligh
2016-04-09 22:07 ` Eric Blake
2016-04-08 22:05 ` [Qemu-devel] [PATCH 07/18] nbd: Treat flags vs. command type as separate fields Eric Blake
2016-04-09 10:37 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 08/18] nbd: Limit nbdflags to 16 bits Eric Blake
2016-04-09 10:37 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 09/18] nbd: Share common reply-sending code in server Eric Blake
2016-04-09 10:38 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 10/18] nbd: Share common option-sending code in client Eric Blake
2016-04-09 10:38 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 11/18] nbd: Let client skip portions of server reply Eric Blake
2016-04-09 10:39 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 12/18] nbd: Less allocation during NBD_OPT_LIST Eric Blake
2016-04-09 10:41 ` Alex Bligh
2016-04-09 22:24 ` Eric Blake
2016-04-08 22:05 ` [Qemu-devel] [PATCH 13/18] nbd: Support shorter handshake Eric Blake
2016-04-09 10:42 ` Alex Bligh
2016-04-09 22:27 ` Eric Blake
2016-04-08 22:05 ` [Qemu-devel] [PATCH 14/18] nbd: Implement NBD_OPT_GO on client Eric Blake
2016-04-09 10:47 ` Alex Bligh
2016-04-09 22:38 ` Eric Blake
2016-04-08 22:05 ` [Qemu-devel] [PATCH 15/18] nbd: Implement NBD_OPT_GO on server Eric Blake
2016-04-09 10:48 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [PATCH 16/18] nbd: Support NBD_CMD_CLOSE Eric Blake
2016-04-09 10:50 ` Alex Bligh
2016-04-09 23:12 ` Eric Blake
2016-04-10 5:28 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server Eric Blake
2016-04-09 9:39 ` Pavel Borzenkov [this message]
2016-04-09 10:54 ` Alex Bligh
2016-04-08 22:05 ` [Qemu-devel] [RFC PATCH 18/18] nbd: Implement NBD_CMD_WRITE_ZEROES on client Eric Blake
2016-04-09 10:57 ` Alex Bligh
2016-04-09 11:52 ` Pavel Borzenkov
2016-04-09 23:17 ` Eric Blake
2016-04-10 5:27 ` Alex Bligh
2016-04-09 10:21 ` [Qemu-devel] [Nbd] [RFC PATCH 00/18] NBD protocol additions Wouter Verhelst
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160409093921.GB21526@phobos \
--to=pborzenkov@virtuozzo.com \
--cc=alex@alex.org.uk \
--cc=eblake@redhat.com \
--cc=kwolf@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.