All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tiwei Bie <tiwei.bie@intel.com>
To: JinYu <jin.yu@intel.com>
Cc: dev@dpdk.org, changpeng.liu@intel.com, zhihong.wang@intel.com,
	maxime.coquelin@redhat.com, Lin Li <lilin24@baidu.com>,
	Xun Ni <nixun@baidu.com>, Yu Zhang <zhangyu31@baidu.com>
Subject: Re: [dpdk-dev] [PATCH v5 1/2] vhost: support inflight share memory protocol feature
Date: Mon, 26 Aug 2019 16:28:11 +0800	[thread overview]
Message-ID: <20190826082811.GA30331@___> (raw)
In-Reply-To: <20190806182500.22320-2-jin.yu@intel.com>

On Wed, Aug 07, 2019 at 02:24:59AM +0800, JinYu wrote:
> This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD
> and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared
> buffer between qemu and backend.
> 
> Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the
> shared buffer from backend. Then qemu should send it back
> through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user.
> 
> This shared buffer is used to process inflight I/O when backend
> reconnect.
> 
> Signed-off-by: Lin Li <lilin24@baidu.com>
> Signed-off-by: Xun Ni <nixun@baidu.com>
> Signed-off-by: Yu Zhang <zhangyu31@baidu.com>
> Signed-off-by: JinYu <jin.yu@intel.com>

s/JinYu/Jin Yu/

> ---
> v1 - specify the APIs are split-ring only
> v2 - fix APIs and judge split or packed
> v3 - Add rte_vhost_ prefix and fix one issue.
> v4 - add the packed ring support
> v5 - revise get_vring_base func depend on Tiwei's suggestion
> ---
>  lib/librte_vhost/rte_vhost.h           | 255 ++++++++++++++-
>  lib/librte_vhost/rte_vhost_version.map |  12 +
>  lib/librte_vhost/vhost.c               | 396 +++++++++++++++++++++-
>  lib/librte_vhost/vhost.h               |  61 ++--
>  lib/librte_vhost/vhost_user.c          | 437 ++++++++++++++++++++++++-
>  lib/librte_vhost/vhost_user.h          |  13 +-
>  6 files changed, 1128 insertions(+), 46 deletions(-)

This patch is too big, please divide it into small patches.
E.g. rte_vhost_vq_is_packed() can be introduced in a separate patch.

> 
> diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
> index 0226b3eff..3f01429b1 100644
> --- a/lib/librte_vhost/rte_vhost.h
> +++ b/lib/librte_vhost/rte_vhost.h
> @@ -11,6 +11,7 @@
>   */
>  
>  #include <stdint.h>
> +#include <stdbool.h>
>  #include <sys/eventfd.h>
>  
>  #include <rte_memory.h>
> @@ -71,6 +72,10 @@ extern "C" {
>  #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
>  #endif
>  
> +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
> +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
> +#endif
> +
>  /** Indicate whether protocol features negotiation is supported. */
>  #ifndef VHOST_USER_F_PROTOCOL_FEATURES
>  #define VHOST_USER_F_PROTOCOL_FEATURES	30
> @@ -98,10 +103,92 @@ struct rte_vhost_memory {
>  	struct rte_vhost_mem_region regions[];
>  };
>  
> +struct inflight_desc_packed {
> +	uint8_t inflight;
> +	uint8_t padding;
> +	uint16_t next;
> +	uint16_t last;
> +	uint16_t num;
> +	uint64_t counter;
> +	uint16_t id;
> +	uint16_t flags;
> +	uint32_t len;
> +	uint64_t addr;
> +};

Why struct inflight_desc_split doesn't have to be part of
vhost API but struct inflight_desc_packed has to be?

> +
> +struct inflight_info_packed {
> +	uint64_t features;
> +	uint16_t version;
> +	uint16_t desc_num;
> +	uint16_t free_head;
> +	uint16_t old_free_head;
> +	uint16_t used_idx;
> +	uint16_t old_used_idx;
> +	uint8_t used_wrap_counter;
> +	uint8_t old_used_wrap_counter;
> +	uint8_t padding[7];
> +	struct inflight_desc_packed desc[0];
> +};
> +
> +struct rte_vhost_resubmit_desc {
> +	uint16_t index;
> +	uint64_t counter;
> +};
> +
> +struct rte_vhost_resubmit_info {
> +	struct rte_vhost_resubmit_desc	*resubmit_list;
> +	uint16_t resubmit_num;
> +};
> +
> +struct rte_vhost_ring_inflight {
> +	union {
> +		struct inflight_info_split *inflight_split;

struct inflight_info_split is used but not declared.

> +		struct inflight_info_packed *inflight_packed;
> +	};
> +
> +	struct rte_vhost_resubmit_info *resubmit_inflight;
> +};
> +
> +/*
> + * Declare below packed ring defines unconditionally
> + * as Kernel header might use different names.
> + */
> +#ifndef VIRTIO_F_RING_PACKED
> +#define VIRTIO_F_RING_PACKED 34
> +
> +#define VRING_DESC_F_AVAIL	(1ULL << 7)
> +#define VRING_DESC_F_USED	(1ULL << 15)

You shouldn't put above macros under VIRTIO_F_RING_PACKED as
kernel doesn't define them. And the build will be broken when
kernel defines VIRTIO_F_RING_PACKED.

Besides, it seems not a good idea to make them parts of vhost
API.

> +
> +struct vring_packed_desc {
> +	uint64_t addr;
> +	uint32_t len;
> +	uint16_t id;
> +	uint16_t flags;
> +};
> +
> +#define VRING_EVENT_F_ENABLE 0x0
> +#define VRING_EVENT_F_DISABLE 0x1
> +#define VRING_EVENT_F_DESC 0x2
> +struct vring_packed_desc_event {
> +	uint16_t off_wrap;
> +	uint16_t flags;
> +};

You just need to declare instead of defining
vring_packed_desc/vring_packed_desc_event in vhost header.

> +#endif
> +
>  struct rte_vhost_vring {
> -	struct vring_desc	*desc;
> -	struct vring_avail	*avail;
> -	struct vring_used	*used;
> +	union {
> +		struct vring_desc	*desc;
> +		struct vring_packed_desc *desc_packed;
> +	};
> +	union {
> +		struct vring_avail	*avail;
> +		struct vring_packed_desc_event *driver_event;
> +	};
> +	union {
> +		struct vring_used	*used;
> +		struct vring_packed_desc_event *device_event;
> +	};
> +
>  	uint64_t		log_guest_addr;
>  
>  	/** Deprecated, use rte_vhost_vring_call() instead. */
> @@ -603,6 +690,33 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
>   */
>  int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
>  
> +/**
> + * Get vq is packed
> + *
> + * @param vid
> + *  vhost device ID
> + * @return
> + *  0 on success, -1 on failure

Return value should be true or false.

> + */
> +int __rte_experimental
> +rte_vhost_vq_is_packed(int vid);
> +
> +/**
> + * Get guest inflight vring info, including inflight ring and resubmit list.
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param vring
> + *  the structure to hold the requested inflight vring info
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental

You should put the `__rte_experimental` tag at the beginning.
See this for more details:
http://git.dpdk.org/dpdk/commit/lib/librte_vhost?id=18218713bf4248c4c6b97a12231e7d59b8a86865

> +rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
> +	struct rte_vhost_ring_inflight *vring);
> +
>  /**
>   * Get guest vring info, including the vring address, vring size, etc.
>   *
> @@ -616,7 +730,7 @@ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
>   *  0 on success, -1 on failure
>   */
>  int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
> -			      struct rte_vhost_vring *vring);
> +	struct rte_vhost_vring *vring);
>  
>  /**
>   * Notify the guest that used descriptors have been added to the vring.  This
> @@ -631,6 +745,112 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
>   */
>  int rte_vhost_vring_call(int vid, uint16_t vring_idx);
>  
> +/**
> + * set split inflight descriptor.

Please capitalize the first letter consistently.

> + *
> + * This function save descriptors that has been comsumed in available
> + * ring
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param idx
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
> +	uint16_t idx);
> +
> +/**
> + * set packed inflight descriptor and get corresponding inflight entry
> + *
> + * This function save descriptors that has been comsumed
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param idx
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
> +	uint16_t head, uint16_t last, uint16_t *inflight_entry);
> +
> +/**
> + * save the head of list that the last batch of used descriptors.
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param idx
> + *  descriptor entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_last_inflight_io_split(int vid,
> +	uint16_t vring_idx, uint16_t idx);
> +
> +/**
> + * update the inflight free_head, used_idx and used_wrap_counter.
> + *
> + * This function will update status first before updating descriptors
> + * to used
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param idx
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_last_inflight_io_packed(int vid,
> +	uint16_t vring_idx, uint16_t head);
> +
> +/**
> + * clear the split inflight status.
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param last_used_idx
> + *  last used idx of used ring
> + * @param idx
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
> +	uint16_t last_used_idx, uint16_t idx);
> +
> +/**
> + * clear the packed inflight status.
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param head
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
> +	uint16_t head);
> +
>  /**
>   * Get vhost RX queue avail count.
>   *
> @@ -656,7 +876,8 @@ uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
>   *  0 on success, -1 on failure
>   */
>  int __rte_experimental
> -rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
> +rte_vhost_get_log_base(int vid, uint64_t *log_base,
> +	uint64_t *log_size);

This change isn't necessary.

>  
>  /**
>   * Get last_avail/used_idx of the vhost virtqueue
> @@ -676,6 +897,28 @@ int __rte_experimental
>  rte_vhost_get_vring_base(int vid, uint16_t queue_id,
>  		uint16_t *last_avail_idx, uint16_t *last_used_idx);
>  
> +/**
> + * Get last_avail/last_used of the vhost virtqueue
> + *
> + * This function is designed for the reconnection and it's specific for
> + * the packed ring as we can get the two parameters from the inflight
> + * queueregion
> + *
> + * @param vid
> + *  vhost device ID
> + * @param queue_id
> + *  vhost queue index
> + * @param last_avail_idx
> + *  vhost last_avail_idx to get
> + * @param last_used_idx
> + *  vhost last_used_idx to get
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_get_vring_base_from_inflight(int vid,
> +	uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx);
> +
>  /**
>   * Set last_avail/used_idx of the vhost virtqueue
>   *
> @@ -692,7 +935,7 @@ rte_vhost_get_vring_base(int vid, uint16_t queue_id,
>   */
>  int __rte_experimental
>  rte_vhost_set_vring_base(int vid, uint16_t queue_id,
> -		uint16_t last_avail_idx, uint16_t last_used_idx);
> +	uint16_t last_avail_idx, uint16_t last_used_idx);
>  
>  /**
>   * Register external message handling callbacks
> diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
> index 5f1d4a75c..99f1134ea 100644
> --- a/lib/librte_vhost/rte_vhost_version.map
> +++ b/lib/librte_vhost/rte_vhost_version.map
> @@ -87,4 +87,16 @@ EXPERIMENTAL {
>  	rte_vdpa_relay_vring_used;
>  	rte_vhost_extern_callback_register;
>  	rte_vhost_driver_set_protocol_features;
> +	rte_vhost_set_inflight_desc_split;
> +	rte_vhost_clr_inflight_desc_split;
> +	rte_vhost_set_last_inflight_io_split;
> +	rte_vhost_get_vhost_ring_inflight;
> +	rte_vhost_vq_is_packed;
> +	rte_vhost_set_inflight_desc_packed;
> +	rte_vhost_clr_inflight_desc_packed;
> +	rte_vhost_set_last_inflight_io_packed;
> +	rte_vhost_get_vring_base_counter;
> +	rte_vhost_get_vring_base_from_inflight;
> +	rte_vhost_get_vring_base_counter_from_inflight;
> +	rte_vhost_set_vring_base_counter;
>  };



> @@ -939,13 +1270,48 @@ int rte_vhost_get_log_base(int vid, uint64_t *log_base,
>  int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
>  		uint16_t *last_avail_idx, uint16_t *last_used_idx)
>  {
> +	struct vhost_virtqueue *vq;
>  	struct virtio_net *dev = get_device(vid);
>  
>  	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
>  		return -1;
>  
> -	*last_avail_idx = dev->virtqueue[queue_id]->last_avail_idx;
> -	*last_used_idx = dev->virtqueue[queue_id]->last_used_idx;
> +	vq = dev->virtqueue[queue_id];
> +	if (!vq)
> +		return -1;
> +
> +	if (vq_is_packed(dev)) {
> +		*last_avail_idx = (vq->avail_wrap_counter << 15) |
> +					vq->last_avail_idx;
> +		*last_used_idx = (vq->used_wrap_counter << 15) |
> +					vq->last_used_idx;
> +	} else {
> +		*last_avail_idx = vq->last_avail_idx;
> +		*last_used_idx = vq->last_used_idx;
> +	}

This should be a fix. Without this change, the user of this API
can't use packed ring properly.

Please do this in a separate patch, add a fixes line and Cc stable.

> +
> +	return 0;
> +}
> +
> +int rte_vhost_get_vring_base_from_inflight(int vid,
> +	uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx)
> +{
> +	struct inflight_info_packed *inflight_info;
> +	struct virtio_net *dev = get_device(vid);
> +
> +	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
> +		return -1;
> +
> +	if (!vq_is_packed(dev))
> +		return -1;
> +
> +	inflight_info = dev->virtqueue[queue_id]->inflight_packed;
> +	if (!inflight_info)
> +		return -1;
> +
> +	*last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
> +				inflight_info->old_used_idx;
> +	*last_used_idx = *last_avail_idx;
>  
>  	return 0;
>  }
> @@ -953,13 +1319,25 @@ int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
>  int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
>  		uint16_t last_avail_idx, uint16_t last_used_idx)
>  {
> +	struct vhost_virtqueue *vq;
>  	struct virtio_net *dev = get_device(vid);
>  
>  	if (!dev)
>  		return -1;
>  
> -	dev->virtqueue[queue_id]->last_avail_idx = last_avail_idx;
> -	dev->virtqueue[queue_id]->last_used_idx = last_used_idx;
> +	vq = dev->virtqueue[queue_id];
> +	if (!vq)
> +		return -1;
> +
> +	if (vq_is_packed(dev)) {
> +		vq->last_avail_idx = last_avail_idx & 0x7fff;
> +		vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
> +		vq->last_used_idx = last_used_idx & 0x7fff;
> +		vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
> +	} else {
> +		vq->last_avail_idx = last_avail_idx;
> +		vq->last_used_idx = last_used_idx;
> +	}

Ditto.

>  
>  	return 0;
>  }
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 884befa85..e9d0b983d 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -88,6 +88,22 @@ struct vring_used_elem_packed {
>  	uint32_t count;
>  };
>  
> +struct inflight_desc_split {
> +	uint8_t		inflight;
> +	uint8_t		padding[5];
> +	uint16_t	next;
> +	uint64_t	counter;
> +};
> +
> +struct inflight_info_split {
> +	uint64_t		features;
> +	uint16_t		version;
> +	uint16_t		desc_num;
> +	uint16_t		last_inflight_io;
> +	uint16_t		used_idx;
> +	struct inflight_desc_split desc[0];
> +};
> +

  reply	other threads:[~2019-08-26  8:30 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20190731204050.40633>
2019-08-06 18:24 ` [dpdk-dev] [PATCH v5 0/2] vhost: support inflight share memory protocol feature JinYu
2019-08-06 18:24   ` [dpdk-dev] [PATCH v5 1/2] " JinYu
2019-08-26  8:28     ` Tiwei Bie [this message]
2019-08-06 18:25   ` [dpdk-dev] [PATCH v5 2/2] vhost: add vhost-user-blk example which support inflight JinYu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190826082811.GA30331@___ \
    --to=tiwei.bie@intel.com \
    --cc=changpeng.liu@intel.com \
    --cc=dev@dpdk.org \
    --cc=jin.yu@intel.com \
    --cc=lilin24@baidu.com \
    --cc=maxime.coquelin@redhat.com \
    --cc=nixun@baidu.com \
    --cc=zhangyu31@baidu.com \
    --cc=zhihong.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.