All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tiwei Bie <tiwei.bie@intel.com>
To: mst@redhat.com, jasowang@redhat.com,
	virtualization@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	virtio-dev@lists.oasis-open.org
Cc: wexu@redhat.com, jfreimann@redhat.com,
	maxime.coquelin@redhat.com, tiwei.bie@intel.com
Subject: [virtio-dev] [PATCH net-next v3 10/13] virtio_ring: introduce packed ring support
Date: Wed, 21 Nov 2018 18:03:27 +0800	[thread overview]
Message-ID: <20181121100330.24846-11-tiwei.bie@intel.com> (raw)
In-Reply-To: <20181121100330.24846-1-tiwei.bie@intel.com>

Introduce the packed ring support. Packed ring can only be
created by vring_create_virtqueue() and each chunk of packed
ring will be allocated individually. Packed ring can not be
created on preallocated memory by vring_new_virtqueue() or
the likes currently.

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 drivers/virtio/virtio_ring.c | 900 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 870 insertions(+), 30 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index aafe1969b45e..b63eee2034e7 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -83,9 +83,26 @@ struct vring_desc_state_split {
 	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
 };
 
+struct vring_desc_state_packed {
+	void *data;			/* Data for callback. */
+	struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
+	u16 num;			/* Descriptor list length. */
+	u16 next;			/* The next desc state in a list. */
+	u16 last;			/* The last desc state in a list. */
+};
+
+struct vring_desc_extra_packed {
+	dma_addr_t addr;		/* Buffer DMA addr. */
+	u32 len;			/* Buffer length. */
+	u16 flags;			/* Descriptor flags. */
+};
+
 struct vring_virtqueue {
 	struct virtqueue vq;
 
+	/* Is this a packed ring? */
+	bool packed_ring;
+
 	/* Is DMA API used? */
 	bool use_dma_api;
 
@@ -109,23 +126,64 @@ struct vring_virtqueue {
 	/* Last used index we've seen. */
 	u16 last_used_idx;
 
-	struct {
-		/* Actual memory layout for this queue */
-		struct vring vring;
+	union {
+		/* Available for split ring */
+		struct {
+			/* Actual memory layout for this queue. */
+			struct vring vring;
 
-		/* Last written value to avail->flags */
-		u16 avail_flags_shadow;
+			/* Last written value to avail->flags */
+			u16 avail_flags_shadow;
 
-		/* Last written value to avail->idx in guest byte order */
-		u16 avail_idx_shadow;
+			/*
+			 * Last written value to avail->idx in
+			 * guest byte order.
+			 */
+			u16 avail_idx_shadow;
 
-		/* Per-descriptor state. */
-		struct vring_desc_state_split *desc_state;
+			/* Per-descriptor state. */
+			struct vring_desc_state_split *desc_state;
 
-		/* DMA, allocation, and size information */
-		size_t queue_size_in_bytes;
-		dma_addr_t queue_dma_addr;
-	} split;
+			/* DMA address and size information */
+			dma_addr_t queue_dma_addr;
+			size_t queue_size_in_bytes;
+		} split;
+
+		/* Available for packed ring */
+		struct {
+			/* Actual memory layout for this queue. */
+			struct vring_packed vring;
+
+			/* Driver ring wrap counter. */
+			bool avail_wrap_counter;
+
+			/* Device ring wrap counter. */
+			bool used_wrap_counter;
+
+			/* Avail used flags. */
+			u16 avail_used_flags;
+
+			/* Index of the next avail descriptor. */
+			u16 next_avail_idx;
+
+			/*
+			 * Last written value to driver->flags in
+			 * guest byte order.
+			 */
+			u16 event_flags_shadow;
+
+			/* Per-descriptor state. */
+			struct vring_desc_state_packed *desc_state;
+			struct vring_desc_extra_packed *desc_extra;
+
+			/* DMA address and size information */
+			dma_addr_t ring_dma_addr;
+			dma_addr_t driver_event_dma_addr;
+			dma_addr_t device_event_dma_addr;
+			size_t ring_size_in_bytes;
+			size_t event_size_in_bytes;
+		} packed;
+	};
 
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
@@ -840,6 +898,717 @@ static struct virtqueue *vring_create_virtqueue_split(
 }
 
 
+/*
+ * Packed ring specific functions - *_packed().
+ */
+
+static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
+				     struct vring_desc_extra_packed *state)
+{
+	u16 flags;
+
+	if (!vq->use_dma_api)
+		return;
+
+	flags = state->flags;
+
+	if (flags & VRING_DESC_F_INDIRECT) {
+		dma_unmap_single(vring_dma_dev(vq),
+				 state->addr, state->len,
+				 (flags & VRING_DESC_F_WRITE) ?
+				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(vring_dma_dev(vq),
+			       state->addr, state->len,
+			       (flags & VRING_DESC_F_WRITE) ?
+			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	}
+}
+
+static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
+				   struct vring_packed_desc *desc)
+{
+	u16 flags;
+
+	if (!vq->use_dma_api)
+		return;
+
+	flags = le16_to_cpu(desc->flags);
+
+	if (flags & VRING_DESC_F_INDIRECT) {
+		dma_unmap_single(vring_dma_dev(vq),
+				 le64_to_cpu(desc->addr),
+				 le32_to_cpu(desc->len),
+				 (flags & VRING_DESC_F_WRITE) ?
+				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(vring_dma_dev(vq),
+			       le64_to_cpu(desc->addr),
+			       le32_to_cpu(desc->len),
+			       (flags & VRING_DESC_F_WRITE) ?
+			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	}
+}
+
+static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
+						       gfp_t gfp)
+{
+	struct vring_packed_desc *desc;
+
+	/*
+	 * We require lowmem mappings for the descriptors because
+	 * otherwise virt_to_phys will give us bogus addresses in the
+	 * virtqueue.
+	 */
+	gfp &= ~__GFP_HIGHMEM;
+
+	desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
+
+	return desc;
+}
+
+static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs,
+				       void *data,
+				       gfp_t gfp)
+{
+	struct vring_packed_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i, n, err_idx;
+	u16 head, id;
+	dma_addr_t addr;
+
+	head = vq->packed.next_avail_idx;
+	desc = alloc_indirect_packed(total_sg, gfp);
+
+	if (unlikely(vq->vq.num_free < 1)) {
+		pr_debug("Can't add buf len 1 - avail = 0\n");
+		END_USE(vq);
+		return -ENOSPC;
+	}
+
+	i = 0;
+	id = vq->free_head;
+	BUG_ON(id == vq->packed.vring.num);
+
+	for (n = 0; n < out_sgs + in_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			addr = vring_map_one_sg(vq, sg, n < out_sgs ?
+					DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			if (vring_mapping_error(vq, addr))
+				goto unmap_release;
+
+			desc[i].flags = cpu_to_le16(n < out_sgs ?
+						0 : VRING_DESC_F_WRITE);
+			desc[i].addr = cpu_to_le64(addr);
+			desc[i].len = cpu_to_le32(sg->length);
+			i++;
+		}
+	}
+
+	/* Now that the indirect table is filled in, map it. */
+	addr = vring_map_single(vq, desc,
+			total_sg * sizeof(struct vring_packed_desc),
+			DMA_TO_DEVICE);
+	if (vring_mapping_error(vq, addr))
+		goto unmap_release;
+
+	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
+	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
+				sizeof(struct vring_packed_desc));
+	vq->packed.vring.desc[head].id = cpu_to_le16(id);
+
+	if (vq->use_dma_api) {
+		vq->packed.desc_extra[id].addr = addr;
+		vq->packed.desc_extra[id].len = total_sg *
+				sizeof(struct vring_packed_desc);
+		vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
+						  vq->packed.avail_used_flags;
+	}
+
+	/*
+	 * A driver MUST NOT make the first descriptor in the list
+	 * available before all subsequent descriptors comprising
+	 * the list are made available.
+	 */
+	virtio_wmb(vq->weak_barriers);
+	vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
+						vq->packed.avail_used_flags);
+
+	/* We're using some buffers from the free list. */
+	vq->vq.num_free -= 1;
+
+	/* Update free pointer */
+	n = head + 1;
+	if (n >= vq->packed.vring.num) {
+		n = 0;
+		vq->packed.avail_wrap_counter ^= 1;
+		vq->packed.avail_used_flags ^=
+				1 << VRING_PACKED_DESC_F_AVAIL |
+				1 << VRING_PACKED_DESC_F_USED;
+	}
+	vq->packed.next_avail_idx = n;
+	vq->free_head = vq->packed.desc_state[id].next;
+
+	/* Store token and indirect buffer state. */
+	vq->packed.desc_state[id].num = 1;
+	vq->packed.desc_state[id].data = data;
+	vq->packed.desc_state[id].indir_desc = desc;
+	vq->packed.desc_state[id].last = id;
+
+	vq->num_added += 1;
+
+	pr_debug("Added buffer head %i to %p\n", head, vq);
+	END_USE(vq);
+
+	return 0;
+
+unmap_release:
+	err_idx = i;
+
+	for (i = 0; i < err_idx; i++)
+		vring_unmap_desc_packed(vq, &desc[i]);
+
+	kfree(desc);
+
+	END_USE(vq);
+	return -EIO;
+}
+
+static inline int virtqueue_add_packed(struct virtqueue *_vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs,
+				       void *data,
+				       void *ctx,
+				       gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_packed_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i, n, c, descs_used, err_idx;
+	__le16 uninitialized_var(head_flags), flags;
+	u16 head, id, uninitialized_var(prev), curr, avail_used_flags;
+
+	START_USE(vq);
+
+	BUG_ON(data == NULL);
+	BUG_ON(ctx && vq->indirect);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return -EIO;
+	}
+
+	LAST_ADD_TIME_UPDATE(vq);
+
+	BUG_ON(total_sg == 0);
+
+	if (virtqueue_use_indirect(_vq, total_sg))
+		return virtqueue_add_indirect_packed(vq, sgs, total_sg,
+				out_sgs, in_sgs, data, gfp);
+
+	head = vq->packed.next_avail_idx;
+	avail_used_flags = vq->packed.avail_used_flags;
+
+	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
+
+	desc = vq->packed.vring.desc;
+	i = head;
+	descs_used = total_sg;
+
+	if (unlikely(vq->vq.num_free < descs_used)) {
+		pr_debug("Can't add buf len %i - avail = %i\n",
+			 descs_used, vq->vq.num_free);
+		END_USE(vq);
+		return -ENOSPC;
+	}
+
+	id = vq->free_head;
+	BUG_ON(id == vq->packed.vring.num);
+
+	curr = id;
+	c = 0;
+	for (n = 0; n < out_sgs + in_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
+					DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			if (vring_mapping_error(vq, addr))
+				goto unmap_release;
+
+			flags = cpu_to_le16(vq->packed.avail_used_flags |
+				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
+				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
+			if (i == head)
+				head_flags = flags;
+			else
+				desc[i].flags = flags;
+
+			desc[i].addr = cpu_to_le64(addr);
+			desc[i].len = cpu_to_le32(sg->length);
+			desc[i].id = cpu_to_le16(id);
+
+			if (unlikely(vq->use_dma_api)) {
+				vq->packed.desc_extra[curr].addr = addr;
+				vq->packed.desc_extra[curr].len = sg->length;
+				vq->packed.desc_extra[curr].flags =
+					le16_to_cpu(flags);
+			}
+			prev = curr;
+			curr = vq->packed.desc_state[curr].next;
+
+			if ((unlikely(++i >= vq->packed.vring.num))) {
+				i = 0;
+				vq->packed.avail_used_flags ^=
+					1 << VRING_PACKED_DESC_F_AVAIL |
+					1 << VRING_PACKED_DESC_F_USED;
+			}
+		}
+	}
+
+	if (i < head)
+		vq->packed.avail_wrap_counter ^= 1;
+
+	/* We're using some buffers from the free list. */
+	vq->vq.num_free -= descs_used;
+
+	/* Update free pointer */
+	vq->packed.next_avail_idx = i;
+	vq->free_head = curr;
+
+	/* Store token. */
+	vq->packed.desc_state[id].num = descs_used;
+	vq->packed.desc_state[id].data = data;
+	vq->packed.desc_state[id].indir_desc = ctx;
+	vq->packed.desc_state[id].last = prev;
+
+	/*
+	 * A driver MUST NOT make the first descriptor in the list
+	 * available before all subsequent descriptors comprising
+	 * the list are made available.
+	 */
+	virtio_wmb(vq->weak_barriers);
+	vq->packed.vring.desc[head].flags = head_flags;
+	vq->num_added += descs_used;
+
+	pr_debug("Added buffer head %i to %p\n", head, vq);
+	END_USE(vq);
+
+	return 0;
+
+unmap_release:
+	err_idx = i;
+	i = head;
+
+	vq->packed.avail_used_flags = avail_used_flags;
+
+	for (n = 0; n < total_sg; n++) {
+		if (i == err_idx)
+			break;
+		vring_unmap_desc_packed(vq, &desc[i]);
+		i++;
+		if (i >= vq->packed.vring.num)
+			i = 0;
+	}
+
+	END_USE(vq);
+	return -EIO;
+}
+
+static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 flags;
+	bool needs_kick;
+	union {
+		struct {
+			__le16 off_wrap;
+			__le16 flags;
+		};
+		u32 u32;
+	} snapshot;
+
+	START_USE(vq);
+
+	/*
+	 * We need to expose the new flags value before checking notification
+	 * suppressions.
+	 */
+	virtio_mb(vq->weak_barriers);
+
+	vq->num_added = 0;
+
+	snapshot.u32 = *(u32 *)vq->packed.vring.device;
+	flags = le16_to_cpu(snapshot.flags);
+
+	LAST_ADD_TIME_CHECK(vq);
+	LAST_ADD_TIME_INVALID(vq);
+
+	needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
+	END_USE(vq);
+	return needs_kick;
+}
+
+static void detach_buf_packed(struct vring_virtqueue *vq,
+			      unsigned int id, void **ctx)
+{
+	struct vring_desc_state_packed *state = NULL;
+	struct vring_packed_desc *desc;
+	unsigned int i, curr;
+
+	state = &vq->packed.desc_state[id];
+
+	/* Clear data ptr. */
+	state->data = NULL;
+
+	vq->packed.desc_state[state->last].next = vq->free_head;
+	vq->free_head = id;
+	vq->vq.num_free += state->num;
+
+	if (unlikely(vq->use_dma_api)) {
+		curr = id;
+		for (i = 0; i < state->num; i++) {
+			vring_unmap_state_packed(vq,
+				&vq->packed.desc_extra[curr]);
+			curr = vq->packed.desc_state[curr].next;
+		}
+	}
+
+	if (vq->indirect) {
+		u32 len;
+
+		/* Free the indirect table, if any, now that it's unmapped. */
+		desc = state->indir_desc;
+		if (!desc)
+			return;
+
+		if (vq->use_dma_api) {
+			len = vq->packed.desc_extra[id].len;
+			for (i = 0; i < len / sizeof(struct vring_packed_desc);
+					i++)
+				vring_unmap_desc_packed(vq, &desc[i]);
+		}
+		kfree(desc);
+		state->indir_desc = NULL;
+	} else if (ctx) {
+		*ctx = state->indir_desc;
+	}
+}
+
+static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
+				       u16 idx, bool used_wrap_counter)
+{
+	bool avail, used;
+	u16 flags;
+
+	flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
+	avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
+	used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
+
+	return avail == used && used == used_wrap_counter;
+}
+
+static inline bool more_used_packed(const struct vring_virtqueue *vq)
+{
+	return is_used_desc_packed(vq, vq->last_used_idx,
+			vq->packed.used_wrap_counter);
+}
+
+static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
+					  unsigned int *len,
+					  void **ctx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 last_used, id;
+	void *ret;
+
+	START_USE(vq);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return NULL;
+	}
+
+	if (!more_used_packed(vq)) {
+		pr_debug("No more buffers in queue\n");
+		END_USE(vq);
+		return NULL;
+	}
+
+	/* Only get used elements after they have been exposed by host. */
+	virtio_rmb(vq->weak_barriers);
+
+	last_used = vq->last_used_idx;
+	id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
+	*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
+
+	if (unlikely(id >= vq->packed.vring.num)) {
+		BAD_RING(vq, "id %u out of range\n", id);
+		return NULL;
+	}
+	if (unlikely(!vq->packed.desc_state[id].data)) {
+		BAD_RING(vq, "id %u is not a head!\n", id);
+		return NULL;
+	}
+
+	/* detach_buf_packed clears data, so grab it now. */
+	ret = vq->packed.desc_state[id].data;
+	detach_buf_packed(vq, id, ctx);
+
+	vq->last_used_idx += vq->packed.desc_state[id].num;
+	if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
+		vq->last_used_idx -= vq->packed.vring.num;
+		vq->packed.used_wrap_counter ^= 1;
+	}
+
+	LAST_ADD_TIME_INVALID(vq);
+
+	END_USE(vq);
+	return ret;
+}
+
+static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
+		vq->packed.vring.driver->flags =
+			cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+}
+
+static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	START_USE(vq);
+
+	/*
+	 * We optimistically turn back on interrupts, then check if there was
+	 * more to do.
+	 */
+
+	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE;
+		vq->packed.vring.driver->flags =
+				cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+
+	END_USE(vq);
+	return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
+			VRING_PACKED_EVENT_F_WRAP_CTR);
+}
+
+static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	bool wrap_counter;
+	u16 used_idx;
+
+	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
+	used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
+
+	return is_used_desc_packed(vq, used_idx, wrap_counter);
+}
+
+static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 used_idx, wrap_counter;
+
+	START_USE(vq);
+
+	/*
+	 * We optimistically turn back on interrupts, then check if there was
+	 * more to do.
+	 */
+
+	used_idx = vq->last_used_idx;
+	wrap_counter = vq->packed.used_wrap_counter;
+
+	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE;
+		vq->packed.vring.driver->flags =
+				cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+
+	/*
+	 * We need to update event suppression structure first
+	 * before re-checking for more used buffers.
+	 */
+	virtio_mb(vq->weak_barriers);
+
+	if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
+}
+
+static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->packed.vring.num; i++) {
+		if (!vq->packed.desc_state[i].data)
+			continue;
+		/* detach_buf clears data, so grab it now. */
+		buf = vq->packed.desc_state[i].data;
+		detach_buf_packed(vq, i, NULL);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->vq.num_free != vq->packed.vring.num);
+
+	END_USE(vq);
+	return NULL;
+}
+
+static struct virtqueue *vring_create_virtqueue_packed(
+	unsigned int index,
+	unsigned int num,
+	unsigned int vring_align,
+	struct virtio_device *vdev,
+	bool weak_barriers,
+	bool may_reduce_num,
+	bool context,
+	bool (*notify)(struct virtqueue *),
+	void (*callback)(struct virtqueue *),
+	const char *name)
+{
+	struct vring_virtqueue *vq;
+	struct vring_packed_desc *ring;
+	struct vring_packed_desc_event *driver, *device;
+	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
+	size_t ring_size_in_bytes, event_size_in_bytes;
+	unsigned int i;
+
+	ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
+
+	ring = vring_alloc_queue(vdev, ring_size_in_bytes,
+				 &ring_dma_addr,
+				 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+	if (!ring)
+		goto err_ring;
+
+	event_size_in_bytes = sizeof(struct vring_packed_desc_event);
+
+	driver = vring_alloc_queue(vdev, event_size_in_bytes,
+				   &driver_event_dma_addr,
+				   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+	if (!driver)
+		goto err_driver;
+
+	device = vring_alloc_queue(vdev, event_size_in_bytes,
+				   &device_event_dma_addr,
+				   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+	if (!device)
+		goto err_device;
+
+	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
+	if (!vq)
+		goto err_vq;
+
+	vq->vq.callback = callback;
+	vq->vq.vdev = vdev;
+	vq->vq.name = name;
+	vq->vq.num_free = num;
+	vq->vq.index = index;
+	vq->we_own_ring = true;
+	vq->notify = notify;
+	vq->weak_barriers = weak_barriers;
+	vq->broken = false;
+	vq->last_used_idx = 0;
+	vq->num_added = 0;
+	vq->packed_ring = true;
+	vq->use_dma_api = vring_use_dma_api(vdev);
+	list_add_tail(&vq->vq.list, &vdev->vqs);
+#ifdef DEBUG
+	vq->in_use = false;
+	vq->last_add_time_valid = false;
+#endif
+
+	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
+		!context;
+	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
+
+	vq->packed.ring_dma_addr = ring_dma_addr;
+	vq->packed.driver_event_dma_addr = driver_event_dma_addr;
+	vq->packed.device_event_dma_addr = device_event_dma_addr;
+
+	vq->packed.ring_size_in_bytes = ring_size_in_bytes;
+	vq->packed.event_size_in_bytes = event_size_in_bytes;
+
+	vq->packed.vring.num = num;
+	vq->packed.vring.desc = ring;
+	vq->packed.vring.driver = driver;
+	vq->packed.vring.device = device;
+
+	vq->packed.next_avail_idx = 0;
+	vq->packed.avail_wrap_counter = 1;
+	vq->packed.used_wrap_counter = 1;
+	vq->packed.event_flags_shadow = 0;
+	vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
+
+	vq->packed.desc_state = kmalloc_array(num,
+			sizeof(struct vring_desc_state_packed),
+			GFP_KERNEL);
+	if (!vq->packed.desc_state)
+		goto err_desc_state;
+
+	memset(vq->packed.desc_state, 0,
+		num * sizeof(struct vring_desc_state_packed));
+
+	/* Put everything in free lists. */
+	vq->free_head = 0;
+	for (i = 0; i < num-1; i++)
+		vq->packed.desc_state[i].next = i + 1;
+
+	vq->packed.desc_extra = kmalloc_array(num,
+			sizeof(struct vring_desc_extra_packed),
+			GFP_KERNEL);
+	if (!vq->packed.desc_extra)
+		goto err_desc_extra;
+
+	memset(vq->packed.desc_extra, 0,
+		num * sizeof(struct vring_desc_extra_packed));
+
+	/* No callback?  Tell other side not to bother us. */
+	if (!callback) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
+		vq->packed.vring.driver->flags =
+			cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+
+	return &vq->vq;
+
+err_desc_extra:
+	kfree(vq->packed.desc_state);
+err_desc_state:
+	kfree(vq);
+err_vq:
+	vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr);
+err_device:
+	vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr);
+err_driver:
+	vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
+err_ring:
+	return NULL;
+}
+
+
 /*
  * Generic functions and exported symbols.
  */
@@ -853,8 +1622,12 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 				void *ctx,
 				gfp_t gfp)
 {
-	return virtqueue_add_split(_vq, sgs, total_sg,
-				   out_sgs, in_sgs, data, ctx, gfp);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
+					out_sgs, in_sgs, data, ctx, gfp) :
+				 virtqueue_add_split(_vq, sgs, total_sg,
+					out_sgs, in_sgs, data, ctx, gfp);
 }
 
 /**
@@ -973,7 +1746,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
  */
 bool virtqueue_kick_prepare(struct virtqueue *_vq)
 {
-	return virtqueue_kick_prepare_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
+				 virtqueue_kick_prepare_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
 
@@ -1040,7 +1816,10 @@ EXPORT_SYMBOL_GPL(virtqueue_kick);
 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
 			    void **ctx)
 {
-	return virtqueue_get_buf_ctx_split(_vq, len, ctx);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
+				 virtqueue_get_buf_ctx_split(_vq, len, ctx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
 
@@ -1049,7 +1828,6 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 	return virtqueue_get_buf_ctx(_vq, len, NULL);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
-
 /**
  * virtqueue_disable_cb - disable callbacks
  * @vq: the struct virtqueue we're talking about.
@@ -1061,7 +1839,12 @@ EXPORT_SYMBOL_GPL(virtqueue_get_buf);
  */
 void virtqueue_disable_cb(struct virtqueue *_vq)
 {
-	virtqueue_disable_cb_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (vq->packed_ring)
+		virtqueue_disable_cb_packed(_vq);
+	else
+		virtqueue_disable_cb_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
@@ -1079,7 +1862,10 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
  */
 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
 {
-	return virtqueue_enable_cb_prepare_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
+				 virtqueue_enable_cb_prepare_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
 
@@ -1097,7 +1883,8 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
 	virtio_mb(vq->weak_barriers);
-	return virtqueue_poll_split(_vq, last_used_idx);
+	return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
+				 virtqueue_poll_split(_vq, last_used_idx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_poll);
 
@@ -1135,7 +1922,10 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
  */
 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 {
-	return virtqueue_enable_cb_delayed_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
+				 virtqueue_enable_cb_delayed_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
@@ -1149,13 +1939,16 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
  */
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
-	return virtqueue_detach_unused_buf_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
+				 virtqueue_detach_unused_buf_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
 
 static inline bool more_used(const struct vring_virtqueue *vq)
 {
-	return more_used_split(vq);
+	return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
 }
 
 irqreturn_t vring_interrupt(int irq, void *_vq)
@@ -1178,6 +1971,7 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
 }
 EXPORT_SYMBOL_GPL(vring_interrupt);
 
+/* Only available for split ring */
 struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					struct vring vring,
 					struct virtio_device *vdev,
@@ -1190,10 +1984,14 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	unsigned int i;
 	struct vring_virtqueue *vq;
 
+	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
+		return NULL;
+
 	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
 	if (!vq)
 		return NULL;
 
+	vq->packed_ring = false;
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
@@ -1261,12 +2059,19 @@ struct virtqueue *vring_create_virtqueue(
 	void (*callback)(struct virtqueue *),
 	const char *name)
 {
+
+	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
+		return vring_create_virtqueue_packed(index, num, vring_align,
+				vdev, weak_barriers, may_reduce_num,
+				context, notify, callback, name);
+
 	return vring_create_virtqueue_split(index, num, vring_align,
 			vdev, weak_barriers, may_reduce_num,
 			context, notify, callback, name);
 }
 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
 
+/* Only available for split ring */
 struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int num,
 				      unsigned int vring_align,
@@ -1279,6 +2084,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      const char *name)
 {
 	struct vring vring;
+
+	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
+		return NULL;
+
 	vring_init(&vring, num, pages, vring_align);
 	return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 				     notify, callback, name);
@@ -1290,11 +2099,32 @@ void vring_del_virtqueue(struct virtqueue *_vq)
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
 	if (vq->we_own_ring) {
-		vring_free_queue(vq->vq.vdev,
-				 vq->split.queue_size_in_bytes,
-				 vq->split.vring.desc,
-				 vq->split.queue_dma_addr);
-		kfree(vq->split.desc_state);
+		if (vq->packed_ring) {
+			vring_free_queue(vq->vq.vdev,
+					 vq->packed.ring_size_in_bytes,
+					 vq->packed.vring.desc,
+					 vq->packed.ring_dma_addr);
+
+			vring_free_queue(vq->vq.vdev,
+					 vq->packed.event_size_in_bytes,
+					 vq->packed.vring.driver,
+					 vq->packed.driver_event_dma_addr);
+
+			vring_free_queue(vq->vq.vdev,
+					 vq->packed.event_size_in_bytes,
+					 vq->packed.vring.device,
+					 vq->packed.device_event_dma_addr);
+
+			kfree(vq->packed.desc_state);
+			kfree(vq->packed.desc_extra);
+		} else {
+			vring_free_queue(vq->vq.vdev,
+					 vq->split.queue_size_in_bytes,
+					 vq->split.vring.desc,
+					 vq->split.queue_dma_addr);
+
+			kfree(vq->split.desc_state);
+		}
 	}
 	list_del(&_vq->list);
 	kfree(vq);
@@ -1336,7 +2166,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
 
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return vq->split.vring.num;
+	return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
 
@@ -1369,6 +2199,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
+	if (vq->packed_ring)
+		return vq->packed.ring_dma_addr;
+
 	return vq->split.queue_dma_addr;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
@@ -1379,6 +2212,9 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
+	if (vq->packed_ring)
+		return vq->packed.driver_event_dma_addr;
+
 	return vq->split.queue_dma_addr +
 		((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
 }
@@ -1390,11 +2226,15 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
+	if (vq->packed_ring)
+		return vq->packed.device_event_dma_addr;
+
 	return vq->split.queue_dma_addr +
 		((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
 
+/* Only available for split ring */
 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
 {
 	return &to_vvq(vq)->split.vring;
-- 
2.14.5


---------------------------------------------------------------------
To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org


  parent reply	other threads:[~2018-11-21 10:05 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-21 10:03 [virtio-dev] [PATCH net-next v3 00/13] virtio: support packed ring Tiwei Bie
2018-11-21 10:03 ` [PATCH net-next v3 01/13] virtio: add packed ring types and macros Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] " Tiwei Bie
2018-11-30  8:10   ` Jason Wang
2018-11-30  8:10   ` [virtio-dev] " Jason Wang
2018-11-30  9:53     ` Tiwei Bie
2018-11-30  9:53     ` [virtio-dev] " Tiwei Bie
2018-11-30 12:47       ` Michael S. Tsirkin
2018-11-30 12:47       ` [virtio-dev] " Michael S. Tsirkin
2018-11-30 13:01         ` Maxime Coquelin
2018-11-30 13:52           ` Michael S. Tsirkin
2018-11-30 13:52           ` [virtio-dev] " Michael S. Tsirkin
2018-11-30 15:37             ` Tiwei Bie
2018-11-30 15:37             ` [virtio-dev] " Tiwei Bie
2018-11-30 15:53               ` Michael S. Tsirkin
2018-11-30 16:24                 ` Tiwei Bie
2018-11-30 16:24                 ` [virtio-dev] " Tiwei Bie
2018-11-30 16:46                   ` Michael S. Tsirkin
2018-11-30 16:46                   ` [virtio-dev] " Michael S. Tsirkin
2018-12-01  2:03                     ` Tiwei Bie
2018-12-01  2:03                     ` [virtio-dev] " Tiwei Bie
2018-11-30 15:53               ` Michael S. Tsirkin
2018-11-21 10:03 ` [virtio-dev] [PATCH net-next v3 02/13] virtio_ring: add _split suffix for split ring functions Tiwei Bie
2018-11-21 10:03 ` Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] [PATCH net-next v3 03/13] virtio_ring: put split ring functions together Tiwei Bie
2018-11-21 10:03 ` Tiwei Bie
2018-11-21 10:03 ` [PATCH net-next v3 04/13] virtio_ring: put split ring fields in a sub struct Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] " Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] [PATCH net-next v3 05/13] virtio_ring: introduce debug helpers Tiwei Bie
2018-11-21 10:03 ` Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] [PATCH net-next v3 06/13] virtio_ring: introduce helper for indirect feature Tiwei Bie
2018-11-21 10:03 ` Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] [PATCH net-next v3 07/13] virtio_ring: allocate desc state for split ring separately Tiwei Bie
2018-11-21 10:03 ` Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] [PATCH net-next v3 08/13] virtio_ring: extract split ring handling from ring creation Tiwei Bie
2018-11-21 10:03 ` Tiwei Bie
2018-11-21 10:03 ` [PATCH net-next v3 09/13] virtio_ring: cache whether we will use DMA API Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] " Tiwei Bie
2018-11-21 10:03 ` [PATCH net-next v3 10/13] virtio_ring: introduce packed ring support Tiwei Bie
2018-11-21 10:03 ` Tiwei Bie [this message]
2018-11-21 10:03 ` [virtio-dev] [PATCH net-next v3 11/13] virtio_ring: leverage event idx in packed ring Tiwei Bie
2018-11-21 10:03 ` Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] [PATCH net-next v3 12/13] virtio_ring: disable packed ring on unsupported transports Tiwei Bie
2018-11-21 10:03 ` Tiwei Bie
2018-11-21 10:03 ` [PATCH net-next v3 13/13] virtio_ring: advertize packed ring layout Tiwei Bie
2018-11-21 10:03 ` [virtio-dev] " Tiwei Bie
2018-11-21 12:20 ` [PATCH net-next v3 00/13] virtio: support packed ring Michael S. Tsirkin
2018-11-21 12:20 ` [virtio-dev] " Michael S. Tsirkin
2018-11-21 12:42   ` Tiwei Bie
2018-11-21 12:42   ` [virtio-dev] " Tiwei Bie
2018-11-21 13:46     ` Jason Wang
2018-11-21 13:46     ` [virtio-dev] " Jason Wang
2018-11-21 17:37   ` David Miller
2018-11-27  6:08 ` [virtio-dev] " Michael S. Tsirkin
2018-11-27  6:18   ` David Miller
2018-11-27  6:08 ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181121100330.24846-11-tiwei.bie@intel.com \
    --to=tiwei.bie@intel.com \
    --cc=jasowang@redhat.com \
    --cc=jfreimann@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maxime.coquelin@redhat.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=virtio-dev@lists.oasis-open.org \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=wexu@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.