Netdev List
 help / color / mirror / Atom feed
* [RFC v3 5/5] virtio_ring: enable packed ring
From: Tiwei Bie @ 2018-04-25  5:15 UTC (permalink / raw)
  To: mst, jasowang, virtualization, linux-kernel, netdev; +Cc: wexu
In-Reply-To: <20180425051550.24342-1-tiwei.bie@intel.com>

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 drivers/virtio/virtio_ring.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b1039c2985b9..9a3d13e1e2ba 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1873,6 +1873,8 @@ void vring_transport_features(struct virtio_device *vdev)
 			break;
 		case VIRTIO_F_IOMMU_PLATFORM:
 			break;
+		case VIRTIO_F_RING_PACKED:
+			break;
 		default:
 			/* We don't understand this bit. */
 			__virtio_clear_bit(vdev, i);
-- 
2.11.0

^ permalink raw reply related

* [RFC v3 4/5] virtio_ring: add event idx support in packed ring
From: Tiwei Bie @ 2018-04-25  5:15 UTC (permalink / raw)
  To: mst, jasowang, virtualization, linux-kernel, netdev
  Cc: wexu, jfreimann, tiwei.bie
In-Reply-To: <20180425051550.24342-1-tiwei.bie@intel.com>

This commit introduces the event idx support in packed
ring. This feature is temporarily disabled, because the
implementation in this patch may not work as expected,
and some further discussions on the implementation are
needed, e.g. do we have to check the wrap counter when
checking whether a kick is needed?

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 drivers/virtio/virtio_ring.c | 53 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 0181e93897be..b1039c2985b9 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -986,7 +986,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 flags;
+	u16 new, old, off_wrap, flags;
 	bool needs_kick;
 	u32 snapshot;
 
@@ -995,7 +995,12 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
 	 * suppressions. */
 	virtio_mb(vq->weak_barriers);
 
+	old = vq->next_avail_idx - vq->num_added;
+	new = vq->next_avail_idx;
+	vq->num_added = 0;
+
 	snapshot = *(u32 *)vq->vring_packed.device;
+	off_wrap = virtio16_to_cpu(_vq->vdev, snapshot & 0xffff);
 	flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
 
 #ifdef DEBUG
@@ -1006,7 +1011,10 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
 	vq->last_add_time_valid = false;
 #endif
 
-	needs_kick = (flags != VRING_EVENT_F_DISABLE);
+	if (flags == VRING_EVENT_F_DESC)
+		needs_kick = vring_need_event(off_wrap & ~(1<<15), new, old);
+	else
+		needs_kick = (flags != VRING_EVENT_F_DISABLE);
 	END_USE(vq);
 	return needs_kick;
 }
@@ -1116,6 +1124,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
 	if (vq->last_used_idx >= vq->vring_packed.num)
 		vq->last_used_idx -= vq->vring_packed.num;
 
+	/* If we expect an interrupt for the next entry, tell host
+	 * by writing event index and flush out the write before
+	 * the read in the next get_buf call. */
+	if (vq->event_flags_shadow == VRING_EVENT_F_DESC)
+		virtio_store_mb(vq->weak_barriers,
+				&vq->vring_packed.driver->off_wrap,
+				cpu_to_virtio16(_vq->vdev, vq->last_used_idx |
+						(vq->wrap_counter << 15)));
+
 #ifdef DEBUG
 	vq->last_add_time_valid = false;
 #endif
@@ -1143,10 +1160,17 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
 
 	/* We optimistically turn back on interrupts, then check if there was
 	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always update the event index to keep code simple. */
+
+	vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
+			vq->last_used_idx | (vq->wrap_counter << 15));
 
 	if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
 		virtio_wmb(vq->weak_barriers);
-		vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
+		vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
+						     VRING_EVENT_F_ENABLE;
 		vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
 							vq->event_flags_shadow);
 	}
@@ -1172,15 +1196,34 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 bufs, used_idx, wrap_counter;
 
 	START_USE(vq);
 
 	/* We optimistically turn back on interrupts, then check if there was
 	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always update the event index to keep code simple. */
+
+	/* TODO: tune this threshold */
+	bufs = (u16)(vq->next_avail_idx - vq->last_used_idx) * 3 / 4;
+
+	used_idx = vq->last_used_idx + bufs;
+	wrap_counter = vq->wrap_counter;
+
+	if (used_idx >= vq->vring_packed.num) {
+		used_idx -= vq->vring_packed.num;
+		wrap_counter ^= 1;
+	}
+
+	vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev,
+			used_idx | (wrap_counter << 15));
 
 	if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
 		virtio_wmb(vq->weak_barriers);
-		vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
+		vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC :
+						     VRING_EVENT_F_ENABLE;
 		vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
 							vq->event_flags_shadow);
 	}
@@ -1822,8 +1865,10 @@ void vring_transport_features(struct virtio_device *vdev)
 		switch (i) {
 		case VIRTIO_RING_F_INDIRECT_DESC:
 			break;
+#if 0
 		case VIRTIO_RING_F_EVENT_IDX:
 			break;
+#endif
 		case VIRTIO_F_VERSION_1:
 			break;
 		case VIRTIO_F_IOMMU_PLATFORM:
-- 
2.11.0

^ permalink raw reply related

* [RFC v3 2/5] virtio_ring: support creating packed ring
From: Tiwei Bie @ 2018-04-25  5:15 UTC (permalink / raw)
  To: mst, jasowang, virtualization, linux-kernel, netdev; +Cc: wexu
In-Reply-To: <20180425051550.24342-1-tiwei.bie@intel.com>

This commit introduces the support for creating packed ring.
All split ring specific functions are added _split suffix.
Some necessary stubs for packed ring are also added.

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 drivers/virtio/virtio_ring.c | 764 ++++++++++++++++++++++++++++---------------
 include/linux/virtio_ring.h  |   8 +-
 2 files changed, 513 insertions(+), 259 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 71458f493cf8..e164822ca66e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -64,8 +64,8 @@ struct vring_desc_state {
 struct vring_virtqueue {
 	struct virtqueue vq;
 
-	/* Actual memory layout for this queue */
-	struct vring vring;
+	/* Is this a packed ring? */
+	bool packed;
 
 	/* Can we use weak barriers? */
 	bool weak_barriers;
@@ -79,19 +79,45 @@ struct vring_virtqueue {
 	/* Host publishes avail event idx */
 	bool event;
 
-	/* Head of free buffer list. */
-	unsigned int free_head;
 	/* Number we've added since last sync. */
 	unsigned int num_added;
 
 	/* Last used index we've seen. */
 	u16 last_used_idx;
 
-	/* Last written value to avail->flags */
-	u16 avail_flags_shadow;
+	union {
+		/* Available for split ring */
+		struct {
+			/* Actual memory layout for this queue. */
+			struct vring vring;
 
-	/* Last written value to avail->idx in guest byte order */
-	u16 avail_idx_shadow;
+			/* Head of free buffer list. */
+			unsigned int free_head;
+
+			/* Last written value to avail->flags */
+			u16 avail_flags_shadow;
+
+			/* Last written value to avail->idx in
+			 * guest byte order. */
+			u16 avail_idx_shadow;
+		};
+
+		/* Available for packed ring */
+		struct {
+			/* Actual memory layout for this queue. */
+			struct vring_packed vring_packed;
+
+			/* Driver ring wrap counter. */
+			u8 wrap_counter;
+
+			/* Index of the next avail descriptor. */
+			unsigned int next_avail_idx;
+
+			/* Last written value to driver->flags in
+			 * guest byte order. */
+			u16 event_flags_shadow;
+		};
+	};
 
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
@@ -201,8 +227,17 @@ static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
 			      cpu_addr, size, direction);
 }
 
-static void vring_unmap_one(const struct vring_virtqueue *vq,
-			    struct vring_desc *desc)
+static int vring_mapping_error(const struct vring_virtqueue *vq,
+			       dma_addr_t addr)
+{
+	if (!vring_use_dma_api(vq->vq.vdev))
+		return 0;
+
+	return dma_mapping_error(vring_dma_dev(vq), addr);
+}
+
+static void vring_unmap_one_split(const struct vring_virtqueue *vq,
+				  struct vring_desc *desc)
 {
 	u16 flags;
 
@@ -226,17 +261,9 @@ static void vring_unmap_one(const struct vring_virtqueue *vq,
 	}
 }
 
-static int vring_mapping_error(const struct vring_virtqueue *vq,
-			       dma_addr_t addr)
-{
-	if (!vring_use_dma_api(vq->vq.vdev))
-		return 0;
-
-	return dma_mapping_error(vring_dma_dev(vq), addr);
-}
-
-static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
-					 unsigned int total_sg, gfp_t gfp)
+static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
+					       unsigned int total_sg,
+					       gfp_t gfp)
 {
 	struct vring_desc *desc;
 	unsigned int i;
@@ -257,14 +284,14 @@ static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
 	return desc;
 }
 
-static inline int virtqueue_add(struct virtqueue *_vq,
-				struct scatterlist *sgs[],
-				unsigned int total_sg,
-				unsigned int out_sgs,
-				unsigned int in_sgs,
-				void *data,
-				void *ctx,
-				gfp_t gfp)
+static inline int virtqueue_add_split(struct virtqueue *_vq,
+				      struct scatterlist *sgs[],
+				      unsigned int total_sg,
+				      unsigned int out_sgs,
+				      unsigned int in_sgs,
+				      void *data,
+				      void *ctx,
+				      gfp_t gfp)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
@@ -303,7 +330,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
 	if (vq->indirect && total_sg > 1 && vq->vq.num_free)
-		desc = alloc_indirect(_vq, total_sg, gfp);
+		desc = alloc_indirect_split(_vq, total_sg, gfp);
 	else {
 		desc = NULL;
 		WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
@@ -424,7 +451,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	for (n = 0; n < total_sg; n++) {
 		if (i == err_idx)
 			break;
-		vring_unmap_one(vq, &desc[i]);
+		vring_unmap_one_split(vq, &desc[i]);
 		i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next);
 	}
 
@@ -435,6 +462,355 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	return -EIO;
 }
 
+static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 new, old;
+	bool needs_kick;
+
+	START_USE(vq);
+	/* We need to expose available array entries before checking avail
+	 * event. */
+	virtio_mb(vq->weak_barriers);
+
+	old = vq->avail_idx_shadow - vq->num_added;
+	new = vq->avail_idx_shadow;
+	vq->num_added = 0;
+
+#ifdef DEBUG
+	if (vq->last_add_time_valid) {
+		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
+					      vq->last_add_time)) > 100);
+	}
+	vq->last_add_time_valid = false;
+#endif
+
+	if (vq->event) {
+		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)),
+					      new, old);
+	} else {
+		needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
+	}
+	END_USE(vq);
+	return needs_kick;
+}
+
+static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
+			     void **ctx)
+{
+	unsigned int i, j;
+	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+
+	/* Clear data ptr. */
+	vq->desc_state[head].data = NULL;
+
+	/* Put back on free list: unmap first-level descriptors and find end */
+	i = head;
+
+	while (vq->vring.desc[i].flags & nextflag) {
+		vring_unmap_one_split(vq, &vq->vring.desc[i]);
+		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
+		vq->vq.num_free++;
+	}
+
+	vring_unmap_one_split(vq, &vq->vring.desc[i]);
+	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
+	vq->free_head = head;
+
+	/* Plus final descriptor */
+	vq->vq.num_free++;
+
+	if (vq->indirect) {
+		struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
+		u32 len;
+
+		/* Free the indirect table, if any, now that it's unmapped. */
+		if (!indir_desc)
+			return;
+
+		len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
+
+		BUG_ON(!(vq->vring.desc[head].flags &
+			 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
+		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
+
+		for (j = 0; j < len / sizeof(struct vring_desc); j++)
+			vring_unmap_one_split(vq, &indir_desc[j]);
+
+		kfree(indir_desc);
+		vq->desc_state[head].indir_desc = NULL;
+	} else if (ctx) {
+		*ctx = vq->desc_state[head].indir_desc;
+	}
+}
+
+static inline bool more_used_split(const struct vring_virtqueue *vq)
+{
+	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
+}
+
+static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
+					 unsigned int *len,
+					 void **ctx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	void *ret;
+	unsigned int i;
+	u16 last_used;
+
+	START_USE(vq);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return NULL;
+	}
+
+	if (!more_used_split(vq)) {
+		pr_debug("No more buffers in queue\n");
+		END_USE(vq);
+		return NULL;
+	}
+
+	/* Only get used array entries after they have been exposed by host. */
+	virtio_rmb(vq->weak_barriers);
+
+	last_used = (vq->last_used_idx & (vq->vring.num - 1));
+	i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
+	*len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
+
+	if (unlikely(i >= vq->vring.num)) {
+		BAD_RING(vq, "id %u out of range\n", i);
+		return NULL;
+	}
+	if (unlikely(!vq->desc_state[i].data)) {
+		BAD_RING(vq, "id %u is not a head!\n", i);
+		return NULL;
+	}
+
+	/* detach_buf_split clears data, so grab it now. */
+	ret = vq->desc_state[i].data;
+	detach_buf_split(vq, i, ctx);
+	vq->last_used_idx++;
+	/* If we expect an interrupt for the next entry, tell host
+	 * by writing event index and flush out the write before
+	 * the read in the next get_buf call. */
+	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
+		virtio_store_mb(vq->weak_barriers,
+				&vring_used_event(&vq->vring),
+				cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
+
+#ifdef DEBUG
+	vq->last_add_time_valid = false;
+#endif
+
+	END_USE(vq);
+	return ret;
+}
+
+static void virtqueue_disable_cb_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
+		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+	}
+}
+
+static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 last_used_idx;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always do both to keep code simple. */
+	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+	}
+	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
+	END_USE(vq);
+	return last_used_idx;
+}
+
+static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	virtio_mb(vq->weak_barriers);
+	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
+}
+
+static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 bufs;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always update the event index to keep code simple. */
+	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+	}
+	/* TODO: tune this threshold */
+	bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
+
+	virtio_store_mb(vq->weak_barriers,
+			&vring_used_event(&vq->vring),
+			cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
+
+	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
+}
+
+static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->vring.num; i++) {
+		if (!vq->desc_state[i].data)
+			continue;
+		/* detach_buf clears data, so grab it now. */
+		buf = vq->desc_state[i].data;
+		detach_buf_split(vq, i, NULL);
+		vq->avail_idx_shadow--;
+		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->vq.num_free != vq->vring.num);
+
+	END_USE(vq);
+	return NULL;
+}
+
+/*
+ * The layout for the packed ring is a continuous chunk of memory
+ * which looks like this.
+ *
+ * struct vring_packed {
+ *	// The actual descriptors (16 bytes each)
+ *	struct vring_packed_desc desc[num];
+ *
+ *	// Padding to the next align boundary.
+ *	char pad[];
+ *
+ *	// Driver Event Suppression
+ *	struct vring_packed_desc_event driver;
+ *
+ *	// Device Event Suppression
+ *	struct vring_packed_desc_event device;
+ * };
+ */
+static inline void vring_init_packed(struct vring_packed *vr, unsigned int num,
+				     void *p, unsigned long align)
+{
+	vr->num = num;
+	vr->desc = p;
+	vr->driver = (void *)(((uintptr_t)p + sizeof(struct vring_packed_desc)
+		* num + align - 1) & ~(align - 1));
+	vr->device = vr->driver + 1;
+}
+
+static inline unsigned vring_size_packed(unsigned int num, unsigned long align)
+{
+	return ((sizeof(struct vring_packed_desc) * num + align - 1)
+		& ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
+}
+
+static inline int virtqueue_add_packed(struct virtqueue *_vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs,
+				       void *data,
+				       void *ctx,
+				       gfp_t gfp)
+{
+	return -EIO;
+}
+
+static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
+{
+	return false;
+}
+
+static inline bool more_used_packed(const struct vring_virtqueue *vq)
+{
+	return false;
+}
+
+static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
+					  unsigned int *len,
+					  void **ctx)
+{
+	return NULL;
+}
+
+static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
+{
+}
+
+static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
+{
+	return 0;
+}
+
+static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
+{
+	return false;
+}
+
+static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
+{
+	return false;
+}
+
+static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
+{
+	return NULL;
+}
+
+static inline int virtqueue_add(struct virtqueue *_vq,
+				struct scatterlist *sgs[],
+				unsigned int total_sg,
+				unsigned int out_sgs,
+				unsigned int in_sgs,
+				void *data,
+				void *ctx,
+				gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed ? virtqueue_add_packed(_vq, sgs, total_sg, out_sgs,
+						 in_sgs, data, ctx, gfp) :
+			    virtqueue_add_split(_vq, sgs, total_sg, out_sgs,
+						in_sgs, data, ctx, gfp);
+}
+
 /**
  * virtqueue_add_sgs - expose buffers to other end
  * @vq: the struct virtqueue we're talking about.
@@ -551,34 +927,9 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
 bool virtqueue_kick_prepare(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 new, old;
-	bool needs_kick;
 
-	START_USE(vq);
-	/* We need to expose available array entries before checking avail
-	 * event. */
-	virtio_mb(vq->weak_barriers);
-
-	old = vq->avail_idx_shadow - vq->num_added;
-	new = vq->avail_idx_shadow;
-	vq->num_added = 0;
-
-#ifdef DEBUG
-	if (vq->last_add_time_valid) {
-		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
-					      vq->last_add_time)) > 100);
-	}
-	vq->last_add_time_valid = false;
-#endif
-
-	if (vq->event) {
-		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)),
-					      new, old);
-	} else {
-		needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
-	}
-	END_USE(vq);
-	return needs_kick;
+	return vq->packed ? virtqueue_kick_prepare_packed(_vq) :
+			    virtqueue_kick_prepare_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
 
@@ -626,58 +977,9 @@ bool virtqueue_kick(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
-static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
-		       void **ctx)
-{
-	unsigned int i, j;
-	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
-
-	/* Clear data ptr. */
-	vq->desc_state[head].data = NULL;
-
-	/* Put back on free list: unmap first-level descriptors and find end */
-	i = head;
-
-	while (vq->vring.desc[i].flags & nextflag) {
-		vring_unmap_one(vq, &vq->vring.desc[i]);
-		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
-		vq->vq.num_free++;
-	}
-
-	vring_unmap_one(vq, &vq->vring.desc[i]);
-	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
-	vq->free_head = head;
-
-	/* Plus final descriptor */
-	vq->vq.num_free++;
-
-	if (vq->indirect) {
-		struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
-		u32 len;
-
-		/* Free the indirect table, if any, now that it's unmapped. */
-		if (!indir_desc)
-			return;
-
-		len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
-
-		BUG_ON(!(vq->vring.desc[head].flags &
-			 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
-		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
-
-		for (j = 0; j < len / sizeof(struct vring_desc); j++)
-			vring_unmap_one(vq, &indir_desc[j]);
-
-		kfree(indir_desc);
-		vq->desc_state[head].indir_desc = NULL;
-	} else if (ctx) {
-		*ctx = vq->desc_state[head].indir_desc;
-	}
-}
-
 static inline bool more_used(const struct vring_virtqueue *vq)
 {
-	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
+	return vq->packed ? more_used_packed(vq) : more_used_split(vq);
 }
 
 /**
@@ -700,57 +1002,9 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
 			    void **ctx)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	void *ret;
-	unsigned int i;
-	u16 last_used;
 
-	START_USE(vq);
-
-	if (unlikely(vq->broken)) {
-		END_USE(vq);
-		return NULL;
-	}
-
-	if (!more_used(vq)) {
-		pr_debug("No more buffers in queue\n");
-		END_USE(vq);
-		return NULL;
-	}
-
-	/* Only get used array entries after they have been exposed by host. */
-	virtio_rmb(vq->weak_barriers);
-
-	last_used = (vq->last_used_idx & (vq->vring.num - 1));
-	i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
-	*len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
-
-	if (unlikely(i >= vq->vring.num)) {
-		BAD_RING(vq, "id %u out of range\n", i);
-		return NULL;
-	}
-	if (unlikely(!vq->desc_state[i].data)) {
-		BAD_RING(vq, "id %u is not a head!\n", i);
-		return NULL;
-	}
-
-	/* detach_buf clears data, so grab it now. */
-	ret = vq->desc_state[i].data;
-	detach_buf(vq, i, ctx);
-	vq->last_used_idx++;
-	/* If we expect an interrupt for the next entry, tell host
-	 * by writing event index and flush out the write before
-	 * the read in the next get_buf call. */
-	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
-		virtio_store_mb(vq->weak_barriers,
-				&vring_used_event(&vq->vring),
-				cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
-
-#ifdef DEBUG
-	vq->last_add_time_valid = false;
-#endif
-
-	END_USE(vq);
-	return ret;
+	return vq->packed ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
+			    virtqueue_get_buf_ctx_split(_vq, len, ctx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
 
@@ -772,12 +1026,10 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
-		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
-	}
-
+	if (vq->packed)
+		virtqueue_disable_cb_packed(_vq);
+	else
+		virtqueue_disable_cb_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
@@ -796,23 +1048,9 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 last_used_idx;
 
-	START_USE(vq);
-
-	/* We optimistically turn back on interrupts, then check if there was
-	 * more to do. */
-	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
-	 * either clear the flags bit or point the event index at the next
-	 * entry. Always do both to keep code simple. */
-	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
-		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
-	}
-	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
-	END_USE(vq);
-	return last_used_idx;
+	return vq->packed ? virtqueue_enable_cb_prepare_packed(_vq) :
+			    virtqueue_enable_cb_prepare_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
 
@@ -829,8 +1067,8 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	virtio_mb(vq->weak_barriers);
-	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
+	return vq->packed ? virtqueue_poll_packed(_vq, last_used_idx) :
+			    virtqueue_poll_split(_vq, last_used_idx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_poll);
 
@@ -868,34 +1106,9 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 bufs;
 
-	START_USE(vq);
-
-	/* We optimistically turn back on interrupts, then check if there was
-	 * more to do. */
-	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
-	 * either clear the flags bit or point the event index at the next
-	 * entry. Always update the event index to keep code simple. */
-	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
-		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
-	}
-	/* TODO: tune this threshold */
-	bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
-
-	virtio_store_mb(vq->weak_barriers,
-			&vring_used_event(&vq->vring),
-			cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
-
-	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
-		END_USE(vq);
-		return false;
-	}
-
-	END_USE(vq);
-	return true;
+	return vq->packed ? virtqueue_enable_cb_delayed_packed(_vq) :
+			    virtqueue_enable_cb_delayed_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
@@ -910,27 +1123,9 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	unsigned int i;
-	void *buf;
 
-	START_USE(vq);
-
-	for (i = 0; i < vq->vring.num; i++) {
-		if (!vq->desc_state[i].data)
-			continue;
-		/* detach_buf clears data, so grab it now. */
-		buf = vq->desc_state[i].data;
-		detach_buf(vq, i, NULL);
-		vq->avail_idx_shadow--;
-		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
-		END_USE(vq);
-		return buf;
-	}
-	/* That should have freed everything. */
-	BUG_ON(vq->vq.num_free != vq->vring.num);
-
-	END_USE(vq);
-	return NULL;
+	return vq->packed ? virtqueue_detach_unused_buf_packed(_vq) :
+			    virtqueue_detach_unused_buf_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
 
@@ -955,7 +1150,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
 EXPORT_SYMBOL_GPL(vring_interrupt);
 
 struct virtqueue *__vring_new_virtqueue(unsigned int index,
-					struct vring vring,
+					union vring_union vring,
+					bool packed,
 					struct virtio_device *vdev,
 					bool weak_barriers,
 					bool context,
@@ -963,19 +1159,20 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					void (*callback)(struct virtqueue *),
 					const char *name)
 {
-	unsigned int i;
+	unsigned int num, i;
 	struct vring_virtqueue *vq;
 
-	vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
+	num = packed ? vring.vring_packed.num : vring.vring_split.num;
+
+	vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
 		     GFP_KERNEL);
 	if (!vq)
 		return NULL;
 
-	vq->vring = vring;
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
-	vq->vq.num_free = vring.num;
+	vq->vq.num_free = num;
 	vq->vq.index = index;
 	vq->we_own_ring = false;
 	vq->queue_dma_addr = 0;
@@ -984,9 +1181,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
-	vq->avail_flags_shadow = 0;
-	vq->avail_idx_shadow = 0;
 	vq->num_added = 0;
+	vq->packed = packed;
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
@@ -997,18 +1193,37 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 		!context;
 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
+	if (vq->packed) {
+		vq->vring_packed = vring.vring_packed;
+		vq->next_avail_idx = 0;
+		vq->wrap_counter = 1;
+		vq->event_flags_shadow = 0;
+	} else {
+		vq->vring = vring.vring_split;
+		vq->avail_flags_shadow = 0;
+		vq->avail_idx_shadow = 0;
+
+		/* Put everything in free lists. */
+		vq->free_head = 0;
+		for (i = 0; i < num-1; i++)
+			vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
+	}
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback) {
-		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
+		if (packed) {
+			vq->event_flags_shadow = VRING_EVENT_F_DISABLE;
+			vq->vring_packed.driver->flags = cpu_to_virtio16(vdev,
+						vq->event_flags_shadow);
+		} else {
+			vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+			if (!vq->event)
+				vq->vring.avail->flags = cpu_to_virtio16(vdev,
+						vq->avail_flags_shadow);
+		}
 	}
 
-	/* Put everything in free lists. */
-	vq->free_head = 0;
-	for (i = 0; i < vring.num-1; i++)
-		vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
-	memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
+	memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
 
 	return &vq->vq;
 }
@@ -1056,6 +1271,12 @@ static void vring_free_queue(struct virtio_device *vdev, size_t size,
 	}
 }
 
+static inline int
+__vring_size(unsigned int num, unsigned long align, bool packed)
+{
+	return packed ? vring_size_packed(num, align) : vring_size(num, align);
+}
+
 struct virtqueue *vring_create_virtqueue(
 	unsigned int index,
 	unsigned int num,
@@ -1072,7 +1293,8 @@ struct virtqueue *vring_create_virtqueue(
 	void *queue = NULL;
 	dma_addr_t dma_addr;
 	size_t queue_size_in_bytes;
-	struct vring vring;
+	union vring_union vring;
+	bool packed;
 
 	/* We assume num is a power of 2. */
 	if (num & (num - 1)) {
@@ -1080,9 +1302,13 @@ struct virtqueue *vring_create_virtqueue(
 		return NULL;
 	}
 
+	packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
+
 	/* TODO: allocate each queue chunk individually */
-	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
-		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+	for (; num && __vring_size(num, vring_align, packed) > PAGE_SIZE;
+			num /= 2) {
+		queue = vring_alloc_queue(vdev, __vring_size(num, vring_align,
+							     packed),
 					  &dma_addr,
 					  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 		if (queue)
@@ -1094,17 +1320,21 @@ struct virtqueue *vring_create_virtqueue(
 
 	if (!queue) {
 		/* Try to get a single page. You are my only hope! */
-		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+		queue = vring_alloc_queue(vdev, __vring_size(num, vring_align,
+							     packed),
 					  &dma_addr, GFP_KERNEL|__GFP_ZERO);
 	}
 	if (!queue)
 		return NULL;
 
-	queue_size_in_bytes = vring_size(num, vring_align);
-	vring_init(&vring, num, queue, vring_align);
+	queue_size_in_bytes = __vring_size(num, vring_align, packed);
+	if (packed)
+		vring_init_packed(&vring.vring_packed, num, queue, vring_align);
+	else
+		vring_init(&vring.vring_split, num, queue, vring_align);
 
-	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
-				   notify, callback, name);
+	vq = __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
+				   context, notify, callback, name);
 	if (!vq) {
 		vring_free_queue(vdev, queue_size_in_bytes, queue,
 				 dma_addr);
@@ -1130,10 +1360,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      void (*callback)(struct virtqueue *vq),
 				      const char *name)
 {
-	struct vring vring;
-	vring_init(&vring, num, pages, vring_align);
-	return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
-				     notify, callback, name);
+	union vring_union vring;
+	bool packed;
+
+	packed = virtio_has_feature(vdev, VIRTIO_F_RING_PACKED);
+	if (packed)
+		vring_init_packed(&vring.vring_packed, num, pages, vring_align);
+	else
+		vring_init(&vring.vring_split, num, pages, vring_align);
+
+	return __vring_new_virtqueue(index, vring, packed, vdev, weak_barriers,
+				     context, notify, callback, name);
 }
 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
 
@@ -1143,7 +1380,9 @@ void vring_del_virtqueue(struct virtqueue *_vq)
 
 	if (vq->we_own_ring) {
 		vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
-				 vq->vring.desc, vq->queue_dma_addr);
+				 vq->packed ? (void *)vq->vring_packed.desc :
+					      (void *)vq->vring.desc,
+				 vq->queue_dma_addr);
 	}
 	list_del(&_vq->list);
 	kfree(vq);
@@ -1185,7 +1424,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
 
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return vq->vring.num;
+	return vq->packed ? vq->vring_packed.num : vq->vring.num;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
 
@@ -1228,6 +1467,10 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
+	if (vq->packed)
+		return vq->queue_dma_addr + ((char *)vq->vring_packed.driver -
+				(char *)vq->vring_packed.desc);
+
 	return vq->queue_dma_addr +
 		((char *)vq->vring.avail - (char *)vq->vring.desc);
 }
@@ -1239,11 +1482,16 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
+	if (vq->packed)
+		return vq->queue_dma_addr + ((char *)vq->vring_packed.device -
+				(char *)vq->vring_packed.desc);
+
 	return vq->queue_dma_addr +
 		((char *)vq->vring.used - (char *)vq->vring.desc);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
 
+/* Only available for split ring */
 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
 {
 	return &to_vvq(vq)->vring;
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index bbf32524ab27..a0075894ad16 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -60,6 +60,11 @@ static inline void virtio_store_mb(bool weak_barriers,
 struct virtio_device;
 struct virtqueue;
 
+union vring_union {
+	struct vring vring_split;
+	struct vring_packed vring_packed;
+};
+
 /*
  * Creates a virtqueue and allocates the descriptor ring.  If
  * may_reduce_num is set, then this may allocate a smaller ring than
@@ -79,7 +84,8 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
 
 /* Creates a virtqueue with a custom layout. */
 struct virtqueue *__vring_new_virtqueue(unsigned int index,
-					struct vring vring,
+					union vring_union vring,
+					bool packed,
 					struct virtio_device *vdev,
 					bool weak_barriers,
 					bool ctx,
-- 
2.11.0

^ permalink raw reply related

* [RFC v3 1/5] virtio: add packed ring definitions
From: Tiwei Bie @ 2018-04-25  5:15 UTC (permalink / raw)
  To: mst, jasowang, virtualization, linux-kernel, netdev; +Cc: wexu
In-Reply-To: <20180425051550.24342-1-tiwei.bie@intel.com>

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 include/uapi/linux/virtio_config.h | 12 +++++++++++-
 include/uapi/linux/virtio_ring.h   | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index 308e2096291f..a6e392325e3a 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -49,7 +49,7 @@
  * transport being used (eg. virtio_ring), the rest are per-device feature
  * bits. */
 #define VIRTIO_TRANSPORT_F_START	28
-#define VIRTIO_TRANSPORT_F_END		34
+#define VIRTIO_TRANSPORT_F_END		36
 
 #ifndef VIRTIO_CONFIG_NO_LEGACY
 /* Do we get callbacks when the ring is completely used, even if we've
@@ -71,4 +71,14 @@
  * this is for compatibility with legacy systems.
  */
 #define VIRTIO_F_IOMMU_PLATFORM		33
+
+/* This feature indicates support for the packed virtqueue layout. */
+#define VIRTIO_F_RING_PACKED		34
+
+/*
+ * This feature indicates that all buffers are used by the device
+ * in the same order in which they have been made available.
+ */
+#define VIRTIO_F_IN_ORDER		35
+
 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
index 6d5d5faa989b..3932cb80c347 100644
--- a/include/uapi/linux/virtio_ring.h
+++ b/include/uapi/linux/virtio_ring.h
@@ -44,6 +44,9 @@
 /* This means the buffer contains a list of buffer descriptors. */
 #define VRING_DESC_F_INDIRECT	4
 
+#define VRING_DESC_F_AVAIL(b)	((b) << 7)
+#define VRING_DESC_F_USED(b)	((b) << 15)
+
 /* The Host uses this in used->flags to advise the Guest: don't kick me when
  * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
  * will still kick if it's out of buffers. */
@@ -53,6 +56,10 @@
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT	1
 
+#define VRING_EVENT_F_ENABLE	0x0
+#define VRING_EVENT_F_DISABLE	0x1
+#define VRING_EVENT_F_DESC	0x2
+
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC	28
 
@@ -171,4 +178,33 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
 	return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
 }
 
+struct vring_packed_desc_event {
+	/* __virtio16 off  : 15; // Descriptor Event Offset
+	 * __virtio16 wrap : 1;  // Descriptor Event Wrap Counter */
+	__virtio16 off_wrap;
+	/* __virtio16 flags : 2; // Descriptor Event Flags */
+	__virtio16 flags;
+};
+
+struct vring_packed_desc {
+	/* Buffer Address. */
+	__virtio64 addr;
+	/* Buffer Length. */
+	__virtio32 len;
+	/* Buffer ID. */
+	__virtio16 id;
+	/* The flags depending on descriptor type. */
+	__virtio16 flags;
+};
+
+struct vring_packed {
+	unsigned int num;
+
+	struct vring_packed_desc *desc;
+
+	struct vring_packed_desc_event *driver;
+
+	struct vring_packed_desc_event *device;
+};
+
 #endif /* _UAPI_LINUX_VIRTIO_RING_H */
-- 
2.11.0

^ permalink raw reply related

* [RFC v3 0/5] virtio: support packed ring
From: Tiwei Bie @ 2018-04-25  5:15 UTC (permalink / raw)
  To: mst, jasowang, virtualization, linux-kernel, netdev
  Cc: wexu, jfreimann, tiwei.bie

Hello everyone,

This RFC implements packed ring support in virtio driver.

Some simple functional tests have been done with Jason's
packed ring implementation in vhost:

https://lkml.org/lkml/2018/4/23/12

Both of ping and netperf worked as expected (with EVENT_IDX
disabled). But there are below known issues:

1. Reloading the guest driver will break the Tx/Rx;
2. Zeroing the flags when detaching a used desc will
   break the guest -> host path.

Some simple functional tests have also been done with
Wei's packed ring implementation in QEMU:

http://lists.nongnu.org/archive/html/qemu-devel/2018-04/msg00342.html

Both of ping and netperf worked as expected (with EVENT_IDX
disabled). Reloading the guest driver also worked as expected.

TODO:
- Refinements (for code and commit log) and bug fixes;
- Discuss/fix/test EVENT_IDX support;
- Test devices other than net;

RFC v2 -> RFC v3:
- Split into small patches (Jason);
- Add helper virtqueue_use_indirect() (Jason);
- Just set id for the last descriptor of a list (Jason);
- Calculate the prev in virtqueue_add_packed() (Jason);
- Fix/improve desc suppression code (Jason/MST);
- Refine the code layout for XXX_split/packed and wrappers (MST);
- Fix the comments and API in uapi (MST);
- Remove the BUG_ON() for indirect (Jason);
- Some other refinements and bug fixes;

RFC v1 -> RFC v2:
- Add indirect descriptor support - compile test only;
- Add event suppression supprt - compile test only;
- Move vring_packed_init() out of uapi (Jason, MST);
- Merge two loops into one in virtqueue_add_packed() (Jason);
- Split vring_unmap_one() for packed ring and split ring (Jason);
- Avoid using '%' operator (Jason);
- Rename free_head -> next_avail_idx (Jason);
- Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
- Some other refinements and bug fixes;

Thanks!

Tiwei Bie (5):
  virtio: add packed ring definitions
  virtio_ring: support creating packed ring
  virtio_ring: add packed ring support
  virtio_ring: add event idx support in packed ring
  virtio_ring: enable packed ring

 drivers/virtio/virtio_ring.c       | 1271 ++++++++++++++++++++++++++++--------
 include/linux/virtio_ring.h        |    8 +-
 include/uapi/linux/virtio_config.h |   12 +-
 include/uapi/linux/virtio_ring.h   |   36 +
 4 files changed, 1049 insertions(+), 278 deletions(-)

-- 
2.11.0

^ permalink raw reply

* [RFC v3 3/5] virtio_ring: add packed ring support
From: Tiwei Bie @ 2018-04-25  5:15 UTC (permalink / raw)
  To: mst, jasowang, virtualization, linux-kernel, netdev
  Cc: wexu, jfreimann, tiwei.bie
In-Reply-To: <20180425051550.24342-1-tiwei.bie@intel.com>

This commit introduces the basic support (without EVENT_IDX)
for packed ring.

Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
---
 drivers/virtio/virtio_ring.c | 444 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 434 insertions(+), 10 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index e164822ca66e..0181e93897be 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -58,7 +58,8 @@
 
 struct vring_desc_state {
 	void *data;			/* Data for callback. */
-	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
+	void *indir_desc;		/* Indirect descriptor, if any. */
+	int num;			/* Descriptor list length. */
 };
 
 struct vring_virtqueue {
@@ -142,6 +143,16 @@ struct vring_virtqueue {
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
+					  unsigned int total_sg)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/* If the host supports indirect descriptor tables, and we have multiple
+	 * buffers, then go indirect. FIXME: tune this threshold */
+	return (vq->indirect && total_sg > 1 && vq->vq.num_free);
+}
+
 /*
  * Modern virtio devices have feature bits to specify whether they need a
  * quirk and bypass the IOMMU. If not there, just use the DMA API.
@@ -327,9 +338,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 
 	head = vq->free_head;
 
-	/* If the host supports indirect descriptor tables, and we have multiple
-	 * buffers, then go indirect. FIXME: tune this threshold */
-	if (vq->indirect && total_sg > 1 && vq->vq.num_free)
+	if (virtqueue_use_indirect(_vq, total_sg))
 		desc = alloc_indirect_split(_vq, total_sg, gfp);
 	else {
 		desc = NULL;
@@ -741,6 +750,49 @@ static inline unsigned vring_size_packed(unsigned int num, unsigned long align)
 		& ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
 }
 
+static void vring_unmap_one_packed(const struct vring_virtqueue *vq,
+				   struct vring_packed_desc *desc)
+{
+	u16 flags;
+
+	if (!vring_use_dma_api(vq->vq.vdev))
+		return;
+
+	flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
+
+	if (flags & VRING_DESC_F_INDIRECT) {
+		dma_unmap_single(vring_dma_dev(vq),
+				 virtio64_to_cpu(vq->vq.vdev, desc->addr),
+				 virtio32_to_cpu(vq->vq.vdev, desc->len),
+				 (flags & VRING_DESC_F_WRITE) ?
+				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(vring_dma_dev(vq),
+			       virtio64_to_cpu(vq->vq.vdev, desc->addr),
+			       virtio32_to_cpu(vq->vq.vdev, desc->len),
+			       (flags & VRING_DESC_F_WRITE) ?
+			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	}
+}
+
+static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
+						       unsigned int total_sg,
+						       gfp_t gfp)
+{
+	struct vring_packed_desc *desc;
+
+	/*
+	 * We require lowmem mappings for the descriptors because
+	 * otherwise virt_to_phys will give us bogus addresses in the
+	 * virtqueue.
+	 */
+	gfp &= ~__GFP_HIGHMEM;
+
+	desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
+
+	return desc;
+}
+
 static inline int virtqueue_add_packed(struct virtqueue *_vq,
 				       struct scatterlist *sgs[],
 				       unsigned int total_sg,
@@ -750,47 +802,419 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 				       void *ctx,
 				       gfp_t gfp)
 {
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_packed_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i, n, descs_used, uninitialized_var(prev), err_idx;
+	__virtio16 uninitialized_var(head_flags), flags;
+	int head, wrap_counter;
+	bool indirect;
+
+	START_USE(vq);
+
+	BUG_ON(data == NULL);
+	BUG_ON(ctx && vq->indirect);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return -EIO;
+	}
+
+#ifdef DEBUG
+	{
+		ktime_t now = ktime_get();
+
+		/* No kick or get, with .1 second between?  Warn. */
+		if (vq->last_add_time_valid)
+			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
+					    > 100);
+		vq->last_add_time = now;
+		vq->last_add_time_valid = true;
+	}
+#endif
+
+	BUG_ON(total_sg == 0);
+
+	head = vq->next_avail_idx;
+	wrap_counter = vq->wrap_counter;
+
+	if (virtqueue_use_indirect(_vq, total_sg))
+		desc = alloc_indirect_packed(_vq, total_sg, gfp);
+	else {
+		desc = NULL;
+		WARN_ON_ONCE(total_sg > vq->vring_packed.num && !vq->indirect);
+	}
+
+	if (desc) {
+		/* Use a single buffer which doesn't continue */
+		indirect = true;
+		/* Set up rest to use this indirect table. */
+		i = 0;
+		descs_used = 1;
+	} else {
+		indirect = false;
+		desc = vq->vring_packed.desc;
+		i = head;
+		descs_used = total_sg;
+	}
+
+	if (vq->vq.num_free < descs_used) {
+		pr_debug("Can't add buf len %i - avail = %i\n",
+			 descs_used, vq->vq.num_free);
+		/* FIXME: for historical reasons, we force a notify here if
+		 * there are outgoing parts to the buffer.  Presumably the
+		 * host should service the ring ASAP. */
+		if (out_sgs)
+			vq->notify(&vq->vq);
+		if (indirect)
+			kfree(desc);
+		END_USE(vq);
+		return -ENOSPC;
+	}
+
+	for (n = 0; n < out_sgs + in_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
+					       DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			if (vring_mapping_error(vq, addr))
+				goto unmap_release;
+
+			flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT |
+					(n < out_sgs ? 0 : VRING_DESC_F_WRITE) |
+					VRING_DESC_F_AVAIL(vq->wrap_counter) |
+					VRING_DESC_F_USED(!vq->wrap_counter));
+			if (!indirect && i == head)
+				head_flags = flags;
+			else
+				desc[i].flags = flags;
+
+			desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
+			desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
+			i++;
+			if (!indirect && i >= vq->vring_packed.num) {
+				i = 0;
+				vq->wrap_counter ^= 1;
+			}
+		}
+	}
+
+	prev = (i > 0 ? i : vq->vring_packed.num) - 1;
+	desc[prev].id = cpu_to_virtio32(_vq->vdev, head);
+
+	/* Last one doesn't continue. */
+	if (total_sg == 1)
+		head_flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
+	else
+		desc[prev].flags &= cpu_to_virtio16(_vq->vdev,
+						~VRING_DESC_F_NEXT);
+
+	if (indirect) {
+		/* Now that the indirect table is filled in, map it. */
+		dma_addr_t addr = vring_map_single(
+			vq, desc, total_sg * sizeof(struct vring_packed_desc),
+			DMA_TO_DEVICE);
+		if (vring_mapping_error(vq, addr))
+			goto unmap_release;
+
+		head_flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT |
+					     VRING_DESC_F_AVAIL(wrap_counter) |
+					     VRING_DESC_F_USED(!wrap_counter));
+		vq->vring_packed.desc[head].addr = cpu_to_virtio64(_vq->vdev,
+								   addr);
+		vq->vring_packed.desc[head].len = cpu_to_virtio32(_vq->vdev,
+				total_sg * sizeof(struct vring_packed_desc));
+		vq->vring_packed.desc[head].id = cpu_to_virtio32(_vq->vdev,
+								 head);
+	}
+
+	/* We're using some buffers from the free list. */
+	vq->vq.num_free -= descs_used;
+
+	/* Update free pointer */
+	if (indirect) {
+		n = head + 1;
+		if (n >= vq->vring_packed.num) {
+			n = 0;
+			vq->wrap_counter ^= 1;
+		}
+		vq->next_avail_idx = n;
+	} else
+		vq->next_avail_idx = i;
+
+	/* Store token and indirect buffer state. */
+	vq->desc_state[head].num = descs_used;
+	vq->desc_state[head].data = data;
+	if (indirect)
+		vq->desc_state[head].indir_desc = desc;
+	else
+		vq->desc_state[head].indir_desc = ctx;
+
+	/* A driver MUST NOT make the first descriptor in the list
+	 * available before all subsequent descriptors comprising
+	 * the list are made available. */
+	virtio_wmb(vq->weak_barriers);
+	vq->vring_packed.desc[head].flags = head_flags;
+	vq->num_added += descs_used;
+
+	pr_debug("Added buffer head %i to %p\n", head, vq);
+	END_USE(vq);
+
+	return 0;
+
+unmap_release:
+	err_idx = i;
+	i = head;
+
+	for (n = 0; n < total_sg; n++) {
+		if (i == err_idx)
+			break;
+		vring_unmap_one_packed(vq, &desc[i]);
+		i++;
+		if (!indirect && i >= vq->vring_packed.num)
+			i = 0;
+	}
+
+	vq->wrap_counter = wrap_counter;
+
+	if (indirect)
+		kfree(desc);
+
+	END_USE(vq);
 	return -EIO;
 }
 
 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
 {
-	return false;
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 flags;
+	bool needs_kick;
+	u32 snapshot;
+
+	START_USE(vq);
+	/* We need to expose the new flags value before checking notification
+	 * suppressions. */
+	virtio_mb(vq->weak_barriers);
+
+	snapshot = *(u32 *)vq->vring_packed.device;
+	flags = cpu_to_virtio16(_vq->vdev, snapshot >> 16) & 0x3;
+
+#ifdef DEBUG
+	if (vq->last_add_time_valid) {
+		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
+					      vq->last_add_time)) > 100);
+	}
+	vq->last_add_time_valid = false;
+#endif
+
+	needs_kick = (flags != VRING_EVENT_F_DISABLE);
+	END_USE(vq);
+	return needs_kick;
+}
+
+static void detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
+			      void **ctx)
+{
+	struct vring_packed_desc *desc;
+	unsigned int i, j;
+
+	/* Clear data ptr. */
+	vq->desc_state[head].data = NULL;
+
+	i = head;
+
+	for (j = 0; j < vq->desc_state[head].num; j++) {
+		desc = &vq->vring_packed.desc[i];
+		vring_unmap_one_packed(vq, desc);
+		i++;
+		if (i >= vq->vring_packed.num)
+			i = 0;
+	}
+
+	vq->vq.num_free += vq->desc_state[head].num;
+
+	if (vq->indirect) {
+		u32 len;
+
+		/* Free the indirect table, if any, now that it's unmapped. */
+		desc = vq->desc_state[head].indir_desc;
+		if (!desc)
+			return;
+
+		len = virtio32_to_cpu(vq->vq.vdev,
+				      vq->vring_packed.desc[head].len);
+
+		for (j = 0; j < len / sizeof(struct vring_packed_desc); j++)
+			vring_unmap_one_packed(vq, &desc[j]);
+
+		kfree(desc);
+		vq->desc_state[head].indir_desc = NULL;
+	} else if (ctx) {
+		*ctx = vq->desc_state[head].indir_desc;
+	}
 }
 
 static inline bool more_used_packed(const struct vring_virtqueue *vq)
 {
-	return false;
+	u16 last_used, flags;
+	bool avail, used;
+
+	if (vq->vq.num_free == vq->vring_packed.num)
+		return false;
+
+	last_used = vq->last_used_idx;
+	flags = virtio16_to_cpu(vq->vq.vdev,
+				vq->vring_packed.desc[last_used].flags);
+	avail = flags & VRING_DESC_F_AVAIL(1);
+	used = flags & VRING_DESC_F_USED(1);
+
+	return avail == used;
 }
 
 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
 					  unsigned int *len,
 					  void **ctx)
 {
-	return NULL;
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	void *ret;
+	unsigned int i;
+	u16 last_used;
+
+	START_USE(vq);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return NULL;
+	}
+
+	if (!more_used_packed(vq)) {
+		pr_debug("No more buffers in queue\n");
+		END_USE(vq);
+		return NULL;
+	}
+
+	/* Only get used elements after they have been exposed by host. */
+	virtio_rmb(vq->weak_barriers);
+
+	last_used = vq->last_used_idx;
+	i = virtio32_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].id);
+	*len = virtio32_to_cpu(_vq->vdev, vq->vring_packed.desc[last_used].len);
+
+	if (unlikely(i >= vq->vring_packed.num)) {
+		BAD_RING(vq, "id %u out of range\n", i);
+		return NULL;
+	}
+	if (unlikely(!vq->desc_state[i].data)) {
+		BAD_RING(vq, "id %u is not a head!\n", i);
+		return NULL;
+	}
+
+	/* detach_buf_packed clears data, so grab it now. */
+	ret = vq->desc_state[i].data;
+	detach_buf_packed(vq, i, ctx);
+
+	vq->last_used_idx += vq->desc_state[i].num;
+	if (vq->last_used_idx >= vq->vring_packed.num)
+		vq->last_used_idx -= vq->vring_packed.num;
+
+#ifdef DEBUG
+	vq->last_add_time_valid = false;
+#endif
+
+	END_USE(vq);
+	return ret;
 }
 
 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
 {
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (vq->event_flags_shadow != VRING_EVENT_F_DISABLE) {
+		vq->event_flags_shadow = VRING_EVENT_F_DISABLE;
+		vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
+							vq->event_flags_shadow);
+	}
 }
 
 static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
 {
-	return 0;
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+
+	if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
+		virtio_wmb(vq->weak_barriers);
+		vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
+		vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
+							vq->event_flags_shadow);
+	}
+
+	END_USE(vq);
+	return vq->last_used_idx;
 }
 
 static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned last_used_idx)
 {
-	return false;
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	bool avail, used;
+	u16 flags;
+
+	virtio_mb(vq->weak_barriers);
+	flags = virtio16_to_cpu(vq->vq.vdev,
+			vq->vring_packed.desc[last_used_idx].flags);
+	avail = flags & VRING_DESC_F_AVAIL(1);
+	used = flags & VRING_DESC_F_USED(1);
+	return avail == used;
 }
 
 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
 {
-	return false;
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+
+	if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
+		virtio_wmb(vq->weak_barriers);
+		vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
+		vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
+							vq->event_flags_shadow);
+	}
+
+	if (more_used_packed(vq)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
 }
 
 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
 {
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->vring_packed.num; i++) {
+		if (!vq->desc_state[i].data)
+			continue;
+		/* detach_buf clears data, so grab it now. */
+		buf = vq->desc_state[i].data;
+		detach_buf_packed(vq, i, NULL);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->vq.num_free != vq->vring_packed.num);
+
+	END_USE(vq);
 	return NULL;
 }
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH bpf-next 4/4] nfp: bpf: optimize comparisons to negative constants
From: Jakub Kicinski @ 2018-04-25  4:22 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
In-Reply-To: <20180425042239.27869-1-jakub.kicinski@netronome.com>

Comparison instruction requires a subtraction.  If the constant
is negative we are more likely to fit it into a NFP instruction
directly if we change the sign and use addition.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 42 +++++++++++++++++++--------
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  6 +++-
 2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 5b8da7a67df4..65f0791cae0c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1247,6 +1247,7 @@ static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	const struct bpf_insn *insn = &meta->insn;
 	u64 imm = insn->imm; /* sign extend */
 	const struct jmp_code_map *code;
+	enum alu_op alu_op, carry_op;
 	u8 reg = insn->dst_reg * 2;
 	swreg tmp_reg;
 
@@ -1254,19 +1255,22 @@ static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	if (!code)
 		return -EINVAL;
 
+	alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
+	carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
+
 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
 	if (!code->swap)
-		emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
+		emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
 	else
-		emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
+		emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
 
 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
 	if (!code->swap)
 		emit_alu(nfp_prog, reg_none(),
-			 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
+			 reg_a(reg + 1), carry_op, tmp_reg);
 	else
 		emit_alu(nfp_prog, reg_none(),
-			 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
+			 tmp_reg, carry_op, reg_a(reg + 1));
 
 	emit_br(nfp_prog, code->br_mask, insn->off, 0);
 
@@ -2745,21 +2749,35 @@ static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
 			continue;
 
 		if (BPF_CLASS(insn.code) != BPF_ALU &&
-		    BPF_CLASS(insn.code) != BPF_ALU64)
+		    BPF_CLASS(insn.code) != BPF_ALU64 &&
+		    BPF_CLASS(insn.code) != BPF_JMP)
 			continue;
 		if (BPF_SRC(insn.code) != BPF_K)
 			continue;
 		if (insn.imm >= 0)
 			continue;
 
-		if (BPF_OP(insn.code) == BPF_ADD)
-			insn.code = BPF_CLASS(insn.code) | BPF_SUB;
-		else if (BPF_OP(insn.code) == BPF_SUB)
-			insn.code = BPF_CLASS(insn.code) | BPF_ADD;
-		else
-			continue;
+		if (BPF_CLASS(insn.code) == BPF_JMP) {
+			switch (BPF_OP(insn.code)) {
+			case BPF_JGE:
+			case BPF_JSGE:
+			case BPF_JLT:
+			case BPF_JSLT:
+				meta->jump_neg_op = true;
+				break;
+			default:
+				continue;
+			}
+		} else {
+			if (BPF_OP(insn.code) == BPF_ADD)
+				insn.code = BPF_CLASS(insn.code) | BPF_SUB;
+			else if (BPF_OP(insn.code) == BPF_SUB)
+				insn.code = BPF_CLASS(insn.code) | BPF_ADD;
+			else
+				continue;
 
-		meta->insn.code = insn.code | BPF_K;
+			meta->insn.code = insn.code | BPF_K;
+		}
 
 		meta->insn.imm = -insn.imm;
 	}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 4981c8944ca3..68b5d326483d 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -236,6 +236,7 @@ struct nfp_bpf_reg_state {
  * @xadd_over_16bit: 16bit immediate is not guaranteed
  * @xadd_maybe_16bit: 16bit immediate is possible
  * @jmp_dst: destination info for jump instructions
+ * @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
  * @arg2: arg2 for call instructions
@@ -264,7 +265,10 @@ struct nfp_insn_meta {
 			bool xadd_maybe_16bit;
 		};
 		/* jump */
-		struct nfp_insn_meta *jmp_dst;
+		struct {
+			struct nfp_insn_meta *jmp_dst;
+			bool jump_neg_op;
+		};
 		/* function calls */
 		struct {
 			u32 func_id;
-- 
2.16.2

^ permalink raw reply related

* [PATCH bpf-next 3/4] nfp: bpf: tabularize generations of compare operations
From: Jakub Kicinski @ 2018-04-25  4:22 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
In-Reply-To: <20180425042239.27869-1-jakub.kicinski@netronome.com>

There are quite a few compare instructions now, use a table
to translate BPF instruction code to NFP instruction parameters
instead of parameterizing helpers.  This saves LOC and makes
future extensions easier.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 168 ++++++++++-----------------
 1 file changed, 61 insertions(+), 107 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index a5590988fc69..5b8da7a67df4 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1214,45 +1214,79 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	return 0;
 }
 
-static int
-wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
-	    enum br_mask br_mask, bool swap)
+static const struct jmp_code_map {
+	enum br_mask br_mask;
+	bool swap;
+} jmp_code_map[] = {
+	[BPF_JGT >> 4]	= { BR_BLO, true },
+	[BPF_JGE >> 4]	= { BR_BHS, false },
+	[BPF_JLT >> 4]	= { BR_BLO, false },
+	[BPF_JLE >> 4]	= { BR_BHS, true },
+	[BPF_JSGT >> 4]	= { BR_BLT, true },
+	[BPF_JSGE >> 4]	= { BR_BGE, false },
+	[BPF_JSLT >> 4]	= { BR_BLT, false },
+	[BPF_JSLE >> 4]	= { BR_BGE, true },
+};
+
+static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
+{
+	unsigned int op;
+
+	op = BPF_OP(meta->insn.code) >> 4;
+	/* br_mask of 0 is BR_BEQ which we don't use in jump code table */
+	if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
+		      !jmp_code_map[op].br_mask,
+		      "no code found for jump instruction"))
+		return NULL;
+
+	return &jmp_code_map[op];
+}
+
+static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
 	u64 imm = insn->imm; /* sign extend */
+	const struct jmp_code_map *code;
 	u8 reg = insn->dst_reg * 2;
 	swreg tmp_reg;
 
+	code = nfp_jmp_code_get(meta);
+	if (!code)
+		return -EINVAL;
+
 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
-	if (!swap)
+	if (!code->swap)
 		emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
 	else
 		emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
 
 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
-	if (!swap)
+	if (!code->swap)
 		emit_alu(nfp_prog, reg_none(),
 			 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
 	else
 		emit_alu(nfp_prog, reg_none(),
 			 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
 
-	emit_br(nfp_prog, br_mask, insn->off, 0);
+	emit_br(nfp_prog, code->br_mask, insn->off, 0);
 
 	return 0;
 }
 
-static int
-wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
-	    enum br_mask br_mask, bool swap)
+static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
+	const struct jmp_code_map *code;
 	u8 areg, breg;
 
+	code = nfp_jmp_code_get(meta);
+	if (!code)
+		return -EINVAL;
+
 	areg = insn->dst_reg * 2;
 	breg = insn->src_reg * 2;
 
-	if (swap) {
+	if (code->swap) {
 		areg ^= breg;
 		breg ^= areg;
 		areg ^= breg;
@@ -1261,7 +1295,7 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
 	emit_alu(nfp_prog, reg_none(),
 		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
-	emit_br(nfp_prog, br_mask, insn->off, 0);
+	emit_br(nfp_prog, code->br_mask, insn->off, 0);
 
 	return 0;
 }
@@ -2283,46 +2317,6 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
-static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
-}
-
-static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
-}
-
-static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
-}
-
-static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
-}
-
-static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true);
-}
-
-static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false);
-}
-
-static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false);
-}
-
-static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true);
-}
-
 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
@@ -2392,46 +2386,6 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
-static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
-}
-
-static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
-}
-
-static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
-}
-
-static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
-}
-
-static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true);
-}
-
-static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false);
-}
-
-static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false);
-}
-
-static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
-{
-	return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true);
-}
-
 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
@@ -2520,25 +2474,25 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_ST | BPF_MEM | BPF_DW] =	mem_st8,
 	[BPF_JMP | BPF_JA | BPF_K] =	jump,
 	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
-	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
-	[BPF_JMP | BPF_JGE | BPF_K] =	jge_imm,
-	[BPF_JMP | BPF_JLT | BPF_K] =	jlt_imm,
-	[BPF_JMP | BPF_JLE | BPF_K] =	jle_imm,
-	[BPF_JMP | BPF_JSGT | BPF_K] =  jsgt_imm,
-	[BPF_JMP | BPF_JSGE | BPF_K] =  jsge_imm,
-	[BPF_JMP | BPF_JSLT | BPF_K] =  jslt_imm,
-	[BPF_JMP | BPF_JSLE | BPF_K] =  jsle_imm,
+	[BPF_JMP | BPF_JGT | BPF_K] =	cmp_imm,
+	[BPF_JMP | BPF_JGE | BPF_K] =	cmp_imm,
+	[BPF_JMP | BPF_JLT | BPF_K] =	cmp_imm,
+	[BPF_JMP | BPF_JLE | BPF_K] =	cmp_imm,
+	[BPF_JMP | BPF_JSGT | BPF_K] =  cmp_imm,
+	[BPF_JMP | BPF_JSGE | BPF_K] =  cmp_imm,
+	[BPF_JMP | BPF_JSLT | BPF_K] =  cmp_imm,
+	[BPF_JMP | BPF_JSLE | BPF_K] =  cmp_imm,
 	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
 	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
 	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
-	[BPF_JMP | BPF_JGT | BPF_X] =	jgt_reg,
-	[BPF_JMP | BPF_JGE | BPF_X] =	jge_reg,
-	[BPF_JMP | BPF_JLT | BPF_X] =	jlt_reg,
-	[BPF_JMP | BPF_JLE | BPF_X] =	jle_reg,
-	[BPF_JMP | BPF_JSGT | BPF_X] =  jsgt_reg,
-	[BPF_JMP | BPF_JSGE | BPF_X] =  jsge_reg,
-	[BPF_JMP | BPF_JSLT | BPF_X] =  jslt_reg,
-	[BPF_JMP | BPF_JSLE | BPF_X] =  jsle_reg,
+	[BPF_JMP | BPF_JGT | BPF_X] =	cmp_reg,
+	[BPF_JMP | BPF_JGE | BPF_X] =	cmp_reg,
+	[BPF_JMP | BPF_JLT | BPF_X] =	cmp_reg,
+	[BPF_JMP | BPF_JLE | BPF_X] =	cmp_reg,
+	[BPF_JMP | BPF_JSGT | BPF_X] =  cmp_reg,
+	[BPF_JMP | BPF_JSGE | BPF_X] =  cmp_reg,
+	[BPF_JMP | BPF_JSLT | BPF_X] =  cmp_reg,
+	[BPF_JMP | BPF_JSLE | BPF_X] =  cmp_reg,
 	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
 	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
 	[BPF_JMP | BPF_CALL] =		call,
-- 
2.16.2

^ permalink raw reply related

* [PATCH bpf-next 2/4] nfp: bpf: optimize add/sub of a negative constant
From: Jakub Kicinski @ 2018-04-25  4:22 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
In-Reply-To: <20180425042239.27869-1-jakub.kicinski@netronome.com>

NFP instruction set can fit small immediates into the instruction.
Negative integers, however, will never fit because they will have
highest bit set.  If we swap the ALU op between ADD and SUB and
negate the constant we have a better chance of fitting small negative
integers into the instruction itself and saving one or two cycles.

immed[gprB_21, 0xfffffffc]
alu[gprA_4, gprA_4, +, gprB_21], gpr_wrboth
immed[gprB_21, 0xffffffff]
alu[gprA_5, gprA_5, +carry, gprB_21], gpr_wrboth

now becomes:

alu[gprA_4, gprA_4, -, 4], gpr_wrboth
alu[gprA_5, gprA_5, -carry, 0], gpr_wrboth

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 35 ++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 9cc638718272..a5590988fc69 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -2777,6 +2777,40 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
 	}
 }
 
+/* abs(insn.imm) will fit better into unrestricted reg immediate -
+ * convert add/sub of a negative number into a sub/add of a positive one.
+ */
+static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		struct bpf_insn insn = meta->insn;
+
+		if (meta->skip)
+			continue;
+
+		if (BPF_CLASS(insn.code) != BPF_ALU &&
+		    BPF_CLASS(insn.code) != BPF_ALU64)
+			continue;
+		if (BPF_SRC(insn.code) != BPF_K)
+			continue;
+		if (insn.imm >= 0)
+			continue;
+
+		if (BPF_OP(insn.code) == BPF_ADD)
+			insn.code = BPF_CLASS(insn.code) | BPF_SUB;
+		else if (BPF_OP(insn.code) == BPF_SUB)
+			insn.code = BPF_CLASS(insn.code) | BPF_ADD;
+		else
+			continue;
+
+		meta->insn.code = insn.code | BPF_K;
+
+		meta->insn.imm = -insn.imm;
+	}
+}
+
 /* Remove masking after load since our load guarantees this is not needed */
 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
 {
@@ -3212,6 +3246,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
 {
 	nfp_bpf_opt_reg_init(nfp_prog);
 
+	nfp_bpf_opt_neg_add_sub(nfp_prog);
 	nfp_bpf_opt_ld_mask(nfp_prog);
 	nfp_bpf_opt_ld_shift(nfp_prog);
 	nfp_bpf_opt_ldst_gather(nfp_prog);
-- 
2.16.2

^ permalink raw reply related

* [PATCH bpf-next 1/4] nfp: bpf: remove double space
From: Jakub Kicinski @ 2018-04-25  4:22 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski
In-Reply-To: <20180425042239.27869-1-jakub.kicinski@netronome.com>

Whitespace cleanup - remove double space.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 29b4e5f8c102..9cc638718272 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1400,7 +1400,7 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	if (!load_lm_ptr)
 		return 0;
 
-	emit_csr_wr(nfp_prog, stack_reg(nfp_prog),  NFP_CSR_ACT_LM_ADDR0);
+	emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
 	wrp_nops(nfp_prog, 3);
 
 	return 0;
-- 
2.16.2

^ permalink raw reply related

* [PATCH bpf-next 0/4] nfp: bpf: optimize negative sums
From: Jakub Kicinski @ 2018-04-25  4:22 UTC (permalink / raw)
  To: alexei.starovoitov, daniel; +Cc: oss-drivers, netdev, Jakub Kicinski

Hi!

This set adds an optimization run to the NFP jit to turn ADD and SUB
instructions with negative immediate into the opposite operation with
a positive immediate.  NFP can fit small immediates into the instructions
but it can't ever fit negative immediates.  Addition of small negative
immediates is quite common in BPF programs for stack address calculations,
therefore this optimization gives us non-negligible savings in instruction
count (up to 4%).


Jakub Kicinski (4):
  nfp: bpf: remove double space
  nfp: bpf: optimize add/sub of a negative constant
  nfp: bpf: tabularize generations of compare operations
  nfp: bpf: optimize comparisons to negative constants

 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 231 +++++++++++++-------------
 drivers/net/ethernet/netronome/nfp/bpf/main.h |   6 +-
 2 files changed, 124 insertions(+), 113 deletions(-)

-- 
2.16.2

^ permalink raw reply

* [net-next:master 42/70] drivers/net/usb/lan78xx.c:1651:37-38: WARNING: Use ARRAY_SIZE
From: kbuild test robot @ 2018-04-25  4:16 UTC (permalink / raw)
  To: Raghuram Chary J; +Cc: kbuild-all, netdev

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git master
head:   c749fa181bd5848be78691d23168ec61ce691b95
commit: 496218656f9857d801512efdec1d609ebbe8a83b [42/70] lan78xx: Add support to dump lan78xx registers


coccinelle warnings: (new ones prefixed by >>)

>> drivers/net/usb/lan78xx.c:1651:37-38: WARNING: Use ARRAY_SIZE

vim +1651 drivers/net/usb/lan78xx.c

  1641	
  1642	static void
  1643	lan78xx_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
  1644			 void *buf)
  1645	{
  1646		u32 *data = buf;
  1647		int i, j;
  1648		struct lan78xx_net *dev = netdev_priv(netdev);
  1649	
  1650		/* Read Device/MAC registers */
> 1651		for (i = 0; i < (sizeof(lan78xx_regs) / sizeof(u32)); i++)
  1652			lan78xx_read_reg(dev, lan78xx_regs[i], &data[i]);
  1653	
  1654		if (!netdev->phydev)
  1655			return;
  1656	
  1657		/* Read PHY registers */
  1658		for (j = 0; j < 32; i++, j++)
  1659			data[i] = phy_read(netdev->phydev, j);
  1660	}
  1661	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply

* [PATCH net-next 4/4] nfp: flower: ignore duplicate cb requests for same rule
From: Jakub Kicinski @ 2018-04-25  4:17 UTC (permalink / raw)
  To: davem; +Cc: netdev, oss-drivers, John Hurley
In-Reply-To: <20180425041704.26882-1-jakub.kicinski@netronome.com>

From: John Hurley <john.hurley@netronome.com>

If a flower rule has a repr both as ingress and egress port then 2
callbacks may be generated for the same rule request.

Add an indicator to each flow as to whether or not it was added from an
ingress registered cb. If so then ignore add/del/stat requests to it from
an egress cb.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  1 +
 .../net/ethernet/netronome/nfp/flower/offload.c    | 23 +++++++++++++++++++---
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 9e6804bc9b40..733ff53cc601 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -194,6 +194,7 @@ struct nfp_fl_payload {
 	char *unmasked_data;
 	char *mask_data;
 	char *action_data;
+	bool ingress_offload;
 };
 
 struct nfp_fl_stats_frame {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index bdc82e11a31e..70ec9d821b91 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -345,7 +345,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 }
 
 static struct nfp_fl_payload *
-nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
+nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer, bool egress)
 {
 	struct nfp_fl_payload *flow_pay;
 
@@ -371,6 +371,8 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
 	flow_pay->meta.flags = 0;
 	spin_lock_init(&flow_pay->lock);
 
+	flow_pay->ingress_offload = !egress;
+
 	return flow_pay;
 
 err_free_mask:
@@ -402,8 +404,20 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_fl_payload *flow_pay;
 	struct nfp_fl_key_ls *key_layer;
+	struct net_device *ingr_dev;
 	int err;
 
+	ingr_dev = egress ? NULL : netdev;
+	flow_pay = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
+					      NFP_FL_STATS_CTX_DONT_CARE);
+	if (flow_pay) {
+		/* Ignore as duplicate if it has been added by different cb. */
+		if (flow_pay->ingress_offload && egress)
+			return 0;
+		else
+			return -EOPNOTSUPP;
+	}
+
 	key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL);
 	if (!key_layer)
 		return -ENOMEM;
@@ -413,7 +427,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 	if (err)
 		goto err_free_key_ls;
 
-	flow_pay = nfp_flower_allocate_new(key_layer);
+	flow_pay = nfp_flower_allocate_new(key_layer, egress);
 	if (!flow_pay) {
 		err = -ENOMEM;
 		goto err_free_key_ls;
@@ -485,7 +499,7 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
 	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
 					      NFP_FL_STATS_CTX_DONT_CARE);
 	if (!nfp_flow)
-		return -ENOENT;
+		return egress ? 0 : -ENOENT;
 
 	err = nfp_modify_flow_metadata(app, nfp_flow);
 	if (err)
@@ -534,6 +548,9 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
 	if (!nfp_flow)
 		return -EINVAL;
 
+	if (nfp_flow->ingress_offload && egress)
+		return 0;
+
 	spin_lock_bh(&nfp_flow->lock);
 	tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes,
 			      nfp_flow->stats.pkts, nfp_flow->stats.used);
-- 
2.16.2

^ permalink raw reply related

* [PATCH net-next 3/4] nfp: flower: support offloading multiple rules with same cookie
From: Jakub Kicinski @ 2018-04-25  4:17 UTC (permalink / raw)
  To: davem; +Cc: netdev, oss-drivers, John Hurley
In-Reply-To: <20180425041704.26882-1-jakub.kicinski@netronome.com>

From: John Hurley <john.hurley@netronome.com>

When multiple netdevs are attached to a tc offload block and register for
callbacks, a rule added to the block will be propogated to all netdevs.
Previously these were detected as duplicates (based on cookie) and
rejected. Modify the rule nfp lookup function to optionally include an
ingress netdev and a host context along with the cookie value when
searching for a rule. When a new rule is passed to the driver, the netdev
the rule is to be attached to is considered when searching for dublicates.
When a stats update is received from HW, the host context is used
alongside the cookie to map to the correct host rule.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  8 +++++--
 .../net/ethernet/netronome/nfp/flower/metadata.c   | 20 +++++++++-------
 .../net/ethernet/netronome/nfp/flower/offload.c    | 27 ++++++++++++++++------
 3 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index c67e1b54c614..9e6804bc9b40 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -47,6 +47,7 @@
 struct net_device;
 struct nfp_app;
 
+#define NFP_FL_STATS_CTX_DONT_CARE	cpu_to_be32(0xffffffff)
 #define NFP_FL_STATS_ENTRY_RS		BIT(20)
 #define NFP_FL_STATS_ELEM_RS		4
 #define NFP_FL_REPEATED_HASH_MAX	BIT(17)
@@ -189,6 +190,7 @@ struct nfp_fl_payload {
 	spinlock_t lock; /* lock stats */
 	struct nfp_fl_stats stats;
 	__be32 nfp_tun_ipv4_addr;
+	struct net_device *ingress_dev;
 	char *unmasked_data;
 	char *mask_data;
 	char *action_data;
@@ -216,12 +218,14 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
 			      struct nfp_fl_payload *nfp_flow);
 int nfp_compile_flow_metadata(struct nfp_app *app,
 			      struct tc_cls_flower_offload *flow,
-			      struct nfp_fl_payload *nfp_flow);
+			      struct nfp_fl_payload *nfp_flow,
+			      struct net_device *netdev);
 int nfp_modify_flow_metadata(struct nfp_app *app,
 			     struct nfp_fl_payload *nfp_flow);
 
 struct nfp_fl_payload *
-nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
+			   struct net_device *netdev, __be32 host_ctx);
 struct nfp_fl_payload *
 nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index db977cf8e933..21668aa435e8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -99,14 +99,18 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
 
 /* Must be called with either RTNL or rcu_read_lock */
 struct nfp_fl_payload *
-nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie)
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
+			   struct net_device *netdev, __be32 host_ctx)
 {
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_fl_payload *flower_entry;
 
 	hash_for_each_possible_rcu(priv->flow_table, flower_entry, link,
 				   tc_flower_cookie)
-		if (flower_entry->tc_flower_cookie == tc_flower_cookie)
+		if (flower_entry->tc_flower_cookie == tc_flower_cookie &&
+		    (!netdev || flower_entry->ingress_dev == netdev) &&
+		    (host_ctx == NFP_FL_STATS_CTX_DONT_CARE ||
+		     flower_entry->meta.host_ctx_id == host_ctx))
 			return flower_entry;
 
 	return NULL;
@@ -121,13 +125,11 @@ nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
 	flower_cookie = be64_to_cpu(stats->stats_cookie);
 
 	rcu_read_lock();
-	nfp_flow = nfp_flower_search_fl_table(app, flower_cookie);
+	nfp_flow = nfp_flower_search_fl_table(app, flower_cookie, NULL,
+					      stats->stats_con_id);
 	if (!nfp_flow)
 		goto exit_rcu_unlock;
 
-	if (nfp_flow->meta.host_ctx_id != stats->stats_con_id)
-		goto exit_rcu_unlock;
-
 	spin_lock(&nfp_flow->lock);
 	nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count);
 	nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count);
@@ -317,7 +319,8 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len,
 
 int nfp_compile_flow_metadata(struct nfp_app *app,
 			      struct tc_cls_flower_offload *flow,
-			      struct nfp_fl_payload *nfp_flow)
+			      struct nfp_fl_payload *nfp_flow,
+			      struct net_device *netdev)
 {
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_fl_payload *check_entry;
@@ -348,7 +351,8 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
 	nfp_flow->stats.bytes = 0;
 	nfp_flow->stats.used = jiffies;
 
-	check_entry = nfp_flower_search_fl_table(app, flow->cookie);
+	check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev,
+						 NFP_FL_STATS_CTX_DONT_CARE);
 	if (check_entry) {
 		if (nfp_release_stats_entry(app, stats_cxt))
 			return -EINVAL;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 114d2ab02a38..bdc82e11a31e 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -419,6 +419,8 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 		goto err_free_key_ls;
 	}
 
+	flow_pay->ingress_dev = egress ? NULL : netdev;
+
 	err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay,
 					    tun_type);
 	if (err)
@@ -428,7 +430,8 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 	if (err)
 		goto err_destroy_flow;
 
-	err = nfp_compile_flow_metadata(app, flow, flow_pay);
+	err = nfp_compile_flow_metadata(app, flow, flow_pay,
+					flow_pay->ingress_dev);
 	if (err)
 		goto err_destroy_flow;
 
@@ -462,6 +465,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
  * @app:	Pointer to the APP handle
  * @netdev:	netdev structure.
  * @flow:	TC flower classifier offload structure
+ * @egress:	Netdev is the egress dev.
  *
  * Removes a flow from the repeated hash structure and clears the
  * action payload.
@@ -470,13 +474,16 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
  */
 static int
 nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
-		       struct tc_cls_flower_offload *flow)
+		       struct tc_cls_flower_offload *flow, bool egress)
 {
 	struct nfp_port *port = nfp_port_from_netdev(netdev);
 	struct nfp_fl_payload *nfp_flow;
+	struct net_device *ingr_dev;
 	int err;
 
-	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+	ingr_dev = egress ? NULL : netdev;
+	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
+					      NFP_FL_STATS_CTX_DONT_CARE);
 	if (!nfp_flow)
 		return -ENOENT;
 
@@ -505,7 +512,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
 /**
  * nfp_flower_get_stats() - Populates flow stats obtained from hardware.
  * @app:	Pointer to the APP handle
+ * @netdev:	Netdev structure.
  * @flow:	TC flower classifier offload structure
+ * @egress:	Netdev is the egress dev.
  *
  * Populates a flow statistics structure which which corresponds to a
  * specific flow.
@@ -513,11 +522,15 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
  * Return: negative value on error, 0 if stats populated successfully.
  */
 static int
-nfp_flower_get_stats(struct nfp_app *app, struct tc_cls_flower_offload *flow)
+nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
+		     struct tc_cls_flower_offload *flow, bool egress)
 {
 	struct nfp_fl_payload *nfp_flow;
+	struct net_device *ingr_dev;
 
-	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+	ingr_dev = egress ? NULL : netdev;
+	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
+					      NFP_FL_STATS_CTX_DONT_CARE);
 	if (!nfp_flow)
 		return -EINVAL;
 
@@ -543,9 +556,9 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
 	case TC_CLSFLOWER_REPLACE:
 		return nfp_flower_add_offload(app, netdev, flower, egress);
 	case TC_CLSFLOWER_DESTROY:
-		return nfp_flower_del_offload(app, netdev, flower);
+		return nfp_flower_del_offload(app, netdev, flower, egress);
 	case TC_CLSFLOWER_STATS:
-		return nfp_flower_get_stats(app, flower);
+		return nfp_flower_get_stats(app, netdev, flower, egress);
 	}
 
 	return -EOPNOTSUPP;
-- 
2.16.2

^ permalink raw reply related

* [PATCH net-next 2/4] nfp: print PCIe link bandwidth on probe
From: Jakub Kicinski @ 2018-04-25  4:17 UTC (permalink / raw)
  To: davem; +Cc: netdev, oss-drivers, Jakub Kicinski
In-Reply-To: <20180425041704.26882-1-jakub.kicinski@netronome.com>

To aid debugging of performance issues caused by limited PCIe
bandwidth print the PCIe link information on probe.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
index cd678323bacb..a0e336bd1d85 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
@@ -1330,6 +1330,7 @@ struct nfp_cpp *nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev)
 	/*  Finished with card initialization. */
 	dev_info(&pdev->dev,
 		 "Netronome Flow Processor NFP4000/NFP6000 PCIe Card Probe\n");
+	pcie_print_link_status(pdev);
 
 	nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
 	if (!nfp) {
-- 
2.16.2

^ permalink raw reply related

* [PATCH net-next 1/4] nfp: reset local locks on init
From: Jakub Kicinski @ 2018-04-25  4:17 UTC (permalink / raw)
  To: davem; +Cc: netdev, oss-drivers, Jakub Kicinski
In-Reply-To: <20180425041704.26882-1-jakub.kicinski@netronome.com>

NFP locks record the owner when held, for PCIe devices the owner
ID will be the PCIe link number.  When driver loads it should scan
known locks and if they indicate that they are held by local
endpoint but the driver doesn't hold them - release them.

Locks can be left taken for instance when kernel gets kexec-ed or
after a crash.  Management FW tries to clean up stale locks too,
but it currently depends on PCIe link going down which doesn't
always happen.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_main.c      |  5 ++
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h   |  2 +
 .../net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h   |  2 +
 .../net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c | 45 +++++++++++++++++
 .../ethernet/netronome/nfp/nfpcore/nfp_resource.c  | 59 ++++++++++++++++++++++
 5 files changed, 113 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 6a3e3231e111..c8aef9508109 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -489,6 +489,10 @@ static int nfp_pci_probe(struct pci_dev *pdev,
 		goto err_disable_msix;
 	}
 
+	err = nfp_resource_table_init(pf->cpp);
+	if (err)
+		goto err_cpp_free;
+
 	pf->hwinfo = nfp_hwinfo_read(pf->cpp);
 
 	dev_info(&pdev->dev, "Assembly: %s%s%s-%s CPLD: %s\n",
@@ -551,6 +555,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
 	vfree(pf->dumpspec);
 err_hwinfo_free:
 	kfree(pf->hwinfo);
+err_cpp_free:
 	nfp_cpp_free(pf->cpp);
 err_disable_msix:
 	destroy_workqueue(pf->wq);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
index ced62d112aa2..f44d0a857314 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
@@ -94,6 +94,8 @@ int nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask,
 /* MAC Statistics Accumulator */
 #define NFP_RESOURCE_MAC_STATISTICS	"mac.stat"
 
+int nfp_resource_table_init(struct nfp_cpp *cpp);
+
 struct nfp_resource *
 nfp_resource_acquire(struct nfp_cpp *cpp, const char *name);
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index c8f2c064cce3..4e19add1c539 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -295,6 +295,8 @@ void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex);
 int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex);
 int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex);
 int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex);
+int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target,
+			  unsigned long long address);
 
 /**
  * nfp_cppcore_pcie_unit() - Get PCI Unit of a CPP handle
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
index cb28ac03e4ca..c88bf673cb76 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c
@@ -59,6 +59,11 @@ static u32 nfp_mutex_unlocked(u16 interface)
 	return (u32)interface << 16 | 0x0000;
 }
 
+static u32 nfp_mutex_owner(u32 val)
+{
+	return val >> 16;
+}
+
 static bool nfp_mutex_is_locked(u32 val)
 {
 	return (val & 0xffff) == 0x000f;
@@ -351,3 +356,43 @@ int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex)
 
 	return nfp_mutex_is_locked(tmp) ? -EBUSY : -EINVAL;
 }
+
+/**
+ * nfp_cpp_mutex_reclaim() - Unlock mutex if held by local endpoint
+ * @cpp:	NFP CPP handle
+ * @target:	NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU)
+ * @address:	Offset into the address space of the NFP CPP target ID
+ *
+ * Release lock if held by local system.  Extreme care is advised, call only
+ * when no local lock users can exist.
+ *
+ * Return:      0 if the lock was OK, 1 if locked by us, -errno on invalid mutex
+ */
+int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target,
+			  unsigned long long address)
+{
+	const u32 mur = NFP_CPP_ID(target, 3, 0);	/* atomic_read */
+	const u32 muw = NFP_CPP_ID(target, 4, 0);	/* atomic_write */
+	u16 interface = nfp_cpp_interface(cpp);
+	int err;
+	u32 tmp;
+
+	err = nfp_cpp_mutex_validate(interface, &target, address);
+	if (err)
+		return err;
+
+	/* Check lock */
+	err = nfp_cpp_readl(cpp, mur, address, &tmp);
+	if (err < 0)
+		return err;
+
+	if (nfp_mutex_is_unlocked(tmp) || nfp_mutex_owner(tmp) != interface)
+		return 0;
+
+	/* Bust the lock */
+	err = nfp_cpp_writel(cpp, muw, address, nfp_mutex_unlocked(interface));
+	if (err < 0)
+		return err;
+
+	return 1;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
index 7e14725055c7..2dd89dba9311 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c
@@ -338,3 +338,62 @@ u64 nfp_resource_size(struct nfp_resource *res)
 {
 	return res->size;
 }
+
+/**
+ * nfp_resource_table_init() - Run initial checks on the resource table
+ * @cpp:	NFP CPP handle
+ *
+ * Start-of-day init procedure for resource table.  Must be called before
+ * any local resource table users may exist.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int nfp_resource_table_init(struct nfp_cpp *cpp)
+{
+	struct nfp_cpp_mutex *dev_mutex;
+	int i, err;
+
+	err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET,
+				    NFP_RESOURCE_TBL_BASE);
+	if (err < 0) {
+		nfp_err(cpp, "Error: failed to reclaim resource table mutex\n");
+		return err;
+	}
+	if (err)
+		nfp_warn(cpp, "Warning: busted main resource table mutex\n");
+
+	dev_mutex = nfp_cpp_mutex_alloc(cpp, NFP_RESOURCE_TBL_TARGET,
+					NFP_RESOURCE_TBL_BASE,
+					NFP_RESOURCE_TBL_KEY);
+	if (!dev_mutex)
+		return -ENOMEM;
+
+	if (nfp_cpp_mutex_lock(dev_mutex)) {
+		nfp_err(cpp, "Error: failed to claim resource table mutex\n");
+		nfp_cpp_mutex_free(dev_mutex);
+		return -EINVAL;
+	}
+
+	/* Resource 0 is the dev_mutex, start from 1 */
+	for (i = 1; i < NFP_RESOURCE_TBL_ENTRIES; i++) {
+		u64 addr = NFP_RESOURCE_TBL_BASE +
+			sizeof(struct nfp_resource_entry) * i;
+
+		err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET, addr);
+		if (err < 0) {
+			nfp_err(cpp,
+				"Error: failed to reclaim resource %d mutex\n",
+				i);
+			goto err_unlock;
+		}
+		if (err)
+			nfp_warn(cpp, "Warning: busted resource %d mutex\n", i);
+	}
+
+	err = 0;
+err_unlock:
+	nfp_cpp_mutex_unlock(dev_mutex);
+	nfp_cpp_mutex_free(dev_mutex);
+
+	return err;
+}
-- 
2.16.2

^ permalink raw reply related

* [PATCH net-next 0/4] nfp: flower tc block support and nfp PCI updates
From: Jakub Kicinski @ 2018-04-25  4:17 UTC (permalink / raw)
  To: davem; +Cc: netdev, oss-drivers, Jakub Kicinski

This series improves the nfp PCIe code by making use of the new
pcie_print_link_status() helper and resetting NFP locks when
driver loads.  This can help us avoid lock ups after host crashes
and is rebooted with PCIe reset or when kdump kernel is loaded.

The flower changes come from John, he says:

This patchset fixes offload issues when multiple repr netdevs are bound to
a tc block and filter rules added. Previously the rule would be passed to
the reprs and would be rejected in all but the first as the cookie value
will indicate a duplicate. The first patch extends the flow lookup
function to consider both host context and ingress netdev along with the
cookie value. This means that a rule with a given cookie can exist
multiple times assuming the ingress netdev is different. The host context
ensures that stats from fw are associated with the correct instance of the
rule.

The second patch protects against rejecting add/del/stat messages when a
rule has a repr as both an ingress port and an egress dev. In such cases a
callback can be triggered twice (once for ingress and once for egress)
and can lead to duplicate rule detection or incorrect double calls.


Jakub Kicinski (2):
  nfp: reset local locks on init
  nfp: print PCIe link bandwidth on probe

John Hurley (2):
  nfp: flower: support offloading multiple rules with same cookie
  nfp: flower: ignore duplicate cb requests for same rule

 drivers/net/ethernet/netronome/nfp/flower/main.h   |  9 +++-
 .../net/ethernet/netronome/nfp/flower/metadata.c   | 20 +++++---
 .../net/ethernet/netronome/nfp/flower/offload.c    | 50 ++++++++++++++----
 drivers/net/ethernet/netronome/nfp/nfp_main.c      |  5 ++
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h   |  2 +
 .../ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c  |  1 +
 .../net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h   |  2 +
 .../net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c | 45 +++++++++++++++++
 .../ethernet/netronome/nfp/nfpcore/nfp_resource.c  | 59 ++++++++++++++++++++++
 9 files changed, 173 insertions(+), 20 deletions(-)

-- 
2.16.2

^ permalink raw reply

* Re: [PATCH] net: ethernet: ave: fix ave_start_xmit()'s return type
From: Kunihiko Hayashi @ 2018-04-25  1:46 UTC (permalink / raw)
  To: Luc Van Oostenryck
  Cc: linux-kernel, David S. Miller, Jassi Brar, Andrew Lunn, netdev
In-Reply-To: <20180424131731.4458-1-luc.vanoostenryck@gmail.com>

Hi,

On Tue, 24 Apr 2018 15:17:25 +0200
Luc Van Oostenryck <luc.vanoostenryck@gmail.com> wrote:

> The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
> which is a typedef for an enum type, but the implementation in this
> driver returns an 'int'.
> 
> Fix this by returning 'netdev_tx_t' in this driver too.
> 
> Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>

Thanks for fixing.
I think it's preferable to add 'Fixes'.

Fixes: 4c270b55a5af ("net: ethernet: socionext: add AVE ethernet driver")
Acked-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>

Thank you,

> ---
>  drivers/net/ethernet/socionext/sni_ave.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
> index 0b3b7a460..95625bca1 100644
> --- a/drivers/net/ethernet/socionext/sni_ave.c
> +++ b/drivers/net/ethernet/socionext/sni_ave.c
> @@ -1360,7 +1360,7 @@ static int ave_stop(struct net_device *ndev)
>  	return 0;
>  }
>  
> -static int ave_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> +static netdev_tx_t ave_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  {
>  	struct ave_private *priv = netdev_priv(ndev);
>  	u32 proc_idx, done_idx, ndesc, cmdsts;
> -- 
> 2.17.0

---
Best Regards,
Kunihiko Hayashi

^ permalink raw reply

* Re: [PATCH net-next v2 0/7] net: Extend availability of PHY statistics
From: David Miller @ 2018-04-25  2:13 UTC (permalink / raw)
  To: f.fainelli; +Cc: netdev, andrew, vivien.didelot, cphealy, nikita.yoush
In-Reply-To: <6fbe8b7c-8e8a-d0d7-0df3-9163b9b5284e@gmail.com>

From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 24 Apr 2018 17:35:09 -0700

> David, sorry looks like there will be a v3, the last patch introduces a
> possible problem with bcm_sf2 which uses b53_common as a library, will
> respin later.

Ok, no problem.

^ permalink raw reply

* [PATCH] [PATCH bpf-next] samples/bpf/bpf_load.c: remove redundant ret assignment in bpf_load_program()
From: Wang Sheng-Hui @ 2018-04-25  2:07 UTC (permalink / raw)
  To: ast, daniel, netdev

2 redundant ret assignments removded:
* 'ret = 1' before the logic 'if (data_maps)', and if any errors jump to
  label 'done'. No 'ret = 1' needed before the error jump.
* After the '/* load programs */' part, if everything goes well, then
  the BPF code will be loaded and 'ret' set to 0 by load_and_attach().
  If something goes wrong, 'ret' set to none-O, the redundant 'ret = 0'
  after the for clause will make the error skipped.
  For example, if some BPF code cannot provide supported program types
  in ELF SEC("unknown"), the for clause will not call load_and_attach()
  to load the BPF code. 1 should be returned to callees instead of 0.

Signed-off-by: Wang Sheng-Hui <shhuiw@foxmail.com>
---
 samples/bpf/bpf_load.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index bebe4188b4b3..feca497d6afd 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -549,7 +549,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		if (nr_maps < 0) {
 			printf("Error: Failed loading ELF maps (errno:%d):%s\n",
 			       nr_maps, strerror(-nr_maps));
-			ret = 1;
 			goto done;
 		}
 		if (load_maps(map_data, nr_maps, fixup_map))
@@ -615,7 +614,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		}
 	}
 
-	ret = 0;
 done:
 	close(fd);
 	return ret;
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH 2/5] ide: kill ide_toggle_bounce
From: Jens Axboe @ 2018-04-25  2:02 UTC (permalink / raw)
  To: Christoph Hellwig, iommu, linux-arch, linux-block, linux-ide,
	linux-scsi, netdev
  Cc: David S. Miller, linux-kernel
In-Reply-To: <20180424181625.22410-3-hch@lst.de>

On 4/24/18 12:16 PM, Christoph Hellwig wrote:
> ide_toggle_bounce did select various strange block bounce limits, including
> not bouncing at all as soon as an iommu is present in the system.  Given
> that the dma_map routines now handle any required bounce buffering except
> for ISA DMA, and the ide code already must handle either ISA DMA or highmem
> at least for iommu equipped systems we can get rid of the block layer
> bounce limit setting entirely.

Pretty sure I was the one to add this code, when highmem page IO was
enabled about two decades ago...

Outside of DMA, the issue was that the PIO code could not handle
highmem. That's not the case anymore, so this should be fine.

Reviewed-by: Jens Axboe <axboe@kernel.dk>

-- 
Jens Axboe

^ permalink raw reply

* Do You Need A Hand?
From: Mavis Wanczyk @ 2018-04-25  0:25 UTC (permalink / raw)





I am Mavis Wanczyk i know you may not know me but am the latest  
largest US Powerball lottery winner of $758.7m just of recent, am  
currently helping out  people in need of financial assistance, i know  
it's hard to believe anything on the internet,  so if you don't need  
my help please don't reply to this message.

Regards
Mavis Wanczyk

^ permalink raw reply

* general protection fault in smc_set_keepalive
From: syzbot @ 2018-04-25  1:40 UTC (permalink / raw)
  To: davem, linux-kernel, linux-s390, netdev, syzkaller-bugs, ubraun

Hello,

syzbot hit the following crash on net-next commit
9c20b9372fbaf6f7d4c05f5f925806a7928f0c73 (Tue Apr 24 03:08:41 2018 +0000)
net: fib_rules: fix l3mdev netlink attr processing
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=cf9012c597c8379d535c

So far this crash happened 2 times on net-next.
C reproducer: https://syzkaller.appspot.com/x/repro.c?id=4775309383565312
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=4978230683500544
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=4770663504019456
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=-2918904850634584293
compiler: gcc (GCC) 8.0.1 20180413 (experimental)

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+cf9012c597c8379d535c@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed. See footer for  
details.
If you forward the report, please keep this part and the footer.

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
    (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 4455 Comm: syz-executor060 Not tainted 4.17.0-rc1+ #17
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
RIP: 0010:smc_set_keepalive+0x4e/0xd0 net/smc/af_smc.c:59
RSP: 0018:ffff8801ced8fa68 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff85d72bcb
RDX: 0000000000000004 RSI: ffffffff873f0a94 RDI: 0000000000000020
RBP: ffff8801ced8fa80 R08: ffff8801b67e44c0 R09: 0000000000000006
R10: ffff8801b67e44c0 R11: 0000000000000000 R12: ffff8801b6bff7c0
R13: 0000000000000001 R14: 0000000000000003 R15: ffff8801aee2b540
FS:  00000000009e4880(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020000040 CR3: 00000001b6e75000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
  sock_setsockopt+0x14e2/0x1fe0 net/core/sock.c:801
  __sys_setsockopt+0x2df/0x390 net/socket.c:1899
  __do_sys_setsockopt net/socket.c:1914 [inline]
  __se_sys_setsockopt net/socket.c:1911 [inline]
  __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1911
  do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x43fcf9
RSP: 002b:00007ffe62977a78 EFLAGS: 00000217 ORIG_RAX: 0000000000000036
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fcf9
RDX: 0000000000000009 RSI: 0000000000000001 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 0000000000000004 R09: 00000000004002c8
R10: 0000000020000040 R11: 0000000000000217 R12: 0000000000401620
R13: 00000000004016b0 R14: 0000000000000000 R15: 0000000000000000
Code: ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 75 78 48 8b 9b 50 04 00 00 48  
b8 00 00 00 00 00 fc ff df 48 8d 7b 20 48 89 fa 48 c1 ea 03 <80> 3c 02 00  
75 6b 48 b8 00 00 00 00 00 fc ff df 48 8b 5b 20 48
RIP: smc_set_keepalive+0x4e/0xd0 net/smc/af_smc.c:59 RSP: ffff8801ced8fa68
---[ end trace a76f9ed0fb111068 ]---


---
This bug is generated by a dumb bot. It may contain errors.
See https://goo.gl/tpsmEJ for details.
Direct all questions to syzkaller@googlegroups.com.

syzbot will keep track of this bug report.
If you forgot to add the Reported-by tag, once the fix for this bug is  
merged
into any tree, please reply to this email with:
#syz fix: exact-commit-title
If you want to test a patch for this bug, please reply with:
#syz test: git://repo/address.git branch
and provide the patch inline or as an attachment.
To mark this as a duplicate of another syzbot report, please reply with:
#syz dup: exact-subject-of-another-report
If it's a one-off invalid bug report, please reply with:
#syz invalid
Note: if the crash happens again, it will cause creation of a new bug  
report.
Note: all commands must start from beginning of the line in the email body.

^ permalink raw reply

* [PATCH net-next] net: rules: Move l3mdev attribute validation to a helper
From: David Ahern @ 2018-04-25  1:36 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

Move the check on FRA_L3MDEV attribute to helper to improve the
readability of fib_nl2rule. Update the extack messages to be
clear when the configuration option is disabled versus an invalid
value has been passed.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/core/fib_rules.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 2271c80fd967..126ffc5bc630 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -454,6 +454,27 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
 	return NULL;
 }
 
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
+			      struct netlink_ext_ack *extack)
+{
+	nlrule->l3mdev = nla_get_u8(nla);
+	if (nlrule->l3mdev != 1) {
+		NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute");
+		return -1;
+	}
+
+	return 0;
+}
+#else
+static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule,
+			      struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel");
+	return -1;
+}
+#endif
+
 static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		       struct netlink_ext_ack *extack,
 		       struct fib_rules_ops *ops,
@@ -536,16 +557,9 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
 
 	err = -EINVAL;
-	if (tb[FRA_L3MDEV]) {
-#ifdef CONFIG_NET_L3_MASTER_DEV
-		nlrule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
-		if (nlrule->l3mdev != 1)
-#endif
-		{
-			NL_SET_ERR_MSG(extack, "Invalid l3mdev");
-			goto errout_free;
-		}
-	}
+	if (tb[FRA_L3MDEV] &&
+	    fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
+		goto errout_free;
 
 	nlrule->action = frh->action;
 	nlrule->flags = frh->flags;
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH] net: phy: allow scanning busses with missing phys
From: Florian Fainelli @ 2018-04-25  1:09 UTC (permalink / raw)
  To: Alexandre Belloni, Andrew Lunn
  Cc: David S . Miller, Allan Nielsen, Thomas Petazzoni, netdev,
	linux-kernel
In-Reply-To: <20180424160904.32457-1-alexandre.belloni@bootlin.com>

On 04/24/2018 09:09 AM, Alexandre Belloni wrote:
> Some MDIO busses will error out when trying to read a phy address with no
> phy present at that address. In that case, probing the bus will fail
> because __mdiobus_register() is scanning the bus for all possible phys
> addresses.
> 
> In case MII_PHYSID1 returns -EIO or -ENODEV, consider there is no phy at
> this address and set the phy ID to 0xffffffff which is then properly
> handled in get_phy_device().
> 
> Suggested-by: Andrew Lunn <andrew@lunn.ch>
> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
-- 
Florian

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox