Linux virtualization list
 help / color / mirror / Atom feed
* [PATCH] drm/virtio: call drm_plane_cleanup() at destroy phase
From: Gustavo Padovan @ 2016-12-12 19:35 UTC (permalink / raw)
  To: dri-devel
  Cc: David Airlie, open list, Gustavo Padovan,
	open list:VIRTIO GPU DRIVER

From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>

virtio was missing this call to clean up core plane usage.

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/gpu/drm/virtio/virtgpu_plane.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index cb75f06..05022ef 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -44,6 +44,7 @@ static const uint32_t virtio_gpu_cursor_formats[] = {
 
 static void virtio_gpu_plane_destroy(struct drm_plane *plane)
 {
+	drm_plane_cleanup(plane);
 	kfree(plane);
 }
 
-- 
2.5.5

^ permalink raw reply related

* [RFC 1/5] drm/virtio: add virtio_gpu_alloc_fence()
From: Gustavo Padovan @ 2016-12-12 20:48 UTC (permalink / raw)
  To: dri-devel
  Cc: David Airlie, open list, open list:VIRTIO GPU DRIVER, robdclark,
	Gustavo Padovan

From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>

Refactor fence creation to remove the potential allocation failure from
the cmd_submit and atomic_commit paths. Now the fence should be allocated
first and just after we should proceed with the rest of the execution.

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/gpu/drm/virtio/virtgpu_drv.h   | 17 +++++++------
 drivers/gpu/drm/virtio/virtgpu_fence.c | 36 +++++++++++++++++---------
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 32 ++++++++++++++++++++---
 drivers/gpu/drm/virtio/virtgpu_plane.c | 46 ++++++++++++++++++++++++++++++----
 drivers/gpu/drm/virtio/virtgpu_vq.c    | 16 ++++++------
 5 files changed, 111 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 08906c8..806c98b 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -127,6 +127,7 @@ struct virtio_gpu_framebuffer {
 	int x1, y1, x2, y2; /* dirty rect */
 	spinlock_t dirty_lock;
 	uint32_t hw_res_handle;
+	struct virtio_gpu_fence *fence;
 };
 #define to_virtio_gpu_framebuffer(x) \
 	container_of(x, struct virtio_gpu_framebuffer, base)
@@ -268,7 +269,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev,
 					uint32_t resource_id, uint64_t offset,
 					__le32 width, __le32 height,
 					__le32 x, __le32 y,
-					struct virtio_gpu_fence **fence);
+					struct virtio_gpu_fence *fence);
 void virtio_gpu_cmd_resource_flush(struct virtio_gpu_device *vgdev,
 				   uint32_t resource_id,
 				   uint32_t x, uint32_t y,
@@ -280,7 +281,7 @@ void virtio_gpu_cmd_set_scanout(struct virtio_gpu_device *vgdev,
 int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
 			     struct virtio_gpu_object *obj,
 			     uint32_t resource_id,
-			     struct virtio_gpu_fence **fence);
+			     struct virtio_gpu_fence *fence);
 int virtio_gpu_attach_status_page(struct virtio_gpu_device *vgdev);
 int virtio_gpu_detach_status_page(struct virtio_gpu_device *vgdev);
 void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev,
@@ -304,21 +305,21 @@ void virtio_gpu_cmd_context_detach_resource(struct virtio_gpu_device *vgdev,
 					    uint32_t resource_id);
 void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 			   void *data, uint32_t data_size,
-			   uint32_t ctx_id, struct virtio_gpu_fence **fence);
+			   uint32_t ctx_id, struct virtio_gpu_fence *fence);
 void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev,
 					  uint32_t resource_id, uint32_t ctx_id,
 					  uint64_t offset, uint32_t level,
 					  struct virtio_gpu_box *box,
-					  struct virtio_gpu_fence **fence);
+					  struct virtio_gpu_fence *fence);
 void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev,
 					uint32_t resource_id, uint32_t ctx_id,
 					uint64_t offset, uint32_t level,
 					struct virtio_gpu_box *box,
-					struct virtio_gpu_fence **fence);
+					struct virtio_gpu_fence *fence);
 void
 virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev,
 				  struct virtio_gpu_resource_create_3d *rc_3d,
-				  struct virtio_gpu_fence **fence);
+				  struct virtio_gpu_fence *fence);
 void virtio_gpu_ctrl_ack(struct virtqueue *vq);
 void virtio_gpu_cursor_ack(struct virtqueue *vq);
 void virtio_gpu_fence_ack(struct virtqueue *vq);
@@ -345,9 +346,11 @@ void virtio_gpu_ttm_fini(struct virtio_gpu_device *vgdev);
 int virtio_gpu_mmap(struct file *filp, struct vm_area_struct *vma);
 
 /* virtio_gpu_fence.c */
+struct virtio_gpu_fence *virtio_gpu_fence_alloc(
+					struct virtio_gpu_device *vgdev);
 int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
-			  struct virtio_gpu_fence **fence);
+			  struct virtio_gpu_fence *fence);
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev,
 				    u64 last_seq);
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c
index 2335352..4dbfe44 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fence.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fence.c
@@ -74,28 +74,40 @@ static const struct dma_fence_ops virtio_fence_ops = {
 	.timeline_value_str  = virtio_timeline_value_str,
 };
 
+struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
+	struct virtio_gpu_fence *fence;
+	unsigned long irq_flags;
+
+	fence = kmalloc(sizeof(struct virtio_gpu_fence), GFP_ATOMIC);
+	if (!fence)
+		return NULL;
+
+	spin_lock_irqsave(&drv->lock, irq_flags);
+	fence->drv = drv;
+	fence->seq = ++drv->sync_seq;
+	dma_fence_init(&fence->f, &virtio_fence_ops, &drv->lock,
+		       drv->context, fence->seq);
+	spin_unlock_irqrestore(&drv->lock, irq_flags);
+
+	return fence;
+}
+
 int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
-			  struct virtio_gpu_fence **fence)
+			  struct virtio_gpu_fence *fence)
 {
 	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
 	unsigned long irq_flags;
 
-	*fence = kmalloc(sizeof(struct virtio_gpu_fence), GFP_ATOMIC);
-	if ((*fence) == NULL)
-		return -ENOMEM;
-
 	spin_lock_irqsave(&drv->lock, irq_flags);
-	(*fence)->drv = drv;
-	(*fence)->seq = ++drv->sync_seq;
-	dma_fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock,
-		       drv->context, (*fence)->seq);
-	dma_fence_get(&(*fence)->f);
-	list_add_tail(&(*fence)->node, &drv->fences);
+	dma_fence_get(&fence->f);
+	list_add_tail(&fence->node, &drv->fences);
 	spin_unlock_irqrestore(&drv->lock, irq_flags);
 
 	cmd_hdr->flags |= cpu_to_le32(VIRTIO_GPU_FLAG_FENCE);
-	cmd_hdr->fence_id = cpu_to_le64((*fence)->seq);
+	cmd_hdr->fence_id = cpu_to_le64(fence->seq);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 61f3a96..da281103 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -164,8 +164,15 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 		ret = PTR_ERR(buf);
 		goto out_unresv;
 	}
+
+	fence = virtio_gpu_fence_alloc(vgdev);
+	if (!fence) {
+		kfree(buf);
+		ret = -ENOMEM;
+		goto out_unresv;
+	}
 	virtio_gpu_cmd_submit(vgdev, buf, exbuf->size,
-			      vfpriv->ctx_id, &fence);
+			      vfpriv->ctx_id, fence);
 
 	ttm_eu_fence_buffer_objects(&ticket, &validate_list, &fence->f);
 
@@ -281,8 +288,14 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data,
 		rc_3d.nr_samples = cpu_to_le32(rc->nr_samples);
 		rc_3d.flags = cpu_to_le32(rc->flags);
 
+		fence = virtio_gpu_fence_alloc(vgdev);
+		if (!fence) {
+			ret = -ENOMEM;
+			goto fail_unref;
+		}
+
 		virtio_gpu_cmd_resource_create_3d(vgdev, &rc_3d, NULL);
-		ret = virtio_gpu_object_attach(vgdev, qobj, res_id, &fence);
+		ret = virtio_gpu_object_attach(vgdev, qobj, res_id, fence);
 		if (ret) {
 			ttm_eu_backoff_reservation(&ticket, &validate_list);
 			goto fail_unref;
@@ -376,10 +389,16 @@ static int virtio_gpu_transfer_from_host_ioctl(struct drm_device *dev,
 		goto out_unres;
 
 	convert_to_hw_box(&box, &args->box);
+
+	fence = virtio_gpu_fence_alloc(vgdev);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto out_unres;
+	}
 	virtio_gpu_cmd_transfer_from_host_3d
 		(vgdev, qobj->hw_res_handle,
 		 vfpriv->ctx_id, offset, args->level,
-		 &box, &fence);
+		 &box, fence);
 	reservation_object_add_excl_fence(qobj->tbo.resv,
 					  &fence->f);
 
@@ -425,10 +444,15 @@ static int virtio_gpu_transfer_to_host_ioctl(struct drm_device *dev, void *data,
 			(vgdev, qobj->hw_res_handle, offset,
 			 box.w, box.h, box.x, box.y, NULL);
 	} else {
+		fence = virtio_gpu_fence_alloc(vgdev);
+		if (!fence) {
+			ret = -ENOMEM;
+			goto out_unres;
+		}
 		virtio_gpu_cmd_transfer_to_host_3d
 			(vgdev, qobj->hw_res_handle,
 			 vfpriv ? vfpriv->ctx_id : 0, offset,
-			 args->level, &box, &fence);
+			 args->level, &box, fence);
 		reservation_object_add_excl_fence(qobj->tbo.resv,
 						  &fence->f);
 		dma_fence_put(&fence->f);
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index 05022ef..77415e5 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -115,6 +115,41 @@ static void virtio_gpu_primary_plane_update(struct drm_plane *plane,
 				      plane->state->src_h >> 16);
 }
 
+static int virtio_gpu_cursor_prepare_fb(struct drm_plane *plane,
+					struct drm_plane_state *new_state)
+{
+	struct drm_device *dev = plane->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct virtio_gpu_framebuffer *vgfb;
+	struct virtio_gpu_object *bo;
+
+	if (!new_state->fb)
+		return 0;
+
+	vgfb = to_virtio_gpu_framebuffer(new_state->fb);
+	bo = gem_to_virtio_gpu_obj(vgfb->obj);
+	if (bo && bo->dumb && (plane->state->fb != new_state->fb)) {
+		vgfb->fence = virtio_gpu_fence_alloc(vgdev);
+		if (!vgfb->fence)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void virtio_gpu_cursor_cleanup_fb(struct drm_plane *plane,
+					 struct drm_plane_state *old_state)
+{
+	struct virtio_gpu_framebuffer *vgfb;
+
+	if (!plane->state->fb)
+		return;
+
+	vgfb = to_virtio_gpu_framebuffer(plane->state->fb);
+	if (vgfb->fence)
+		dma_fence_put(&vgfb->fence->f);
+}
+
 static void virtio_gpu_cursor_plane_update(struct drm_plane *plane,
 					   struct drm_plane_state *old_state)
 {
@@ -122,7 +157,6 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane,
 	struct virtio_gpu_device *vgdev = dev->dev_private;
 	struct virtio_gpu_output *output = NULL;
 	struct virtio_gpu_framebuffer *vgfb;
-	struct virtio_gpu_fence *fence = NULL;
 	struct virtio_gpu_object *bo = NULL;
 	uint32_t handle;
 	int ret = 0;
@@ -148,13 +182,13 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane,
 			(vgdev, handle, 0,
 			 cpu_to_le32(plane->state->crtc_w),
 			 cpu_to_le32(plane->state->crtc_h),
-			 0, 0, &fence);
+			 0, 0, vgfb->fence);
 		ret = virtio_gpu_object_reserve(bo, false);
 		if (!ret) {
 			reservation_object_add_excl_fence(bo->tbo.resv,
-							  &fence->f);
-			dma_fence_put(&fence->f);
-			fence = NULL;
+							  &vgfb->fence->f);
+			dma_fence_put(&vgfb->fence->f);
+			vgfb->fence = NULL;
 			virtio_gpu_object_unreserve(bo);
 			virtio_gpu_object_wait(bo, false);
 		}
@@ -196,6 +230,8 @@ static const struct drm_plane_helper_funcs virtio_gpu_primary_helper_funcs = {
 };
 
 static const struct drm_plane_helper_funcs virtio_gpu_cursor_helper_funcs = {
+	.prepare_fb		= virtio_gpu_cursor_prepare_fb,
+	.cleanup_fb		= virtio_gpu_cursor_cleanup_fb,
 	.atomic_check		= virtio_gpu_plane_atomic_check,
 	.atomic_update		= virtio_gpu_cursor_plane_update,
 };
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index 974f941..e7c3e8d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -350,7 +350,7 @@ static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev,
 static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev,
 					       struct virtio_gpu_vbuffer *vbuf,
 					       struct virtio_gpu_ctrl_hdr *hdr,
-					       struct virtio_gpu_fence **fence)
+					       struct virtio_gpu_fence *fence)
 {
 	struct virtqueue *vq = vgdev->ctrlq.vq;
 	int rc;
@@ -515,7 +515,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev,
 					uint32_t resource_id, uint64_t offset,
 					__le32 width, __le32 height,
 					__le32 x, __le32 y,
-					struct virtio_gpu_fence **fence)
+					struct virtio_gpu_fence *fence)
 {
 	struct virtio_gpu_transfer_to_host_2d *cmd_p;
 	struct virtio_gpu_vbuffer *vbuf;
@@ -539,7 +539,7 @@ virtio_gpu_cmd_resource_attach_backing(struct virtio_gpu_device *vgdev,
 				       uint32_t resource_id,
 				       struct virtio_gpu_mem_entry *ents,
 				       uint32_t nents,
-				       struct virtio_gpu_fence **fence)
+				       struct virtio_gpu_fence *fence)
 {
 	struct virtio_gpu_resource_attach_backing *cmd_p;
 	struct virtio_gpu_vbuffer *vbuf;
@@ -795,7 +795,7 @@ void virtio_gpu_cmd_context_detach_resource(struct virtio_gpu_device *vgdev,
 void
 virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev,
 				  struct virtio_gpu_resource_create_3d *rc_3d,
-				  struct virtio_gpu_fence **fence)
+				  struct virtio_gpu_fence *fence)
 {
 	struct virtio_gpu_resource_create_3d *cmd_p;
 	struct virtio_gpu_vbuffer *vbuf;
@@ -814,7 +814,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev,
 					uint32_t resource_id, uint32_t ctx_id,
 					uint64_t offset, uint32_t level,
 					struct virtio_gpu_box *box,
-					struct virtio_gpu_fence **fence)
+					struct virtio_gpu_fence *fence)
 {
 	struct virtio_gpu_transfer_host_3d *cmd_p;
 	struct virtio_gpu_vbuffer *vbuf;
@@ -836,7 +836,7 @@ void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev,
 					  uint32_t resource_id, uint32_t ctx_id,
 					  uint64_t offset, uint32_t level,
 					  struct virtio_gpu_box *box,
-					  struct virtio_gpu_fence **fence)
+					  struct virtio_gpu_fence *fence)
 {
 	struct virtio_gpu_transfer_host_3d *cmd_p;
 	struct virtio_gpu_vbuffer *vbuf;
@@ -856,7 +856,7 @@ void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev,
 
 void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 			   void *data, uint32_t data_size,
-			   uint32_t ctx_id, struct virtio_gpu_fence **fence)
+			   uint32_t ctx_id, struct virtio_gpu_fence *fence)
 {
 	struct virtio_gpu_cmd_submit *cmd_p;
 	struct virtio_gpu_vbuffer *vbuf;
@@ -877,7 +877,7 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev,
 int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
 			     struct virtio_gpu_object *obj,
 			     uint32_t resource_id,
-			     struct virtio_gpu_fence **fence)
+			     struct virtio_gpu_fence *fence)
 {
 	struct virtio_gpu_mem_entry *ents;
 	struct scatterlist *sg;
-- 
2.5.5

^ permalink raw reply related

* [RFC 2/5] drm/virtio: add uapi for in and out explicit fences
From: Gustavo Padovan @ 2016-12-12 20:48 UTC (permalink / raw)
  To: dri-devel
  Cc: David Airlie, open list, open list:VIRTIO GPU DRIVER, robdclark,
	Gustavo Padovan
In-Reply-To: <1481575710-12535-1-git-send-email-gustavo@padovan.org>

From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>

Add a new field called fence_fd that will be used by userspace to send
in-fences to the kernel and receive out-fences created by the kernel.

This uapi enables virtio to take advantage of explicit synchronization of
dma-bufs.

There are two new flags:

* VIRTGPU_EXECBUF_FENCE_FD_IN to be used when passing an in-fence fd.
* VIRTGPU_EXECBUF_FENCE_FD_OUT to be used when requesting an out-fence fd

The execbuffer IOCTL is now read-write to allow the userspace to read the
out-fence.

On error -1 should be returned in the fence_fd field.

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c |  2 ++
 include/uapi/drm/virtgpu_drm.h         | 13 ++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index da281103..d164b54 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -113,6 +113,8 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	struct ww_acquire_ctx ticket;
 	void *buf;
 
+	exbuf->fence_fd = -1;
+
 	if (vgdev->has_virgl_3d == false)
 		return -ENOSYS;
 
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index 91a31ff..d1d69be 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -47,6 +47,13 @@ extern "C" {
 #define DRM_VIRTGPU_WAIT     0x08
 #define DRM_VIRTGPU_GET_CAPS  0x09
 
+#define VIRTGPU_EXECBUF_FENCE_FD_IN	0x01
+#define VIRTGPU_EXECBUF_FENCE_FD_OUT	0x02
+#define VIRTGPU_EXECBUF_FLAGS  (\
+		VIRTGPU_EXECBUF_FENCE_FD_IN |\
+		VIRTGPU_EXECBUF_FENCE_FD_OUT |\
+		0)
+
 struct drm_virtgpu_map {
 	__u64 offset; /* use for mmap system call */
 	__u32 handle;
@@ -54,12 +61,12 @@ struct drm_virtgpu_map {
 };
 
 struct drm_virtgpu_execbuffer {
-	__u32		flags;		/* for future use */
+	__u32 flags;
 	__u32 size;
 	__u64 command; /* void* */
 	__u64 bo_handles;
 	__u32 num_bo_handles;
-	__u32 pad;
+	__s32 fence_fd;
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
@@ -136,7 +143,7 @@ struct drm_virtgpu_get_caps {
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map)
 
 #define DRM_IOCTL_VIRTGPU_EXECBUFFER \
-	DRM_IOW(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\
 		struct drm_virtgpu_execbuffer)
 
 #define DRM_IOCTL_VIRTGPU_GETPARAM \
-- 
2.5.5

^ permalink raw reply related

* [RFC 3/5] drm/virtio: add in-fences support for explicit synchronization
From: Gustavo Padovan @ 2016-12-12 20:48 UTC (permalink / raw)
  To: dri-devel
  Cc: David Airlie, open list, open list:VIRTIO GPU DRIVER, robdclark,
	Gustavo Padovan
In-Reply-To: <1481575710-12535-1-git-send-email-gustavo@padovan.org>

From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>

When the execbuf call receives an in-fence it will get the dma_fence
related to that fence fd and wait on it before submitting the draw call.

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 41 ++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index d164b54..ac0b4b0 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -29,6 +29,7 @@
 #include "virtgpu_drv.h"
 #include <drm/virtgpu_drm.h>
 #include "ttm/ttm_execbuf_util.h"
+#include <linux/sync_file.h>
 
 static void convert_to_hw_box(struct virtio_gpu_box *dst,
 			      const struct drm_virtgpu_3d_box *src)
@@ -111,6 +112,8 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	struct ttm_validate_buffer *buflist = NULL;
 	int i;
 	struct ww_acquire_ctx ticket;
+	struct dma_fence *in_fence = NULL;
+	int in_fence_fd = exbuf->fence_fd;
 	void *buf;
 
 	exbuf->fence_fd = -1;
@@ -118,6 +121,19 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	if (vgdev->has_virgl_3d == false)
 		return -ENOSYS;
 
+	/* TODO: if the fence is a fence array we need to check
+	 * the context of every single fence */
+	if (exbuf->flags & VIRTGPU_EXECBUF_FENCE_FD_IN) {
+		in_fence = sync_file_get_fence(in_fence_fd);
+		if (!in_fence)
+			return -EINVAL;
+
+		if (in_fence->context == vgdev->fence_drv.context) {
+			dma_fence_put(in_fence);
+			return -EINVAL;
+		}
+	}
+
 	INIT_LIST_HEAD(&validate_list);
 	if (exbuf->num_bo_handles) {
 
@@ -126,26 +142,22 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 		buflist = drm_calloc_large(exbuf->num_bo_handles,
 					   sizeof(struct ttm_validate_buffer));
 		if (!bo_handles || !buflist) {
-			drm_free_large(bo_handles);
-			drm_free_large(buflist);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto out_in_fence;
 		}
 
 		user_bo_handles = (void __user *)(uintptr_t)exbuf->bo_handles;
 		if (copy_from_user(bo_handles, user_bo_handles,
 				   exbuf->num_bo_handles * sizeof(uint32_t))) {
 			ret = -EFAULT;
-			drm_free_large(bo_handles);
-			drm_free_large(buflist);
-			return ret;
+			goto out_in_fence;
 		}
 
 		for (i = 0; i < exbuf->num_bo_handles; i++) {
 			gobj = drm_gem_object_lookup(drm_file, bo_handles[i]);
 			if (!gobj) {
-				drm_free_large(bo_handles);
-				drm_free_large(buflist);
-				return -ENOENT;
+				ret = -ENOENT;
+				goto out_in_fence;
 			}
 
 			qobj = gem_to_virtio_gpu_obj(gobj);
@@ -154,6 +166,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 			list_add(&buflist[i].head, &validate_list);
 		}
 		drm_free_large(bo_handles);
+		bo_handles = NULL;
 	}
 
 	ret = virtio_gpu_object_list_validate(&ticket, &validate_list);
@@ -173,6 +186,13 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 		ret = -ENOMEM;
 		goto out_unresv;
 	}
+
+	if (in_fence) {
+		dma_fence_wait(in_fence, true);
+		dma_fence_put(in_fence);
+		in_fence = NULL;
+	}
+
 	virtio_gpu_cmd_submit(vgdev, buf, exbuf->size,
 			      vfpriv->ctx_id, fence);
 
@@ -188,7 +208,10 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	ttm_eu_backoff_reservation(&ticket, &validate_list);
 out_free:
 	virtio_gpu_unref_list(&validate_list);
+out_in_fence:
 	drm_free_large(buflist);
+	drm_free_large(bo_handles);
+	dma_fence_put(in_fence);
 	return ret;
 }
 
-- 
2.5.5

^ permalink raw reply related

* [RFC 4/5] drm/virtio: add out-fences support for explicit synchronization
From: Gustavo Padovan @ 2016-12-12 20:48 UTC (permalink / raw)
  To: dri-devel
  Cc: David Airlie, open list, open list:VIRTIO GPU DRIVER, robdclark,
	Gustavo Padovan
In-Reply-To: <1481575710-12535-1-git-send-email-gustavo@padovan.org>

From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>

On the out-fence side we get fence returned by the submitted draw call
and attach it to a sync_file and send the sync_file fd to userspace. On
error -1 is returned to userspace.

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 51 ++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index ac0b4b0..f441928 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -103,7 +103,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	struct virtio_gpu_device *vgdev = dev->dev_private;
 	struct virtio_gpu_fpriv *vfpriv = drm_file->driver_priv;
 	struct drm_gem_object *gobj;
-	struct virtio_gpu_fence *fence;
+	struct virtio_gpu_fence *out_fence;
 	struct virtio_gpu_object *qobj;
 	int ret;
 	uint32_t *bo_handles = NULL;
@@ -113,7 +113,9 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	int i;
 	struct ww_acquire_ctx ticket;
 	struct dma_fence *in_fence = NULL;
+	struct sync_file *sync_file;
 	int in_fence_fd = exbuf->fence_fd;
+	int out_fence_fd = -1;
 	void *buf;
 
 	exbuf->fence_fd = -1;
@@ -134,6 +136,14 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
+	if (exbuf->flags & VIRTGPU_EXECBUF_FENCE_FD_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			ret = out_fence_fd;
+			goto out_in_fence;
+		}
+	}
+
 	INIT_LIST_HEAD(&validate_list);
 	if (exbuf->num_bo_handles) {
 
@@ -143,21 +153,21 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 					   sizeof(struct ttm_validate_buffer));
 		if (!bo_handles || !buflist) {
 			ret = -ENOMEM;
-			goto out_in_fence;
+			goto out_unused_fd;
 		}
 
 		user_bo_handles = (void __user *)(uintptr_t)exbuf->bo_handles;
 		if (copy_from_user(bo_handles, user_bo_handles,
 				   exbuf->num_bo_handles * sizeof(uint32_t))) {
 			ret = -EFAULT;
-			goto out_in_fence;
+			goto out_unused_fd;
 		}
 
 		for (i = 0; i < exbuf->num_bo_handles; i++) {
 			gobj = drm_gem_object_lookup(drm_file, bo_handles[i]);
 			if (!gobj) {
 				ret = -ENOENT;
-				goto out_in_fence;
+				goto out_unused_fd;
 			}
 
 			qobj = gem_to_virtio_gpu_obj(gobj);
@@ -180,11 +190,22 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 		goto out_unresv;
 	}
 
-	fence = virtio_gpu_fence_alloc(vgdev);
-	if (!fence) {
-		kfree(buf);
+	out_fence = virtio_gpu_fence_alloc(vgdev);
+	if(!out_fence) {
 		ret = -ENOMEM;
-		goto out_unresv;
+		goto out_memdup;
+	}
+
+	if (out_fence_fd >= 0) {
+		sync_file = sync_file_create(dma_fence_get(&out_fence->f));
+		if (!sync_file) {
+			dma_fence_put(&out_fence->f);
+			ret = -ENOMEM;
+			goto out_memdup;
+		}
+
+		exbuf->fence_fd = out_fence_fd;
+		fd_install(out_fence_fd, sync_file->file);
 	}
 
 	if (in_fence) {
@@ -194,23 +215,29 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	}
 
 	virtio_gpu_cmd_submit(vgdev, buf, exbuf->size,
-			      vfpriv->ctx_id, fence);
+			      vfpriv->ctx_id, out_fence);
 
-	ttm_eu_fence_buffer_objects(&ticket, &validate_list, &fence->f);
+	ttm_eu_fence_buffer_objects(&ticket, &validate_list, &out_fence->f);
 
 	/* fence the command bo */
 	virtio_gpu_unref_list(&validate_list);
 	drm_free_large(buflist);
-	dma_fence_put(&fence->f);
+	dma_fence_put(&out_fence->f);
 	return 0;
 
+out_memdup:
+	kfree(buf);
 out_unresv:
 	ttm_eu_backoff_reservation(&ticket, &validate_list);
 out_free:
 	virtio_gpu_unref_list(&validate_list);
-out_in_fence:
+out_unused_fd:
 	drm_free_large(buflist);
 	drm_free_large(bo_handles);
+
+	if (out_fence_fd >= 0)
+		put_unused_fd(out_fence_fd);
+out_in_fence:
 	dma_fence_put(in_fence);
 	return ret;
 }
-- 
2.5.5

^ permalink raw reply related

* [RFC 5/5] drm/virtio: bump driver version after explicit synchronization addition
From: Gustavo Padovan @ 2016-12-12 20:48 UTC (permalink / raw)
  To: dri-devel
  Cc: David Airlie, open list, open list:VIRTIO GPU DRIVER, robdclark,
	Gustavo Padovan
In-Reply-To: <1481575710-12535-1-git-send-email-gustavo@padovan.org>

From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>

To reflect the (backward compatible) changes in the uabi we are bumping
the driver's version.

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/gpu/drm/virtio/virtgpu_drv.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 806c98b..b9ab010 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -45,8 +45,8 @@
 #define DRIVER_DATE "0"
 
 #define DRIVER_MAJOR 0
-#define DRIVER_MINOR 0
-#define DRIVER_PATCHLEVEL 1
+#define DRIVER_MINOR 1
+#define DRIVER_PATCHLEVEL 0
 
 /* virtgpu_drm_bus.c */
 int drm_virtio_init(struct drm_driver *driver, struct virtio_device *vdev);
-- 
2.5.5

^ permalink raw reply related

* Re: [PATCH v6 2/2] crypto: add virtio-crypto driver
From: Michael S. Tsirkin @ 2016-12-12 22:05 UTC (permalink / raw)
  To: Herbert Xu
  Cc: virtio-dev@lists.oasis-open.org, Huangweidong (C),
	Claudio Fontana, Hanweidong (Randy), qemu-devel@nongnu.org,
	Luonengjun, linux-kernel@vger.kernel.org, Wanzongshun (Vincent),
	virtualization@lists.linux-foundation.org, Xuquan (Quan Xu),
	Gonglei (Arei), linux-crypto@vger.kernel.org, stefanha@redhat.com,
	Zhoujian (jay, Euler), longpeng, davem@davemloft.net, Wubin (H)
In-Reply-To: <20161212105407.GA3033@gondor.apana.org.au>

On Mon, Dec 12, 2016 at 06:54:07PM +0800, Herbert Xu wrote:
> On Mon, Dec 12, 2016 at 06:25:12AM +0000, Gonglei (Arei) wrote:
> > Hi, Michael & Herbert
> > 
> > Because the virtio-crypto device emulation had been in QEMU 2.8,
> > would you please merge the virtio-crypto driver for 4.10 if no other
> > comments? If so, Miachel pls ack and/or review the patch, then
> > Herbert will take it (I asked him last week). Thank you!
> > 
> > Ps: Note on 4.10 merge window timing from Linus
> >  https://lkml.org/lkml/2016/12/7/506
> > 
> > Dec 23rd is the deadline for 4.10 merge window.
> 
> Sorry but it's too late for 4.10.  It needed to have been in my
> tree before the merge window opened to make it for this cycle.
> 
> Cheers,


Objections to me merging this? I'm preparing my tree right now.

> -- 
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* RE: [PATCH v6 2/2] crypto: add virtio-crypto driver
From: Gonglei (Arei) @ 2016-12-13  1:13 UTC (permalink / raw)
  To: Michael S. Tsirkin, Herbert Xu
  Cc: virtio-dev@lists.oasis-open.org, Huangweidong (C),
	Claudio Fontana, Hanweidong (Randy), qemu-devel@nongnu.org,
	Luonengjun, linux-kernel@vger.kernel.org, Wanzongshun (Vincent),
	virtualization@lists.linux-foundation.org, Xuquan (Quan Xu),
	linux-crypto@vger.kernel.org, stefanha@redhat.com,
	Zhoujian (jay, Euler), longpeng, davem@davemloft.net, Wubin (H),
	arei.gonglei@hotmail.com
In-Reply-To: <20161212234941-mutt-send-email-mst@kernel.org>

>
> Subject: Re: [PATCH v6 2/2] crypto: add virtio-crypto driver
> 
> On Mon, Dec 12, 2016 at 06:54:07PM +0800, Herbert Xu wrote:
> > On Mon, Dec 12, 2016 at 06:25:12AM +0000, Gonglei (Arei) wrote:
> > > Hi, Michael & Herbert
> > >
> > > Because the virtio-crypto device emulation had been in QEMU 2.8,
> > > would you please merge the virtio-crypto driver for 4.10 if no other
> > > comments? If so, Miachel pls ack and/or review the patch, then
> > > Herbert will take it (I asked him last week). Thank you!
> > >
> > > Ps: Note on 4.10 merge window timing from Linus
> > >  https://lkml.org/lkml/2016/12/7/506
> > >
> > > Dec 23rd is the deadline for 4.10 merge window.
> >
> > Sorry but it's too late for 4.10.  It needed to have been in my
> > tree before the merge window opened to make it for this cycle.
> >
> > Cheers,
> 
> 
> Objections to me merging this? I'm preparing my tree right now.
> 
That's great if so since 4.11 merge window opens 
at least three months later.

Do you agree with it Herbert? Thanks.

Regards,
-Gonglei

^ permalink raw reply

* Re: [PATCH v6 2/2] crypto: add virtio-crypto driver
From: Herbert Xu @ 2016-12-13  3:33 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: virtio-dev@lists.oasis-open.org, Huangweidong (C),
	Claudio Fontana, Hanweidong (Randy), qemu-devel@nongnu.org,
	Luonengjun, linux-kernel@vger.kernel.org, Wanzongshun (Vincent),
	virtualization@lists.linux-foundation.org, Xuquan (Quan Xu),
	Gonglei (Arei), linux-crypto@vger.kernel.org, stefanha@redhat.com,
	Zhoujian (jay, Euler), longpeng, davem@davemloft.net, Wubin (H)
In-Reply-To: <20161212234941-mutt-send-email-mst@kernel.org>

On Tue, Dec 13, 2016 at 12:05:19AM +0200, Michael S. Tsirkin wrote:
>
> > Sorry but it's too late for 4.10.  It needed to have been in my
> > tree before the merge window opened to make it for this cycle.
> 
> Objections to me merging this? I'm preparing my tree right now.

Sure feel free to merge it through your tree.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH net] virtio-net: correctly enable multiqueue
From: Jason Wang @ 2016-12-13  6:23 UTC (permalink / raw)
  To: netdev, virtualization; +Cc: tytso, Neil Horman, Michael S . Tsirkin

Commit 4490001029012539937ff02778fe6180613fa949 ("virtio-net: enable
multiqueue by default") blindly set the affinity instead of queues
during probe which can cause a mismatch of #queues between guest and
host. This patch fixes it by setting queues.

Reported-by: Theodore Ts'o <tytso@mit.edu>
Tested-by: Theodore Ts'o <tytso@mit.edu>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Fixes: 49000102901 ("virtio-net: enable multiqueue by default")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b425fa1..fe9f772 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1930,7 +1930,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 		goto free_unregister_netdev;
 	}
 
-	virtnet_set_affinity(vi);
+	rtnl_lock();
+	virtnet_set_queues(vi, vi->curr_queue_pairs);
+	rtnl_unlock();
 
 	/* Assume link up if device can't report link status,
 	   otherwise get link status from config. */
-- 
2.7.4

^ permalink raw reply related

* Re: [RFC 1/5] drm/virtio: add virtio_gpu_alloc_fence()
From: Gerd Hoffmann @ 2016-12-13  6:47 UTC (permalink / raw)
  To: Gustavo Padovan
  Cc: David Airlie, open list, dri-devel, open list:VIRTIO GPU DRIVER,
	robdclark, Gustavo Padovan
In-Reply-To: <1481575710-12535-1-git-send-email-gustavo@padovan.org>

  Hi,

> +struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev)
> +{
> +	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
> +	struct virtio_gpu_fence *fence;
> +	unsigned long irq_flags;
> +
> +	fence = kmalloc(sizeof(struct virtio_gpu_fence), GFP_ATOMIC);
> +	if (!fence)
> +		return NULL;
> +

> +	spin_lock_irqsave(&drv->lock, irq_flags);
> +	fence->drv = drv;
> +	fence->seq = ++drv->sync_seq;
> +	dma_fence_init(&fence->f, &virtio_fence_ops, &drv->lock,
> +		       drv->context, fence->seq);
> +	spin_unlock_irqrestore(&drv->lock, irq_flags);

seq assignment ...

> +
> +	return fence;
> +}
> +
>  int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
>  			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
> -			  struct virtio_gpu_fence **fence)
> +			  struct virtio_gpu_fence *fence)
>  {
>  	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
>  	unsigned long irq_flags;
>  
> -	*fence = kmalloc(sizeof(struct virtio_gpu_fence), GFP_ATOMIC);
> -	if ((*fence) == NULL)
> -		return -ENOMEM;
> -
>  	spin_lock_irqsave(&drv->lock, irq_flags);
> -	(*fence)->drv = drv;
> -	(*fence)->seq = ++drv->sync_seq;
> -	dma_fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock,
> -		       drv->context, (*fence)->seq);

... must stay here.  Otherwise requests can be submitted to the virt
queue with fence sequence numbers out of order.

cheers,
  Gerd

^ permalink raw reply

* Re: [PATCH v4 0/4] vsock: cancel connect packets when failing to connect
From: Stefan Hajnoczi @ 2016-12-13  9:50 UTC (permalink / raw)
  To: Peng Tao
  Cc: kvm, netdev, virtualization, Stefan Hajnoczi, David Miller,
	Jorgen Hansen
In-Reply-To: <1481545269-18104-1-git-send-email-bergwolf@gmail.com>


[-- Attachment #1.1: Type: text/plain, Size: 1792 bytes --]

On Mon, Dec 12, 2016 at 08:21:05PM +0800, Peng Tao wrote:
> Currently, if a connect call fails on a signal or timeout (e.g., guest is still
> in the process of starting up), we'll just return to caller and leave the connect
> packet queued and they are sent even though the connection is considered a failure,
> which can confuse applications with unwanted false connect attempt.
> 
> The patchset enables vsock (both host and guest) to cancel queued packets when
> a connect attempt is considered to fail.
> 
> v4 changelog:
>   - drop two unnecessary void * cast
>   - update new callback commnet
> v3 changelog:
>   - define cancel_pkt callback in struct vsock_transport rather than struct virtio_transport
>   - rename virtio_vsock_pkt->vsk to virtio_vsock_pkt->cancel_token
> v2 changelog:
>   - fix queued_replies counting and resume tx/rx when necessary
> 
> Cheers,
> Tao
> 
> Peng Tao (4):
>   vsock: track pkt owner vsock
>   vhost-vsock: add pkt cancel capability
>   vsock: add pkt cancel capability
>   vsock: cancel packets when failing to connect
> 
>  drivers/vhost/vsock.c                   | 41 ++++++++++++++++++++++++++++++++
>  include/linux/virtio_vsock.h            |  2 ++
>  include/net/af_vsock.h                  |  3 +++
>  net/vmw_vsock/af_vsock.c                | 14 +++++++++++
>  net/vmw_vsock/virtio_transport.c        | 42 +++++++++++++++++++++++++++++++++
>  net/vmw_vsock/virtio_transport_common.c |  7 ++++++
>  6 files changed, 109 insertions(+)
> 
> -- 
> 2.7.4
> 
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH v4 2/4] vhost-vsock: add pkt cancel capability
From: Jorgen S. Hansen @ 2016-12-13 10:04 UTC (permalink / raw)
  To: Peng Tao
  Cc: netdev@vger.kernel.org, Stefan Hajnoczi, David Miller,
	kvm@vger.kernel.org, virtualization@lists.linux-foundation.org
In-Reply-To: <1481545269-18104-3-git-send-email-bergwolf@gmail.com>


> On Dec 12, 2016, at 1:21 PM, Peng Tao <bergwolf@gmail.com> wrote:
> 
> To allow canceling all packets of a connection.
> 
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Peng Tao <bergwolf@gmail.com>
> ---
> drivers/vhost/vsock.c  | 41 +++++++++++++++++++++++++++++++++++++++++
> include/net/af_vsock.h |  3 +++
> 2 files changed, 44 insertions(+)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index a504e2e0..fef8808 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -218,6 +218,46 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
> 	return len;
> }
> 
> +static int
> +vhost_transport_cancel_pkt(struct vsock_sock *vsk)
> +{
> +	struct vhost_vsock *vsock;
> +	struct virtio_vsock_pkt *pkt, *n;
> +	int cnt = 0;
> +	LIST_HEAD(freeme);
> +
> +	/* Find the vhost_vsock according to guest context id  */
> +	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
> +	if (!vsock)
> +		return -ENODEV;
> +
> +	spin_lock_bh(&vsock->send_pkt_list_lock);
> +	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
> +		if (pkt->cancel_token != vsk)
> +			continue;
> +		list_move(&pkt->list, &freeme);
> +	}
> +	spin_unlock_bh(&vsock->send_pkt_list_lock);
> +
> +	list_for_each_entry_safe(pkt, n, &freeme, list) {
> +		if (pkt->reply)
> +			cnt++;
> +		list_del(&pkt->list);
> +		virtio_transport_free_pkt(pkt);
> +	}
> +
> +	if (cnt) {
> +		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
> +		int new_cnt;
> +
> +		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
> +		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
> +			vhost_poll_queue(&tx_vq->poll);
> +	}
> +
> +	return 0;
> +}
> +
> static struct virtio_vsock_pkt *
> vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
> 		      unsigned int out, unsigned int in)
> @@ -664,6 +704,7 @@ static struct virtio_transport vhost_transport = {
> 		.release                  = virtio_transport_release,
> 		.connect                  = virtio_transport_connect,
> 		.shutdown                 = virtio_transport_shutdown,
> +		.cancel_pkt               = vhost_transport_cancel_pkt,
> 
> 		.dgram_enqueue            = virtio_transport_dgram_enqueue,
> 		.dgram_dequeue            = virtio_transport_dgram_dequeue,
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> index f275896..f32ed9a 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -100,6 +100,9 @@ struct vsock_transport {
> 	void (*destruct)(struct vsock_sock *);
> 	void (*release)(struct vsock_sock *);
> 
> +	/* Cancel all pending packets sent on vsock. */
> +	int (*cancel_pkt)(struct vsock_sock *vsk);
> +
> 	/* Connections. */
> 	int (*connect)(struct vsock_sock *);
> 
> -- 
> 2.7.4
> 

Reviewed-by: Jorgen Hansen <jhansen@vmware.com>

^ permalink raw reply

* Re: [RFC 1/5] drm/virtio: add virtio_gpu_alloc_fence()
From: Gustavo Padovan @ 2016-12-13 11:53 UTC (permalink / raw)
  To: Gerd Hoffmann
  Cc: David Airlie, open list, dri-devel, open list:VIRTIO GPU DRIVER,
	robdclark, Gustavo Padovan
In-Reply-To: <1481611645.27088.56.camel@redhat.com>

2016-12-13 Gerd Hoffmann <kraxel@redhat.com>:

>   Hi,
> 
> > +struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev)
> > +{
> > +	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
> > +	struct virtio_gpu_fence *fence;
> > +	unsigned long irq_flags;
> > +
> > +	fence = kmalloc(sizeof(struct virtio_gpu_fence), GFP_ATOMIC);
> > +	if (!fence)
> > +		return NULL;
> > +
> 
> > +	spin_lock_irqsave(&drv->lock, irq_flags);
> > +	fence->drv = drv;
> > +	fence->seq = ++drv->sync_seq;
> > +	dma_fence_init(&fence->f, &virtio_fence_ops, &drv->lock,
> > +		       drv->context, fence->seq);
> > +	spin_unlock_irqrestore(&drv->lock, irq_flags);
> 
> seq assignment ...
> 
> > +
> > +	return fence;
> > +}
> > +
> >  int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
> >  			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
> > -			  struct virtio_gpu_fence **fence)
> > +			  struct virtio_gpu_fence *fence)
> >  {
> >  	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
> >  	unsigned long irq_flags;
> >  
> > -	*fence = kmalloc(sizeof(struct virtio_gpu_fence), GFP_ATOMIC);
> > -	if ((*fence) == NULL)
> > -		return -ENOMEM;
> > -
> >  	spin_lock_irqsave(&drv->lock, irq_flags);
> > -	(*fence)->drv = drv;
> > -	(*fence)->seq = ++drv->sync_seq;
> > -	dma_fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock,
> > -		       drv->context, (*fence)->seq);
> 
> ... must stay here.  Otherwise requests can be submitted to the virt
> queue with fence sequence numbers out of order.

Yes, makes sense. So I'll just leave the kmalloc in there.

Gustavo

^ permalink raw reply

* Re: [PATCH net] virtio-net: correctly enable multiqueue
From: Michael S. Tsirkin @ 2016-12-13 13:56 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, tytso, Neil Horman, virtualization
In-Reply-To: <1481610185-12183-1-git-send-email-jasowang@redhat.com>

On Tue, Dec 13, 2016 at 02:23:05PM +0800, Jason Wang wrote:
> Commit 4490001029012539937ff02778fe6180613fa949 ("virtio-net: enable
> multiqueue by default") blindly set the affinity instead of queues
> during probe which can cause a mismatch of #queues between guest and
> host. This patch fixes it by setting queues.
> 
> Reported-by: Theodore Ts'o <tytso@mit.edu>
> Tested-by: Theodore Ts'o <tytso@mit.edu>
> Cc: Neil Horman <nhorman@tuxdriver.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Fixes: 49000102901 ("virtio-net: enable multiqueue by default")
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Acked-by: Michael S. Tsirkin <mst@redhat.com>

> ---
>  drivers/net/virtio_net.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index b425fa1..fe9f772 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1930,7 +1930,9 @@ static int virtnet_probe(struct virtio_device *vdev)
>  		goto free_unregister_netdev;
>  	}
>  
> -	virtnet_set_affinity(vi);
> +	rtnl_lock();
> +	virtnet_set_queues(vi, vi->curr_queue_pairs);
> +	rtnl_unlock();
>  
>  	/* Assume link up if device can't report link status,
>  	   otherwise get link status from config. */

I note that virtnet_set_channels also plays with affinity
directly. Can this be changed to rely on cpu notifiers
somehow?


> -- 
> 2.7.4

^ permalink raw reply

* Re: [PATCH net] virtio-net: correctly enable multiqueue
From: David Miller @ 2016-12-13 15:39 UTC (permalink / raw)
  To: jasowang; +Cc: netdev, mst, tytso, nhorman, virtualization
In-Reply-To: <1481610185-12183-1-git-send-email-jasowang@redhat.com>

From: Jason Wang <jasowang@redhat.com>
Date: Tue, 13 Dec 2016 14:23:05 +0800

> Commit 4490001029012539937ff02778fe6180613fa949 ("virtio-net: enable
> multiqueue by default") blindly set the affinity instead of queues
> during probe which can cause a mismatch of #queues between guest and
> host. This patch fixes it by setting queues.
> 
> Reported-by: Theodore Ts'o <tytso@mit.edu>
> Tested-by: Theodore Ts'o <tytso@mit.edu>
> Cc: Neil Horman <nhorman@tuxdriver.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Fixes: 49000102901 ("virtio-net: enable multiqueue by default")
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Applied, thanks Jason.

^ permalink raw reply

* Re: [PATCH v6 2/2] crypto: add virtio-crypto driver
From: Halil Pasic @ 2016-12-13 16:11 UTC (permalink / raw)
  To: Gonglei (Arei)
  Cc: virtio-dev@lists.oasis-open.org, Xuquan (Quan Xu),
	Huangweidong (C), Herbert Xu, Michael S. Tsirkin, Claudio Fontana,
	Hanweidong (Randy), Luonengjun, qemu-devel@nongnu.org,
	Wanzongshun (Vincent), linux-kernel@vger.kernel.org,
	linux-crypto@vger.kernel.org, stefanha@redhat.com,
	Zhoujian (jay, Euler), longpeng,
	virtualization@lists.linux-foundation.org, davem@davemloft.net
In-Reply-To: <20161212234941-mutt-send-email-mst@kernel.org>



On 12/12/2016 11:05 PM, Michael S. Tsirkin wrote:
> On Mon, Dec 12, 2016 at 06:54:07PM +0800, Herbert Xu wrote:
>> On Mon, Dec 12, 2016 at 06:25:12AM +0000, Gonglei (Arei) wrote:
>>> Hi, Michael & Herbert
>>>
>>> Because the virtio-crypto device emulation had been in QEMU 2.8,
>>> would you please merge the virtio-crypto driver for 4.10 if no other
>>> comments? If so, Miachel pls ack and/or review the patch, then
>>> Herbert will take it (I asked him last week). Thank you!
>>>
>>> Ps: Note on 4.10 merge window timing from Linus
>>>  https://lkml.org/lkml/2016/12/7/506
>>>
>>> Dec 23rd is the deadline for 4.10 merge window.
>>
>> Sorry but it's too late for 4.10.  It needed to have been in my
>> tree before the merge window opened to make it for this cycle.
>>
>> Cheers,
> 
> 
> Objections to me merging this? I'm preparing my tree right now.

Got this when testing the most recent version on s390x 

[   20.391074] test 0 (128 bit key, 16 byte blocks): [   20.391078] BUG: using smp_processor_id() in preemptible [00000000] code: insmod/97
[   20.391082] caller is virtio_crypto_ablkcipher_setkey+0x44/0x198
[   20.391085] CPU: 0 PID: 97 Comm: insmod Not tainted 4.9.0-02683-gb62a1ab #46
[   20.391088] Hardware name: IBM              2964 NC9              704              (KVM)
[   20.391405] Stack:
[   20.391407]        000000000c0eb6d0 000000000c0eb760 0000000000000003 0000000000000000
[   20.391414]        000000000c0eb800 000000000c0eb778 000000000c0eb778 0000000000000020
[   20.391420]        0000000000000000 000000000000000a 0000000000000020 000003ff0000000a
[   20.391426]        000003ff0000000c 000000000c0eb7c8 0000000000000000 0000000000000000
[   20.391432]        0700000000c173c8 00000000001126ba 000000000c0eb760 000000000c0eb7b8
[   20.391439] Call Trace:
[   20.391442] ([<000000000011259e>] show_trace+0x8e/0xe0)
[   20.391446]  [<0000000000112670>] show_stack+0x80/0xd8 
[   20.391449]  [<0000000000753ab6>] dump_stack+0x96/0xd8 
[   20.391453]  [<00000000007872e6>] check_preemption_disabled+0xfe/0x128 
[   20.391456]  [<0000000000839cc4>] virtio_crypto_ablkcipher_setkey+0x44/0x198 
[   20.391459]  [<0000000000705a40>] skcipher_setkey_ablkcipher+0x50/0x70 
[   20.391476]  [<000003ff80002a48>] test_skcipher_speed+0x328/0xb98 [tcrypt] 
[   20.391492]  [<000003ff800063dc>] do_test+0x1c24/0x28e0 [tcrypt] 
[   20.391509]  [<000003ff8001006a>] tcrypt_mod_init+0x6a/0x1000 [tcrypt] 
[   20.391512]  [<00000000001002cc>] do_one_initcall+0xb4/0x148 
[   20.391515]  [<0000000000298632>] do_init_module+0x7a/0x228 
[   20.391519]  [<00000000001fd380>] load_module+0x2428/0x2de0 
[   20.391522]  [<00000000001fde8a>] SyS_init_module+0x152/0x160 
[   20.391526]  [<00000000009f1306>] system_call+0xd6/0x270 
[   20.391528] no locks held by insmod/97.

Gonglei, any idea? Did not look into it myself yet.

Halil

> 
>> -- 
>> Email: Herbert Xu <herbert@gondor.apana.org.au>
>> Home Page: http://gondor.apana.org.au/~herbert/
>> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> 

^ permalink raw reply

* [PATCH] vhost: introduce O(1) vq metadata cache
From: Jason Wang @ 2016-12-14  7:56 UTC (permalink / raw)
  To: mst, kvm, virtualization, netdev, linux-kernel
  Cc: vkaplans, maxime.coquelin, wexu, peterx

When device IOTLB is enabled, all address translations were stored in
interval tree. O(lgN) searching time could be slow for virtqueue
metadata (avail, used and descriptors) since they were accessed much
often than other addresses. So this patch introduces an O(1) array
which points to the interval tree nodes that store the translations of
vq metadata. Those array were update during vq IOTLB prefetching and
were reset during each invalidation and tlb update. Each time we want
to access vq metadata, this small array were queried before interval
tree. This would be sufficient for static mappings but not dynamic
mappings, we could do optimizations on top.

Test were done with l2fwd in guest (2M hugepage):

   noiommu  | before        | after
tx 1.32Mpps | 1.06Mpps(82%) | 1.30Mpps(98%)
rx 2.33Mpps | 1.46Mpps(63%) | 2.29Mpps(98%)

We can almost reach the same performance as noiommu mode.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 136 ++++++++++++++++++++++++++++++++++++++++----------
 drivers/vhost/vhost.h |   8 +++
 2 files changed, 118 insertions(+), 26 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c6f2d89..89e40b6 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -282,6 +282,22 @@ void vhost_poll_queue(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_queue);
 
+static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
+{
+	int j;
+
+	for (j = 0; j < VHOST_NUM_ADDRS; j++)
+		vq->meta_iotlb[j] = NULL;
+}
+
+static void vhost_vq_meta_reset(struct vhost_dev *d)
+{
+	int i;
+
+	for (i = 0; i < d->nvqs; ++i)
+		__vhost_vq_meta_reset(d->vqs[i]);
+}
+
 static void vhost_vq_reset(struct vhost_dev *dev,
 			   struct vhost_virtqueue *vq)
 {
@@ -311,6 +327,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->busyloop_timeout = 0;
 	vq->umem = NULL;
 	vq->iotlb = NULL;
+	__vhost_vq_meta_reset(vq);
 }
 
 static int vhost_worker(void *data)
@@ -690,6 +707,18 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
 	return 1;
 }
 
+static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
+					       u64 addr, unsigned int size,
+					       int type)
+{
+	const struct vhost_umem_node *node = vq->meta_iotlb[type];
+
+	if (!node)
+		return NULL;
+
+	return (void *)(node->userspace_addr + (u64)addr - node->start);
+}
+
 /* Can we switch to this memory table? */
 /* Caller should have device mutex but not vq mutex */
 static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
@@ -732,8 +761,14 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to,
 		 * could be access through iotlb. So -EAGAIN should
 		 * not happen in this case.
 		 */
-		/* TODO: more fast path */
 		struct iov_iter t;
+		void __user *uaddr = vhost_vq_meta_fetch(vq,
+				     (u64)(uintptr_t)to, size,
+				     VHOST_ADDR_DESC);
+
+		if (uaddr)
+			return __copy_to_user(uaddr, from, size);
+
 		ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
 				     ARRAY_SIZE(vq->iotlb_iov),
 				     VHOST_ACCESS_WO);
@@ -761,8 +796,14 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 		 * could be access through iotlb. So -EAGAIN should
 		 * not happen in this case.
 		 */
-		/* TODO: more fast path */
+		void __user *uaddr = vhost_vq_meta_fetch(vq,
+				     (u64)(uintptr_t)from, size,
+				     VHOST_ADDR_DESC);
 		struct iov_iter f;
+
+		if (uaddr)
+			return __copy_from_user(to, uaddr, size);
+
 		ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
 				     ARRAY_SIZE(vq->iotlb_iov),
 				     VHOST_ACCESS_RO);
@@ -782,17 +823,12 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 	return ret;
 }
 
-static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
-				     void *addr, unsigned size)
+static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
+					  void *addr, unsigned int size,
+					  int type)
 {
 	int ret;
 
-	/* This function should be called after iotlb
-	 * prefetch, which means we're sure that vq
-	 * could be access through iotlb. So -EAGAIN should
-	 * not happen in this case.
-	 */
-	/* TODO: more fast path */
 	ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
 			     ARRAY_SIZE(vq->iotlb_iov),
 			     VHOST_ACCESS_RO);
@@ -813,14 +849,32 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	return vq->iotlb_iov[0].iov_base;
 }
 
-#define vhost_put_user(vq, x, ptr) \
+/* This function should be called after iotlb
+ * prefetch, which means we're sure that vq
+ * could be access through iotlb. So -EAGAIN should
+ * not happen in this case.
+ */
+static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
+					    void *addr, unsigned int size,
+					    int type)
+{
+	void __user *uaddr = vhost_vq_meta_fetch(vq,
+			     (u64)(uintptr_t)addr, size, type);
+	if (uaddr)
+		return uaddr;
+
+	return __vhost_get_user_slow(vq, addr, size, type);
+}
+
+#define vhost_put_user(vq, x, ptr)		\
 ({ \
 	int ret = -EFAULT; \
 	if (!vq->iotlb) { \
 		ret = __put_user(x, ptr); \
 	} else { \
 		__typeof__(ptr) to = \
-			(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+			(__typeof__(ptr)) __vhost_get_user(vq, ptr,	\
+					  sizeof(*ptr), VHOST_ADDR_USED); \
 		if (to != NULL) \
 			ret = __put_user(x, to); \
 		else \
@@ -829,14 +883,16 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
-#define vhost_get_user(vq, x, ptr) \
+#define vhost_get_user(vq, x, ptr, type)		\
 ({ \
 	int ret; \
 	if (!vq->iotlb) { \
 		ret = __get_user(x, ptr); \
 	} else { \
 		__typeof__(ptr) from = \
-			(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+			(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
+							   sizeof(*ptr), \
+							   type); \
 		if (from != NULL) \
 			ret = __get_user(x, from); \
 		else \
@@ -845,6 +901,12 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
+#define vhost_get_avail(vq, x, ptr) \
+	vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
+
+#define vhost_get_used(vq, x, ptr) \
+	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
+
 static void vhost_dev_lock_vqs(struct vhost_dev *d)
 {
 	int i = 0;
@@ -950,6 +1012,7 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev,
 			ret = -EFAULT;
 			break;
 		}
+		vhost_vq_meta_reset(dev);
 		if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
 					 msg->iova + msg->size - 1,
 					 msg->uaddr, msg->perm)) {
@@ -959,6 +1022,7 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev,
 		vhost_iotlb_notify_vq(dev, msg);
 		break;
 	case VHOST_IOTLB_INVALIDATE:
+		vhost_vq_meta_reset(dev);
 		vhost_del_umem_range(dev->iotlb, msg->iova,
 				     msg->iova + msg->size - 1);
 		break;
@@ -1102,12 +1166,26 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
 			sizeof *used + num * sizeof *used->ring + s);
 }
 
+static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
+				 const struct vhost_umem_node *node,
+				 int type)
+{
+	int access = (type == VHOST_ADDR_USED) ?
+		     VHOST_ACCESS_WO : VHOST_ACCESS_RO;
+
+	if (likely(node->perm & access))
+		vq->meta_iotlb[type] = node;
+}
+
 static int iotlb_access_ok(struct vhost_virtqueue *vq,
-			   int access, u64 addr, u64 len)
+			   int access, u64 addr, u64 len, int type)
 {
 	const struct vhost_umem_node *node;
 	struct vhost_umem *umem = vq->iotlb;
-	u64 s = 0, size;
+	u64 s = 0, size, orig_addr = addr;
+
+	if (vhost_vq_meta_fetch(vq, addr, len, type))
+		return true;
 
 	while (len > s) {
 		node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
@@ -1124,6 +1202,10 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq,
 		}
 
 		size = node->size - addr + node->start;
+
+		if (orig_addr == addr && size >= len)
+			vhost_vq_meta_update(vq, node, type);
+
 		s += size;
 		addr += size;
 	}
@@ -1140,13 +1222,15 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
 		return 1;
 
 	return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
-			       num * sizeof *vq->desc) &&
+			       num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
 	       iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
 			       sizeof *vq->avail +
-			       num * sizeof *vq->avail->ring + s) &&
+			       num * sizeof(*vq->avail->ring) + s,
+			       VHOST_ADDR_AVAIL) &&
 	       iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
 			       sizeof *vq->used +
-			       num * sizeof *vq->used->ring + s);
+			       num * sizeof(*vq->used->ring) + s,
+			       VHOST_ADDR_USED);
 }
 EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
 
@@ -1729,7 +1813,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
 		r = -EFAULT;
 		goto err;
 	}
-	r = vhost_get_user(vq, last_used_idx, &vq->used->idx);
+	r = vhost_get_used(vq, last_used_idx, &vq->used->idx);
 	if (r) {
 		vq_err(vq, "Can't access used idx at %p\n",
 		       &vq->used->idx);
@@ -1932,7 +2016,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
 	/* Check it isn't doing very strange things with descriptor numbers. */
 	last_avail_idx = vq->last_avail_idx;
-	if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
+	if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
 		vq_err(vq, "Failed to access avail idx at %p\n",
 		       &vq->avail->idx);
 		return -EFAULT;
@@ -1954,7 +2038,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
-	if (unlikely(vhost_get_user(vq, ring_head,
+	if (unlikely(vhost_get_avail(vq, ring_head,
 		     &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
 		vq_err(vq, "Failed to read head: idx %d address %p\n",
 		       last_avail_idx,
@@ -2170,7 +2254,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 
 	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
 		__virtio16 flags;
-		if (vhost_get_user(vq, flags, &vq->avail->flags)) {
+		if (vhost_get_avail(vq, flags, &vq->avail->flags)) {
 			vq_err(vq, "Failed to get flags");
 			return true;
 		}
@@ -2184,7 +2268,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	if (unlikely(!v))
 		return true;
 
-	if (vhost_get_user(vq, event, vhost_used_event(vq))) {
+	if (vhost_get_avail(vq, event, vhost_used_event(vq))) {
 		vq_err(vq, "Failed to get used event idx");
 		return true;
 	}
@@ -2226,7 +2310,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	__virtio16 avail_idx;
 	int r;
 
-	r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+	r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
 	if (r)
 		return false;
 
@@ -2261,7 +2345,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	/* They could have slipped one in as we were doing that: make
 	 * sure it's written, then check again. */
 	smp_mb();
-	r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+	r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
 	if (r) {
 		vq_err(vq, "Failed to check avail idx at %p: %d\n",
 		       &vq->avail->idx, r);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 78f3c5f..034ea18 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -76,6 +76,13 @@ struct vhost_umem {
 	int numem;
 };
 
+enum vhost_uaddr_type {
+	VHOST_ADDR_DESC = 0,
+	VHOST_ADDR_AVAIL = 1,
+	VHOST_ADDR_USED = 2,
+	VHOST_NUM_ADDRS = 3,
+};
+
 /* The virtqueue structure describes a queue attached to a device. */
 struct vhost_virtqueue {
 	struct vhost_dev *dev;
@@ -86,6 +93,7 @@ struct vhost_virtqueue {
 	struct vring_desc __user *desc;
 	struct vring_avail __user *avail;
 	struct vring_used __user *used;
+	const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
 	struct file *kick;
 	struct file *call;
 	struct file *error;
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH] vhost: introduce O(1) vq metadata cache
From: kbuild test robot @ 2016-12-14  8:14 UTC (permalink / raw)
  To: Jason Wang
  Cc: kvm, mst, netdev, linux-kernel, peterx, virtualization,
	maxime.coquelin, kbuild-all, vkaplans, wexu
In-Reply-To: <1481702183-16088-1-git-send-email-jasowang@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 1838 bytes --]

Hi Jason,

[auto build test WARNING on vhost/linux-next]
[also build test WARNING on v4.9 next-20161214]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Jason-Wang/vhost-introduce-O-1-vq-metadata-cache/20161214-160153
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: i386-randconfig-x005-201650 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   drivers/vhost/vhost.c: In function 'vhost_vq_meta_fetch':
>> drivers/vhost/vhost.c:719:9: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     return (void *)(node->userspace_addr + (u64)addr - node->start);
            ^

vim +719 drivers/vhost/vhost.c

   703							   node->start,
   704							   node->size))
   705				return 0;
   706		}
   707		return 1;
   708	}
   709	
   710	static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
   711						       u64 addr, unsigned int size,
   712						       int type)
   713	{
   714		const struct vhost_umem_node *node = vq->meta_iotlb[type];
   715	
   716		if (!node)
   717			return NULL;
   718	
 > 719		return (void *)(node->userspace_addr + (u64)addr - node->start);
   720	}
   721	
   722	/* Can we switch to this memory table? */
   723	/* Caller should have device mutex but not vq mutex */
   724	static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
   725				    int log_all)
   726	{
   727		int i;

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 27829 bytes --]

[-- Attachment #3: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* RE: [Qemu-devel] [PATCH kernel v5 0/5] Extend virtio-balloon for fast (de)inflating & fast live migration
From: Li, Liang Z @ 2016-12-14  8:20 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: mhocko@suse.com, kvm@vger.kernel.org, mst@redhat.com,
	linux-kernel@vger.kernel.org, qemu-devel@nongnu.org,
	linux-mm@kvack.org, Hansen, Dave, kirill.shutemov@linux.intel.com,
	pbonzini@redhat.com, akpm@linux-foundation.org,
	virtualization@lists.linux-foundation.org, dgilbert@redhat.com
In-Reply-To: <20161209164222.GI28786@redhat.com>

> fast (de)inflating & fast live migration
> 
> Hello,
> 
> On Fri, Dec 09, 2016 at 05:35:45AM +0000, Li, Liang Z wrote:
> > > On 12/08/2016 08:45 PM, Li, Liang Z wrote:
> > > > What's the conclusion of your discussion? It seems you want some
> > > > statistic before deciding whether to  ripping the bitmap from the
> > > > ABI, am I right?
> > >
> > > I think Andrea and David feel pretty strongly that we should remove
> > > the bitmap, unless we have some data to support keeping it.  I don't
> > > feel as strongly about it, but I think their critique of it is
> > > pretty valid.  I think the consensus is that the bitmap needs to go.
> > >
> >
> > Thanks for you clarification.
> >
> > > The only real question IMNHO is whether we should do a power-of-2 or
> > > a length.  But, if we have 12 bits, then the argument for doing
> > > length is pretty strong.  We don't need anywhere near 12 bits if doing
> power-of-2.
> > >
> > So each item can max represent 16MB Bytes, seems not big enough, but
> > enough for most case.
> > Things became much more simple without the bitmap, and I like simple
> > solution too. :)
> >
> > I will prepare the v6 and remove all the bitmap related stuffs. Thank you all!
> 
> Sounds great!
> 
> I suggested to check the statistics, because collecting those stats looked
> simpler and quicker than removing all bitmap related stuff from the patchset.
> However if you prefer to prepare a v6 without the bitmap another perhaps
> more interesting way to evaluate the usefulness of the bitmap is to just run
> the same benchmark and verify that there is no regression compared to the
> bitmap enabled code.
> 
> The other issue with the bitmap is, the best case for the bitmap is ever less
> likely to materialize the more RAM is added to the guest. It won't regress
> linearly because after all there can be some locality bias in the buddy splits,
> but if sync compaction is used in the large order allocations tried before
> reaching order 0, the bitmap payoff will regress close to linearly with the
> increase of RAM.
> 
> So it'd be good to check the stats or the benchmark on large guests, at least
> one hundred gigabytes or so.
> 
> Changing topic but still about the ABI features needed, so it may be relevant
> for this discussion:
> 
> 1) vNUMA locality: i.e. allowing host to specify which vNODEs to take
>    memory from, using alloc_pages_node in guest. So you can ask to
>    take X pages from vnode A, Y pages from vnode B, in one vmenter.
> 
> 2) allowing qemu to tell the guest to stop inflating the balloon and
>    report a fragmentation limit being hit, when sync compaction
>    powered allocations fails at certain power-of-two order granularity
>    passed by qemu to the guest. This order constraint will be passed
>    by default for hugetlbfs guests with 2MB hpage size, while it can
>    be used optionally on THP backed guests. This option with THP
>    guests would allow a highlevel management software to provide a
>    "don't reduce guest performance" while shrinking the memory size of
>    the guest from the GUI. If you deselect the option, you can shrink
>    down to the last freeable 4k guest page, but doing so may have to
>    split THP in the host (you don't know for sure if they were really
>    THP but they could have been), and it may regress
>    performance. Inflating the balloon while passing a minimum
>    granularity "order" of the pages being zapped, will guarantee
>    inflating the balloon cannot decrease guest performance
>    instead. Plus it's needed for hugetlbfs anyway as far as I can
>    tell. hugetlbfs would not be host enforceable even if the idea is
>    not to free memory but only reduce the available memory of the
>    guest (not without major changes that maps a hugetlb page with 4k
>    ptes at least). While for a more cooperative usage of hugetlbfs
>    guests, it's simply not useful to inflate the balloon at anything
>    less than the "HPAGE_SIZE" hugetlbfs granularity.
> 
> We also plan to use userfaultfd to make the balloon driver host enforced (will
> work fine on hugetlbfs 2M and tmpfs too) but that's going to be invisible to
> the ABI so it's not strictly relevant for this discussion.
> 
> On a side note, registering userfaultfd on the ballooned range, will keep
> khugepaged at bay so it won't risk to re-inflating the MADV_DONTNEED
> zapped sub-THP fragments no matter the sysfs tunings.
> 

Thanks for your elaboration!

> Thanks!
> Andrea

^ permalink raw reply

* RE: [Qemu-devel] [PATCH kernel v5 0/5] Extend virtio-balloon for fast (de)inflating & fast live migration
From: Li, Liang Z @ 2016-12-14  8:59 UTC (permalink / raw)
  To: Hansen, Dave, Andrea Arcangeli
  Cc: mhocko@suse.com, kvm@vger.kernel.org, mst@redhat.com,
	qemu-devel@nongnu.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, kirill.shutemov@linux.intel.com,
	pbonzini@redhat.com, akpm@linux-foundation.org,
	virtualization@lists.linux-foundation.org, dgilbert@redhat.com
In-Reply-To: <060287c7-d1af-45d5-70ea-ad35d4bbeb84@intel.com>

> Subject: Re: [Qemu-devel] [PATCH kernel v5 0/5] Extend virtio-balloon for
> fast (de)inflating & fast live migration
> 
> On 12/08/2016 08:45 PM, Li, Liang Z wrote:
> > What's the conclusion of your discussion? It seems you want some
> > statistic before deciding whether to  ripping the bitmap from the ABI,
> > am I right?
> 
> I think Andrea and David feel pretty strongly that we should remove the
> bitmap, unless we have some data to support keeping it.  I don't feel as
> strongly about it, but I think their critique of it is pretty valid.  I think the
> consensus is that the bitmap needs to go.
> 
> The only real question IMNHO is whether we should do a power-of-2 or a
> length.  But, if we have 12 bits, then the argument for doing length is pretty
> strong.  We don't need anywhere near 12 bits if doing power-of-2.

Just found the MAX_ORDER should be limited to 12 if use length instead of order,
If the MAX_ORDER is configured to a value bigger than 12, it will make things more
complex to handle this case. 

If use order, we need to break a large memory range whose length is not the power of 2 into several
small ranges, it also make the code complex.

It seems we leave too many bit  for the pfn, and the bits leave for length is not enough,
How about keep 45 bits for the pfn and 19 bits for length, 45 bits for pfn can cover 57 bits
physical address, that should be enough in the near feature. 

What's your opinion?


thanks!
Liang

^ permalink raw reply

* Re: [PATCH] vhost: introduce O(1) vq metadata cache
From: Jason Wang @ 2016-12-14  9:33 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kvm, mst, netdev, linux-kernel, peterx, virtualization,
	maxime.coquelin, kbuild-all, vkaplans, wexu
In-Reply-To: <201612141605.QJowNf4i%fengguang.wu@intel.com>



On 2016年12月14日 16:14, kbuild test robot wrote:
> Hi Jason,
>
> [auto build test WARNING on vhost/linux-next]
> [also build test WARNING on v4.9 next-20161214]
> [if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
>
> url:    https://github.com/0day-ci/linux/commits/Jason-Wang/vhost-introduce-O-1-vq-metadata-cache/20161214-160153
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
> config: i386-randconfig-x005-201650 (attached as .config)
> compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
> reproduce:
>          # save the attached .config to linux build tree

Thanks, V2 will be posted soon.

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* [PATCH V2] vhost: introduce O(1) vq metadata cache
From: Jason Wang @ 2016-12-14  9:53 UTC (permalink / raw)
  To: mst, kvm, virtualization, netdev, linux-kernel
  Cc: vkaplans, maxime.coquelin, wexu, peterx

When device IOTLB is enabled, all address translations were stored in
interval tree. O(lgN) searching time could be slow for virtqueue
metadata (avail, used and descriptors) since they were accessed much
often than other addresses. So this patch introduces an O(1) array
which points to the interval tree nodes that store the translations of
vq metadata. Those array were update during vq IOTLB prefetching and
were reset during each invalidation and tlb update. Each time we want
to access vq metadata, this small array were queried before interval
tree. This would be sufficient for static mappings but not dynamic
mappings, we could do optimizations on top.

Test were done with l2fwd in guest (2M hugepage):

   noiommu  | before        | after
tx 1.32Mpps | 1.06Mpps(82%) | 1.30Mpps(98%)
rx 2.33Mpps | 1.46Mpps(63%) | 2.29Mpps(98%)

We can almost reach the same performance as noiommu mode.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Changes from V1:
- silent 32bit build warning
---
 drivers/vhost/vhost.c | 136 ++++++++++++++++++++++++++++++++++++++++----------
 drivers/vhost/vhost.h |   8 +++
 2 files changed, 118 insertions(+), 26 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c6f2d89..50ed625 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -282,6 +282,22 @@ void vhost_poll_queue(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_queue);
 
+static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
+{
+	int j;
+
+	for (j = 0; j < VHOST_NUM_ADDRS; j++)
+		vq->meta_iotlb[j] = NULL;
+}
+
+static void vhost_vq_meta_reset(struct vhost_dev *d)
+{
+	int i;
+
+	for (i = 0; i < d->nvqs; ++i)
+		__vhost_vq_meta_reset(d->vqs[i]);
+}
+
 static void vhost_vq_reset(struct vhost_dev *dev,
 			   struct vhost_virtqueue *vq)
 {
@@ -311,6 +327,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->busyloop_timeout = 0;
 	vq->umem = NULL;
 	vq->iotlb = NULL;
+	__vhost_vq_meta_reset(vq);
 }
 
 static int vhost_worker(void *data)
@@ -690,6 +707,18 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
 	return 1;
 }
 
+static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
+					       u64 addr, unsigned int size,
+					       int type)
+{
+	const struct vhost_umem_node *node = vq->meta_iotlb[type];
+
+	if (!node)
+		return NULL;
+
+	return (void *)(uintptr_t)(node->userspace_addr + addr - node->start);
+}
+
 /* Can we switch to this memory table? */
 /* Caller should have device mutex but not vq mutex */
 static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
@@ -732,8 +761,14 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void *to,
 		 * could be access through iotlb. So -EAGAIN should
 		 * not happen in this case.
 		 */
-		/* TODO: more fast path */
 		struct iov_iter t;
+		void __user *uaddr = vhost_vq_meta_fetch(vq,
+				     (u64)(uintptr_t)to, size,
+				     VHOST_ADDR_DESC);
+
+		if (uaddr)
+			return __copy_to_user(uaddr, from, size);
+
 		ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
 				     ARRAY_SIZE(vq->iotlb_iov),
 				     VHOST_ACCESS_WO);
@@ -761,8 +796,14 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 		 * could be access through iotlb. So -EAGAIN should
 		 * not happen in this case.
 		 */
-		/* TODO: more fast path */
+		void __user *uaddr = vhost_vq_meta_fetch(vq,
+				     (u64)(uintptr_t)from, size,
+				     VHOST_ADDR_DESC);
 		struct iov_iter f;
+
+		if (uaddr)
+			return __copy_from_user(to, uaddr, size);
+
 		ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
 				     ARRAY_SIZE(vq->iotlb_iov),
 				     VHOST_ACCESS_RO);
@@ -782,17 +823,12 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 	return ret;
 }
 
-static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
-				     void *addr, unsigned size)
+static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
+					  void *addr, unsigned int size,
+					  int type)
 {
 	int ret;
 
-	/* This function should be called after iotlb
-	 * prefetch, which means we're sure that vq
-	 * could be access through iotlb. So -EAGAIN should
-	 * not happen in this case.
-	 */
-	/* TODO: more fast path */
 	ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
 			     ARRAY_SIZE(vq->iotlb_iov),
 			     VHOST_ACCESS_RO);
@@ -813,14 +849,32 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	return vq->iotlb_iov[0].iov_base;
 }
 
-#define vhost_put_user(vq, x, ptr) \
+/* This function should be called after iotlb
+ * prefetch, which means we're sure that vq
+ * could be access through iotlb. So -EAGAIN should
+ * not happen in this case.
+ */
+static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
+					    void *addr, unsigned int size,
+					    int type)
+{
+	void __user *uaddr = vhost_vq_meta_fetch(vq,
+			     (u64)(uintptr_t)addr, size, type);
+	if (uaddr)
+		return uaddr;
+
+	return __vhost_get_user_slow(vq, addr, size, type);
+}
+
+#define vhost_put_user(vq, x, ptr)		\
 ({ \
 	int ret = -EFAULT; \
 	if (!vq->iotlb) { \
 		ret = __put_user(x, ptr); \
 	} else { \
 		__typeof__(ptr) to = \
-			(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+			(__typeof__(ptr)) __vhost_get_user(vq, ptr,	\
+					  sizeof(*ptr), VHOST_ADDR_USED); \
 		if (to != NULL) \
 			ret = __put_user(x, to); \
 		else \
@@ -829,14 +883,16 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
-#define vhost_get_user(vq, x, ptr) \
+#define vhost_get_user(vq, x, ptr, type)		\
 ({ \
 	int ret; \
 	if (!vq->iotlb) { \
 		ret = __get_user(x, ptr); \
 	} else { \
 		__typeof__(ptr) from = \
-			(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+			(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
+							   sizeof(*ptr), \
+							   type); \
 		if (from != NULL) \
 			ret = __get_user(x, from); \
 		else \
@@ -845,6 +901,12 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
+#define vhost_get_avail(vq, x, ptr) \
+	vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
+
+#define vhost_get_used(vq, x, ptr) \
+	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
+
 static void vhost_dev_lock_vqs(struct vhost_dev *d)
 {
 	int i = 0;
@@ -950,6 +1012,7 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev,
 			ret = -EFAULT;
 			break;
 		}
+		vhost_vq_meta_reset(dev);
 		if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
 					 msg->iova + msg->size - 1,
 					 msg->uaddr, msg->perm)) {
@@ -959,6 +1022,7 @@ int vhost_process_iotlb_msg(struct vhost_dev *dev,
 		vhost_iotlb_notify_vq(dev, msg);
 		break;
 	case VHOST_IOTLB_INVALIDATE:
+		vhost_vq_meta_reset(dev);
 		vhost_del_umem_range(dev->iotlb, msg->iova,
 				     msg->iova + msg->size - 1);
 		break;
@@ -1102,12 +1166,26 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
 			sizeof *used + num * sizeof *used->ring + s);
 }
 
+static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
+				 const struct vhost_umem_node *node,
+				 int type)
+{
+	int access = (type == VHOST_ADDR_USED) ?
+		     VHOST_ACCESS_WO : VHOST_ACCESS_RO;
+
+	if (likely(node->perm & access))
+		vq->meta_iotlb[type] = node;
+}
+
 static int iotlb_access_ok(struct vhost_virtqueue *vq,
-			   int access, u64 addr, u64 len)
+			   int access, u64 addr, u64 len, int type)
 {
 	const struct vhost_umem_node *node;
 	struct vhost_umem *umem = vq->iotlb;
-	u64 s = 0, size;
+	u64 s = 0, size, orig_addr = addr;
+
+	if (vhost_vq_meta_fetch(vq, addr, len, type))
+		return true;
 
 	while (len > s) {
 		node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
@@ -1124,6 +1202,10 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq,
 		}
 
 		size = node->size - addr + node->start;
+
+		if (orig_addr == addr && size >= len)
+			vhost_vq_meta_update(vq, node, type);
+
 		s += size;
 		addr += size;
 	}
@@ -1140,13 +1222,15 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
 		return 1;
 
 	return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
-			       num * sizeof *vq->desc) &&
+			       num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
 	       iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
 			       sizeof *vq->avail +
-			       num * sizeof *vq->avail->ring + s) &&
+			       num * sizeof(*vq->avail->ring) + s,
+			       VHOST_ADDR_AVAIL) &&
 	       iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
 			       sizeof *vq->used +
-			       num * sizeof *vq->used->ring + s);
+			       num * sizeof(*vq->used->ring) + s,
+			       VHOST_ADDR_USED);
 }
 EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
 
@@ -1729,7 +1813,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
 		r = -EFAULT;
 		goto err;
 	}
-	r = vhost_get_user(vq, last_used_idx, &vq->used->idx);
+	r = vhost_get_used(vq, last_used_idx, &vq->used->idx);
 	if (r) {
 		vq_err(vq, "Can't access used idx at %p\n",
 		       &vq->used->idx);
@@ -1932,7 +2016,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
 	/* Check it isn't doing very strange things with descriptor numbers. */
 	last_avail_idx = vq->last_avail_idx;
-	if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
+	if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
 		vq_err(vq, "Failed to access avail idx at %p\n",
 		       &vq->avail->idx);
 		return -EFAULT;
@@ -1954,7 +2038,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
-	if (unlikely(vhost_get_user(vq, ring_head,
+	if (unlikely(vhost_get_avail(vq, ring_head,
 		     &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
 		vq_err(vq, "Failed to read head: idx %d address %p\n",
 		       last_avail_idx,
@@ -2170,7 +2254,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 
 	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
 		__virtio16 flags;
-		if (vhost_get_user(vq, flags, &vq->avail->flags)) {
+		if (vhost_get_avail(vq, flags, &vq->avail->flags)) {
 			vq_err(vq, "Failed to get flags");
 			return true;
 		}
@@ -2184,7 +2268,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	if (unlikely(!v))
 		return true;
 
-	if (vhost_get_user(vq, event, vhost_used_event(vq))) {
+	if (vhost_get_avail(vq, event, vhost_used_event(vq))) {
 		vq_err(vq, "Failed to get used event idx");
 		return true;
 	}
@@ -2226,7 +2310,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	__virtio16 avail_idx;
 	int r;
 
-	r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+	r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
 	if (r)
 		return false;
 
@@ -2261,7 +2345,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	/* They could have slipped one in as we were doing that: make
 	 * sure it's written, then check again. */
 	smp_mb();
-	r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+	r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
 	if (r) {
 		vq_err(vq, "Failed to check avail idx at %p: %d\n",
 		       &vq->avail->idx, r);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 78f3c5f..034ea18 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -76,6 +76,13 @@ struct vhost_umem {
 	int numem;
 };
 
+enum vhost_uaddr_type {
+	VHOST_ADDR_DESC = 0,
+	VHOST_ADDR_AVAIL = 1,
+	VHOST_ADDR_USED = 2,
+	VHOST_NUM_ADDRS = 3,
+};
+
 /* The virtqueue structure describes a queue attached to a device. */
 struct vhost_virtqueue {
 	struct vhost_dev *dev;
@@ -86,6 +93,7 @@ struct vhost_virtqueue {
 	struct vring_desc __user *desc;
 	struct vring_avail __user *avail;
 	struct vring_used __user *used;
+	const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
 	struct file *kick;
 	struct file *call;
 	struct file *error;
-- 
2.7.4

^ permalink raw reply related

* [PATCH] arch: x86: kernel: fixed unused label issue
From: Piotr Gregor @ 2016-12-14 11:15 UTC (permalink / raw)
  To: tglx; +Cc: jeremy, x86, linux-kernel, virtualization, mingo, hpa, akataria

The patch_default label is only used from within
	case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock)
and
	case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted)
i.e. when #if defined(CONFIG_PARAVIRT_SPINLOCKS) is true.
Therefore no code jumps to this label in case CONFIG_PARAVIRT_SPINLOCKS
is not defined and label should be removed in that case.
Moving #endif directive just after that label fixes the issue.

In addition,there are three errors reported by checkpatch script
on this file. This commit fixes two of them. The last one is
	ERROR: Macros with multiple statements should be enclosed
	in a do - while loop
which probably is a false alarm here as PATCH_SITE macro defines
a case in switch local to native_patch function not meant to be used
in other places.

Signed-off-by: Piotr Gregor <piotrgregor@rsyncme.org>
---
 arch/x86/kernel/paravirt_patch_64.c | 67 +++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index f4fcf26..7ce2848 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -44,43 +44,44 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 	const unsigned char *start, *end;
 	unsigned ret;
 
-#define PATCH_SITE(ops, x)					\
-		case PARAVIRT_PATCH(ops.x):			\
-			start = start_##ops##_##x;		\
-			end = end_##ops##_##x;			\
-			goto patch_site
-	switch(type) {
-		PATCH_SITE(pv_irq_ops, restore_fl);
-		PATCH_SITE(pv_irq_ops, save_fl);
-		PATCH_SITE(pv_irq_ops, irq_enable);
-		PATCH_SITE(pv_irq_ops, irq_disable);
-		PATCH_SITE(pv_cpu_ops, usergs_sysret64);
-		PATCH_SITE(pv_cpu_ops, swapgs);
-		PATCH_SITE(pv_mmu_ops, read_cr2);
-		PATCH_SITE(pv_mmu_ops, read_cr3);
-		PATCH_SITE(pv_mmu_ops, write_cr3);
-		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
-		PATCH_SITE(pv_cpu_ops, wbinvd);
+#define PATCH_SITE(ops, x)				\
+	case PARAVIRT_PATCH(ops.x):			\
+		start = start_##ops##_##x;		\
+		end = end_##ops##_##x;			\
+		goto patch_site
+
+	switch (type) {
+	PATCH_SITE(pv_irq_ops, restore_fl);
+	PATCH_SITE(pv_irq_ops, save_fl);
+	PATCH_SITE(pv_irq_ops, irq_enable);
+	PATCH_SITE(pv_irq_ops, irq_disable);
+	PATCH_SITE(pv_cpu_ops, usergs_sysret64);
+	PATCH_SITE(pv_cpu_ops, swapgs);
+	PATCH_SITE(pv_mmu_ops, read_cr2);
+	PATCH_SITE(pv_mmu_ops, read_cr3);
+	PATCH_SITE(pv_mmu_ops, write_cr3);
+	PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+	PATCH_SITE(pv_cpu_ops, wbinvd);
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
-		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
-			if (pv_is_native_spin_unlock()) {
-				start = start_pv_lock_ops_queued_spin_unlock;
-				end   = end_pv_lock_ops_queued_spin_unlock;
-				goto patch_site;
-			}
-			goto patch_default;
+	case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
+		if (pv_is_native_spin_unlock()) {
+			start = start_pv_lock_ops_queued_spin_unlock;
+			end   = end_pv_lock_ops_queued_spin_unlock;
+			goto patch_site;
+		}
+		goto patch_default;
 
-		case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
-			if (pv_is_native_vcpu_is_preempted()) {
-				start = start_pv_lock_ops_vcpu_is_preempted;
-				end   = end_pv_lock_ops_vcpu_is_preempted;
-				goto patch_site;
-			}
-			goto patch_default;
-#endif
+	case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
+		if (pv_is_native_vcpu_is_preempted()) {
+			start = start_pv_lock_ops_vcpu_is_preempted;
+			end   = end_pv_lock_ops_vcpu_is_preempted;
+			goto patch_site;
+		}
+		goto patch_default;
 
-	default:
 patch_default:
+#endif
+	default:
 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
 
-- 
2.1.4

^ permalink raw reply related

* RE: [PATCH v6 2/2] crypto: add virtio-crypto driver
From: Gonglei (Arei) @ 2016-12-14 11:32 UTC (permalink / raw)
  To: Halil Pasic
  Cc: virtio-dev@lists.oasis-open.org, Xuquan (Quan Xu),
	Huangweidong (C), Herbert Xu, Michael S. Tsirkin, Claudio Fontana,
	Hanweidong (Randy), Luonengjun, qemu-devel@nongnu.org,
	Wanzongshun (Vincent), linux-kernel@vger.kernel.org,
	linux-crypto@vger.kernel.org, stefanha@redhat.com,
	Zhoujian (jay, Euler), longpeng,
	virtualization@lists.linux-foundation.org, davem@davemloft.net
In-Reply-To: <b55a4666-d71a-352c-8be3-286eef6c5bc0@linux.vnet.ibm.com>

Hi,

>
> Subject: Re: [PATCH v6 2/2] crypto: add virtio-crypto driver
> 
> 
> 
> On 12/12/2016 11:05 PM, Michael S. Tsirkin wrote:
> > On Mon, Dec 12, 2016 at 06:54:07PM +0800, Herbert Xu wrote:
> >> On Mon, Dec 12, 2016 at 06:25:12AM +0000, Gonglei (Arei) wrote:
> >>> Hi, Michael & Herbert
> >>>
> >>> Because the virtio-crypto device emulation had been in QEMU 2.8,
> >>> would you please merge the virtio-crypto driver for 4.10 if no other
> >>> comments? If so, Miachel pls ack and/or review the patch, then
> >>> Herbert will take it (I asked him last week). Thank you!
> >>>
> >>> Ps: Note on 4.10 merge window timing from Linus
> >>>  https://lkml.org/lkml/2016/12/7/506
> >>>
> >>> Dec 23rd is the deadline for 4.10 merge window.
> >>
> >> Sorry but it's too late for 4.10.  It needed to have been in my
> >> tree before the merge window opened to make it for this cycle.
> >>
> >> Cheers,
> >
> >
> > Objections to me merging this? I'm preparing my tree right now.
> 
> Got this when testing the most recent version on s390x
> 
> [   20.391074] test 0 (128 bit key, 16 byte blocks): [   20.391078] BUG: using
> smp_processor_id() in preemptible [00000000] code: insmod/97
> [   20.391082] caller is virtio_crypto_ablkcipher_setkey+0x44/0x198
> [   20.391085] CPU: 0 PID: 97 Comm: insmod Not tainted
> 4.9.0-02683-gb62a1ab #46
> [   20.391088] Hardware name: IBM              2964 NC9
> 704              (KVM)
> [   20.391405] Stack:
> [   20.391407]        000000000c0eb6d0 000000000c0eb760
> 0000000000000003 0000000000000000
> [   20.391414]        000000000c0eb800 000000000c0eb778
> 000000000c0eb778 0000000000000020
> [   20.391420]        0000000000000000 000000000000000a
> 0000000000000020 000003ff0000000a
> [   20.391426]        000003ff0000000c 000000000c0eb7c8
> 0000000000000000 0000000000000000
> [   20.391432]        0700000000c173c8 00000000001126ba
> 000000000c0eb760 000000000c0eb7b8
> [   20.391439] Call Trace:
> [   20.391442] ([<000000000011259e>] show_trace+0x8e/0xe0)
> [   20.391446]  [<0000000000112670>] show_stack+0x80/0xd8
> [   20.391449]  [<0000000000753ab6>] dump_stack+0x96/0xd8
> [   20.391453]  [<00000000007872e6>]
> check_preemption_disabled+0xfe/0x128
> [   20.391456]  [<0000000000839cc4>]
> virtio_crypto_ablkcipher_setkey+0x44/0x198
> [   20.391459]  [<0000000000705a40>]
> skcipher_setkey_ablkcipher+0x50/0x70
> [   20.391476]  [<000003ff80002a48>] test_skcipher_speed+0x328/0xb98
> [tcrypt]
> [   20.391492]  [<000003ff800063dc>] do_test+0x1c24/0x28e0 [tcrypt]
> [   20.391509]  [<000003ff8001006a>] tcrypt_mod_init+0x6a/0x1000
> [tcrypt]
> [   20.391512]  [<00000000001002cc>] do_one_initcall+0xb4/0x148
> [   20.391515]  [<0000000000298632>] do_init_module+0x7a/0x228
> [   20.391519]  [<00000000001fd380>] load_module+0x2428/0x2de0
> [   20.391522]  [<00000000001fde8a>] SyS_init_module+0x152/0x160
> [   20.391526]  [<00000000009f1306>] system_call+0xd6/0x270
> [   20.391528] no locks held by insmod/97.
> 
> Gonglei, any idea? Did not look into it myself yet.
> 
Thanks for report. You must open CONFIG_DEBUG_PREEMPT,
but I didn't do that before. I open some kernel hacking debug switches to test it as well today,
and the following patch can fix it.

diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h
index 975404b..518dc7ad 100644
--- a/drivers/crypto/virtio/virtio_crypto_common.h
+++ b/drivers/crypto/virtio/virtio_crypto_common.h
@@ -113,7 +113,13 @@ struct virtio_crypto_request {

 static inline int virtio_crypto_get_current_node(void)
 {
-   return topology_physical_package_id(smp_processor_id());
+   int cpu, node;
+
+   cpu = get_cpu();
+   node = topology_physical_package_id(cpu);
+   put_cpu();
+
+   return node;
 }

Meanwhile I find aother problem, will fix it in the following V7.

Regards,
-Gonglei

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox