* [PATCH 1/4] iommu: Add virtio-iommu driver
From: Jean-Philippe Brucker @ 2018-02-14 14:53 UTC (permalink / raw)
To: iommu, kvm, virtualization, virtio-dev, kvmarm
Cc: jayachandran.nair, lorenzo.pieralisi, tnowicki, mst, marc.zyngier,
will.deacon, jintack, eric.auger, robin.murphy, joro,
eric.auger.pro
In-Reply-To: <20180214145340.1223-1-jean-philippe.brucker@arm.com>
The virtio IOMMU is a para-virtualized device, allowing to send IOMMU
requests such as map/unmap over virtio-mmio transport without emulating
page tables. This implementation handles ATTACH, DETACH, MAP and UNMAP
requests.
The bulk of the code transforms calls coming from the IOMMU API into
corresponding virtio requests. Mappings are kept in an interval tree
instead of page tables.
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
---
MAINTAINERS | 6 +
drivers/iommu/Kconfig | 11 +
drivers/iommu/Makefile | 1 +
drivers/iommu/virtio-iommu.c | 960 ++++++++++++++++++++++++++++++++++++++
include/uapi/linux/virtio_ids.h | 1 +
include/uapi/linux/virtio_iommu.h | 116 +++++
6 files changed, 1095 insertions(+)
create mode 100644 drivers/iommu/virtio-iommu.c
create mode 100644 include/uapi/linux/virtio_iommu.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 3bdc260e36b7..2a181924d420 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14818,6 +14818,12 @@ S: Maintained
F: drivers/virtio/virtio_input.c
F: include/uapi/linux/virtio_input.h
+VIRTIO IOMMU DRIVER
+M: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
+S: Maintained
+F: drivers/iommu/virtio-iommu.c
+F: include/uapi/linux/virtio_iommu.h
+
VIRTUAL BOX GUEST DEVICE DRIVER
M: Hans de Goede <hdegoede@redhat.com>
M: Arnd Bergmann <arnd@arndb.de>
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f3a21343e636..1ea0ec74524f 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -381,4 +381,15 @@ config QCOM_IOMMU
help
Support for IOMMU on certain Qualcomm SoCs.
+config VIRTIO_IOMMU
+ bool "Virtio IOMMU driver"
+ depends on VIRTIO_MMIO
+ select IOMMU_API
+ select INTERVAL_TREE
+ select ARM_DMA_USE_IOMMU if ARM
+ help
+ Para-virtualised IOMMU driver with virtio.
+
+ Say Y here if you intend to run this kernel as a guest.
+
endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 1fb695854809..9c68be1365e1 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
+obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
new file mode 100644
index 000000000000..a9c9245e8ba2
--- /dev/null
+++ b/drivers/iommu/virtio-iommu.c
@@ -0,0 +1,960 @@
+/*
+ * Virtio driver for the paravirtualized IOMMU
+ *
+ * Copyright (C) 2018 ARM Limited
+ * Author: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/amba/bus.h>
+#include <linux/delay.h>
+#include <linux/dma-iommu.h>
+#include <linux/freezer.h>
+#include <linux/interval_tree.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/wait.h>
+
+#include <uapi/linux/virtio_iommu.h>
+
+#define MSI_IOVA_BASE 0x8000000
+#define MSI_IOVA_LENGTH 0x100000
+
+struct viommu_dev {
+ struct iommu_device iommu;
+ struct device *dev;
+ struct virtio_device *vdev;
+
+ struct ida domain_ids;
+
+ struct virtqueue *vq;
+ /* Serialize anything touching the request queue */
+ spinlock_t request_lock;
+
+ /* Device configuration */
+ struct iommu_domain_geometry geometry;
+ u64 pgsize_bitmap;
+ u8 domain_bits;
+};
+
+struct viommu_mapping {
+ phys_addr_t paddr;
+ struct interval_tree_node iova;
+ union {
+ struct virtio_iommu_req_map map;
+ struct virtio_iommu_req_unmap unmap;
+ } req;
+};
+
+struct viommu_domain {
+ struct iommu_domain domain;
+ struct viommu_dev *viommu;
+ struct mutex mutex;
+ unsigned int id;
+
+ spinlock_t mappings_lock;
+ struct rb_root_cached mappings;
+
+ /* Number of endpoints attached to this domain */
+ unsigned long endpoints;
+};
+
+struct viommu_endpoint {
+ struct viommu_dev *viommu;
+ struct viommu_domain *vdomain;
+};
+
+struct viommu_request {
+ struct scatterlist top;
+ struct scatterlist bottom;
+
+ int written;
+ struct list_head list;
+};
+
+#define to_viommu_domain(domain) \
+ container_of(domain, struct viommu_domain, domain)
+
+/* Virtio transport */
+
+static int viommu_status_to_errno(u8 status)
+{
+ switch (status) {
+ case VIRTIO_IOMMU_S_OK:
+ return 0;
+ case VIRTIO_IOMMU_S_UNSUPP:
+ return -ENOSYS;
+ case VIRTIO_IOMMU_S_INVAL:
+ return -EINVAL;
+ case VIRTIO_IOMMU_S_RANGE:
+ return -ERANGE;
+ case VIRTIO_IOMMU_S_NOENT:
+ return -ENOENT;
+ case VIRTIO_IOMMU_S_FAULT:
+ return -EFAULT;
+ case VIRTIO_IOMMU_S_IOERR:
+ case VIRTIO_IOMMU_S_DEVERR:
+ default:
+ return -EIO;
+ }
+}
+
+/*
+ * viommu_get_req_size - compute request size
+ *
+ * A virtio-iommu request is split into one device-read-only part (top) and one
+ * device-write-only part (bottom). Given a request, return the sizes of the two
+ * parts in @top and @bottom.
+ *
+ * Return 0 on success, or an error when the request seems invalid.
+ */
+static int viommu_get_req_size(struct viommu_dev *viommu,
+ struct virtio_iommu_req_head *req, size_t *top,
+ size_t *bottom)
+{
+ size_t size;
+ union virtio_iommu_req *r = (void *)req;
+
+ *bottom = sizeof(struct virtio_iommu_req_tail);
+
+ switch (req->type) {
+ case VIRTIO_IOMMU_T_ATTACH:
+ size = sizeof(r->attach);
+ break;
+ case VIRTIO_IOMMU_T_DETACH:
+ size = sizeof(r->detach);
+ break;
+ case VIRTIO_IOMMU_T_MAP:
+ size = sizeof(r->map);
+ break;
+ case VIRTIO_IOMMU_T_UNMAP:
+ size = sizeof(r->unmap);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *top = size - *bottom;
+ return 0;
+}
+
+static int viommu_receive_resp(struct viommu_dev *viommu, int nr_sent,
+ struct list_head *sent)
+{
+
+ unsigned int len;
+ int nr_received = 0;
+ struct viommu_request *req, *pending;
+
+ pending = list_first_entry_or_null(sent, struct viommu_request, list);
+ if (WARN_ON(!pending))
+ return 0;
+
+ while ((req = virtqueue_get_buf(viommu->vq, &len)) != NULL) {
+ if (req != pending) {
+ dev_warn(viommu->dev, "discarding stale request\n");
+ continue;
+ }
+
+ pending->written = len;
+
+ if (++nr_received == nr_sent) {
+ WARN_ON(!list_is_last(&pending->list, sent));
+ break;
+ } else if (WARN_ON(list_is_last(&pending->list, sent))) {
+ break;
+ }
+
+ pending = list_next_entry(pending, list);
+ }
+
+ return nr_received;
+}
+
+static int _viommu_send_reqs_sync(struct viommu_dev *viommu,
+ struct viommu_request *req, int nr,
+ int *nr_sent)
+{
+ int i, ret;
+ ktime_t timeout;
+ LIST_HEAD(pending);
+ int nr_received = 0;
+ struct scatterlist *sg[2];
+ /*
+ * The timeout is chosen arbitrarily. It's only here to prevent locking
+ * up the CPU in case of a device bug.
+ */
+ unsigned long timeout_ms = 1000;
+
+ *nr_sent = 0;
+
+ for (i = 0; i < nr; i++, req++) {
+ req->written = 0;
+
+ sg[0] = &req->top;
+ sg[1] = &req->bottom;
+
+ ret = virtqueue_add_sgs(viommu->vq, sg, 1, 1, req,
+ GFP_ATOMIC);
+ if (ret)
+ break;
+
+ list_add_tail(&req->list, &pending);
+ }
+
+ if (i && !virtqueue_kick(viommu->vq))
+ return -EPIPE;
+
+ timeout = ktime_add_ms(ktime_get(), timeout_ms * i);
+ while (nr_received < i && ktime_before(ktime_get(), timeout)) {
+ nr_received += viommu_receive_resp(viommu, i - nr_received,
+ &pending);
+ if (nr_received < i)
+ cpu_relax();
+ }
+
+ if (nr_received != i)
+ ret = -ETIMEDOUT;
+
+ if (ret == -ENOSPC && nr_received)
+ /*
+ * We've freed some space since virtio told us that the ring is
+ * full, tell the caller to come back for more.
+ */
+ ret = -EAGAIN;
+
+ *nr_sent = nr_received;
+
+ return ret;
+}
+
+/*
+ * viommu_send_reqs_sync - add a batch of requests, kick the host and wait for
+ * them to return
+ *
+ * @req: array of requests
+ * @nr: array length
+ * @nr_sent: on return, contains the number of requests actually sent
+ *
+ * Return 0 on success, or an error if we failed to send some of the requests.
+ */
+static int viommu_send_reqs_sync(struct viommu_dev *viommu,
+ struct viommu_request *req, int nr,
+ int *nr_sent)
+{
+ int ret;
+ int sent = 0;
+ unsigned long flags;
+
+ *nr_sent = 0;
+ do {
+ spin_lock_irqsave(&viommu->request_lock, flags);
+ ret = _viommu_send_reqs_sync(viommu, req, nr, &sent);
+ spin_unlock_irqrestore(&viommu->request_lock, flags);
+
+ *nr_sent += sent;
+ req += sent;
+ nr -= sent;
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+/*
+ * viommu_send_req_sync - send one request and wait for reply
+ *
+ * @top: pointer to a virtio_iommu_req_* structure
+ *
+ * Returns 0 if the request was successful, or an error number otherwise. No
+ * distinction is done between transport and request errors.
+ */
+static int viommu_send_req_sync(struct viommu_dev *viommu, void *top)
+{
+ int ret;
+ int nr_sent;
+ void *bottom;
+ size_t top_size, bottom_size;
+ struct virtio_iommu_req_tail *tail;
+ struct virtio_iommu_req_head *head = top;
+ struct viommu_request req = {
+ .written = 0
+ };
+
+ ret = viommu_get_req_size(viommu, head, &top_size, &bottom_size);
+ if (ret)
+ return ret;
+
+ bottom = top + top_size;
+ tail = bottom + bottom_size - sizeof(*tail);
+
+ sg_init_one(&req.top, top, top_size);
+ sg_init_one(&req.bottom, bottom, bottom_size);
+
+ ret = viommu_send_reqs_sync(viommu, &req, 1, &nr_sent);
+ if (ret || !req.written || nr_sent != 1) {
+ dev_err(viommu->dev, "failed to send request\n");
+ return -EIO;
+ }
+
+ return viommu_status_to_errno(tail->status);
+}
+
+/*
+ * viommu_add_mapping - add a mapping to the internal tree
+ *
+ * On success, return the new mapping. Otherwise return NULL.
+ */
+static struct viommu_mapping *
+viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova,
+ phys_addr_t paddr, size_t size)
+{
+ unsigned long flags;
+ struct viommu_mapping *mapping;
+
+ mapping = kzalloc(sizeof(*mapping), GFP_ATOMIC);
+ if (!mapping)
+ return NULL;
+
+ mapping->paddr = paddr;
+ mapping->iova.start = iova;
+ mapping->iova.last = iova + size - 1;
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ interval_tree_insert(&mapping->iova, &vdomain->mappings);
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ return mapping;
+}
+
+/*
+ * viommu_del_mappings - remove mappings from the internal tree
+ *
+ * @vdomain: the domain
+ * @iova: start of the range
+ * @size: size of the range. A size of 0 corresponds to the entire address
+ * space.
+ * @out_mapping: if not NULL, the first removed mapping is returned in there.
+ * This allows the caller to reuse the buffer for the unmap request. When
+ * the returned size is greater than zero, if a mapping is returned, the
+ * caller must free it.
+ *
+ * On success, returns the number of unmapped bytes (>= size)
+ */
+static size_t viommu_del_mappings(struct viommu_domain *vdomain,
+ unsigned long iova, size_t size,
+ struct viommu_mapping **out_mapping)
+{
+ size_t unmapped = 0;
+ unsigned long flags;
+ unsigned long last = iova + size - 1;
+ struct viommu_mapping *mapping = NULL;
+ struct interval_tree_node *node, *next;
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ next = interval_tree_iter_first(&vdomain->mappings, iova, last);
+
+ if (next) {
+ mapping = container_of(next, struct viommu_mapping, iova);
+ /* Trying to split a mapping? */
+ if (WARN_ON(mapping->iova.start < iova))
+ next = NULL;
+ }
+
+ while (next) {
+ node = next;
+ mapping = container_of(node, struct viommu_mapping, iova);
+
+ next = interval_tree_iter_next(node, iova, last);
+
+ /*
+ * Note that for a partial range, this will return the full
+ * mapping so we avoid sending split requests to the device.
+ */
+ unmapped += mapping->iova.last - mapping->iova.start + 1;
+
+ interval_tree_remove(node, &vdomain->mappings);
+
+ if (out_mapping && !(*out_mapping))
+ *out_mapping = mapping;
+ else
+ kfree(mapping);
+ }
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ return unmapped;
+}
+
+/*
+ * viommu_replay_mappings - re-send MAP requests
+ *
+ * When reattaching a domain that was previously detached from all endpoints,
+ * mappings were deleted from the device. Re-create the mappings available in
+ * the internal tree.
+ */
+static int viommu_replay_mappings(struct viommu_domain *vdomain)
+{
+ unsigned long flags;
+ int i = 1, ret, nr_sent;
+ struct viommu_request *reqs;
+ struct viommu_mapping *mapping;
+ struct interval_tree_node *node;
+ size_t top_size, bottom_size;
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL);
+ if (!node) {
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+ return 0;
+ }
+
+ while ((node = interval_tree_iter_next(node, 0, -1UL)) != NULL)
+ i++;
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ reqs = kcalloc(i, sizeof(*reqs), GFP_KERNEL);
+ if (!reqs)
+ return -ENOMEM;
+
+ bottom_size = sizeof(struct virtio_iommu_req_tail);
+ top_size = sizeof(struct virtio_iommu_req_map) - bottom_size;
+
+ i = 0;
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL);
+ while (node) {
+ mapping = container_of(node, struct viommu_mapping, iova);
+ sg_init_one(&reqs[i].top, &mapping->req.map, top_size);
+ sg_init_one(&reqs[i].bottom, &mapping->req.map.tail,
+ bottom_size);
+
+ node = interval_tree_iter_next(node, 0, -1UL);
+ i++;
+ }
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ ret = viommu_send_reqs_sync(vdomain->viommu, reqs, i, &nr_sent);
+ kfree(reqs);
+
+ return ret;
+}
+
+/* IOMMU API */
+
+static bool viommu_capable(enum iommu_cap cap)
+{
+ return false;
+}
+
+static struct iommu_domain *viommu_domain_alloc(unsigned type)
+{
+ struct viommu_domain *vdomain;
+
+ if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
+ return NULL;
+
+ vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL);
+ if (!vdomain)
+ return NULL;
+
+ mutex_init(&vdomain->mutex);
+ spin_lock_init(&vdomain->mappings_lock);
+ vdomain->mappings = RB_ROOT_CACHED;
+
+ if (type == IOMMU_DOMAIN_DMA &&
+ iommu_get_dma_cookie(&vdomain->domain)) {
+ kfree(vdomain);
+ return NULL;
+ }
+
+ return &vdomain->domain;
+}
+
+static int viommu_domain_finalise(struct viommu_dev *viommu,
+ struct iommu_domain *domain)
+{
+ int ret;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+ /* ida limits size to 31 bits. A value of 0 means "max" */
+ unsigned int max_domain = viommu->domain_bits >= 31 ? 0 :
+ 1U << viommu->domain_bits;
+
+ vdomain->viommu = viommu;
+
+ domain->pgsize_bitmap = viommu->pgsize_bitmap;
+ domain->geometry = viommu->geometry;
+
+ ret = ida_simple_get(&viommu->domain_ids, 0, max_domain, GFP_KERNEL);
+ if (ret >= 0)
+ vdomain->id = (unsigned int)ret;
+
+ return ret > 0 ? 0 : ret;
+}
+
+static void viommu_domain_free(struct iommu_domain *domain)
+{
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ iommu_put_dma_cookie(domain);
+
+ /* Free all remaining mappings (size 2^64) */
+ viommu_del_mappings(vdomain, 0, 0, NULL);
+
+ if (vdomain->viommu)
+ ida_simple_remove(&vdomain->viommu->domain_ids, vdomain->id);
+
+ kfree(vdomain);
+}
+
+static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
+{
+ int i;
+ int ret = 0;
+ struct virtio_iommu_req_attach *req;
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct viommu_endpoint *vdev = fwspec->iommu_priv;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ mutex_lock(&vdomain->mutex);
+ if (!vdomain->viommu) {
+ /*
+ * Initialize the domain proper now that we know which viommu
+ * owns it.
+ */
+ ret = viommu_domain_finalise(vdev->viommu, domain);
+ } else if (vdomain->viommu != vdev->viommu) {
+ dev_err(dev, "cannot attach to foreign vIOMMU\n");
+ ret = -EXDEV;
+ }
+ mutex_unlock(&vdomain->mutex);
+
+ if (ret)
+ return ret;
+
+ /*
+ * In the virtio-iommu device, when attaching the endpoint to a new
+ * domain, it is detached from the old one and, if as as a result the
+ * old domain isn't attached to any endpoint, all mappings are removed
+ * from the old domain and it is freed.
+ *
+ * In the driver the old domain still exists, and its mappings will be
+ * recreated if it gets reattached to an endpoint. Otherwise it will be
+ * freed explicitly.
+ *
+ * vdev->vdomain is protected by group->mutex
+ */
+ if (vdev->vdomain)
+ vdev->vdomain->endpoints--;
+
+ /* DMA to the stack is forbidden, store request on the heap */
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ *req = (struct virtio_iommu_req_attach) {
+ .head.type = VIRTIO_IOMMU_T_ATTACH,
+ .domain = cpu_to_le32(vdomain->id),
+ };
+
+ for (i = 0; i < fwspec->num_ids; i++) {
+ req->endpoint = cpu_to_le32(fwspec->ids[i]);
+
+ ret = viommu_send_req_sync(vdomain->viommu, req);
+ if (ret)
+ break;
+ }
+
+ kfree(req);
+
+ if (ret)
+ return ret;
+
+ if (!vdomain->endpoints) {
+ /*
+ * This endpoint is the first to be attached to the domain.
+ * Replay existing mappings if any (e.g. SW MSI).
+ */
+ ret = viommu_replay_mappings(vdomain);
+ if (ret)
+ return ret;
+ }
+
+ vdomain->endpoints++;
+ vdev->vdomain = vdomain;
+
+ return 0;
+}
+
+static int viommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ int ret;
+ int flags;
+ struct viommu_mapping *mapping;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ mapping = viommu_add_mapping(vdomain, iova, paddr, size);
+ if (!mapping)
+ return -ENOMEM;
+
+ flags = (prot & IOMMU_READ ? VIRTIO_IOMMU_MAP_F_READ : 0) |
+ (prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0);
+
+ mapping->req.map = (struct virtio_iommu_req_map) {
+ .head.type = VIRTIO_IOMMU_T_MAP,
+ .domain = cpu_to_le32(vdomain->id),
+ .virt_start = cpu_to_le64(iova),
+ .phys_start = cpu_to_le64(paddr),
+ .virt_end = cpu_to_le64(iova + size - 1),
+ .flags = cpu_to_le32(flags),
+ };
+
+ if (!vdomain->endpoints)
+ return 0;
+
+ ret = viommu_send_req_sync(vdomain->viommu, &mapping->req);
+ if (ret)
+ viommu_del_mappings(vdomain, iova, size, NULL);
+
+ return ret;
+}
+
+static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
+{
+ int ret = 0;
+ size_t unmapped;
+ struct viommu_mapping *mapping = NULL;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ unmapped = viommu_del_mappings(vdomain, iova, size, &mapping);
+ if (unmapped < size) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ /* Device already removed all mappings after detach. */
+ if (!vdomain->endpoints)
+ goto out_free;
+
+ if (WARN_ON(!mapping))
+ return 0;
+
+ mapping->req.unmap = (struct virtio_iommu_req_unmap) {
+ .head.type = VIRTIO_IOMMU_T_UNMAP,
+ .domain = cpu_to_le32(vdomain->id),
+ .virt_start = cpu_to_le64(iova),
+ .virt_end = cpu_to_le64(iova + unmapped - 1),
+ };
+
+ ret = viommu_send_req_sync(vdomain->viommu, &mapping->req);
+
+out_free:
+ kfree(mapping);
+
+ return ret ? 0 : unmapped;
+}
+
+static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ u64 paddr = 0;
+ unsigned long flags;
+ struct viommu_mapping *mapping;
+ struct interval_tree_node *node;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ node = interval_tree_iter_first(&vdomain->mappings, iova, iova);
+ if (node) {
+ mapping = container_of(node, struct viommu_mapping, iova);
+ paddr = mapping->paddr + (iova - mapping->iova.start);
+ }
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ return paddr;
+}
+
+static struct iommu_ops viommu_ops;
+static struct virtio_driver virtio_iommu_drv;
+
+static int viommu_match_node(struct device *dev, void *data)
+{
+ return dev->parent->fwnode == data;
+}
+
+static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode)
+{
+ struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL,
+ fwnode, viommu_match_node);
+ put_device(dev);
+
+ return dev ? dev_to_virtio(dev)->priv : NULL;
+}
+
+static int viommu_add_device(struct device *dev)
+{
+ struct iommu_group *group;
+ struct viommu_endpoint *vdev;
+ struct viommu_dev *viommu = NULL;
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+ if (!fwspec || fwspec->ops != &viommu_ops)
+ return -ENODEV;
+
+ viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode);
+ if (!viommu)
+ return -ENODEV;
+
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+ if (!vdev)
+ return -ENOMEM;
+
+ vdev->viommu = viommu;
+ fwspec->iommu_priv = vdev;
+
+ /*
+ * Last step creates a default domain and attaches to it. Everything
+ * must be ready.
+ */
+ group = iommu_group_get_for_dev(dev);
+ if (!IS_ERR(group))
+ iommu_group_put(group);
+
+ return PTR_ERR_OR_ZERO(group);
+}
+
+static void viommu_remove_device(struct device *dev)
+{
+ kfree(dev->iommu_fwspec->iommu_priv);
+}
+
+static struct iommu_group *viommu_device_group(struct device *dev)
+{
+ if (dev_is_pci(dev))
+ return pci_device_group(dev);
+ else
+ return generic_device_group(dev);
+}
+
+static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+ return iommu_fwspec_add_ids(dev, args->args, 1);
+}
+
+static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
+{
+ struct iommu_resv_region *region;
+ int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+ region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
+ IOMMU_RESV_SW_MSI);
+ if (!region)
+ return;
+
+ list_add_tail(®ion->list, head);
+ iommu_dma_get_resv_regions(dev, head);
+}
+
+static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
+{
+ struct iommu_resv_region *entry, *next;
+
+ list_for_each_entry_safe(entry, next, head, list)
+ kfree(entry);
+}
+
+static struct iommu_ops viommu_ops = {
+ .capable = viommu_capable,
+ .domain_alloc = viommu_domain_alloc,
+ .domain_free = viommu_domain_free,
+ .attach_dev = viommu_attach_dev,
+ .map = viommu_map,
+ .unmap = viommu_unmap,
+ .map_sg = default_iommu_map_sg,
+ .iova_to_phys = viommu_iova_to_phys,
+ .add_device = viommu_add_device,
+ .remove_device = viommu_remove_device,
+ .device_group = viommu_device_group,
+ .of_xlate = viommu_of_xlate,
+ .get_resv_regions = viommu_get_resv_regions,
+ .put_resv_regions = viommu_put_resv_regions,
+};
+
+static int viommu_init_vq(struct viommu_dev *viommu)
+{
+ struct virtio_device *vdev = dev_to_virtio(viommu->dev);
+ const char *name = "request";
+ void *ret;
+
+ ret = virtio_find_single_vq(vdev, NULL, name);
+ if (IS_ERR(ret)) {
+ dev_err(viommu->dev, "cannot find VQ\n");
+ return PTR_ERR(ret);
+ }
+
+ viommu->vq = ret;
+
+ return 0;
+}
+
+static int viommu_probe(struct virtio_device *vdev)
+{
+ struct device *parent_dev = vdev->dev.parent;
+ struct viommu_dev *viommu = NULL;
+ struct device *dev = &vdev->dev;
+ u64 input_start = 0;
+ u64 input_end = -1UL;
+ int ret;
+
+ viommu = devm_kzalloc(dev, sizeof(*viommu), GFP_KERNEL);
+ if (!viommu)
+ return -ENOMEM;
+
+ spin_lock_init(&viommu->request_lock);
+ ida_init(&viommu->domain_ids);
+ viommu->dev = dev;
+ viommu->vdev = vdev;
+
+ ret = viommu_init_vq(viommu);
+ if (ret)
+ return ret;
+
+ virtio_cread(vdev, struct virtio_iommu_config, page_size_mask,
+ &viommu->pgsize_bitmap);
+
+ if (!viommu->pgsize_bitmap) {
+ ret = -EINVAL;
+ goto err_free_vqs;
+ }
+
+ viommu->domain_bits = 32;
+
+ /* Optional features */
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
+ struct virtio_iommu_config, input_range.start,
+ &input_start);
+
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
+ struct virtio_iommu_config, input_range.end,
+ &input_end);
+
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_BITS,
+ struct virtio_iommu_config, domain_bits,
+ &viommu->domain_bits);
+
+ viommu->geometry = (struct iommu_domain_geometry) {
+ .aperture_start = input_start,
+ .aperture_end = input_end,
+ .force_aperture = true,
+ };
+
+ viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap;
+
+ virtio_device_ready(vdev);
+
+ ret = iommu_device_sysfs_add(&viommu->iommu, dev, NULL, "%s",
+ virtio_bus_name(vdev));
+ if (ret)
+ goto err_free_vqs;
+
+ iommu_device_set_ops(&viommu->iommu, &viommu_ops);
+ iommu_device_set_fwnode(&viommu->iommu, parent_dev->fwnode);
+
+ iommu_device_register(&viommu->iommu);
+
+#ifdef CONFIG_PCI
+ if (pci_bus_type.iommu_ops != &viommu_ops) {
+ pci_request_acs();
+ ret = bus_set_iommu(&pci_bus_type, &viommu_ops);
+ if (ret)
+ goto err_unregister;
+ }
+#endif
+#ifdef CONFIG_ARM_AMBA
+ if (amba_bustype.iommu_ops != &viommu_ops) {
+ ret = bus_set_iommu(&amba_bustype, &viommu_ops);
+ if (ret)
+ goto err_unregister;
+ }
+#endif
+ if (platform_bus_type.iommu_ops != &viommu_ops) {
+ ret = bus_set_iommu(&platform_bus_type, &viommu_ops);
+ if (ret)
+ goto err_unregister;
+ }
+
+ vdev->priv = viommu;
+
+ dev_info(dev, "input address: %u bits\n",
+ order_base_2(viommu->geometry.aperture_end));
+ dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap);
+
+ return 0;
+
+err_unregister:
+ iommu_device_sysfs_remove(&viommu->iommu);
+ iommu_device_unregister(&viommu->iommu);
+err_free_vqs:
+ vdev->config->del_vqs(vdev);
+
+ return ret;
+}
+
+static void viommu_remove(struct virtio_device *vdev)
+{
+ struct viommu_dev *viommu = vdev->priv;
+
+ iommu_device_sysfs_remove(&viommu->iommu);
+ iommu_device_unregister(&viommu->iommu);
+
+ /* Stop all virtqueues */
+ vdev->config->reset(vdev);
+ vdev->config->del_vqs(vdev);
+
+ dev_info(&vdev->dev, "device removed\n");
+}
+
+static void viommu_config_changed(struct virtio_device *vdev)
+{
+ dev_warn(&vdev->dev, "config changed\n");
+}
+
+static unsigned int features[] = {
+ VIRTIO_IOMMU_F_MAP_UNMAP,
+ VIRTIO_IOMMU_F_DOMAIN_BITS,
+ VIRTIO_IOMMU_F_INPUT_RANGE,
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct virtio_driver virtio_iommu_drv = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .probe = viommu_probe,
+ .remove = viommu_remove,
+ .config_changed = viommu_config_changed,
+};
+
+module_virtio_driver(virtio_iommu_drv);
+
+IOMMU_OF_DECLARE(viommu, "virtio,mmio");
+
+MODULE_DESCRIPTION("Virtio IOMMU driver");
+MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 6d5c3b2d4f4d..cfe47c5d9a56 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -43,5 +43,6 @@
#define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
+#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
new file mode 100644
index 000000000000..0de9b44db14d
--- /dev/null
+++ b/include/uapi/linux/virtio_iommu.h
@@ -0,0 +1,116 @@
+/*
+ * Virtio-iommu definition v0.6
+ *
+ * Copyright (C) 2018 ARM Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#ifndef _UAPI_LINUX_VIRTIO_IOMMU_H
+#define _UAPI_LINUX_VIRTIO_IOMMU_H
+
+#include <linux/types.h>
+
+/* Feature bits */
+#define VIRTIO_IOMMU_F_INPUT_RANGE 0
+#define VIRTIO_IOMMU_F_DOMAIN_BITS 1
+#define VIRTIO_IOMMU_F_MAP_UNMAP 2
+#define VIRTIO_IOMMU_F_BYPASS 3
+
+struct virtio_iommu_config {
+ /* Supported page sizes */
+ __u64 page_size_mask;
+ /* Supported IOVA range */
+ struct virtio_iommu_range {
+ __u64 start;
+ __u64 end;
+ } input_range;
+ /* Max domain ID size */
+ __u8 domain_bits;
+} __packed;
+
+/* Request types */
+#define VIRTIO_IOMMU_T_ATTACH 0x01
+#define VIRTIO_IOMMU_T_DETACH 0x02
+#define VIRTIO_IOMMU_T_MAP 0x03
+#define VIRTIO_IOMMU_T_UNMAP 0x04
+
+/* Status types */
+#define VIRTIO_IOMMU_S_OK 0x00
+#define VIRTIO_IOMMU_S_IOERR 0x01
+#define VIRTIO_IOMMU_S_UNSUPP 0x02
+#define VIRTIO_IOMMU_S_DEVERR 0x03
+#define VIRTIO_IOMMU_S_INVAL 0x04
+#define VIRTIO_IOMMU_S_RANGE 0x05
+#define VIRTIO_IOMMU_S_NOENT 0x06
+#define VIRTIO_IOMMU_S_FAULT 0x07
+
+struct virtio_iommu_req_head {
+ __u8 type;
+ __u8 reserved[3];
+} __packed;
+
+struct virtio_iommu_req_tail {
+ __u8 status;
+ __u8 reserved[3];
+} __packed;
+
+struct virtio_iommu_req_attach {
+ struct virtio_iommu_req_head head;
+
+ __le32 domain;
+ __le32 endpoint;
+ __le32 reserved;
+
+ struct virtio_iommu_req_tail tail;
+} __packed;
+
+struct virtio_iommu_req_detach {
+ struct virtio_iommu_req_head head;
+
+ __le32 endpoint;
+ __le32 reserved;
+
+ struct virtio_iommu_req_tail tail;
+} __packed;
+
+#define VIRTIO_IOMMU_MAP_F_READ (1 << 0)
+#define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1)
+#define VIRTIO_IOMMU_MAP_F_EXEC (1 << 2)
+
+#define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \
+ VIRTIO_IOMMU_MAP_F_WRITE | \
+ VIRTIO_IOMMU_MAP_F_EXEC)
+
+struct virtio_iommu_req_map {
+ struct virtio_iommu_req_head head;
+
+ __le32 domain;
+ __le64 virt_start;
+ __le64 virt_end;
+ __le64 phys_start;
+ __le32 flags;
+
+ struct virtio_iommu_req_tail tail;
+} __packed;
+
+struct virtio_iommu_req_unmap {
+ struct virtio_iommu_req_head head;
+
+ __le32 domain;
+ __le64 virt_start;
+ __le64 virt_end;
+ __le32 reserved;
+
+ struct virtio_iommu_req_tail tail;
+} __packed;
+
+union virtio_iommu_req {
+ struct virtio_iommu_req_head head;
+
+ struct virtio_iommu_req_attach attach;
+ struct virtio_iommu_req_detach detach;
+ struct virtio_iommu_req_map map;
+ struct virtio_iommu_req_unmap unmap;
+};
+
+#endif
--
2.16.1
^ permalink raw reply related
* [PATCH 0/4] Add virtio-iommu driver
From: Jean-Philippe Brucker @ 2018-02-14 14:53 UTC (permalink / raw)
To: iommu, kvm, virtualization, virtio-dev, kvmarm
Cc: jayachandran.nair, lorenzo.pieralisi, tnowicki, mst, marc.zyngier,
will.deacon, jintack, eric.auger, robin.murphy, joro,
eric.auger.pro
Implement the virtio-iommu driver following version 0.6 of the
specification [1]. Previous version, RFCv2, was sent in November [2].
This version addresses Eric's comments and changes the device number.
(Since last week I also tested and fixed the probe/release functions,
they now use devm properly.)
I did not include ACPI support because the next IORT specifications
isn't ready yet (even though the virtio-iommu spec describes the node
format, a new node type has to be allocated). Therefore only device-tree
guests are supported for the moment but the x86 prototype, on branch
virtio-iommu/devel, doesn't add much complexity.
git://linux-arm.org/linux-jpb.git virtio-iommu/v0.6
Test it with Eric's latest QEMU device [3], for example with the
following command-line:
$ qemu-system-aarch64 -M virt -cpu cortex-a57 -nographic
-kernel Image -append 'console=ttyAMA0 root=/dev/vda rw'
-device virtio-iommu-device
-device virtio-blk-pci,iommu_platform,disable-legacy=on,drive=hd0
-drive if=none,file=rootfs.bin,id=hd0
You can also try the kvmtool device [4]. For example on AMD Seattle I
use the following commands to boot a guest with all devices behind a
virtio-iommu, then display mapping stats.
$ lkvm run -k Image --irqchip gicv2m --console virtio --vfio-pci 01:00.0
--viommu vfio --viommu virtio
$ lkvm debug -a -i stats
[1] [RFC] virtio-iommu version 0.6
https://www.spinics.net/lists/linux-virtualization/msg32628.html
[2] [RFC PATCH v2 0/5] Add virtio-iommu driver
https://www.spinics.net/lists/kvm/msg159047.html
[3] [RFC v6 00/22] VIRTIO-IOMMU device
http://lists.gnu.org/archive/html/qemu-arm/2018-02/msg00274.html
[4] git://linux-arm.org/kvmtool-jpb.git virtio-iommu/v0.6
Jean-Philippe Brucker (4):
iommu: Add virtio-iommu driver
iommu/virtio: Add probe request
iommu/virtio: Add event queue
vfio: Allow type-1 IOMMU instantiation with a virtio-iommu
MAINTAINERS | 6 +
drivers/iommu/Kconfig | 11 +
drivers/iommu/Makefile | 1 +
drivers/iommu/virtio-iommu.c | 1220 +++++++++++++++++++++++++++++++++++++
drivers/vfio/Kconfig | 2 +-
include/uapi/linux/virtio_ids.h | 1 +
include/uapi/linux/virtio_iommu.h | 171 ++++++
7 files changed, 1411 insertions(+), 1 deletion(-)
create mode 100644 drivers/iommu/virtio-iommu.c
create mode 100644 include/uapi/linux/virtio_iommu.h
--
2.16.1
^ permalink raw reply
* Re: [PATCH v2 2/6] crypto: engine - Permit to enqueue all async requests
From: Fabien DESSENNE @ 2018-02-14 13:31 UTC (permalink / raw)
To: Corentin Labbe, Alexandre TORGUE, arei.gonglei@huawei.com,
corbet@lwn.net, davem@davemloft.net, herbert@gondor.apana.org.au,
jasowang@redhat.com, mcoquelin.stm32@gmail.com, mst@redhat.com
Cc: linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
virtualization@lists.linux-foundation.org,
linux-sunxi@googlegroups.com, linux-crypto@vger.kernel.org,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <20180126191534.17569-3-clabbe.montjoie@gmail.com>
Adding my tested-by for the AEAD part which is new in v2
On 26/01/18 20:15, Corentin Labbe wrote:
> The crypto engine could actually only enqueue hash and ablkcipher request.
> This patch permit it to enqueue any type of crypto_async_request.
>
> Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
> Tested-by: Fabien Dessenne <fabien.dessenne@st.com>
Tested-by: Fabien Dessenne <fabien.dessenne@st.com>
> ---
> crypto/crypto_engine.c | 301 ++++++++++++++++++++++++++----------------------
> include/crypto/engine.h | 68 ++++++-----
> 2 files changed, 203 insertions(+), 166 deletions(-)
>
> diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
> index 61e7c4e02fd2..992e8d8dcdd9 100644
> --- a/crypto/crypto_engine.c
> +++ b/crypto/crypto_engine.c
> @@ -15,13 +15,50 @@
> #include <linux/err.h>
> #include <linux/delay.h>
> #include <crypto/engine.h>
> -#include <crypto/internal/hash.h>
> #include <uapi/linux/sched/types.h>
> #include "internal.h"
>
> #define CRYPTO_ENGINE_MAX_QLEN 10
>
> /**
> + * crypto_finalize_request - finalize one request if the request is done
> + * @engine: the hardware engine
> + * @req: the request need to be finalized
> + * @err: error number
> + */
> +static void crypto_finalize_request(struct crypto_engine *engine,
> + struct crypto_async_request *req, int err)
> +{
> + unsigned long flags;
> + bool finalize_cur_req = false;
> + int ret;
> + struct crypto_engine_ctx *enginectx;
> +
> + spin_lock_irqsave(&engine->queue_lock, flags);
> + if (engine->cur_req == req)
> + finalize_cur_req = true;
> + spin_unlock_irqrestore(&engine->queue_lock, flags);
> +
> + if (finalize_cur_req) {
> + enginectx = crypto_tfm_ctx(req->tfm);
> + if (engine->cur_req_prepared &&
> + enginectx->op.unprepare_request) {
> + ret = enginectx->op.unprepare_request(engine, req);
> + if (ret)
> + dev_err(engine->dev, "failed to unprepare request\n");
> + }
> + spin_lock_irqsave(&engine->queue_lock, flags);
> + engine->cur_req = NULL;
> + engine->cur_req_prepared = false;
> + spin_unlock_irqrestore(&engine->queue_lock, flags);
> + }
> +
> + req->complete(req, err);
> +
> + kthread_queue_work(engine->kworker, &engine->pump_requests);
> +}
> +
> +/**
> * crypto_pump_requests - dequeue one request from engine queue to process
> * @engine: the hardware engine
> * @in_kthread: true if we are in the context of the request pump thread
> @@ -34,11 +71,10 @@ static void crypto_pump_requests(struct crypto_engine *engine,
> bool in_kthread)
> {
> struct crypto_async_request *async_req, *backlog;
> - struct ahash_request *hreq;
> - struct ablkcipher_request *breq;
> unsigned long flags;
> bool was_busy = false;
> - int ret, rtype;
> + int ret;
> + struct crypto_engine_ctx *enginectx;
>
> spin_lock_irqsave(&engine->queue_lock, flags);
>
> @@ -94,7 +130,6 @@ static void crypto_pump_requests(struct crypto_engine *engine,
>
> spin_unlock_irqrestore(&engine->queue_lock, flags);
>
> - rtype = crypto_tfm_alg_type(engine->cur_req->tfm);
> /* Until here we get the request need to be encrypted successfully */
> if (!was_busy && engine->prepare_crypt_hardware) {
> ret = engine->prepare_crypt_hardware(engine);
> @@ -104,57 +139,31 @@ static void crypto_pump_requests(struct crypto_engine *engine,
> }
> }
>
> - switch (rtype) {
> - case CRYPTO_ALG_TYPE_AHASH:
> - hreq = ahash_request_cast(engine->cur_req);
> - if (engine->prepare_hash_request) {
> - ret = engine->prepare_hash_request(engine, hreq);
> - if (ret) {
> - dev_err(engine->dev, "failed to prepare request: %d\n",
> - ret);
> - goto req_err;
> - }
> - engine->cur_req_prepared = true;
> - }
> - ret = engine->hash_one_request(engine, hreq);
> - if (ret) {
> - dev_err(engine->dev, "failed to hash one request from queue\n");
> - goto req_err;
> - }
> - return;
> - case CRYPTO_ALG_TYPE_ABLKCIPHER:
> - breq = ablkcipher_request_cast(engine->cur_req);
> - if (engine->prepare_cipher_request) {
> - ret = engine->prepare_cipher_request(engine, breq);
> - if (ret) {
> - dev_err(engine->dev, "failed to prepare request: %d\n",
> - ret);
> - goto req_err;
> - }
> - engine->cur_req_prepared = true;
> - }
> - ret = engine->cipher_one_request(engine, breq);
> + enginectx = crypto_tfm_ctx(async_req->tfm);
> +
> + if (enginectx->op.prepare_request) {
> + ret = enginectx->op.prepare_request(engine, async_req);
> if (ret) {
> - dev_err(engine->dev, "failed to cipher one request from queue\n");
> + dev_err(engine->dev, "failed to prepare request: %d\n",
> + ret);
> goto req_err;
> }
> - return;
> - default:
> - dev_err(engine->dev, "failed to prepare request of unknown type\n");
> - return;
> + engine->cur_req_prepared = true;
> + }
> + if (!enginectx->op.do_one_request) {
> + dev_err(engine->dev, "failed to do request\n");
> + ret = -EINVAL;
> + goto req_err;
> }
> + ret = enginectx->op.do_one_request(engine, async_req);
> + if (ret) {
> + dev_err(engine->dev, "Failed to do one request from queue: %d\n", ret);
> + goto req_err;
> + }
> + return;
>
> req_err:
> - switch (rtype) {
> - case CRYPTO_ALG_TYPE_AHASH:
> - hreq = ahash_request_cast(engine->cur_req);
> - crypto_finalize_hash_request(engine, hreq, ret);
> - break;
> - case CRYPTO_ALG_TYPE_ABLKCIPHER:
> - breq = ablkcipher_request_cast(engine->cur_req);
> - crypto_finalize_cipher_request(engine, breq, ret);
> - break;
> - }
> + crypto_finalize_request(engine, async_req, ret);
> return;
>
> out:
> @@ -170,13 +179,12 @@ static void crypto_pump_work(struct kthread_work *work)
> }
>
> /**
> - * crypto_transfer_cipher_request - transfer the new request into the
> - * enginequeue
> + * crypto_transfer_request - transfer the new request into the engine queue
> * @engine: the hardware engine
> * @req: the request need to be listed into the engine queue
> */
> -int crypto_transfer_cipher_request(struct crypto_engine *engine,
> - struct ablkcipher_request *req,
> +static int crypto_transfer_request(struct crypto_engine *engine,
> + struct crypto_async_request *req,
> bool need_pump)
> {
> unsigned long flags;
> @@ -189,7 +197,7 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
> return -ESHUTDOWN;
> }
>
> - ret = ablkcipher_enqueue_request(&engine->queue, req);
> + ret = crypto_enqueue_request(&engine->queue, req);
>
> if (!engine->busy && need_pump)
> kthread_queue_work(engine->kworker, &engine->pump_requests);
> @@ -197,102 +205,131 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
> spin_unlock_irqrestore(&engine->queue_lock, flags);
> return ret;
> }
> -EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request);
>
> /**
> - * crypto_transfer_cipher_request_to_engine - transfer one request to list
> + * crypto_transfer_request_to_engine - transfer one request to list
> * into the engine queue
> * @engine: the hardware engine
> * @req: the request need to be listed into the engine queue
> */
> -int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
> - struct ablkcipher_request *req)
> +static int crypto_transfer_request_to_engine(struct crypto_engine *engine,
> + struct crypto_async_request *req)
> {
> - return crypto_transfer_cipher_request(engine, req, true);
> + return crypto_transfer_request(engine, req, true);
> }
> -EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request_to_engine);
>
> /**
> - * crypto_transfer_hash_request - transfer the new request into the
> - * enginequeue
> + * crypto_transfer_ablkcipher_request_to_engine - transfer one ablkcipher_request
> + * to list into the engine queue
> * @engine: the hardware engine
> * @req: the request need to be listed into the engine queue
> + * TODO: Remove this function when skcipher conversion is finished
> */
> -int crypto_transfer_hash_request(struct crypto_engine *engine,
> - struct ahash_request *req, bool need_pump)
> +int crypto_transfer_ablkcipher_request_to_engine(struct crypto_engine *engine,
> + struct ablkcipher_request *req)
> {
> - unsigned long flags;
> - int ret;
> -
> - spin_lock_irqsave(&engine->queue_lock, flags);
> -
> - if (!engine->running) {
> - spin_unlock_irqrestore(&engine->queue_lock, flags);
> - return -ESHUTDOWN;
> - }
> -
> - ret = ahash_enqueue_request(&engine->queue, req);
> + return crypto_transfer_request_to_engine(engine, &req->base);
> +}
> +EXPORT_SYMBOL_GPL(crypto_transfer_ablkcipher_request_to_engine);
>
> - if (!engine->busy && need_pump)
> - kthread_queue_work(engine->kworker, &engine->pump_requests);
> +/**
> + * crypto_transfer_aead_request_to_engine - transfer one aead_request
> + * to list into the engine queue
> + * @engine: the hardware engine
> + * @req: the request need to be listed into the engine queue
> + */
> +int crypto_transfer_aead_request_to_engine(struct crypto_engine *engine,
> + struct aead_request *req)
> +{
> + return crypto_transfer_request_to_engine(engine, &req->base);
> +}
> +EXPORT_SYMBOL_GPL(crypto_transfer_aead_request_to_engine);
>
> - spin_unlock_irqrestore(&engine->queue_lock, flags);
> - return ret;
> +/**
> + * crypto_transfer_akcipher_request_to_engine - transfer one akcipher_request
> + * to list into the engine queue
> + * @engine: the hardware engine
> + * @req: the request need to be listed into the engine queue
> + */
> +int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine,
> + struct akcipher_request *req)
> +{
> + return crypto_transfer_request_to_engine(engine, &req->base);
> }
> -EXPORT_SYMBOL_GPL(crypto_transfer_hash_request);
> +EXPORT_SYMBOL_GPL(crypto_transfer_akcipher_request_to_engine);
>
> /**
> - * crypto_transfer_hash_request_to_engine - transfer one request to list
> - * into the engine queue
> + * crypto_transfer_hash_request_to_engine - transfer one ahash_request
> + * to list into the engine queue
> * @engine: the hardware engine
> * @req: the request need to be listed into the engine queue
> */
> int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
> struct ahash_request *req)
> {
> - return crypto_transfer_hash_request(engine, req, true);
> + return crypto_transfer_request_to_engine(engine, &req->base);
> }
> EXPORT_SYMBOL_GPL(crypto_transfer_hash_request_to_engine);
>
> /**
> - * crypto_finalize_cipher_request - finalize one request if the request is done
> + * crypto_transfer_skcipher_request_to_engine - transfer one skcipher_request
> + * to list into the engine queue
> + * @engine: the hardware engine
> + * @req: the request need to be listed into the engine queue
> + */
> +int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine,
> + struct skcipher_request *req)
> +{
> + return crypto_transfer_request_to_engine(engine, &req->base);
> +}
> +EXPORT_SYMBOL_GPL(crypto_transfer_skcipher_request_to_engine);
> +
> +/**
> + * crypto_finalize_ablkcipher_request - finalize one ablkcipher_request if
> + * the request is done
> * @engine: the hardware engine
> * @req: the request need to be finalized
> * @err: error number
> + * TODO: Remove this function when skcipher conversion is finished
> */
> -void crypto_finalize_cipher_request(struct crypto_engine *engine,
> - struct ablkcipher_request *req, int err)
> +void crypto_finalize_ablkcipher_request(struct crypto_engine *engine,
> + struct ablkcipher_request *req, int err)
> {
> - unsigned long flags;
> - bool finalize_cur_req = false;
> - int ret;
> -
> - spin_lock_irqsave(&engine->queue_lock, flags);
> - if (engine->cur_req == &req->base)
> - finalize_cur_req = true;
> - spin_unlock_irqrestore(&engine->queue_lock, flags);
> -
> - if (finalize_cur_req) {
> - if (engine->cur_req_prepared &&
> - engine->unprepare_cipher_request) {
> - ret = engine->unprepare_cipher_request(engine, req);
> - if (ret)
> - dev_err(engine->dev, "failed to unprepare request\n");
> - }
> - spin_lock_irqsave(&engine->queue_lock, flags);
> - engine->cur_req = NULL;
> - engine->cur_req_prepared = false;
> - spin_unlock_irqrestore(&engine->queue_lock, flags);
> - }
> + return crypto_finalize_request(engine, &req->base, err);
> +}
> +EXPORT_SYMBOL_GPL(crypto_finalize_ablkcipher_request);
>
> - req->base.complete(&req->base, err);
> +/**
> + * crypto_finalize_aead_request - finalize one aead_request if
> + * the request is done
> + * @engine: the hardware engine
> + * @req: the request need to be finalized
> + * @err: error number
> + */
> +void crypto_finalize_aead_request(struct crypto_engine *engine,
> + struct aead_request *req, int err)
> +{
> + return crypto_finalize_request(engine, &req->base, err);
> +}
> +EXPORT_SYMBOL_GPL(crypto_finalize_aead_request);
>
> - kthread_queue_work(engine->kworker, &engine->pump_requests);
> +/**
> + * crypto_finalize_akcipher_request - finalize one akcipher_request if
> + * the request is done
> + * @engine: the hardware engine
> + * @req: the request need to be finalized
> + * @err: error number
> + */
> +void crypto_finalize_akcipher_request(struct crypto_engine *engine,
> + struct akcipher_request *req, int err)
> +{
> + return crypto_finalize_request(engine, &req->base, err);
> }
> -EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
> +EXPORT_SYMBOL_GPL(crypto_finalize_akcipher_request);
>
> /**
> - * crypto_finalize_hash_request - finalize one request if the request is done
> + * crypto_finalize_hash_request - finalize one ahash_request if
> + * the request is done
> * @engine: the hardware engine
> * @req: the request need to be finalized
> * @err: error number
> @@ -300,35 +337,25 @@ EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
> void crypto_finalize_hash_request(struct crypto_engine *engine,
> struct ahash_request *req, int err)
> {
> - unsigned long flags;
> - bool finalize_cur_req = false;
> - int ret;
> -
> - spin_lock_irqsave(&engine->queue_lock, flags);
> - if (engine->cur_req == &req->base)
> - finalize_cur_req = true;
> - spin_unlock_irqrestore(&engine->queue_lock, flags);
> -
> - if (finalize_cur_req) {
> - if (engine->cur_req_prepared &&
> - engine->unprepare_hash_request) {
> - ret = engine->unprepare_hash_request(engine, req);
> - if (ret)
> - dev_err(engine->dev, "failed to unprepare request\n");
> - }
> - spin_lock_irqsave(&engine->queue_lock, flags);
> - engine->cur_req = NULL;
> - engine->cur_req_prepared = false;
> - spin_unlock_irqrestore(&engine->queue_lock, flags);
> - }
> -
> - req->base.complete(&req->base, err);
> -
> - kthread_queue_work(engine->kworker, &engine->pump_requests);
> + return crypto_finalize_request(engine, &req->base, err);
> }
> EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
>
> /**
> + * crypto_finalize_skcipher_request - finalize one skcipher_request if
> + * the request is done
> + * @engine: the hardware engine
> + * @req: the request need to be finalized
> + * @err: error number
> + */
> +void crypto_finalize_skcipher_request(struct crypto_engine *engine,
> + struct skcipher_request *req, int err)
> +{
> + return crypto_finalize_request(engine, &req->base, err);
> +}
> +EXPORT_SYMBOL_GPL(crypto_finalize_skcipher_request);
> +
> +/**
> * crypto_engine_start - start the hardware engine
> * @engine: the hardware engine need to be started
> *
> diff --git a/include/crypto/engine.h b/include/crypto/engine.h
> index dd04c1699b51..1cbec29af3d6 100644
> --- a/include/crypto/engine.h
> +++ b/include/crypto/engine.h
> @@ -17,7 +17,10 @@
> #include <linux/kernel.h>
> #include <linux/kthread.h>
> #include <crypto/algapi.h>
> +#include <crypto/aead.h>
> +#include <crypto/akcipher.h>
> #include <crypto/hash.h>
> +#include <crypto/skcipher.h>
>
> #define ENGINE_NAME_LEN 30
> /*
> @@ -37,12 +40,6 @@
> * @unprepare_crypt_hardware: there are currently no more requests on the
> * queue so the subsystem notifies the driver that it may relax the
> * hardware by issuing this call
> - * @prepare_cipher_request: do some prepare if need before handle the current request
> - * @unprepare_cipher_request: undo any work done by prepare_cipher_request()
> - * @cipher_one_request: do encryption for current request
> - * @prepare_hash_request: do some prepare if need before handle the current request
> - * @unprepare_hash_request: undo any work done by prepare_hash_request()
> - * @hash_one_request: do hash for current request
> * @kworker: kthread worker struct for request pump
> * @pump_requests: work struct for scheduling work to the request pump
> * @priv_data: the engine private data
> @@ -65,19 +62,6 @@ struct crypto_engine {
> int (*prepare_crypt_hardware)(struct crypto_engine *engine);
> int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
>
> - int (*prepare_cipher_request)(struct crypto_engine *engine,
> - struct ablkcipher_request *req);
> - int (*unprepare_cipher_request)(struct crypto_engine *engine,
> - struct ablkcipher_request *req);
> - int (*prepare_hash_request)(struct crypto_engine *engine,
> - struct ahash_request *req);
> - int (*unprepare_hash_request)(struct crypto_engine *engine,
> - struct ahash_request *req);
> - int (*cipher_one_request)(struct crypto_engine *engine,
> - struct ablkcipher_request *req);
> - int (*hash_one_request)(struct crypto_engine *engine,
> - struct ahash_request *req);
> -
> struct kthread_worker *kworker;
> struct kthread_work pump_requests;
>
> @@ -85,19 +69,45 @@ struct crypto_engine {
> struct crypto_async_request *cur_req;
> };
>
> -int crypto_transfer_cipher_request(struct crypto_engine *engine,
> - struct ablkcipher_request *req,
> - bool need_pump);
> -int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
> - struct ablkcipher_request *req);
> -int crypto_transfer_hash_request(struct crypto_engine *engine,
> - struct ahash_request *req, bool need_pump);
> +/*
> + * struct crypto_engine_op - crypto hardware engine operations
> + * @prepare__request: do some prepare if need before handle the current request
> + * @unprepare_request: undo any work done by prepare_request()
> + * @do_one_request: do encryption for current request
> + */
> +struct crypto_engine_op {
> + int (*prepare_request)(struct crypto_engine *engine,
> + void *areq);
> + int (*unprepare_request)(struct crypto_engine *engine,
> + void *areq);
> + int (*do_one_request)(struct crypto_engine *engine,
> + void *areq);
> +};
> +
> +struct crypto_engine_ctx {
> + struct crypto_engine_op op;
> +};
> +
> +int crypto_transfer_ablkcipher_request_to_engine(struct crypto_engine *engine,
> + struct ablkcipher_request *req);
> +int crypto_transfer_aead_request_to_engine(struct crypto_engine *engine,
> + struct aead_request *req);
> +int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine,
> + struct akcipher_request *req);
> int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
> - struct ahash_request *req);
> -void crypto_finalize_cipher_request(struct crypto_engine *engine,
> - struct ablkcipher_request *req, int err);
> + struct ahash_request *req);
> +int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine,
> + struct skcipher_request *req);
> +void crypto_finalize_ablkcipher_request(struct crypto_engine *engine,
> + struct ablkcipher_request *req, int err);
> +void crypto_finalize_aead_request(struct crypto_engine *engine,
> + struct aead_request *req, int err);
> +void crypto_finalize_akcipher_request(struct crypto_engine *engine,
> + struct akcipher_request *req, int err);
> void crypto_finalize_hash_request(struct crypto_engine *engine,
> struct ahash_request *req, int err);
> +void crypto_finalize_skcipher_request(struct crypto_engine *engine,
> + struct skcipher_request *req, int err);
> int crypto_engine_start(struct crypto_engine *engine);
> int crypto_engine_stop(struct crypto_engine *engine);
> struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt);
^ permalink raw reply
* Re: [PULL v2 1/1] virtio/s390: implement PM operations for virtio_ccw
From: Michael S. Tsirkin @ 2018-02-14 12:37 UTC (permalink / raw)
To: Cornelia Huck; +Cc: linux-s390, kvm, virtualization
In-Reply-To: <20180214131652.1908763e.cohuck@redhat.com>
On Wed, Feb 14, 2018 at 01:16:52PM +0100, Cornelia Huck wrote:
> On Mon, 12 Feb 2018 09:52:00 +0100
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
>
> > Michael, Conny,
> > it seems that this patch did not make it into 4.16-rc1.
>
> Michael, please let me know what you plan to do with the
> virtio-s390-20171218-v2 pull request.
My bad, I did not realize you expect me to merge pulls.
I pretty much can't because I rebase my tree.
I've applied this patch on the vhost branch - will merge
as a bugfix.
> >
> >
> >
> > On 12/18/2017 05:21 PM, Cornelia Huck wrote:
> > > From: Christian Borntraeger <borntraeger@de.ibm.com>
> > >
> > > Suspend/Resume to/from disk currently fails. Let us wire
> > > up the necessary callbacks. This is mostly just forwarding
> > > the requests to the virtio drivers. The only thing that
> > > has to be done in virtio_ccw itself is to re-set the
> > > virtio revision.
> > >
> > > Suggested-by: Thomas Huth <thuth@redhat.com>
> > > Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> > > Message-Id: <20171207141102.70190-2-borntraeger@de.ibm.com>
> > > Reviewed-by: David Hildenbrand <david@redhat.com>
> > > [CH: merged <20171218083706.223836-1-borntraeger@de.ibm.com> to fix
> > > !CONFIG_PM configs]
> > > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > > ---
> > > drivers/s390/virtio/virtio_ccw.c | 29 +++++++++++++++++++++++++++++
> > > 1 file changed, 29 insertions(+)
> > >
> > > diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
> > > index b18fe2014cf2..985184ebda45 100644
> > > --- a/drivers/s390/virtio/virtio_ccw.c
> > > +++ b/drivers/s390/virtio/virtio_ccw.c
> > > @@ -1300,6 +1300,9 @@ static int virtio_ccw_cio_notify(struct ccw_device *cdev, int event)
> > > vcdev->device_lost = true;
> > > rc = NOTIFY_DONE;
> > > break;
> > > + case CIO_OPER:
> > > + rc = NOTIFY_OK;
> > > + break;
> > > default:
> > > rc = NOTIFY_DONE;
> > > break;
> > > @@ -1312,6 +1315,27 @@ static struct ccw_device_id virtio_ids[] = {
> > > {},
> > > };
> > >
> > > +#ifdef CONFIG_PM_SLEEP
> > > +static int virtio_ccw_freeze(struct ccw_device *cdev)
> > > +{
> > > + struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
> > > +
> > > + return virtio_device_freeze(&vcdev->vdev);
> > > +}
> > > +
> > > +static int virtio_ccw_restore(struct ccw_device *cdev)
> > > +{
> > > + struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
> > > + int ret;
> > > +
> > > + ret = virtio_ccw_set_transport_rev(vcdev);
> > > + if (ret)
> > > + return ret;
> > > +
> > > + return virtio_device_restore(&vcdev->vdev);
> > > +}
> > > +#endif
> > > +
> > > static struct ccw_driver virtio_ccw_driver = {
> > > .driver = {
> > > .owner = THIS_MODULE,
> > > @@ -1324,6 +1348,11 @@ static struct ccw_driver virtio_ccw_driver = {
> > > .set_online = virtio_ccw_online,
> > > .notify = virtio_ccw_cio_notify,
> > > .int_class = IRQIO_VIR,
> > > +#ifdef CONFIG_PM_SLEEP
> > > + .freeze = virtio_ccw_freeze,
> > > + .thaw = virtio_ccw_restore,
> > > + .restore = virtio_ccw_restore,
> > > +#endif
> > > };
> > >
> > > static int __init pure_hex(char **cp, unsigned int *val, int min_digit,
> > >
> >
^ permalink raw reply
* Re: [PULL v2 1/1] virtio/s390: implement PM operations for virtio_ccw
From: Cornelia Huck @ 2018-02-14 12:16 UTC (permalink / raw)
To: mst; +Cc: linux-s390, kvm, virtualization
In-Reply-To: <1e030d8d-0185-ee75-eda4-5551412db096@de.ibm.com>
On Mon, 12 Feb 2018 09:52:00 +0100
Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> Michael, Conny,
> it seems that this patch did not make it into 4.16-rc1.
Michael, please let me know what you plan to do with the
virtio-s390-20171218-v2 pull request.
>
>
>
> On 12/18/2017 05:21 PM, Cornelia Huck wrote:
> > From: Christian Borntraeger <borntraeger@de.ibm.com>
> >
> > Suspend/Resume to/from disk currently fails. Let us wire
> > up the necessary callbacks. This is mostly just forwarding
> > the requests to the virtio drivers. The only thing that
> > has to be done in virtio_ccw itself is to re-set the
> > virtio revision.
> >
> > Suggested-by: Thomas Huth <thuth@redhat.com>
> > Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> > Message-Id: <20171207141102.70190-2-borntraeger@de.ibm.com>
> > Reviewed-by: David Hildenbrand <david@redhat.com>
> > [CH: merged <20171218083706.223836-1-borntraeger@de.ibm.com> to fix
> > !CONFIG_PM configs]
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> > drivers/s390/virtio/virtio_ccw.c | 29 +++++++++++++++++++++++++++++
> > 1 file changed, 29 insertions(+)
> >
> > diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
> > index b18fe2014cf2..985184ebda45 100644
> > --- a/drivers/s390/virtio/virtio_ccw.c
> > +++ b/drivers/s390/virtio/virtio_ccw.c
> > @@ -1300,6 +1300,9 @@ static int virtio_ccw_cio_notify(struct ccw_device *cdev, int event)
> > vcdev->device_lost = true;
> > rc = NOTIFY_DONE;
> > break;
> > + case CIO_OPER:
> > + rc = NOTIFY_OK;
> > + break;
> > default:
> > rc = NOTIFY_DONE;
> > break;
> > @@ -1312,6 +1315,27 @@ static struct ccw_device_id virtio_ids[] = {
> > {},
> > };
> >
> > +#ifdef CONFIG_PM_SLEEP
> > +static int virtio_ccw_freeze(struct ccw_device *cdev)
> > +{
> > + struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
> > +
> > + return virtio_device_freeze(&vcdev->vdev);
> > +}
> > +
> > +static int virtio_ccw_restore(struct ccw_device *cdev)
> > +{
> > + struct virtio_ccw_device *vcdev = dev_get_drvdata(&cdev->dev);
> > + int ret;
> > +
> > + ret = virtio_ccw_set_transport_rev(vcdev);
> > + if (ret)
> > + return ret;
> > +
> > + return virtio_device_restore(&vcdev->vdev);
> > +}
> > +#endif
> > +
> > static struct ccw_driver virtio_ccw_driver = {
> > .driver = {
> > .owner = THIS_MODULE,
> > @@ -1324,6 +1348,11 @@ static struct ccw_driver virtio_ccw_driver = {
> > .set_online = virtio_ccw_online,
> > .notify = virtio_ccw_cio_notify,
> > .int_class = IRQIO_VIR,
> > +#ifdef CONFIG_PM_SLEEP
> > + .freeze = virtio_ccw_freeze,
> > + .thaw = virtio_ccw_restore,
> > + .restore = virtio_ccw_restore,
> > +#endif
> > };
> >
> > static int __init pure_hex(char **cp, unsigned int *val, int min_digit,
> >
>
^ permalink raw reply
* Re: [PATCH RFC 0/2] Packed ring for vhost
From: Jason Wang @ 2018-02-14 3:48 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: tiwei.bie, netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180214044634-mutt-send-email-mst@kernel.org>
On 2018年02月14日 10:47, Michael S. Tsirkin wrote:
> On Wed, Feb 14, 2018 at 10:37:07AM +0800, Jason Wang wrote:
>> Hi all:
>>
>> This RFC implement a subset of packed ring which was described at
>> https://github.com/oasis-tcs/virtio-docs/blob/master/virtio-v1.1-packed-wd07.pdf
>> . The code were tested with pmd implement by Jens at
>> http://dpdk.org/ml/archives/dev/2018-January/089417.html. Minor
>> change was needed for pmd codes to kick virtqueue since it assumes a
>> busy polling backend.
>>
>> Test were done between localhost and guest. Testpmd (rxonly) in guest
>> reports 2.4Mpps. Testpmd (txonly) repots about 2.1Mpps.
> How does this compare with the split ring design?
No obvious difference (+-5%). I believe we reach the bottleneck of vhost.
>
>> It's not a complete implemention, here's what were missed:
>>
>> - Device Area
>> - Driver Area
>> - Descriptor indirection
>> - Zerocopy may not be functional
>> - Migration path is not tested
>> - Vhost devices except for net
>> - vIOMMU can not work (mainly because the metadata prefetch is not
>> implemented).
>> - See FIXME/TODO in the codes for more details
>> - No batching or other optimizations were implemented
> ioeventfd for PIO/mmio/s390.
>
Probably, but this is not the stuffs of packed ring I think.
Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH RFC 0/2] Packed ring for vhost
From: Michael S. Tsirkin @ 2018-02-14 2:47 UTC (permalink / raw)
To: Jason Wang; +Cc: tiwei.bie, netdev, linux-kernel, virtualization, wexu
In-Reply-To: <1518575829-1431-1-git-send-email-jasowang@redhat.com>
On Wed, Feb 14, 2018 at 10:37:07AM +0800, Jason Wang wrote:
> Hi all:
>
> This RFC implement a subset of packed ring which was described at
> https://github.com/oasis-tcs/virtio-docs/blob/master/virtio-v1.1-packed-wd07.pdf
> . The code were tested with pmd implement by Jens at
> http://dpdk.org/ml/archives/dev/2018-January/089417.html. Minor
> change was needed for pmd codes to kick virtqueue since it assumes a
> busy polling backend.
>
> Test were done between localhost and guest. Testpmd (rxonly) in guest
> reports 2.4Mpps. Testpmd (txonly) repots about 2.1Mpps.
How does this compare with the split ring design?
> It's not a complete implemention, here's what were missed:
>
> - Device Area
> - Driver Area
> - Descriptor indirection
> - Zerocopy may not be functional
> - Migration path is not tested
> - Vhost devices except for net
> - vIOMMU can not work (mainly because the metadata prefetch is not
> implemented).
> - See FIXME/TODO in the codes for more details
> - No batching or other optimizations were implemented
ioeventfd for PIO/mmio/s390.
> For a quick prototype, this series open code the tracking of warp
> counter and descriptor index at net device. This will be addressed in
> the future by:
>
> - Move get_rx_bufs() from net.c to vhost.c
> - Let vhost_get_vq_desc() returns vring_used_elem instad of head id
>
> With the above, we can hide the internal (at least part of) vring
> layout from specific device.
>
> Please review.
>
> Thanks
>
> Jason Wang (2):
> virtio: introduce packed ring defines
> vhost: packed ring support
>
> drivers/vhost/net.c | 14 +-
> drivers/vhost/vhost.c | 351 ++++++++++++++++++++++++++++++++++---
> drivers/vhost/vhost.h | 6 +-
> include/uapi/linux/virtio_config.h | 9 +
> include/uapi/linux/virtio_ring.h | 17 ++
> 5 files changed, 369 insertions(+), 28 deletions(-)
>
> --
> 2.7.4
^ permalink raw reply
* Re: [PATCH RFC 0/2] Packed ring for vhost
From: Jason Wang @ 2018-02-14 2:43 UTC (permalink / raw)
To: mst, virtualization, linux-kernel, netdev; +Cc: wexu, tiwei.bie
In-Reply-To: <1518575829-1431-1-git-send-email-jasowang@redhat.com>
On 2018年02月14日 10:37, Jason Wang wrote:
> Hi all:
>
> This RFC implement a subset of packed ring which was described at
> https://github.com/oasis-tcs/virtio-docs/blob/master/virtio-v1.1-packed-wd07.pdf
> . The code were tested with pmd implement by Jens at
> http://dpdk.org/ml/archives/dev/2018-January/089417.html. Minor
> change was needed for pmd codes to kick virtqueue since it assumes a
> busy polling backend.
>
> Test were done between localhost and guest. Testpmd (rxonly) in guest
> reports 2.4Mpps. Testpmd (txonly) repots about 2.1Mpps.
>
> It's not a complete implemention, here's what were missed:
>
> - Device Area
> - Driver Area
> - Descriptor indirection
> - Zerocopy may not be functional
> - Migration path is not tested
> - Vhost devices except for net
> - vIOMMU can not work (mainly because the metadata prefetch is not
> implemented).
> - See FIXME/TODO in the codes for more details
> - No batching or other optimizations were implemented
>
> For a quick prototype, this series open code the tracking of warp
> counter and descriptor index at net device. This will be addressed in
> the future by:
>
> - Move get_rx_bufs() from net.c to vhost.c
> - Let vhost_get_vq_desc() returns vring_used_elem instad of head id
>
> With the above, we can hide the internal (at least part of) vring
> layout from specific device.
>
> Please review.
>
> Thanks
It's near spring festival in China, will probably reply after the holiday.
Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* [PATCH RFC 2/2] vhost: packed ring support
From: Jason Wang @ 2018-02-14 2:37 UTC (permalink / raw)
To: mst, virtualization, linux-kernel, netdev; +Cc: wexu, tiwei.bie
In-Reply-To: <1518575829-1431-1-git-send-email-jasowang@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/net.c | 14 +-
drivers/vhost/vhost.c | 351 ++++++++++++++++++++++++++++++++++++++++++++++----
drivers/vhost/vhost.h | 6 +-
3 files changed, 343 insertions(+), 28 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c613d2e..65b27c9 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -67,7 +67,8 @@ enum {
VHOST_NET_FEATURES = VHOST_FEATURES |
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
- (1ULL << VIRTIO_F_IOMMU_PLATFORM)
+ (1ULL << VIRTIO_F_IOMMU_PLATFORM) |
+ (1ULL << VIRTIO_F_RING_PACKED)
};
enum {
@@ -473,6 +474,7 @@ static void handle_tx(struct vhost_net *net)
struct socket *sock;
struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
bool zcopy, zcopy_used;
+ struct vring_used_elem used;
mutex_lock(&vq->mutex);
sock = vq->private_data;
@@ -494,6 +496,8 @@ static void handle_tx(struct vhost_net *net)
vhost_zerocopy_signal_used(net, vq);
+ used.idx = vq->last_avail_idx & (vq->num - 1);
+ used.wrap_counter = vq->used_wrap_counter;
head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
ARRAY_SIZE(vq->iov),
&out, &in);
@@ -515,6 +519,8 @@ static void handle_tx(struct vhost_net *net)
}
/* Skip header. TODO: support TSO. */
len = iov_length(vq->iov, out);
+ used.id = head;
+ used.len = 0;
iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
iov_iter_advance(&msg.msg_iter, hdr_size);
/* Sanity check */
@@ -576,7 +582,7 @@ static void handle_tx(struct vhost_net *net)
pr_debug("Truncated TX packet: "
" len %d != %zd\n", err, len);
if (!zcopy_used)
- vhost_add_used_and_signal(&net->dev, vq, head, 0);
+ vhost_add_used_and_signal_n(&net->dev, vq, &used, 1);
else
vhost_zerocopy_signal_used(net, vq);
vhost_net_tx_packet(net);
@@ -691,6 +697,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
r = -ENOBUFS;
goto err;
}
+ heads[headcount].idx = vq->last_avail_idx & (vq->num - 1);
+ heads[headcount].wrap_counter = vq->used_wrap_counter;
r = vhost_get_vq_desc(vq, vq->iov + seg,
ARRAY_SIZE(vq->iov) - seg, &out,
&in, log, log_num);
@@ -780,6 +788,8 @@ static void handle_rx(struct vhost_net *net)
vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ?
vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
+ /* FIXME: workaround for current dpdk prototype */
+ mergeable = false;
while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
sock_len += sock_hlen;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2db5af8..5667d03 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -324,6 +324,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vhost_reset_is_le(vq);
vhost_disable_cross_endian(vq);
vq->busyloop_timeout = 0;
+ vq->used_wrap_counter = true;
vq->umem = NULL;
vq->iotlb = NULL;
__vhost_vq_meta_reset(vq);
@@ -1136,10 +1137,22 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
return 0;
}
-static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
- struct vring_desc __user *desc,
- struct vring_avail __user *avail,
- struct vring_used __user *used)
+static int vq_access_ok_packed(struct vhost_virtqueue *vq, unsigned int num,
+ struct vring_desc __user *desc,
+ struct vring_avail __user *avail,
+ struct vring_used __user *used)
+{
+ struct vring_desc_packed *packed = (struct vring_desc_packed *)desc;
+
+ /* FIXME: check device area and driver area */
+ return access_ok(VERIFY_READ, packed, num * sizeof(*packed)) &&
+ access_ok(VERIFY_WRITE, packed, num * sizeof(*packed));
+}
+
+static int vq_access_ok_split(struct vhost_virtqueue *vq, unsigned int num,
+ struct vring_desc __user *desc,
+ struct vring_avail __user *avail,
+ struct vring_used __user *used)
{
size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
@@ -1151,6 +1164,17 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
sizeof *used + num * sizeof *used->ring + s);
}
+static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
+ struct vring_desc __user *desc,
+ struct vring_avail __user *avail,
+ struct vring_used __user *used)
+{
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+ return vq_access_ok_packed(vq, num, desc, avail, used);
+ else
+ return vq_access_ok_split(vq, num, desc, avail, used);
+}
+
static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
const struct vhost_umem_node *node,
int type)
@@ -1763,6 +1787,9 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
vhost_init_is_le(vq);
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+ return 0;
+
r = vhost_update_used_flags(vq);
if (r)
goto err;
@@ -1947,18 +1974,136 @@ static int get_indirect(struct vhost_virtqueue *vq,
return 0;
}
-/* This looks in the virtqueue and for the first available buffer, and converts
- * it to an iovec for convenient access. Since descriptors consist of some
- * number of output then some number of input descriptors, it's actually two
- * iovecs, but we pack them into one and note how many of each there were.
- *
- * This function returns the descriptor number found, or vq->num (which is
- * never a valid descriptor number) if none was found. A negative code is
- * returned on error. */
-int vhost_get_vq_desc(struct vhost_virtqueue *vq,
- struct iovec iov[], unsigned int iov_size,
- unsigned int *out_num, unsigned int *in_num,
- struct vhost_log *log, unsigned int *log_num)
+#define DESC_AVAIL (1 << VRING_DESC_F_AVAIL)
+#define DESC_USED (1 << VRING_DESC_F_USED)
+static bool desc_is_avail(struct vhost_virtqueue *vq,
+ struct vring_desc_packed *desc)
+{
+ if (vq->used_wrap_counter)
+ if ((desc->flags & DESC_AVAIL) && !(desc->flags & DESC_USED))
+ return true;
+ if (vq->used_wrap_counter == false)
+ if (!(desc->flags & DESC_AVAIL) && (desc->flags & DESC_USED))
+ return true;
+
+ return false;
+}
+
+static void set_desc_used(struct vhost_virtqueue *vq,
+ struct vring_desc_packed *desc, bool wrap_counter)
+{
+ __virtio16 flags = desc->flags;
+
+ if (wrap_counter) {
+ desc->flags |= cpu_to_vhost16(vq, DESC_AVAIL);
+ desc->flags |= cpu_to_vhost16(vq, DESC_USED);
+ } else {
+ desc->flags &= ~cpu_to_vhost16(vq, DESC_AVAIL);
+ desc->flags &= ~cpu_to_vhost16(vq, DESC_USED);
+ }
+
+ desc->flags = flags;
+}
+
+static int vhost_get_vq_desc_packed(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log,
+ unsigned int *log_num)
+{
+ struct vring_desc_packed desc;
+ int ret, access, i;
+ u16 avail_idx = vq->last_avail_idx;
+
+ /* When we start there are none of either input nor output. */
+ *out_num = *in_num = 0;
+ if (unlikely(log))
+ *log_num = 0;
+
+ do {
+ unsigned int iov_count = *in_num + *out_num;
+
+ i = vq->last_avail_idx & (vq->num - 1);
+ ret = vhost_copy_from_user(vq, &desc, vq->desc_packed + i,
+ sizeof(desc));
+ if (unlikely(ret)) {
+ vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+ i, vq->desc_packed + i);
+ return -EFAULT;
+ }
+
+ if (!desc_is_avail(vq, &desc)) {
+ /* If there's nothing new since last we looked, return
+ * invalid.
+ */
+ if (likely(avail_idx == vq->last_avail_idx))
+ return vq->num;
+ }
+
+ /* Only start to read descriptor after we're sure it was
+ * available.
+ */
+ smp_rmb();
+
+ /* FIXME: support indirect */
+ if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) {
+ vq_err(vq, "Indirect descriptor is not supported\n");
+ return -EFAULT;
+ }
+
+ if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE))
+ access = VHOST_ACCESS_WO;
+ else
+ access = VHOST_ACCESS_RO;
+ ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr),
+ vhost32_to_cpu(vq, desc.len),
+ iov + iov_count, iov_size - iov_count,
+ access);
+ if (unlikely(ret < 0)) {
+ if (ret != -EAGAIN)
+ vq_err(vq, "Translation failure %d idx %d\n",
+ ret, i);
+ return ret;
+ }
+
+ if (access == VHOST_ACCESS_WO) {
+ /* If this is an input descriptor,
+ * increment that count.
+ */
+ *in_num += ret;
+ if (unlikely(log)) {
+ log[*log_num].addr =
+ vhost64_to_cpu(vq, desc.addr);
+ log[*log_num].len =
+ vhost32_to_cpu(vq, desc.len);
+ ++*log_num;
+ }
+ } else {
+ /* If it's an output descriptor, they're all supposed
+ * to come before any input descriptors.
+ */
+ if (unlikely(*in_num)) {
+ vq_err(vq, "Desc out after in: idx %d\n",
+ i);
+ return -EINVAL;
+ }
+ *out_num += ret;
+ }
+
+ /* On success, increment avail index. */
+ if ((++vq->last_avail_idx & (vq->num - 1)) == 0)
+ vq->used_wrap_counter ^= 1;
+
+ /* If this descriptor says it doesn't chain, we're done. */
+ } while (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT));
+
+ return desc.id;
+}
+
+static int vhost_get_vq_desc_split(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
{
struct vring_desc desc;
unsigned int i, head, found = 0;
@@ -2096,6 +2241,30 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
return head;
}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found. A negative code is
+ * returned on error.
+ */
+int vhost_get_vq_desc(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
+{
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+ return vhost_get_vq_desc_packed(vq, iov, iov_size,
+ out_num, in_num,
+ log, log_num);
+ else
+ return vhost_get_vq_desc_split(vq, iov, iov_size,
+ out_num, in_num,
+ log, log_num);
+}
EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
@@ -2161,10 +2330,48 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
return 0;
}
-/* After we've used one of their buffers, we tell them about it. We'll then
- * want to notify the guest, using eventfd. */
-int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
- unsigned count)
+static int vhost_add_used_n_packed(struct vhost_virtqueue *vq,
+ struct vring_used_elem *heads,
+ unsigned int count)
+{
+ struct vring_desc_packed desc = {
+ .addr = 0,
+ .flags = 0,
+ };
+ int i, ret;
+
+ for (i = 0; i < count; i++) {
+ desc.id = heads[i].id;
+ desc.len = heads[i].len;
+ set_desc_used(vq, &desc, heads[i].wrap_counter);
+
+ /* Update flags etc before desc is written */
+ smp_mb();
+
+ ret = vhost_copy_to_user(vq, vq->desc_packed + heads[i].idx,
+ &desc, sizeof(desc));
+ if (unlikely(ret)) {
+ vq_err(vq, "Failed to set descriptor: idx %d addr %p\n",
+ heads[i].idx, vq->desc_packed + heads[i].idx);
+ return -EFAULT;
+ }
+ if (unlikely(vq->log_used)) {
+ /* Make sure desc is written before update log. */
+ smp_wmb();
+ log_write(vq->log_base,
+ vq->log_addr + heads[i].idx * sizeof(desc),
+ sizeof(desc));
+ if (vq->log_ctx)
+ eventfd_signal(vq->log_ctx, 1);
+ }
+ }
+
+ return 0;
+}
+
+static int vhost_add_used_n_split(struct vhost_virtqueue *vq,
+ struct vring_used_elem *heads,
+ unsigned int count)
{
int start, n, r;
@@ -2196,6 +2403,19 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
}
return r;
}
+
+/* After we've used one of their buffers, we tell them about it. We'll then
+ * want to notify the guest, using eventfd.
+ */
+int vhost_add_used_n(struct vhost_virtqueue *vq,
+ struct vring_used_elem *heads,
+ unsigned int count)
+{
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+ return vhost_add_used_n_packed(vq, heads, count);
+ else
+ return vhost_add_used_n_split(vq, heads, count);
+}
EXPORT_SYMBOL_GPL(vhost_add_used_n);
static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
@@ -2203,6 +2423,11 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
__u16 old, new;
__virtio16 event;
bool v;
+
+ /* FIXME: check driver area */
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+ return false;
+
/* Flush out used index updates. This is paired
* with the barrier that the Guest executes when enabling
* interrupts. */
@@ -2265,7 +2490,8 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
/* return true if we're sure that avaiable ring is empty */
-bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+static bool vhost_vq_avail_empty_split(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq)
{
__virtio16 avail_idx;
int r;
@@ -2280,10 +2506,61 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
return vq->avail_idx == vq->last_avail_idx;
}
+
+/* FIXME: unify codes with vhost_enable_notify_packed() */
+static bool vhost_vq_avail_empty_packed(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq)
+{
+ struct vring_desc_packed desc;
+ int ret, i = vq->last_avail_idx & (vq->num - 1);
+
+ ret = vhost_copy_from_user(vq, &desc, vq->desc_packed + i,
+ sizeof(desc));
+ if (unlikely(ret)) {
+ vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+ i, vq->desc_packed + i);
+ return -EFAULT;
+ }
+
+ /* Read flag after desc is read */
+ smp_mb();
+
+ return desc_is_avail(vq, &desc);
+}
+
+bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+ return vhost_vq_avail_empty_packed(dev, vq);
+ else
+ return vhost_vq_avail_empty_split(dev, vq);
+}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
-/* OK, now we need to know about added descriptors. */
-bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+static bool vhost_enable_notify_packed(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq)
+{
+ struct vring_desc_packed desc;
+ int ret, i = vq->last_avail_idx & (vq->num - 1);
+
+ /* FIXME: disable notification through device area */
+
+ ret = vhost_copy_from_user(vq, &desc, vq->desc_packed + i,
+ sizeof(desc));
+ if (unlikely(ret)) {
+ vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+ i, vq->desc_packed + i);
+ return -EFAULT;
+ }
+
+ /* Read flag after desc is read */
+ smp_mb();
+
+ return desc_is_avail(vq, &desc);
+}
+
+static bool vhost_enable_notify_split(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq)
{
__virtio16 avail_idx;
int r;
@@ -2318,10 +2595,25 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
return vhost16_to_cpu(vq, avail_idx) != vq->avail_idx;
}
+
+/* OK, now we need to know about added descriptors. */
+bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+ return vhost_enable_notify_packed(dev, vq);
+ else
+ return vhost_enable_notify_split(dev, vq);
+}
EXPORT_SYMBOL_GPL(vhost_enable_notify);
-/* We don't need to be notified again. */
-void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+static void vhost_disable_notify_packed(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq)
+{
+ /* FIXME: disable notification through device area */
+}
+
+static void vhost_disable_notify_split(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq)
{
int r;
@@ -2335,6 +2627,15 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
&vq->used->flags, r);
}
}
+
+/* We don't need to be notified again. */
+void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+ return vhost_disable_notify_packed(dev, vq);
+ else
+ return vhost_disable_notify_split(dev, vq);
+}
EXPORT_SYMBOL_GPL(vhost_disable_notify);
/* Create a new message. */
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index ac4b605..cf6533a 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -87,7 +87,10 @@ struct vhost_virtqueue {
/* The actual ring of buffers. */
struct mutex mutex;
unsigned int num;
- struct vring_desc __user *desc;
+ union {
+ struct vring_desc __user *desc;
+ struct vring_desc_packed __user *desc_packed;
+ };
struct vring_avail __user *avail;
struct vring_used __user *used;
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
@@ -144,6 +147,7 @@ struct vhost_virtqueue {
bool user_be;
#endif
u32 busyloop_timeout;
+ bool used_wrap_counter;
};
struct vhost_msg_node {
--
2.7.4
^ permalink raw reply related
* [PATCH RFC 1/2] virtio: introduce packed ring defines
From: Jason Wang @ 2018-02-14 2:37 UTC (permalink / raw)
To: mst, virtualization, linux-kernel, netdev; +Cc: wexu, tiwei.bie
In-Reply-To: <1518575829-1431-1-git-send-email-jasowang@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
include/uapi/linux/virtio_config.h | 9 +++++++++
include/uapi/linux/virtio_ring.h | 17 +++++++++++++++++
2 files changed, 26 insertions(+)
diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index 308e209..5903d51 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -71,4 +71,13 @@
* this is for compatibility with legacy systems.
*/
#define VIRTIO_F_IOMMU_PLATFORM 33
+
+#define VIRTIO_F_RING_PACKED 34
+
+/*
+ * This feature indicates that all buffers are used by the device in
+ * the same order in which they have been made available.
+ */
+#define VIRTIO_F_IN_ORDER 35
+
#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
index 6d5d5fa..a169b53 100644
--- a/include/uapi/linux/virtio_ring.h
+++ b/include/uapi/linux/virtio_ring.h
@@ -43,6 +43,8 @@
#define VRING_DESC_F_WRITE 2
/* This means the buffer contains a list of buffer descriptors. */
#define VRING_DESC_F_INDIRECT 4
+#define VRING_DESC_F_AVAIL 7
+#define VRING_DESC_F_USED 15
/* The Host uses this in used->flags to advise the Guest: don't kick me when
* you add a buffer. It's unreliable, so it's simply an optimization. Guest
@@ -62,6 +64,17 @@
* at the end of the used ring. Guest should ignore the used->flags field. */
#define VIRTIO_RING_F_EVENT_IDX 29
+struct vring_desc_packed {
+ /* Buffer Address. */
+ __virtio64 addr;
+ /* Buffer Length. */
+ __virtio32 len;
+ /* Buffer ID. */
+ __virtio16 id;
+ /* The flags depending on descriptor type. */
+ __virtio16 flags;
+};
+
/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
struct vring_desc {
/* Address (guest-physical). */
@@ -86,6 +99,10 @@ struct vring_used_elem {
__virtio32 id;
/* Total length of the descriptor chain which was used (written to) */
__virtio32 len;
+ /* Index of the descriptor that needs to write, used by packed ring. */
+ u16 idx;
+ /* Wrap counter for this used desc, used by packed ring. */
+ bool wrap_counter;
};
struct vring_used {
--
2.7.4
^ permalink raw reply related
* [PATCH RFC 0/2] Packed ring for vhost
From: Jason Wang @ 2018-02-14 2:37 UTC (permalink / raw)
To: mst, virtualization, linux-kernel, netdev; +Cc: wexu, tiwei.bie
Hi all:
This RFC implement a subset of packed ring which was described at
https://github.com/oasis-tcs/virtio-docs/blob/master/virtio-v1.1-packed-wd07.pdf
. The code were tested with pmd implement by Jens at
http://dpdk.org/ml/archives/dev/2018-January/089417.html. Minor
change was needed for pmd codes to kick virtqueue since it assumes a
busy polling backend.
Test were done between localhost and guest. Testpmd (rxonly) in guest
reports 2.4Mpps. Testpmd (txonly) repots about 2.1Mpps.
It's not a complete implemention, here's what were missed:
- Device Area
- Driver Area
- Descriptor indirection
- Zerocopy may not be functional
- Migration path is not tested
- Vhost devices except for net
- vIOMMU can not work (mainly because the metadata prefetch is not
implemented).
- See FIXME/TODO in the codes for more details
- No batching or other optimizations were implemented
For a quick prototype, this series open code the tracking of warp
counter and descriptor index at net device. This will be addressed in
the future by:
- Move get_rx_bufs() from net.c to vhost.c
- Let vhost_get_vq_desc() returns vring_used_elem instad of head id
With the above, we can hide the internal (at least part of) vring
layout from specific device.
Please review.
Thanks
Jason Wang (2):
virtio: introduce packed ring defines
vhost: packed ring support
drivers/vhost/net.c | 14 +-
drivers/vhost/vhost.c | 351 ++++++++++++++++++++++++++++++++++---
drivers/vhost/vhost.h | 6 +-
include/uapi/linux/virtio_config.h | 9 +
include/uapi/linux/virtio_ring.h | 17 ++
5 files changed, 369 insertions(+), 28 deletions(-)
--
2.7.4
^ permalink raw reply
* Re: [vhost:vhost 22/23] drivers/firmware/qemu_fw_cfg.c:130:36: sparse: incorrect type in initializer (different base types)
From: Michael S. Tsirkin @ 2018-02-14 1:31 UTC (permalink / raw)
To: kbuild test robot
Cc: Marc-André Lureau, netdev, kbuild-all, kvm, virtualization
In-Reply-To: <201802140843.NMqq2jAs%fengguang.wu@intel.com>
On Wed, Feb 14, 2018 at 08:46:46AM +0800, kbuild test robot wrote:
> tree: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost
> head: 3d22d7c1190db3209b644b8a13a75a9802b4587f
> commit: b3a8771f409b74c42deee28aee3092fc5d2c8dab [22/23] fw_cfg: write vmcoreinfo details
> reproduce:
> # apt-get install sparse
> git checkout b3a8771f409b74c42deee28aee3092fc5d2c8dab
> make ARCH=x86_64 allmodconfig
> make C=1 CF=-D__CHECK_ENDIAN__
Please make sure there are no sparse warnings when you build
a driver.
>
> sparse warnings: (new ones prefixed by >>)
>
> drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
> >> drivers/firmware/qemu_fw_cfg.c:130:36: sparse: incorrect type in initializer (different base types) @@ expected unsigned long long address @@ got gned] address @@
> drivers/firmware/qemu_fw_cfg.c:130:36: expected unsigned long long address
> drivers/firmware/qemu_fw_cfg.c:130:36: got restricted __be64
> drivers/firmware/qemu_fw_cfg.c:131:27: sparse: incorrect type in initializer (different base types) @@ expected unsigned int length @@ got ed int length @@
> drivers/firmware/qemu_fw_cfg.c:131:27: expected unsigned int length
> drivers/firmware/qemu_fw_cfg.c:131:27: got restricted __be32 <noident>
> drivers/firmware/qemu_fw_cfg.c:132:28: sparse: incorrect type in initializer (different base types) @@ expected unsigned int control @@ got ed int control @@
> drivers/firmware/qemu_fw_cfg.c:132:28: expected unsigned int control
> drivers/firmware/qemu_fw_cfg.c:132:28: got restricted __be32 <noident>
> drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
> drivers/firmware/qemu_fw_cfg.c:671:33: sparse: cast to restricted __be16
> drivers/firmware/qemu_fw_cfg.c:671:33: sparse: cast to restricted __be16
> drivers/firmware/qemu_fw_cfg.c:671:33: sparse: cast to restricted __be16
> drivers/firmware/qemu_fw_cfg.c:671:33: sparse: cast to restricted __be16
> drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
> drivers/firmware/qemu_fw_cfg.c:717:22: sparse: cast to restricted __le32
>
> vim +130 drivers/firmware/qemu_fw_cfg.c
>
> 103
> 104 /* qemu fw_cfg device is sync today, but spec says it may become async */
> 105 static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
> 106 {
> 107 do {
> > 108 u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
> 109
> 110 if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
> 111 return;
> 112
> 113 usleep_range(50, 100);
> 114 } while (true);
> 115 }
> 116
> 117 static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
> 118 {
> 119 phys_addr_t dma;
> 120 struct fw_cfg_dma *d = NULL;
> 121 ssize_t ret = length;
> 122
> 123 d = kmalloc(sizeof(*d), GFP_KERNEL);
> 124 if (!d) {
> 125 ret = -ENOMEM;
> 126 goto end;
> 127 }
> 128
> 129 *d = (struct fw_cfg_dma) {
> > 130 .address = address ? cpu_to_be64(virt_to_phys(address)) : 0,
> 131 .length = cpu_to_be32(length),
> 132 .control = cpu_to_be32(control)
> 133 };
> 134
> 135 dma = virt_to_phys(d);
> 136
> 137 iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
> 138 iowrite32be(dma, fw_cfg_reg_dma + 4);
> 139
> 140 fw_cfg_wait_for_control(d);
> 141
> 142 if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
> 143 ret = -EIO;
> 144 }
> 145
> 146 end:
> 147 kfree(d);
> 148
> 149 return ret;
> 150 }
> 151
>
> ---
> 0-DAY kernel test infrastructure Open Source Technology Center
> https://lists.01.org/pipermail/kbuild-all Intel Corporation
^ permalink raw reply
* Re: [PATCH] headers: untangle kmemleak.h from mm.h
From: Randy Dunlap @ 2018-02-14 0:48 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-s390, John Johansen, netdev@vger.kernel.org, X86 ML,
linux-wireless, LKML, virtualization, Linux MM, iommu,
Greg Kroah-Hartman, sparclinux, Andrew Morton, Fengguang Wu,
linuxppc-dev
In-Reply-To: <20180212072727.saupl35jvwex6hbe@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1830 bytes --]
On 02/11/2018 11:27 PM, Ingo Molnar wrote:
>
> * Randy Dunlap <rdunlap@infradead.org> wrote:
>
>> From: Randy Dunlap <rdunlap@infradead.org>
>>
>> Currently <linux/slab.h> #includes <linux/kmemleak.h> for no obvious
>> reason. It looks like it's only a convenience, so remove kmemleak.h
>> from slab.h and add <linux/kmemleak.h> to any users of kmemleak_*
>> that don't already #include it.
>> Also remove <linux/kmemleak.h> from source files that do not use it.
>>
>> This is tested on i386 allmodconfig and x86_64 allmodconfig. It
>> would be good to run it through the 0day bot for other $ARCHes.
>> I have neither the horsepower nor the storage space for the other
>> $ARCHes.
>>
>> [slab.h is the second most used header file after module.h; kernel.h
>> is right there with slab.h. There could be some minor error in the
>> counting due to some #includes having comments after them and I
>> didn't combine all of those.]
>>
>> This is Lingchi patch #1 (death by a thousand cuts, applied to kernel
>> header files).
>>
>> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
>
> Nice find:
>
> Reviewed-by: Ingo Molnar <mingo@kernel.org>
>
> I agree that it needs to go through 0-day to find any hidden dependencies we might
> have grown due to this.
Andrew,
This patch has mostly survived both 0day and ozlabs multi-arch testing with
2 build errors being reported by both of them. I have posted patches for
those separately. (and are attached here)
other-patch-1:
lkml.kernel.org/r/5664ced1-a0cd-7e4e-71b6-9c3a97d68927@infradead.org
"lib/test_firmware: add header file to prevent build errors"
other-patch-2:
lkml.kernel.org/r/b3b7eebb-0e9f-f175-94a8-379c5ddcaa86@infradead.org
"integrity/security: fix digsig.c build error"
Will you see that these are merged or do you want me to repost them?
thanks,
--
~Randy
[-- Attachment #2: integrity_security_digsig_add_header.patch --]
[-- Type: text/x-patch, Size: 872 bytes --]
From: Randy Dunlap <rdunlap@infradead.org>
security/integrity/digsig.c has build errors on some $ARCH due to a
missing header file, so add it.
security/integrity/digsig.c:146:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: linux-integrity@vger.kernel.org
Link: http://kisskb.ellerman.id.au/kisskb/head/13396/
---
security/integrity/digsig.c | 1 +
1 file changed, 1 insertion(+)
--- lnx-416-rc1.orig/security/integrity/digsig.c
+++ lnx-416-rc1/security/integrity/digsig.c
@@ -18,6 +18,7 @@
#include <linux/cred.h>
#include <linux/key-type.h>
#include <linux/digsig.h>
+#include <linux/vmalloc.h>
#include <crypto/public_key.h>
#include <keys/system_keyring.h>
[-- Attachment #3: lib_test_firmware_add_header_file.patch --]
[-- Type: text/x-patch, Size: 1024 bytes --]
From: Randy Dunlap <rdunlap@infradead.org>
lib/test_firmware.c has build errors on some $ARCH due to a
missing header file, so add it.
lib/test_firmware.c:134:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
lib/test_firmware.c:620:25: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: http://kisskb.ellerman.id.au/kisskb/head/13396/
---
lib/test_firmware.c | 1 +
1 file changed, 1 insertion(+)
--- lnx-416-rc1.orig/lib/test_firmware.c
+++ lnx-416-rc1/lib/test_firmware.c
@@ -21,6 +21,7 @@
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/kthread.h>
+#include <linux/vfree.h>
#define TEST_FIRMWARE_NAME "test-firmware.bin"
#define TEST_FIRMWARE_NUM_REQS 4
[-- Attachment #4: Type: text/plain, Size: 183 bytes --]
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* [vhost:vhost 22/23] drivers/firmware/qemu_fw_cfg.c:130:36: sparse: incorrect type in initializer (different base types)
From: kbuild test robot @ 2018-02-14 0:46 UTC (permalink / raw)
To: Marc-André Lureau
Cc: netdev, Michael S. Tsirkin, kbuild-all, kvm, virtualization
tree: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost
head: 3d22d7c1190db3209b644b8a13a75a9802b4587f
commit: b3a8771f409b74c42deee28aee3092fc5d2c8dab [22/23] fw_cfg: write vmcoreinfo details
reproduce:
# apt-get install sparse
git checkout b3a8771f409b74c42deee28aee3092fc5d2c8dab
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__
sparse warnings: (new ones prefixed by >>)
drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:108:28: sparse: cast to restricted __be32
>> drivers/firmware/qemu_fw_cfg.c:130:36: sparse: incorrect type in initializer (different base types) @@ expected unsigned long long address @@ got gned] address @@
drivers/firmware/qemu_fw_cfg.c:130:36: expected unsigned long long address
drivers/firmware/qemu_fw_cfg.c:130:36: got restricted __be64
drivers/firmware/qemu_fw_cfg.c:131:27: sparse: incorrect type in initializer (different base types) @@ expected unsigned int length @@ got ed int length @@
drivers/firmware/qemu_fw_cfg.c:131:27: expected unsigned int length
drivers/firmware/qemu_fw_cfg.c:131:27: got restricted __be32 <noident>
drivers/firmware/qemu_fw_cfg.c:132:28: sparse: incorrect type in initializer (different base types) @@ expected unsigned int control @@ got ed int control @@
drivers/firmware/qemu_fw_cfg.c:132:28: expected unsigned int control
drivers/firmware/qemu_fw_cfg.c:132:28: got restricted __be32 <noident>
drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:142:13: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:660:17: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:670:31: sparse: cast to restricted __be32
drivers/firmware/qemu_fw_cfg.c:671:33: sparse: cast to restricted __be16
drivers/firmware/qemu_fw_cfg.c:671:33: sparse: cast to restricted __be16
drivers/firmware/qemu_fw_cfg.c:671:33: sparse: cast to restricted __be16
drivers/firmware/qemu_fw_cfg.c:671:33: sparse: cast to restricted __be16
drivers/firmware/qemu_fw_cfg.c:96:33: sparse: restricted __be16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:96:52: sparse: restricted __le16 degrades to integer
drivers/firmware/qemu_fw_cfg.c:717:22: sparse: cast to restricted __le32
vim +130 drivers/firmware/qemu_fw_cfg.c
103
104 /* qemu fw_cfg device is sync today, but spec says it may become async */
105 static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
106 {
107 do {
> 108 u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
109
110 if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
111 return;
112
113 usleep_range(50, 100);
114 } while (true);
115 }
116
117 static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
118 {
119 phys_addr_t dma;
120 struct fw_cfg_dma *d = NULL;
121 ssize_t ret = length;
122
123 d = kmalloc(sizeof(*d), GFP_KERNEL);
124 if (!d) {
125 ret = -ENOMEM;
126 goto end;
127 }
128
129 *d = (struct fw_cfg_dma) {
> 130 .address = address ? cpu_to_be64(virt_to_phys(address)) : 0,
131 .length = cpu_to_be32(length),
132 .control = cpu_to_be32(control)
133 };
134
135 dma = virt_to_phys(d);
136
137 iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
138 iowrite32be(dma, fw_cfg_reg_dma + 4);
139
140 fw_cfg_wait_for_control(d);
141
142 if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
143 ret = -EIO;
144 }
145
146 end:
147 kfree(d);
148
149 return ret;
150 }
151
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
^ permalink raw reply
* Re: [PATCH] headers: untangle kmemleak.h from mm.h
From: Randy Dunlap @ 2018-02-13 17:51 UTC (permalink / raw)
To: Michael Ellerman, LKML, Linux MM, Andrew Morton, Fengguang Wu
Cc: linux-s390, John Johansen, netdev@vger.kernel.org, X86 ML,
linux-wireless, virtualization, iommu, Dmitry Kasatkin,
Greg Kroah-Hartman, sparclinux, linuxppc-dev
In-Reply-To: <87fu652oce.fsf@concordia.ellerman.id.au>
On 02/13/2018 02:09 AM, Michael Ellerman wrote:
> Randy Dunlap <rdunlap@infradead.org> writes:
>
>> On 02/12/2018 04:28 AM, Michael Ellerman wrote:
>>> Randy Dunlap <rdunlap@infradead.org> writes:
>>>
>>>> From: Randy Dunlap <rdunlap@infradead.org>
>>>>
>>>> Currently <linux/slab.h> #includes <linux/kmemleak.h> for no obvious
>>>> reason. It looks like it's only a convenience, so remove kmemleak.h
>>>> from slab.h and add <linux/kmemleak.h> to any users of kmemleak_*
>>>> that don't already #include it.
>>>> Also remove <linux/kmemleak.h> from source files that do not use it.
>>>>
>>>> This is tested on i386 allmodconfig and x86_64 allmodconfig. It
>>>> would be good to run it through the 0day bot for other $ARCHes.
>>>> I have neither the horsepower nor the storage space for the other
>>>> $ARCHes.
>>>>
>>>> [slab.h is the second most used header file after module.h; kernel.h
>>>> is right there with slab.h. There could be some minor error in the
>>>> counting due to some #includes having comments after them and I
>>>> didn't combine all of those.]
>>>>
>>>> This is Lingchi patch #1 (death by a thousand cuts, applied to kernel
>>>> header files).
>>>>
>>>> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
>>>
>>> I threw it at a random selection of configs and so far the only failures
>>> I'm seeing are:
>>>
>>> lib/test_firmware.c:134:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
>>> lib/test_firmware.c:620:25: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
>>> lib/test_firmware.c:620:2: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
>>> security/integrity/digsig.c:146:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
>>
>> Both of those source files need to #include <linux/vmalloc.h>.
>
> Yep, I added those and rebuilt. I don't see any more failures that look
> related to your patch.
Great, thanks.
I also sent patches for both of those.
> http://kisskb.ellerman.id.au/kisskb/head/13399/
>
>
> I haven't gone through the defconfigs I have enabled for a while, so
> it's possible I have some missing but it's still a reasonable cross
> section.
--
~Randy
^ permalink raw reply
* Re: [PATCH v3 1/2] drm/virtio: Add window server support
From: Tomeu Vizoso @ 2018-02-13 14:27 UTC (permalink / raw)
To: Gerd Hoffmann
Cc: Michael S. Tsirkin, David Airlie, linux-kernel, dri-devel,
virtualization, Zach Reizner, kernel
In-Reply-To: <20180212114540.iygbha554busy4ip@sirius.home.kraxel.org>
On 02/12/2018 12:45 PM, Gerd Hoffmann wrote:
> Hi,
>
>>> (a) software rendering: client allocates shared memory buffer, renders
>>> into it, then passes a file handle for that shmem block together
>>> with some meta data (size, format, ...) to the wayland server.
>>>
>>> (b) gpu rendering: client opens a render node, allocates a buffer,
>>> asks the cpu to renders into it, exports the buffer as dma-buf
>>> (DRM_IOCTL_PRIME_HANDLE_TO_FD), passes this to the wayland server
>>> (again including meta data of course).
>>>
>>> Is that correct?
>>
>> Both are correct descriptions of typical behaviors. But it isn't spec'ed
>> anywhere who has to do the buffer allocation.
>
> Well, according to Pekka's reply it is spec'ed that way, for the
> existing buffer types. So for server allocated buffers you need
> (a) a wayland protocol extension and (b) support for the extension
> in the clients.
>
>> That's to say that if we cannot come up with a zero-copy solution for
>> unmodified clients, we should at least support zero-copy for cooperative
>> clients.
>
> "cooperative clients" == "client which has support for the wayland
> protocol extension", correct?
Guess it could be that, but I was rather thinking of clients that would
allocate the buffer for wl_shm_pool with DRM_VIRTGPU_RESOURCE_CREATE or
equivalent. Then that buffer would be exported and the fd passed using
the standard wl_shm protocol.
>>>> 4. QEMU maps that buffer to the guest's address space
>>>> (KVM_SET_USER_MEMORY_REGION), passes the guest PFN to the virtio driver
>>>
>>> That part is problematic. The host can't simply allocate something in
>>> the physical address space, because most physical address space
>>> management is done by the guest. All pci bars are mapped by the guest
>>> firmware for example (or by the guest OS in case of hotplug).
>>
>> How can KVM_SET_USER_MEMORY_REGION ever be safely used then? I would have
>> expected that callers of that ioctl have enough knowledge to be able to
>> choose a physical address that won't conflict with the guest's kernel.
>
> Depends on the kind of region. Guest RAM is allocated and mapped by
> qemu, guest firmware can query qemu about RAM mappings using a special
> interface, then create a e820 memory map for the guest os. PCI device
> bars are mapped according to the pci config space registers, which in
> turn are initialized by the guest firmware, so it is basically in the
> guests hand where they show up.
>
>> I see that the ivshmem device in QEMU registers the memory region in BAR 2
>> of a PCI device instead. Would that be better in your opinion?
>
> Yes.
Would it make sense for virtio-gpu to map buffers to the guest via PCI
BARs? So we can use a single drm driver for both 2d and 3d.
>>>> 4. QEMU pops data+buffers from the virtqueue, looks up shmem FD for each
>>>> resource, sends data + FDs to the compositor with SCM_RIGHTS
>>>
>>> BTW: Is there a 1:1 relationship between buffers and shmem blocks? Or
>>> does the wayland protocol allow for offsets in buffer meta data, so you
>>> can place multiple buffers in a single shmem block?
>>
>> The latter:
>> https://wayland.freedesktop.org/docs/html/apa.html#protocol-spec-wl_shm_pool
>
> Ah, good, that makes it alot easier.
>
> So, yes, using ivshmem would be one option. Tricky part here is the
> buffer management though. It's just a raw piece of memory. The guest
> proxy could mmap the pci bar and manage it. But then it is again either
> unmodified guest + copying the data, or modified client (which requests
> buffers from guest proxy) for zero-copy.
>
> Another idea would be extending stdvga. Basically qemu would have to
> use shmem as backing storage for vga memory instead of anonymous memory,
> so it would be very simliar to ivshmem on the host side. But on the
> guest side we have a drm driver for it (bochs-drm). So clients can
> allocate dumb drm buffers for software rendering, and the buffer would
> already be backed by a host shmem segment. Given that wayland already
> supports drm buffers for 3d rendering that could work without extending
> the wayland protocol. The client proxy would have to translate the drm
> buffer into an pci bar offset and pass it to the host side. The host
> proxy could register the pci bar as wl_shm_pool, then just pass through
> the offset to reference the individual buffers.
>
> Drawback of both approaches would be that software rendering and gpu
> rendering would use quite different code paths.
Yeah, would be great if we could find a way to avoid that.
> We also need a solution for the keymap shmem block. I guess the keymap
> doesn't change all that often, so maybe it is easiest to just copy it
> over (host proxy -> guest proxy) instead of trying to map the host shmem
> into the guest?
I think that should be fine for now. Something similar will have to
happen for the clipboard, which currently uses pipes to exchange data.
Thanks,
Tomeu
^ permalink raw reply
* (unknown),
From: Solen win @ 2018-02-13 11:58 UTC (permalink / raw)
To: virtualization
[-- Attachment #1.1: Type: text/plain, Size: 22 bytes --]
solenwin2.zendesk.com
[-- Attachment #1.2: Type: text/html, Size: 139 bytes --]
[-- Attachment #2: Type: text/plain, Size: 183 bytes --]
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH] mm/page_poison: move PAGE_POISON to page_poison.c
From: Michal Hocko @ 2018-02-13 10:16 UTC (permalink / raw)
To: Wei Wang; +Cc: linux-mm, akpm, mst, linux-kernel, virtualization
In-Reply-To: <1518163694-27155-1-git-send-email-wei.w.wang@intel.com>
On Fri 09-02-18 16:08:14, Wei Wang wrote:
> The PAGE_POISON macro is used in page_poison.c only, so avoid exporting
> it. Also remove the "mm/debug-pagealloc.c" related comment, which is
> obsolete.
Why is this an improvement? I thought the whole point of poison.h is to
keep all the poison value at a single place to make them obviously
unique.
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> ---
> include/linux/poison.h | 7 -------
> mm/page_poison.c | 6 ++++++
> 2 files changed, 6 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/poison.h b/include/linux/poison.h
> index 15927eb..348bf67 100644
> --- a/include/linux/poison.h
> +++ b/include/linux/poison.h
> @@ -30,13 +30,6 @@
> */
> #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA)
>
> -/********** mm/debug-pagealloc.c **********/
> -#ifdef CONFIG_PAGE_POISONING_ZERO
> -#define PAGE_POISON 0x00
> -#else
> -#define PAGE_POISON 0xaa
> -#endif
> -
> /********** mm/page_alloc.c ************/
>
> #define TAIL_MAPPING ((void *) 0x400 + POISON_POINTER_DELTA)
> diff --git a/mm/page_poison.c b/mm/page_poison.c
> index e83fd44..8aaf076 100644
> --- a/mm/page_poison.c
> +++ b/mm/page_poison.c
> @@ -7,6 +7,12 @@
> #include <linux/poison.h>
> #include <linux/ratelimit.h>
>
> +#ifdef CONFIG_PAGE_POISONING_ZERO
> +#define PAGE_POISON 0x00
> +#else
> +#define PAGE_POISON 0xaa
> +#endif
> +
> static bool want_page_poisoning __read_mostly;
>
> static int early_page_poison_param(char *buf)
> --
> 2.7.4
--
Michal Hocko
SUSE Labs
^ permalink raw reply
* Re: [PATCH] headers: untangle kmemleak.h from mm.h
From: Michael Ellerman @ 2018-02-13 10:09 UTC (permalink / raw)
To: Randy Dunlap, LKML, Linux MM, Andrew Morton, Fengguang Wu
Cc: linux-s390, John Johansen, netdev@vger.kernel.org, X86 ML,
linux-wireless, virtualization, iommu, Dmitry Kasatkin,
Greg Kroah-Hartman, sparclinux, linuxppc-dev
In-Reply-To: <f119a273-2e86-1b7f-346f-7627ad8b51ed@infradead.org>
Randy Dunlap <rdunlap@infradead.org> writes:
> On 02/12/2018 04:28 AM, Michael Ellerman wrote:
>> Randy Dunlap <rdunlap@infradead.org> writes:
>>
>>> From: Randy Dunlap <rdunlap@infradead.org>
>>>
>>> Currently <linux/slab.h> #includes <linux/kmemleak.h> for no obvious
>>> reason. It looks like it's only a convenience, so remove kmemleak.h
>>> from slab.h and add <linux/kmemleak.h> to any users of kmemleak_*
>>> that don't already #include it.
>>> Also remove <linux/kmemleak.h> from source files that do not use it.
>>>
>>> This is tested on i386 allmodconfig and x86_64 allmodconfig. It
>>> would be good to run it through the 0day bot for other $ARCHes.
>>> I have neither the horsepower nor the storage space for the other
>>> $ARCHes.
>>>
>>> [slab.h is the second most used header file after module.h; kernel.h
>>> is right there with slab.h. There could be some minor error in the
>>> counting due to some #includes having comments after them and I
>>> didn't combine all of those.]
>>>
>>> This is Lingchi patch #1 (death by a thousand cuts, applied to kernel
>>> header files).
>>>
>>> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
>>
>> I threw it at a random selection of configs and so far the only failures
>> I'm seeing are:
>>
>> lib/test_firmware.c:134:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
>> lib/test_firmware.c:620:25: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
>> lib/test_firmware.c:620:2: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
>> security/integrity/digsig.c:146:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
>
> Both of those source files need to #include <linux/vmalloc.h>.
Yep, I added those and rebuilt. I don't see any more failures that look
related to your patch.
http://kisskb.ellerman.id.au/kisskb/head/13399/
I haven't gone through the defconfigs I have enabled for a while, so
it's possible I have some missing but it's still a reasonable cross
section.
cheers
^ permalink raw reply
* Re: [PATCH v3 1/2] drm/virtio: Add window server support
From: Pekka Paalanen @ 2018-02-13 7:41 UTC (permalink / raw)
To: Gerd Hoffmann
Cc: Tomeu Vizoso, Michael S. Tsirkin, David Airlie, linux-kernel,
dri-devel, virtualization, kernel
In-Reply-To: <20180212114540.iygbha554busy4ip@sirius.home.kraxel.org>
[-- Attachment #1.1: Type: text/plain, Size: 1849 bytes --]
On Mon, 12 Feb 2018 12:45:40 +0100
Gerd Hoffmann <kraxel@redhat.com> wrote:
> Hi,
>
> > > (a) software rendering: client allocates shared memory buffer, renders
> > > into it, then passes a file handle for that shmem block together
> > > with some meta data (size, format, ...) to the wayland server.
> > >
> > > (b) gpu rendering: client opens a render node, allocates a buffer,
> > > asks the cpu to renders into it, exports the buffer as dma-buf
> > > (DRM_IOCTL_PRIME_HANDLE_TO_FD), passes this to the wayland server
> > > (again including meta data of course).
> > >
> > > Is that correct?
> >
> > Both are correct descriptions of typical behaviors. But it isn't spec'ed
> > anywhere who has to do the buffer allocation.
>
> Well, according to Pekka's reply it is spec'ed that way, for the
> existing buffer types. So for server allocated buffers you need
> (a) a wayland protocol extension and (b) support for the extension
> in the clients.
Correct. Or simply a libEGL that uses such Wayland extension behind
everyone's back. I believe such things did at least exist, but are
probably not relevant for this discussion.
(If there is a standard library, like libEGL, loaded and used by both a
server and a client, that library can advertise custom private Wayland
protocol extensions and the client side can take advantage of them,
both without needing any code changes on either the server or the
client.)
> We also need a solution for the keymap shmem block. I guess the keymap
> doesn't change all that often, so maybe it is easiest to just copy it
> over (host proxy -> guest proxy) instead of trying to map the host shmem
> into the guest?
Yes, I believe that would be a perfectly valid solution for that
particular case.
Thanks,
pq
[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
[-- Attachment #2: Type: text/plain, Size: 183 bytes --]
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH] headers: untangle kmemleak.h from mm.h
From: Randy Dunlap @ 2018-02-12 21:16 UTC (permalink / raw)
To: Michael Ellerman, LKML, Linux MM, Andrew Morton, Fengguang Wu
Cc: linux-s390, John Johansen, netdev@vger.kernel.org, X86 ML,
linux-wireless, virtualization, iommu, Greg Kroah-Hartman,
sparclinux, linuxppc-dev
In-Reply-To: <87zi4ev1d2.fsf@concordia.ellerman.id.au>
On 02/12/2018 04:28 AM, Michael Ellerman wrote:
> Randy Dunlap <rdunlap@infradead.org> writes:
>
>> From: Randy Dunlap <rdunlap@infradead.org>
>>
>> Currently <linux/slab.h> #includes <linux/kmemleak.h> for no obvious
>> reason. It looks like it's only a convenience, so remove kmemleak.h
>> from slab.h and add <linux/kmemleak.h> to any users of kmemleak_*
>> that don't already #include it.
>> Also remove <linux/kmemleak.h> from source files that do not use it.
>>
>> This is tested on i386 allmodconfig and x86_64 allmodconfig. It
>> would be good to run it through the 0day bot for other $ARCHes.
>> I have neither the horsepower nor the storage space for the other
>> $ARCHes.
>>
>> [slab.h is the second most used header file after module.h; kernel.h
>> is right there with slab.h. There could be some minor error in the
>> counting due to some #includes having comments after them and I
>> didn't combine all of those.]
>>
>> This is Lingchi patch #1 (death by a thousand cuts, applied to kernel
>> header files).
>>
>> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
>
> I threw it at a random selection of configs and so far the only failures
> I'm seeing are:
>
> lib/test_firmware.c:134:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
> lib/test_firmware.c:620:25: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
> lib/test_firmware.c:620:2: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
> security/integrity/digsig.c:146:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
>
> Full results trickling in here, not all the failures there are caused by
> this patch, ie. some configs are broken in mainline:
>
> http://kisskb.ellerman.id.au/kisskb/head/13396/
That's very useful, thanks.
I'll send a few patches for those.
--
~Randy
^ permalink raw reply
* Re: [PATCH] headers: untangle kmemleak.h from mm.h
From: Randy Dunlap @ 2018-02-12 19:40 UTC (permalink / raw)
To: Michael Ellerman, LKML, Linux MM, Andrew Morton, Fengguang Wu
Cc: linux-s390, John Johansen, netdev@vger.kernel.org, X86 ML,
linux-wireless, virtualization, iommu, Dmitry Kasatkin,
Greg Kroah-Hartman, sparclinux, linuxppc-dev
In-Reply-To: <87zi4ev1d2.fsf@concordia.ellerman.id.au>
On 02/12/2018 04:28 AM, Michael Ellerman wrote:
> Randy Dunlap <rdunlap@infradead.org> writes:
>
>> From: Randy Dunlap <rdunlap@infradead.org>
>>
>> Currently <linux/slab.h> #includes <linux/kmemleak.h> for no obvious
>> reason. It looks like it's only a convenience, so remove kmemleak.h
>> from slab.h and add <linux/kmemleak.h> to any users of kmemleak_*
>> that don't already #include it.
>> Also remove <linux/kmemleak.h> from source files that do not use it.
>>
>> This is tested on i386 allmodconfig and x86_64 allmodconfig. It
>> would be good to run it through the 0day bot for other $ARCHes.
>> I have neither the horsepower nor the storage space for the other
>> $ARCHes.
>>
>> [slab.h is the second most used header file after module.h; kernel.h
>> is right there with slab.h. There could be some minor error in the
>> counting due to some #includes having comments after them and I
>> didn't combine all of those.]
>>
>> This is Lingchi patch #1 (death by a thousand cuts, applied to kernel
>> header files).
>>
>> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
>
> I threw it at a random selection of configs and so far the only failures
> I'm seeing are:
>
> lib/test_firmware.c:134:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
> lib/test_firmware.c:620:25: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
> lib/test_firmware.c:620:2: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
> security/integrity/digsig.c:146:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration]
>
Both of those source files need to #include <linux/vmalloc.h>.
> Full results trickling in here, not all the failures there are caused by
> this patch, ie. some configs are broken in mainline:
>
> http://kisskb.ellerman.id.au/kisskb/head/13396/
>
> cheers
:)
--
~Randy
^ permalink raw reply
* Re: [PATCH v3 1/2] drm/virtio: Add window server support
From: Gerd Hoffmann @ 2018-02-12 15:29 UTC (permalink / raw)
To: Tomeu Vizoso
Cc: Michael S. Tsirkin, David Airlie, linux-kernel, dri-devel,
virtualization, Zach Reizner, kernel
In-Reply-To: <6cf0c06a-b884-3251-166b-6ff3dec3ebc7@collabora.com>
> > I was more thinking about a struct containing enough info to allow the
> > proxy on the host side find the buffer, something like:
> >
> > struct {
> > enum type { stdvga, virtio-cpu, ... }
> > pcislot device;
> > union {
> > int stdvga_pcibar_offset;
> > int virtio_gpu_resource_id;
> > }
> > }
> >
> > So when the guest proxy gets a message with a fd referencing a buffer it
> > would have to figure where the buffer is, rewrite the message into the
> > struct above for the host proxy. The host proxy would rewrite the
> > message again for the server.
>
> What I don't understand yet is how we can keep the buffer descriptions
> together with the protocol data that references them.
>
> With SCM_RIGHTS, the FDs are placed in the ancillary data that "travels"
> together with the protocol data that references them.
Place the buffer description into the wayland extension protocol messages?
i.e. have some wl_virt_proxy protocol extension. Then, for the stdvga case:
(1) client sends wl_drm/create_prime_buffer request to the guest proxy
(2) guest proxy rewrites this into wl_virt_proxy/create_buffer, or
maybe a create_stdvga_buffer request, carrying all the information
listed above, and sends it to the host proxy.
(3) host proxy rewrites it again into a wl_shm_pool/create_buffer and
forwards it to the server.
cheers,
Gerd
^ permalink raw reply
* Re: [PATCH v3 1/2] drm/virtio: Add window server support
From: Tomeu Vizoso @ 2018-02-12 14:42 UTC (permalink / raw)
To: Gerd Hoffmann
Cc: Michael S. Tsirkin, David Airlie, linux-kernel, dri-devel,
virtualization, Zach Reizner, kernel
In-Reply-To: <20180212142730.g2646v77qsvzd5ff@sirius.home.kraxel.org>
On 02/12/2018 03:27 PM, Gerd Hoffmann wrote:
> On Mon, Feb 12, 2018 at 03:00:24PM +0100, Tomeu Vizoso wrote:
>> On 02/12/2018 12:52 PM, Gerd Hoffmann wrote:
>>> Hi,
>>>
>>>> can we reach agreement on whether vsock should be involved in this?
>>>
>>> I think the best approach would be to have guest proxy and host proxy
>>> use vsock for the wayland protocol. Use a wayland protocol extension to
>>> reference the buffers in stdvga / ivshmem / virtio-gpu. Only the two
>>> proxies need to understand the extension, the client <=> guest proxy and
>>> host proxy <=> server communication would be standard wayland protocol.
>>
>> Thanks for the ideas. What I haven't understood yet is how you see the
>> actual passing of buffers via vsock. Are you thinking of using ancillary
>> data to pass FDs, or something else?
>
> I was more thinking about a struct containing enough info to allow the
> proxy on the host side find the buffer, something like:
>
> struct {
> enum type { stdvga, virtio-cpu, ... }
> pcislot device;
> union {
> int stdvga_pcibar_offset;
> int virtio_gpu_resource_id;
> }
> }
>
> So when the guest proxy gets a message with a fd referencing a buffer it
> would have to figure where the buffer is, rewrite the message into the
> struct above for the host proxy. The host proxy would rewrite the
> message again for the server.
What I don't understand yet is how we can keep the buffer descriptions
together with the protocol data that references them.
With SCM_RIGHTS, the FDs are placed in the ancillary data that "travels"
together with the protocol data that references them.
With the present series, the DRM_IOCTL_VIRTGPU_WINSRV_TX ioctl struct has
a field for the protocol data and an array of FDs.
How are you proposing to pass instances of that struct from above along
the protocol data that refers to them?
Thanks,
Tomeu
^ permalink raw reply
* Re: [PATCH v3 1/2] drm/virtio: Add window server support
From: Gerd Hoffmann @ 2018-02-12 14:27 UTC (permalink / raw)
To: Tomeu Vizoso
Cc: Michael S. Tsirkin, David Airlie, linux-kernel, dri-devel,
virtualization, Zach Reizner, kernel
In-Reply-To: <eea72b9d-d180-6a57-81ce-4366dc0f2bcc@collabora.com>
On Mon, Feb 12, 2018 at 03:00:24PM +0100, Tomeu Vizoso wrote:
> On 02/12/2018 12:52 PM, Gerd Hoffmann wrote:
> > Hi,
> >
> > > can we reach agreement on whether vsock should be involved in this?
> >
> > I think the best approach would be to have guest proxy and host proxy
> > use vsock for the wayland protocol. Use a wayland protocol extension to
> > reference the buffers in stdvga / ivshmem / virtio-gpu. Only the two
> > proxies need to understand the extension, the client <=> guest proxy and
> > host proxy <=> server communication would be standard wayland protocol.
>
> Thanks for the ideas. What I haven't understood yet is how you see the
> actual passing of buffers via vsock. Are you thinking of using ancillary
> data to pass FDs, or something else?
I was more thinking about a struct containing enough info to allow the
proxy on the host side find the buffer, something like:
struct {
enum type { stdvga, virtio-cpu, ... }
pcislot device;
union {
int stdvga_pcibar_offset;
int virtio_gpu_resource_id;
}
}
So when the guest proxy gets a message with a fd referencing a buffer it
would have to figure where the buffer is, rewrite the message into the
struct above for the host proxy. The host proxy would rewrite the
message again for the server.
cheers,
Gerd
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox