* [PATCH 1/4] iommu: Add virtio-iommu driver
From: Jean-Philippe Brucker @ 2018-02-14 14:53 UTC (permalink / raw)
To: iommu, kvm, virtualization, virtio-dev, kvmarm
Cc: jayachandran.nair, lorenzo.pieralisi, tnowicki, mst, marc.zyngier,
will.deacon, jintack, eric.auger, robin.murphy, joro,
eric.auger.pro
In-Reply-To: <20180214145340.1223-1-jean-philippe.brucker@arm.com>
The virtio IOMMU is a para-virtualized device, allowing to send IOMMU
requests such as map/unmap over virtio-mmio transport without emulating
page tables. This implementation handles ATTACH, DETACH, MAP and UNMAP
requests.
The bulk of the code transforms calls coming from the IOMMU API into
corresponding virtio requests. Mappings are kept in an interval tree
instead of page tables.
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
---
MAINTAINERS | 6 +
drivers/iommu/Kconfig | 11 +
drivers/iommu/Makefile | 1 +
drivers/iommu/virtio-iommu.c | 960 ++++++++++++++++++++++++++++++++++++++
include/uapi/linux/virtio_ids.h | 1 +
include/uapi/linux/virtio_iommu.h | 116 +++++
6 files changed, 1095 insertions(+)
create mode 100644 drivers/iommu/virtio-iommu.c
create mode 100644 include/uapi/linux/virtio_iommu.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 3bdc260e36b7..2a181924d420 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14818,6 +14818,12 @@ S: Maintained
F: drivers/virtio/virtio_input.c
F: include/uapi/linux/virtio_input.h
+VIRTIO IOMMU DRIVER
+M: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
+S: Maintained
+F: drivers/iommu/virtio-iommu.c
+F: include/uapi/linux/virtio_iommu.h
+
VIRTUAL BOX GUEST DEVICE DRIVER
M: Hans de Goede <hdegoede@redhat.com>
M: Arnd Bergmann <arnd@arndb.de>
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f3a21343e636..1ea0ec74524f 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -381,4 +381,15 @@ config QCOM_IOMMU
help
Support for IOMMU on certain Qualcomm SoCs.
+config VIRTIO_IOMMU
+ bool "Virtio IOMMU driver"
+ depends on VIRTIO_MMIO
+ select IOMMU_API
+ select INTERVAL_TREE
+ select ARM_DMA_USE_IOMMU if ARM
+ help
+ Para-virtualised IOMMU driver with virtio.
+
+ Say Y here if you intend to run this kernel as a guest.
+
endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 1fb695854809..9c68be1365e1 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
+obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
new file mode 100644
index 000000000000..a9c9245e8ba2
--- /dev/null
+++ b/drivers/iommu/virtio-iommu.c
@@ -0,0 +1,960 @@
+/*
+ * Virtio driver for the paravirtualized IOMMU
+ *
+ * Copyright (C) 2018 ARM Limited
+ * Author: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/amba/bus.h>
+#include <linux/delay.h>
+#include <linux/dma-iommu.h>
+#include <linux/freezer.h>
+#include <linux/interval_tree.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/wait.h>
+
+#include <uapi/linux/virtio_iommu.h>
+
+#define MSI_IOVA_BASE 0x8000000
+#define MSI_IOVA_LENGTH 0x100000
+
+struct viommu_dev {
+ struct iommu_device iommu;
+ struct device *dev;
+ struct virtio_device *vdev;
+
+ struct ida domain_ids;
+
+ struct virtqueue *vq;
+ /* Serialize anything touching the request queue */
+ spinlock_t request_lock;
+
+ /* Device configuration */
+ struct iommu_domain_geometry geometry;
+ u64 pgsize_bitmap;
+ u8 domain_bits;
+};
+
+struct viommu_mapping {
+ phys_addr_t paddr;
+ struct interval_tree_node iova;
+ union {
+ struct virtio_iommu_req_map map;
+ struct virtio_iommu_req_unmap unmap;
+ } req;
+};
+
+struct viommu_domain {
+ struct iommu_domain domain;
+ struct viommu_dev *viommu;
+ struct mutex mutex;
+ unsigned int id;
+
+ spinlock_t mappings_lock;
+ struct rb_root_cached mappings;
+
+ /* Number of endpoints attached to this domain */
+ unsigned long endpoints;
+};
+
+struct viommu_endpoint {
+ struct viommu_dev *viommu;
+ struct viommu_domain *vdomain;
+};
+
+struct viommu_request {
+ struct scatterlist top;
+ struct scatterlist bottom;
+
+ int written;
+ struct list_head list;
+};
+
+#define to_viommu_domain(domain) \
+ container_of(domain, struct viommu_domain, domain)
+
+/* Virtio transport */
+
+static int viommu_status_to_errno(u8 status)
+{
+ switch (status) {
+ case VIRTIO_IOMMU_S_OK:
+ return 0;
+ case VIRTIO_IOMMU_S_UNSUPP:
+ return -ENOSYS;
+ case VIRTIO_IOMMU_S_INVAL:
+ return -EINVAL;
+ case VIRTIO_IOMMU_S_RANGE:
+ return -ERANGE;
+ case VIRTIO_IOMMU_S_NOENT:
+ return -ENOENT;
+ case VIRTIO_IOMMU_S_FAULT:
+ return -EFAULT;
+ case VIRTIO_IOMMU_S_IOERR:
+ case VIRTIO_IOMMU_S_DEVERR:
+ default:
+ return -EIO;
+ }
+}
+
+/*
+ * viommu_get_req_size - compute request size
+ *
+ * A virtio-iommu request is split into one device-read-only part (top) and one
+ * device-write-only part (bottom). Given a request, return the sizes of the two
+ * parts in @top and @bottom.
+ *
+ * Return 0 on success, or an error when the request seems invalid.
+ */
+static int viommu_get_req_size(struct viommu_dev *viommu,
+ struct virtio_iommu_req_head *req, size_t *top,
+ size_t *bottom)
+{
+ size_t size;
+ union virtio_iommu_req *r = (void *)req;
+
+ *bottom = sizeof(struct virtio_iommu_req_tail);
+
+ switch (req->type) {
+ case VIRTIO_IOMMU_T_ATTACH:
+ size = sizeof(r->attach);
+ break;
+ case VIRTIO_IOMMU_T_DETACH:
+ size = sizeof(r->detach);
+ break;
+ case VIRTIO_IOMMU_T_MAP:
+ size = sizeof(r->map);
+ break;
+ case VIRTIO_IOMMU_T_UNMAP:
+ size = sizeof(r->unmap);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *top = size - *bottom;
+ return 0;
+}
+
+static int viommu_receive_resp(struct viommu_dev *viommu, int nr_sent,
+ struct list_head *sent)
+{
+
+ unsigned int len;
+ int nr_received = 0;
+ struct viommu_request *req, *pending;
+
+ pending = list_first_entry_or_null(sent, struct viommu_request, list);
+ if (WARN_ON(!pending))
+ return 0;
+
+ while ((req = virtqueue_get_buf(viommu->vq, &len)) != NULL) {
+ if (req != pending) {
+ dev_warn(viommu->dev, "discarding stale request\n");
+ continue;
+ }
+
+ pending->written = len;
+
+ if (++nr_received == nr_sent) {
+ WARN_ON(!list_is_last(&pending->list, sent));
+ break;
+ } else if (WARN_ON(list_is_last(&pending->list, sent))) {
+ break;
+ }
+
+ pending = list_next_entry(pending, list);
+ }
+
+ return nr_received;
+}
+
+static int _viommu_send_reqs_sync(struct viommu_dev *viommu,
+ struct viommu_request *req, int nr,
+ int *nr_sent)
+{
+ int i, ret;
+ ktime_t timeout;
+ LIST_HEAD(pending);
+ int nr_received = 0;
+ struct scatterlist *sg[2];
+ /*
+ * The timeout is chosen arbitrarily. It's only here to prevent locking
+ * up the CPU in case of a device bug.
+ */
+ unsigned long timeout_ms = 1000;
+
+ *nr_sent = 0;
+
+ for (i = 0; i < nr; i++, req++) {
+ req->written = 0;
+
+ sg[0] = &req->top;
+ sg[1] = &req->bottom;
+
+ ret = virtqueue_add_sgs(viommu->vq, sg, 1, 1, req,
+ GFP_ATOMIC);
+ if (ret)
+ break;
+
+ list_add_tail(&req->list, &pending);
+ }
+
+ if (i && !virtqueue_kick(viommu->vq))
+ return -EPIPE;
+
+ timeout = ktime_add_ms(ktime_get(), timeout_ms * i);
+ while (nr_received < i && ktime_before(ktime_get(), timeout)) {
+ nr_received += viommu_receive_resp(viommu, i - nr_received,
+ &pending);
+ if (nr_received < i)
+ cpu_relax();
+ }
+
+ if (nr_received != i)
+ ret = -ETIMEDOUT;
+
+ if (ret == -ENOSPC && nr_received)
+ /*
+ * We've freed some space since virtio told us that the ring is
+ * full, tell the caller to come back for more.
+ */
+ ret = -EAGAIN;
+
+ *nr_sent = nr_received;
+
+ return ret;
+}
+
+/*
+ * viommu_send_reqs_sync - add a batch of requests, kick the host and wait for
+ * them to return
+ *
+ * @req: array of requests
+ * @nr: array length
+ * @nr_sent: on return, contains the number of requests actually sent
+ *
+ * Return 0 on success, or an error if we failed to send some of the requests.
+ */
+static int viommu_send_reqs_sync(struct viommu_dev *viommu,
+ struct viommu_request *req, int nr,
+ int *nr_sent)
+{
+ int ret;
+ int sent = 0;
+ unsigned long flags;
+
+ *nr_sent = 0;
+ do {
+ spin_lock_irqsave(&viommu->request_lock, flags);
+ ret = _viommu_send_reqs_sync(viommu, req, nr, &sent);
+ spin_unlock_irqrestore(&viommu->request_lock, flags);
+
+ *nr_sent += sent;
+ req += sent;
+ nr -= sent;
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+/*
+ * viommu_send_req_sync - send one request and wait for reply
+ *
+ * @top: pointer to a virtio_iommu_req_* structure
+ *
+ * Returns 0 if the request was successful, or an error number otherwise. No
+ * distinction is done between transport and request errors.
+ */
+static int viommu_send_req_sync(struct viommu_dev *viommu, void *top)
+{
+ int ret;
+ int nr_sent;
+ void *bottom;
+ size_t top_size, bottom_size;
+ struct virtio_iommu_req_tail *tail;
+ struct virtio_iommu_req_head *head = top;
+ struct viommu_request req = {
+ .written = 0
+ };
+
+ ret = viommu_get_req_size(viommu, head, &top_size, &bottom_size);
+ if (ret)
+ return ret;
+
+ bottom = top + top_size;
+ tail = bottom + bottom_size - sizeof(*tail);
+
+ sg_init_one(&req.top, top, top_size);
+ sg_init_one(&req.bottom, bottom, bottom_size);
+
+ ret = viommu_send_reqs_sync(viommu, &req, 1, &nr_sent);
+ if (ret || !req.written || nr_sent != 1) {
+ dev_err(viommu->dev, "failed to send request\n");
+ return -EIO;
+ }
+
+ return viommu_status_to_errno(tail->status);
+}
+
+/*
+ * viommu_add_mapping - add a mapping to the internal tree
+ *
+ * On success, return the new mapping. Otherwise return NULL.
+ */
+static struct viommu_mapping *
+viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova,
+ phys_addr_t paddr, size_t size)
+{
+ unsigned long flags;
+ struct viommu_mapping *mapping;
+
+ mapping = kzalloc(sizeof(*mapping), GFP_ATOMIC);
+ if (!mapping)
+ return NULL;
+
+ mapping->paddr = paddr;
+ mapping->iova.start = iova;
+ mapping->iova.last = iova + size - 1;
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ interval_tree_insert(&mapping->iova, &vdomain->mappings);
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ return mapping;
+}
+
+/*
+ * viommu_del_mappings - remove mappings from the internal tree
+ *
+ * @vdomain: the domain
+ * @iova: start of the range
+ * @size: size of the range. A size of 0 corresponds to the entire address
+ * space.
+ * @out_mapping: if not NULL, the first removed mapping is returned in there.
+ * This allows the caller to reuse the buffer for the unmap request. When
+ * the returned size is greater than zero, if a mapping is returned, the
+ * caller must free it.
+ *
+ * On success, returns the number of unmapped bytes (>= size)
+ */
+static size_t viommu_del_mappings(struct viommu_domain *vdomain,
+ unsigned long iova, size_t size,
+ struct viommu_mapping **out_mapping)
+{
+ size_t unmapped = 0;
+ unsigned long flags;
+ unsigned long last = iova + size - 1;
+ struct viommu_mapping *mapping = NULL;
+ struct interval_tree_node *node, *next;
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ next = interval_tree_iter_first(&vdomain->mappings, iova, last);
+
+ if (next) {
+ mapping = container_of(next, struct viommu_mapping, iova);
+ /* Trying to split a mapping? */
+ if (WARN_ON(mapping->iova.start < iova))
+ next = NULL;
+ }
+
+ while (next) {
+ node = next;
+ mapping = container_of(node, struct viommu_mapping, iova);
+
+ next = interval_tree_iter_next(node, iova, last);
+
+ /*
+ * Note that for a partial range, this will return the full
+ * mapping so we avoid sending split requests to the device.
+ */
+ unmapped += mapping->iova.last - mapping->iova.start + 1;
+
+ interval_tree_remove(node, &vdomain->mappings);
+
+ if (out_mapping && !(*out_mapping))
+ *out_mapping = mapping;
+ else
+ kfree(mapping);
+ }
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ return unmapped;
+}
+
+/*
+ * viommu_replay_mappings - re-send MAP requests
+ *
+ * When reattaching a domain that was previously detached from all endpoints,
+ * mappings were deleted from the device. Re-create the mappings available in
+ * the internal tree.
+ */
+static int viommu_replay_mappings(struct viommu_domain *vdomain)
+{
+ unsigned long flags;
+ int i = 1, ret, nr_sent;
+ struct viommu_request *reqs;
+ struct viommu_mapping *mapping;
+ struct interval_tree_node *node;
+ size_t top_size, bottom_size;
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL);
+ if (!node) {
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+ return 0;
+ }
+
+ while ((node = interval_tree_iter_next(node, 0, -1UL)) != NULL)
+ i++;
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ reqs = kcalloc(i, sizeof(*reqs), GFP_KERNEL);
+ if (!reqs)
+ return -ENOMEM;
+
+ bottom_size = sizeof(struct virtio_iommu_req_tail);
+ top_size = sizeof(struct virtio_iommu_req_map) - bottom_size;
+
+ i = 0;
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL);
+ while (node) {
+ mapping = container_of(node, struct viommu_mapping, iova);
+ sg_init_one(&reqs[i].top, &mapping->req.map, top_size);
+ sg_init_one(&reqs[i].bottom, &mapping->req.map.tail,
+ bottom_size);
+
+ node = interval_tree_iter_next(node, 0, -1UL);
+ i++;
+ }
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ ret = viommu_send_reqs_sync(vdomain->viommu, reqs, i, &nr_sent);
+ kfree(reqs);
+
+ return ret;
+}
+
+/* IOMMU API */
+
+static bool viommu_capable(enum iommu_cap cap)
+{
+ return false;
+}
+
+static struct iommu_domain *viommu_domain_alloc(unsigned type)
+{
+ struct viommu_domain *vdomain;
+
+ if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
+ return NULL;
+
+ vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL);
+ if (!vdomain)
+ return NULL;
+
+ mutex_init(&vdomain->mutex);
+ spin_lock_init(&vdomain->mappings_lock);
+ vdomain->mappings = RB_ROOT_CACHED;
+
+ if (type == IOMMU_DOMAIN_DMA &&
+ iommu_get_dma_cookie(&vdomain->domain)) {
+ kfree(vdomain);
+ return NULL;
+ }
+
+ return &vdomain->domain;
+}
+
+static int viommu_domain_finalise(struct viommu_dev *viommu,
+ struct iommu_domain *domain)
+{
+ int ret;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+ /* ida limits size to 31 bits. A value of 0 means "max" */
+ unsigned int max_domain = viommu->domain_bits >= 31 ? 0 :
+ 1U << viommu->domain_bits;
+
+ vdomain->viommu = viommu;
+
+ domain->pgsize_bitmap = viommu->pgsize_bitmap;
+ domain->geometry = viommu->geometry;
+
+ ret = ida_simple_get(&viommu->domain_ids, 0, max_domain, GFP_KERNEL);
+ if (ret >= 0)
+ vdomain->id = (unsigned int)ret;
+
+ return ret > 0 ? 0 : ret;
+}
+
+static void viommu_domain_free(struct iommu_domain *domain)
+{
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ iommu_put_dma_cookie(domain);
+
+ /* Free all remaining mappings (size 2^64) */
+ viommu_del_mappings(vdomain, 0, 0, NULL);
+
+ if (vdomain->viommu)
+ ida_simple_remove(&vdomain->viommu->domain_ids, vdomain->id);
+
+ kfree(vdomain);
+}
+
+static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
+{
+ int i;
+ int ret = 0;
+ struct virtio_iommu_req_attach *req;
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct viommu_endpoint *vdev = fwspec->iommu_priv;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ mutex_lock(&vdomain->mutex);
+ if (!vdomain->viommu) {
+ /*
+ * Initialize the domain proper now that we know which viommu
+ * owns it.
+ */
+ ret = viommu_domain_finalise(vdev->viommu, domain);
+ } else if (vdomain->viommu != vdev->viommu) {
+ dev_err(dev, "cannot attach to foreign vIOMMU\n");
+ ret = -EXDEV;
+ }
+ mutex_unlock(&vdomain->mutex);
+
+ if (ret)
+ return ret;
+
+ /*
+ * In the virtio-iommu device, when attaching the endpoint to a new
+ * domain, it is detached from the old one and, if as as a result the
+ * old domain isn't attached to any endpoint, all mappings are removed
+ * from the old domain and it is freed.
+ *
+ * In the driver the old domain still exists, and its mappings will be
+ * recreated if it gets reattached to an endpoint. Otherwise it will be
+ * freed explicitly.
+ *
+ * vdev->vdomain is protected by group->mutex
+ */
+ if (vdev->vdomain)
+ vdev->vdomain->endpoints--;
+
+ /* DMA to the stack is forbidden, store request on the heap */
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ *req = (struct virtio_iommu_req_attach) {
+ .head.type = VIRTIO_IOMMU_T_ATTACH,
+ .domain = cpu_to_le32(vdomain->id),
+ };
+
+ for (i = 0; i < fwspec->num_ids; i++) {
+ req->endpoint = cpu_to_le32(fwspec->ids[i]);
+
+ ret = viommu_send_req_sync(vdomain->viommu, req);
+ if (ret)
+ break;
+ }
+
+ kfree(req);
+
+ if (ret)
+ return ret;
+
+ if (!vdomain->endpoints) {
+ /*
+ * This endpoint is the first to be attached to the domain.
+ * Replay existing mappings if any (e.g. SW MSI).
+ */
+ ret = viommu_replay_mappings(vdomain);
+ if (ret)
+ return ret;
+ }
+
+ vdomain->endpoints++;
+ vdev->vdomain = vdomain;
+
+ return 0;
+}
+
+static int viommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ int ret;
+ int flags;
+ struct viommu_mapping *mapping;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ mapping = viommu_add_mapping(vdomain, iova, paddr, size);
+ if (!mapping)
+ return -ENOMEM;
+
+ flags = (prot & IOMMU_READ ? VIRTIO_IOMMU_MAP_F_READ : 0) |
+ (prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0);
+
+ mapping->req.map = (struct virtio_iommu_req_map) {
+ .head.type = VIRTIO_IOMMU_T_MAP,
+ .domain = cpu_to_le32(vdomain->id),
+ .virt_start = cpu_to_le64(iova),
+ .phys_start = cpu_to_le64(paddr),
+ .virt_end = cpu_to_le64(iova + size - 1),
+ .flags = cpu_to_le32(flags),
+ };
+
+ if (!vdomain->endpoints)
+ return 0;
+
+ ret = viommu_send_req_sync(vdomain->viommu, &mapping->req);
+ if (ret)
+ viommu_del_mappings(vdomain, iova, size, NULL);
+
+ return ret;
+}
+
+static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
+{
+ int ret = 0;
+ size_t unmapped;
+ struct viommu_mapping *mapping = NULL;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ unmapped = viommu_del_mappings(vdomain, iova, size, &mapping);
+ if (unmapped < size) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ /* Device already removed all mappings after detach. */
+ if (!vdomain->endpoints)
+ goto out_free;
+
+ if (WARN_ON(!mapping))
+ return 0;
+
+ mapping->req.unmap = (struct virtio_iommu_req_unmap) {
+ .head.type = VIRTIO_IOMMU_T_UNMAP,
+ .domain = cpu_to_le32(vdomain->id),
+ .virt_start = cpu_to_le64(iova),
+ .virt_end = cpu_to_le64(iova + unmapped - 1),
+ };
+
+ ret = viommu_send_req_sync(vdomain->viommu, &mapping->req);
+
+out_free:
+ kfree(mapping);
+
+ return ret ? 0 : unmapped;
+}
+
+static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ u64 paddr = 0;
+ unsigned long flags;
+ struct viommu_mapping *mapping;
+ struct interval_tree_node *node;
+ struct viommu_domain *vdomain = to_viommu_domain(domain);
+
+ spin_lock_irqsave(&vdomain->mappings_lock, flags);
+ node = interval_tree_iter_first(&vdomain->mappings, iova, iova);
+ if (node) {
+ mapping = container_of(node, struct viommu_mapping, iova);
+ paddr = mapping->paddr + (iova - mapping->iova.start);
+ }
+ spin_unlock_irqrestore(&vdomain->mappings_lock, flags);
+
+ return paddr;
+}
+
+static struct iommu_ops viommu_ops;
+static struct virtio_driver virtio_iommu_drv;
+
+static int viommu_match_node(struct device *dev, void *data)
+{
+ return dev->parent->fwnode == data;
+}
+
+static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode)
+{
+ struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL,
+ fwnode, viommu_match_node);
+ put_device(dev);
+
+ return dev ? dev_to_virtio(dev)->priv : NULL;
+}
+
+static int viommu_add_device(struct device *dev)
+{
+ struct iommu_group *group;
+ struct viommu_endpoint *vdev;
+ struct viommu_dev *viommu = NULL;
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+ if (!fwspec || fwspec->ops != &viommu_ops)
+ return -ENODEV;
+
+ viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode);
+ if (!viommu)
+ return -ENODEV;
+
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+ if (!vdev)
+ return -ENOMEM;
+
+ vdev->viommu = viommu;
+ fwspec->iommu_priv = vdev;
+
+ /*
+ * Last step creates a default domain and attaches to it. Everything
+ * must be ready.
+ */
+ group = iommu_group_get_for_dev(dev);
+ if (!IS_ERR(group))
+ iommu_group_put(group);
+
+ return PTR_ERR_OR_ZERO(group);
+}
+
+static void viommu_remove_device(struct device *dev)
+{
+ kfree(dev->iommu_fwspec->iommu_priv);
+}
+
+static struct iommu_group *viommu_device_group(struct device *dev)
+{
+ if (dev_is_pci(dev))
+ return pci_device_group(dev);
+ else
+ return generic_device_group(dev);
+}
+
+static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+ return iommu_fwspec_add_ids(dev, args->args, 1);
+}
+
+static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
+{
+ struct iommu_resv_region *region;
+ int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+ region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
+ IOMMU_RESV_SW_MSI);
+ if (!region)
+ return;
+
+ list_add_tail(®ion->list, head);
+ iommu_dma_get_resv_regions(dev, head);
+}
+
+static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
+{
+ struct iommu_resv_region *entry, *next;
+
+ list_for_each_entry_safe(entry, next, head, list)
+ kfree(entry);
+}
+
+static struct iommu_ops viommu_ops = {
+ .capable = viommu_capable,
+ .domain_alloc = viommu_domain_alloc,
+ .domain_free = viommu_domain_free,
+ .attach_dev = viommu_attach_dev,
+ .map = viommu_map,
+ .unmap = viommu_unmap,
+ .map_sg = default_iommu_map_sg,
+ .iova_to_phys = viommu_iova_to_phys,
+ .add_device = viommu_add_device,
+ .remove_device = viommu_remove_device,
+ .device_group = viommu_device_group,
+ .of_xlate = viommu_of_xlate,
+ .get_resv_regions = viommu_get_resv_regions,
+ .put_resv_regions = viommu_put_resv_regions,
+};
+
+static int viommu_init_vq(struct viommu_dev *viommu)
+{
+ struct virtio_device *vdev = dev_to_virtio(viommu->dev);
+ const char *name = "request";
+ void *ret;
+
+ ret = virtio_find_single_vq(vdev, NULL, name);
+ if (IS_ERR(ret)) {
+ dev_err(viommu->dev, "cannot find VQ\n");
+ return PTR_ERR(ret);
+ }
+
+ viommu->vq = ret;
+
+ return 0;
+}
+
+static int viommu_probe(struct virtio_device *vdev)
+{
+ struct device *parent_dev = vdev->dev.parent;
+ struct viommu_dev *viommu = NULL;
+ struct device *dev = &vdev->dev;
+ u64 input_start = 0;
+ u64 input_end = -1UL;
+ int ret;
+
+ viommu = devm_kzalloc(dev, sizeof(*viommu), GFP_KERNEL);
+ if (!viommu)
+ return -ENOMEM;
+
+ spin_lock_init(&viommu->request_lock);
+ ida_init(&viommu->domain_ids);
+ viommu->dev = dev;
+ viommu->vdev = vdev;
+
+ ret = viommu_init_vq(viommu);
+ if (ret)
+ return ret;
+
+ virtio_cread(vdev, struct virtio_iommu_config, page_size_mask,
+ &viommu->pgsize_bitmap);
+
+ if (!viommu->pgsize_bitmap) {
+ ret = -EINVAL;
+ goto err_free_vqs;
+ }
+
+ viommu->domain_bits = 32;
+
+ /* Optional features */
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
+ struct virtio_iommu_config, input_range.start,
+ &input_start);
+
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
+ struct virtio_iommu_config, input_range.end,
+ &input_end);
+
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_BITS,
+ struct virtio_iommu_config, domain_bits,
+ &viommu->domain_bits);
+
+ viommu->geometry = (struct iommu_domain_geometry) {
+ .aperture_start = input_start,
+ .aperture_end = input_end,
+ .force_aperture = true,
+ };
+
+ viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap;
+
+ virtio_device_ready(vdev);
+
+ ret = iommu_device_sysfs_add(&viommu->iommu, dev, NULL, "%s",
+ virtio_bus_name(vdev));
+ if (ret)
+ goto err_free_vqs;
+
+ iommu_device_set_ops(&viommu->iommu, &viommu_ops);
+ iommu_device_set_fwnode(&viommu->iommu, parent_dev->fwnode);
+
+ iommu_device_register(&viommu->iommu);
+
+#ifdef CONFIG_PCI
+ if (pci_bus_type.iommu_ops != &viommu_ops) {
+ pci_request_acs();
+ ret = bus_set_iommu(&pci_bus_type, &viommu_ops);
+ if (ret)
+ goto err_unregister;
+ }
+#endif
+#ifdef CONFIG_ARM_AMBA
+ if (amba_bustype.iommu_ops != &viommu_ops) {
+ ret = bus_set_iommu(&amba_bustype, &viommu_ops);
+ if (ret)
+ goto err_unregister;
+ }
+#endif
+ if (platform_bus_type.iommu_ops != &viommu_ops) {
+ ret = bus_set_iommu(&platform_bus_type, &viommu_ops);
+ if (ret)
+ goto err_unregister;
+ }
+
+ vdev->priv = viommu;
+
+ dev_info(dev, "input address: %u bits\n",
+ order_base_2(viommu->geometry.aperture_end));
+ dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap);
+
+ return 0;
+
+err_unregister:
+ iommu_device_sysfs_remove(&viommu->iommu);
+ iommu_device_unregister(&viommu->iommu);
+err_free_vqs:
+ vdev->config->del_vqs(vdev);
+
+ return ret;
+}
+
+static void viommu_remove(struct virtio_device *vdev)
+{
+ struct viommu_dev *viommu = vdev->priv;
+
+ iommu_device_sysfs_remove(&viommu->iommu);
+ iommu_device_unregister(&viommu->iommu);
+
+ /* Stop all virtqueues */
+ vdev->config->reset(vdev);
+ vdev->config->del_vqs(vdev);
+
+ dev_info(&vdev->dev, "device removed\n");
+}
+
+static void viommu_config_changed(struct virtio_device *vdev)
+{
+ dev_warn(&vdev->dev, "config changed\n");
+}
+
+static unsigned int features[] = {
+ VIRTIO_IOMMU_F_MAP_UNMAP,
+ VIRTIO_IOMMU_F_DOMAIN_BITS,
+ VIRTIO_IOMMU_F_INPUT_RANGE,
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct virtio_driver virtio_iommu_drv = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .probe = viommu_probe,
+ .remove = viommu_remove,
+ .config_changed = viommu_config_changed,
+};
+
+module_virtio_driver(virtio_iommu_drv);
+
+IOMMU_OF_DECLARE(viommu, "virtio,mmio");
+
+MODULE_DESCRIPTION("Virtio IOMMU driver");
+MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 6d5c3b2d4f4d..cfe47c5d9a56 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -43,5 +43,6 @@
#define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
+#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
new file mode 100644
index 000000000000..0de9b44db14d
--- /dev/null
+++ b/include/uapi/linux/virtio_iommu.h
@@ -0,0 +1,116 @@
+/*
+ * Virtio-iommu definition v0.6
+ *
+ * Copyright (C) 2018 ARM Ltd.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#ifndef _UAPI_LINUX_VIRTIO_IOMMU_H
+#define _UAPI_LINUX_VIRTIO_IOMMU_H
+
+#include <linux/types.h>
+
+/* Feature bits */
+#define VIRTIO_IOMMU_F_INPUT_RANGE 0
+#define VIRTIO_IOMMU_F_DOMAIN_BITS 1
+#define VIRTIO_IOMMU_F_MAP_UNMAP 2
+#define VIRTIO_IOMMU_F_BYPASS 3
+
+struct virtio_iommu_config {
+ /* Supported page sizes */
+ __u64 page_size_mask;
+ /* Supported IOVA range */
+ struct virtio_iommu_range {
+ __u64 start;
+ __u64 end;
+ } input_range;
+ /* Max domain ID size */
+ __u8 domain_bits;
+} __packed;
+
+/* Request types */
+#define VIRTIO_IOMMU_T_ATTACH 0x01
+#define VIRTIO_IOMMU_T_DETACH 0x02
+#define VIRTIO_IOMMU_T_MAP 0x03
+#define VIRTIO_IOMMU_T_UNMAP 0x04
+
+/* Status types */
+#define VIRTIO_IOMMU_S_OK 0x00
+#define VIRTIO_IOMMU_S_IOERR 0x01
+#define VIRTIO_IOMMU_S_UNSUPP 0x02
+#define VIRTIO_IOMMU_S_DEVERR 0x03
+#define VIRTIO_IOMMU_S_INVAL 0x04
+#define VIRTIO_IOMMU_S_RANGE 0x05
+#define VIRTIO_IOMMU_S_NOENT 0x06
+#define VIRTIO_IOMMU_S_FAULT 0x07
+
+struct virtio_iommu_req_head {
+ __u8 type;
+ __u8 reserved[3];
+} __packed;
+
+struct virtio_iommu_req_tail {
+ __u8 status;
+ __u8 reserved[3];
+} __packed;
+
+struct virtio_iommu_req_attach {
+ struct virtio_iommu_req_head head;
+
+ __le32 domain;
+ __le32 endpoint;
+ __le32 reserved;
+
+ struct virtio_iommu_req_tail tail;
+} __packed;
+
+struct virtio_iommu_req_detach {
+ struct virtio_iommu_req_head head;
+
+ __le32 endpoint;
+ __le32 reserved;
+
+ struct virtio_iommu_req_tail tail;
+} __packed;
+
+#define VIRTIO_IOMMU_MAP_F_READ (1 << 0)
+#define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1)
+#define VIRTIO_IOMMU_MAP_F_EXEC (1 << 2)
+
+#define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \
+ VIRTIO_IOMMU_MAP_F_WRITE | \
+ VIRTIO_IOMMU_MAP_F_EXEC)
+
+struct virtio_iommu_req_map {
+ struct virtio_iommu_req_head head;
+
+ __le32 domain;
+ __le64 virt_start;
+ __le64 virt_end;
+ __le64 phys_start;
+ __le32 flags;
+
+ struct virtio_iommu_req_tail tail;
+} __packed;
+
+struct virtio_iommu_req_unmap {
+ struct virtio_iommu_req_head head;
+
+ __le32 domain;
+ __le64 virt_start;
+ __le64 virt_end;
+ __le32 reserved;
+
+ struct virtio_iommu_req_tail tail;
+} __packed;
+
+union virtio_iommu_req {
+ struct virtio_iommu_req_head head;
+
+ struct virtio_iommu_req_attach attach;
+ struct virtio_iommu_req_detach detach;
+ struct virtio_iommu_req_map map;
+ struct virtio_iommu_req_unmap unmap;
+};
+
+#endif
--
2.16.1
^ permalink raw reply related
* [PATCH 2/4] iommu/virtio: Add probe request
From: Jean-Philippe Brucker @ 2018-02-14 14:53 UTC (permalink / raw)
To: iommu, kvm, virtualization, virtio-dev, kvmarm
Cc: jayachandran.nair, lorenzo.pieralisi, tnowicki, mst, marc.zyngier,
will.deacon, jintack, eric.auger, robin.murphy, joro,
eric.auger.pro
In-Reply-To: <20180214145340.1223-1-jean-philippe.brucker@arm.com>
When the device offers the probe feature, send a probe request for each
device managed by the IOMMU. Extract RESV_MEM information. When we
encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region.
This will tell other subsystems that there is no need to map the MSI
doorbell in the virtio-iommu, because MSIs bypass it.
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
---
drivers/iommu/virtio-iommu.c | 163 ++++++++++++++++++++++++++++++++++++--
include/uapi/linux/virtio_iommu.h | 37 +++++++++
2 files changed, 193 insertions(+), 7 deletions(-)
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index a9c9245e8ba2..3ac4b38eaf19 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -45,6 +45,7 @@ struct viommu_dev {
struct iommu_domain_geometry geometry;
u64 pgsize_bitmap;
u8 domain_bits;
+ u32 probe_size;
};
struct viommu_mapping {
@@ -72,6 +73,7 @@ struct viommu_domain {
struct viommu_endpoint {
struct viommu_dev *viommu;
struct viommu_domain *vdomain;
+ struct list_head resv_regions;
};
struct viommu_request {
@@ -140,6 +142,10 @@ static int viommu_get_req_size(struct viommu_dev *viommu,
case VIRTIO_IOMMU_T_UNMAP:
size = sizeof(r->unmap);
break;
+ case VIRTIO_IOMMU_T_PROBE:
+ *bottom += viommu->probe_size;
+ size = sizeof(r->probe) + *bottom;
+ break;
default:
return -EINVAL;
}
@@ -448,6 +454,105 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
return ret;
}
+static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
+ struct virtio_iommu_probe_resv_mem *mem,
+ size_t len)
+{
+ struct iommu_resv_region *region = NULL;
+ unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+ u64 addr = le64_to_cpu(mem->addr);
+ u64 size = le64_to_cpu(mem->size);
+
+ if (len < sizeof(*mem))
+ return -EINVAL;
+
+ switch (mem->subtype) {
+ case VIRTIO_IOMMU_RESV_MEM_T_MSI:
+ region = iommu_alloc_resv_region(addr, size, prot,
+ IOMMU_RESV_MSI);
+ break;
+ case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
+ default:
+ region = iommu_alloc_resv_region(addr, size, 0,
+ IOMMU_RESV_RESERVED);
+ break;
+ }
+
+ list_add(&vdev->resv_regions, ®ion->list);
+
+ /*
+ * Treat unknown subtype as RESERVED, but urge users to update their
+ * driver.
+ */
+ if (mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_RESERVED &&
+ mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_MSI)
+ pr_warn("unknown resv mem subtype 0x%x\n", mem->subtype);
+
+ return 0;
+}
+
+static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
+{
+ int ret;
+ u16 type, len;
+ size_t cur = 0;
+ struct virtio_iommu_req_probe *probe;
+ struct virtio_iommu_probe_property *prop;
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct viommu_endpoint *vdev = fwspec->iommu_priv;
+
+ if (!fwspec->num_ids)
+ /* Trouble ahead. */
+ return -EINVAL;
+
+ probe = kzalloc(sizeof(*probe) + viommu->probe_size +
+ sizeof(struct virtio_iommu_req_tail), GFP_KERNEL);
+ if (!probe)
+ return -ENOMEM;
+
+ probe->head.type = VIRTIO_IOMMU_T_PROBE;
+ /*
+ * For now, assume that properties of an endpoint that outputs multiple
+ * IDs are consistent. Only probe the first one.
+ */
+ probe->endpoint = cpu_to_le32(fwspec->ids[0]);
+
+ ret = viommu_send_req_sync(viommu, probe);
+ if (ret)
+ goto out_free;
+
+ prop = (void *)probe->properties;
+ type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
+
+ while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
+ cur < viommu->probe_size) {
+ len = le16_to_cpu(prop->length);
+
+ switch (type) {
+ case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
+ ret = viommu_add_resv_mem(vdev, (void *)prop->value, len);
+ break;
+ default:
+ dev_dbg(dev, "unknown viommu prop 0x%x\n", type);
+ }
+
+ if (ret)
+ dev_err(dev, "failed to parse viommu prop 0x%x\n", type);
+
+ cur += sizeof(*prop) + len;
+ if (cur >= viommu->probe_size)
+ break;
+
+ prop = (void *)probe->properties + cur;
+ type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
+ }
+
+out_free:
+ kfree(probe);
+ return ret;
+}
+
/* IOMMU API */
static bool viommu_capable(enum iommu_cap cap)
@@ -703,6 +808,7 @@ static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode)
static int viommu_add_device(struct device *dev)
{
+ int ret;
struct iommu_group *group;
struct viommu_endpoint *vdev;
struct viommu_dev *viommu = NULL;
@@ -720,8 +826,16 @@ static int viommu_add_device(struct device *dev)
return -ENOMEM;
vdev->viommu = viommu;
+ INIT_LIST_HEAD(&vdev->resv_regions);
fwspec->iommu_priv = vdev;
+ if (viommu->probe_size) {
+ /* Get additional information for this endpoint */
+ ret = viommu_probe_endpoint(viommu, dev);
+ if (ret)
+ return ret;
+ }
+
/*
* Last step creates a default domain and attaches to it. Everything
* must be ready.
@@ -735,7 +849,19 @@ static int viommu_add_device(struct device *dev)
static void viommu_remove_device(struct device *dev)
{
- kfree(dev->iommu_fwspec->iommu_priv);
+ struct viommu_endpoint *vdev;
+ struct iommu_resv_region *entry, *next;
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+ if (!fwspec || fwspec->ops != &viommu_ops)
+ return;
+
+ vdev = fwspec->iommu_priv;
+
+ list_for_each_entry_safe(entry, next, &vdev->resv_regions, list)
+ kfree(entry);
+
+ kfree(vdev);
}
static struct iommu_group *viommu_device_group(struct device *dev)
@@ -753,15 +879,33 @@ static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args)
static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
{
- struct iommu_resv_region *region;
+ struct iommu_resv_region *entry, *new_entry, *msi = NULL;
+ struct viommu_endpoint *vdev = dev->iommu_fwspec->iommu_priv;
int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
- region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
- IOMMU_RESV_SW_MSI);
- if (!region)
- return;
+ list_for_each_entry(entry, &vdev->resv_regions, list) {
+ /*
+ * If the device registered a bypass MSI windows, use it.
+ * Otherwise add a software-mapped region
+ */
+ if (entry->type == IOMMU_RESV_MSI)
+ msi = entry;
+
+ new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
+ if (!new_entry)
+ return;
+ list_add_tail(&new_entry->list, head);
+ }
+
+ if (!msi) {
+ msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
+ prot, IOMMU_RESV_SW_MSI);
+ if (!msi)
+ return;
+
+ list_add_tail(&msi->list, head);
+ }
- list_add_tail(®ion->list, head);
iommu_dma_get_resv_regions(dev, head);
}
@@ -852,6 +996,10 @@ static int viommu_probe(struct virtio_device *vdev)
struct virtio_iommu_config, domain_bits,
&viommu->domain_bits);
+ virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
+ struct virtio_iommu_config, probe_size,
+ &viommu->probe_size);
+
viommu->geometry = (struct iommu_domain_geometry) {
.aperture_start = input_start,
.aperture_end = input_end,
@@ -933,6 +1081,7 @@ static unsigned int features[] = {
VIRTIO_IOMMU_F_MAP_UNMAP,
VIRTIO_IOMMU_F_DOMAIN_BITS,
VIRTIO_IOMMU_F_INPUT_RANGE,
+ VIRTIO_IOMMU_F_PROBE,
};
static struct virtio_device_id id_table[] = {
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
index 0de9b44db14d..2335d9ed4676 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -15,6 +15,7 @@
#define VIRTIO_IOMMU_F_DOMAIN_BITS 1
#define VIRTIO_IOMMU_F_MAP_UNMAP 2
#define VIRTIO_IOMMU_F_BYPASS 3
+#define VIRTIO_IOMMU_F_PROBE 4
struct virtio_iommu_config {
/* Supported page sizes */
@@ -26,6 +27,9 @@ struct virtio_iommu_config {
} input_range;
/* Max domain ID size */
__u8 domain_bits;
+ __u8 padding[3];
+ /* Probe buffer size */
+ __u32 probe_size;
} __packed;
/* Request types */
@@ -33,6 +37,7 @@ struct virtio_iommu_config {
#define VIRTIO_IOMMU_T_DETACH 0x02
#define VIRTIO_IOMMU_T_MAP 0x03
#define VIRTIO_IOMMU_T_UNMAP 0x04
+#define VIRTIO_IOMMU_T_PROBE 0x05
/* Status types */
#define VIRTIO_IOMMU_S_OK 0x00
@@ -104,6 +109,37 @@ struct virtio_iommu_req_unmap {
struct virtio_iommu_req_tail tail;
} __packed;
+#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
+#define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
+
+struct virtio_iommu_probe_resv_mem {
+ __u8 subtype;
+ __u8 reserved[3];
+ __le64 addr;
+ __le64 size;
+} __packed;
+
+#define VIRTIO_IOMMU_PROBE_T_NONE 0
+#define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
+
+#define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
+
+struct virtio_iommu_probe_property {
+ __le16 type;
+ __le16 length;
+ __u8 value[];
+} __packed;
+
+struct virtio_iommu_req_probe {
+ struct virtio_iommu_req_head head;
+ __le32 endpoint;
+ __u8 reserved[64];
+
+ __u8 properties[];
+
+ /* Tail follows the variable-length properties array (no padding) */
+} __packed;
+
union virtio_iommu_req {
struct virtio_iommu_req_head head;
@@ -111,6 +147,7 @@ union virtio_iommu_req {
struct virtio_iommu_req_detach detach;
struct virtio_iommu_req_map map;
struct virtio_iommu_req_unmap unmap;
+ struct virtio_iommu_req_probe probe;
};
#endif
--
2.16.1
^ permalink raw reply related
* [PATCH 3/4] iommu/virtio: Add event queue
From: Jean-Philippe Brucker @ 2018-02-14 14:53 UTC (permalink / raw)
To: iommu, kvm, virtualization, virtio-dev, kvmarm
Cc: jayachandran.nair, lorenzo.pieralisi, tnowicki, mst, marc.zyngier,
will.deacon, jintack, eric.auger, robin.murphy, joro,
eric.auger.pro
In-Reply-To: <20180214145340.1223-1-jean-philippe.brucker@arm.com>
The event queue offers a way for the device to report access faults from
endpoints. It is implemented on virtqueue #1. Whenever the host needs to
signal a fault, it fills one of the buffers offered by the guest and
interrupts it.
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
---
drivers/iommu/virtio-iommu.c | 139 ++++++++++++++++++++++++++++++++++----
include/uapi/linux/virtio_iommu.h | 18 +++++
2 files changed, 143 insertions(+), 14 deletions(-)
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 3ac4b38eaf19..6b96f1b36d5a 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -30,6 +30,12 @@
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
+enum viommu_vq_idx {
+ VIOMMU_REQUEST_VQ = 0,
+ VIOMMU_EVENT_VQ = 1,
+ VIOMMU_NUM_VQS = 2,
+};
+
struct viommu_dev {
struct iommu_device iommu;
struct device *dev;
@@ -37,9 +43,10 @@ struct viommu_dev {
struct ida domain_ids;
- struct virtqueue *vq;
+ struct virtqueue *vqs[VIOMMU_NUM_VQS];
/* Serialize anything touching the request queue */
spinlock_t request_lock;
+ void *evts;
/* Device configuration */
struct iommu_domain_geometry geometry;
@@ -84,6 +91,15 @@ struct viommu_request {
struct list_head list;
};
+#define VIOMMU_FAULT_RESV_MASK 0xffffff00
+
+struct viommu_event {
+ union {
+ u32 head;
+ struct virtio_iommu_fault fault;
+ };
+};
+
#define to_viommu_domain(domain) \
container_of(domain, struct viommu_domain, domain)
@@ -161,12 +177,13 @@ static int viommu_receive_resp(struct viommu_dev *viommu, int nr_sent,
unsigned int len;
int nr_received = 0;
struct viommu_request *req, *pending;
+ struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
pending = list_first_entry_or_null(sent, struct viommu_request, list);
if (WARN_ON(!pending))
return 0;
- while ((req = virtqueue_get_buf(viommu->vq, &len)) != NULL) {
+ while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
if (req != pending) {
dev_warn(viommu->dev, "discarding stale request\n");
continue;
@@ -201,6 +218,7 @@ static int _viommu_send_reqs_sync(struct viommu_dev *viommu,
* up the CPU in case of a device bug.
*/
unsigned long timeout_ms = 1000;
+ struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ];
*nr_sent = 0;
@@ -210,15 +228,14 @@ static int _viommu_send_reqs_sync(struct viommu_dev *viommu,
sg[0] = &req->top;
sg[1] = &req->bottom;
- ret = virtqueue_add_sgs(viommu->vq, sg, 1, 1, req,
- GFP_ATOMIC);
+ ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC);
if (ret)
break;
list_add_tail(&req->list, &pending);
}
- if (i && !virtqueue_kick(viommu->vq))
+ if (i && !virtqueue_kick(vq))
return -EPIPE;
timeout = ktime_add_ms(ktime_get(), timeout_ms * i);
@@ -553,6 +570,70 @@ static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
return ret;
}
+static int viommu_fault_handler(struct viommu_dev *viommu,
+ struct virtio_iommu_fault *fault)
+{
+ char *reason_str;
+
+ u8 reason = fault->reason;
+ u32 flags = le32_to_cpu(fault->flags);
+ u32 endpoint = le32_to_cpu(fault->endpoint);
+ u64 address = le64_to_cpu(fault->address);
+
+ switch (reason) {
+ case VIRTIO_IOMMU_FAULT_R_DOMAIN:
+ reason_str = "domain";
+ break;
+ case VIRTIO_IOMMU_FAULT_R_MAPPING:
+ reason_str = "page";
+ break;
+ case VIRTIO_IOMMU_FAULT_R_UNKNOWN:
+ default:
+ reason_str = "unknown";
+ break;
+ }
+
+ /* TODO: find EP by ID and report_iommu_fault */
+ if (flags & VIRTIO_IOMMU_FAULT_F_ADDRESS)
+ dev_err_ratelimited(viommu->dev, "%s fault from EP %u at %#llx [%s%s%s]\n",
+ reason_str, endpoint, address,
+ flags & VIRTIO_IOMMU_FAULT_F_READ ? "R" : "",
+ flags & VIRTIO_IOMMU_FAULT_F_WRITE ? "W" : "",
+ flags & VIRTIO_IOMMU_FAULT_F_EXEC ? "X" : "");
+ else
+ dev_err_ratelimited(viommu->dev, "%s fault from EP %u\n",
+ reason_str, endpoint);
+
+ return 0;
+}
+
+static void viommu_event_handler(struct virtqueue *vq)
+{
+ int ret;
+ unsigned int len;
+ struct scatterlist sg[1];
+ struct viommu_event *evt;
+ struct viommu_dev *viommu = vq->vdev->priv;
+
+ while ((evt = virtqueue_get_buf(vq, &len)) != NULL) {
+ if (len > sizeof(*evt)) {
+ dev_err(viommu->dev,
+ "invalid event buffer (len %u != %zu)\n",
+ len, sizeof(*evt));
+ } else if (!(evt->head & VIOMMU_FAULT_RESV_MASK)) {
+ viommu_fault_handler(viommu, &evt->fault);
+ }
+
+ sg_init_one(sg, evt, sizeof(*evt));
+ ret = virtqueue_add_inbuf(vq, sg, 1, evt, GFP_ATOMIC);
+ if (ret)
+ dev_err(viommu->dev, "could not add event buffer\n");
+ }
+
+ if (!virtqueue_kick(vq))
+ dev_err(viommu->dev, "kick failed\n");
+}
+
/* IOMMU API */
static bool viommu_capable(enum iommu_cap cap)
@@ -934,19 +1015,44 @@ static struct iommu_ops viommu_ops = {
.put_resv_regions = viommu_put_resv_regions,
};
-static int viommu_init_vq(struct viommu_dev *viommu)
+static int viommu_init_vqs(struct viommu_dev *viommu)
{
struct virtio_device *vdev = dev_to_virtio(viommu->dev);
- const char *name = "request";
- void *ret;
+ const char *names[] = { "request", "event" };
+ vq_callback_t *callbacks[] = {
+ NULL, /* No async requests */
+ viommu_event_handler,
+ };
+
+ return virtio_find_vqs(vdev, VIOMMU_NUM_VQS, viommu->vqs, callbacks,
+ names, NULL);
+}
- ret = virtio_find_single_vq(vdev, NULL, name);
- if (IS_ERR(ret)) {
- dev_err(viommu->dev, "cannot find VQ\n");
- return PTR_ERR(ret);
+static int viommu_fill_evtq(struct viommu_dev *viommu)
+{
+ int i, ret;
+ struct scatterlist sg[1];
+ struct viommu_event *evts;
+ struct virtqueue *vq = viommu->vqs[VIOMMU_EVENT_VQ];
+ size_t nr_evts = min_t(size_t, PAGE_SIZE / sizeof(struct viommu_event),
+ viommu->vqs[VIOMMU_EVENT_VQ]->num_free);
+
+ viommu->evts = evts = devm_kmalloc_array(viommu->dev, nr_evts,
+ sizeof(*evts), GFP_KERNEL);
+ if (!evts)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_evts; i++) {
+ sg_init_one(sg, &evts[i], sizeof(*evts));
+ ret = virtqueue_add_inbuf(vq, sg, 1, &evts[i], GFP_KERNEL);
+ if (ret)
+ return ret;
}
- viommu->vq = ret;
+ if (!virtqueue_kick(vq))
+ return -EPIPE;
+
+ dev_info(viommu->dev, "%zu event buffers\n", nr_evts);
return 0;
}
@@ -969,7 +1075,7 @@ static int viommu_probe(struct virtio_device *vdev)
viommu->dev = dev;
viommu->vdev = vdev;
- ret = viommu_init_vq(viommu);
+ ret = viommu_init_vqs(viommu);
if (ret)
return ret;
@@ -1010,6 +1116,11 @@ static int viommu_probe(struct virtio_device *vdev)
virtio_device_ready(vdev);
+ /* Populate the event queue with buffers */
+ ret = viommu_fill_evtq(viommu);
+ if (ret)
+ goto err_free_vqs;
+
ret = iommu_device_sysfs_add(&viommu->iommu, dev, NULL, "%s",
virtio_bus_name(vdev));
if (ret)
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
index 2335d9ed4676..d6c0224efe61 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -150,4 +150,22 @@ union virtio_iommu_req {
struct virtio_iommu_req_probe probe;
};
+/* Fault types */
+#define VIRTIO_IOMMU_FAULT_R_UNKNOWN 0
+#define VIRTIO_IOMMU_FAULT_R_DOMAIN 1
+#define VIRTIO_IOMMU_FAULT_R_MAPPING 2
+
+#define VIRTIO_IOMMU_FAULT_F_READ (1 << 0)
+#define VIRTIO_IOMMU_FAULT_F_WRITE (1 << 1)
+#define VIRTIO_IOMMU_FAULT_F_EXEC (1 << 2)
+#define VIRTIO_IOMMU_FAULT_F_ADDRESS (1 << 8)
+
+struct virtio_iommu_fault {
+ __u8 reason;
+ __u8 padding[3];
+ __le32 flags;
+ __le32 endpoint;
+ __le64 address;
+} __packed;
+
#endif
--
2.16.1
^ permalink raw reply related
* [PATCH 4/4] vfio: Allow type-1 IOMMU instantiation with a virtio-iommu
From: Jean-Philippe Brucker @ 2018-02-14 14:53 UTC (permalink / raw)
To: iommu, kvm, virtualization, virtio-dev, kvmarm
Cc: jayachandran.nair, lorenzo.pieralisi, tnowicki, mst, marc.zyngier,
will.deacon, jintack, eric.auger, robin.murphy, joro,
eric.auger.pro
In-Reply-To: <20180214145340.1223-1-jean-philippe.brucker@arm.com>
When enabling both VFIO and VIRTIO_IOMMU modules, automatically select
VFIO_IOMMU_TYPE1 as well.
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
---
drivers/vfio/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index c84333eb5eb5..65a1e691110c 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -21,7 +21,7 @@ config VFIO_VIRQFD
menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
- select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
+ select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3 || VIRTIO_IOMMU)
select ANON_INODES
help
VFIO provides a framework for secure userspace device drivers.
--
2.16.1
^ permalink raw reply related
* Re: [PATCH 4/4] vfio: Allow type-1 IOMMU instantiation with a virtio-iommu
From: Alex Williamson @ 2018-02-14 15:26 UTC (permalink / raw)
To: Jean-Philippe Brucker
Cc: virtio-dev, jayachandran.nair, lorenzo.pieralisi, tnowicki, kvm,
mst, joro, will.deacon, virtualization, marc.zyngier, iommu,
jintack, eric.auger, robin.murphy, kvmarm, eric.auger.pro
In-Reply-To: <20180214145340.1223-5-jean-philippe.brucker@arm.com>
On Wed, 14 Feb 2018 14:53:40 +0000
Jean-Philippe Brucker <jean-philippe.brucker@arm.com> wrote:
> When enabling both VFIO and VIRTIO_IOMMU modules, automatically select
> VFIO_IOMMU_TYPE1 as well.
>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> ---
> drivers/vfio/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
> index c84333eb5eb5..65a1e691110c 100644
> --- a/drivers/vfio/Kconfig
> +++ b/drivers/vfio/Kconfig
> @@ -21,7 +21,7 @@ config VFIO_VIRQFD
> menuconfig VFIO
> tristate "VFIO Non-Privileged userspace driver framework"
> depends on IOMMU_API
> - select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
> + select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3 || VIRTIO_IOMMU)
> select ANON_INODES
> help
> VFIO provides a framework for secure userspace device drivers.
Why are we basing this on specific IOMMU drivers in the first place?
Only ARM is doing that. Shouldn't IOMMU_API only be enabled for ARM
targets that support it and therefore we can forget about the specific
IOMMU drivers? Thanks,
Alex
^ permalink raw reply
* Re: [PATCH 4/4] vfio: Allow type-1 IOMMU instantiation with a virtio-iommu
From: Robin Murphy @ 2018-02-14 15:35 UTC (permalink / raw)
To: Alex Williamson, Jean-Philippe Brucker
Cc: virtio-dev, jayachandran.nair, lorenzo.pieralisi, tnowicki, kvm,
mst, joro, will.deacon, virtualization, marc.zyngier, iommu,
jintack, eric.auger, kvmarm, eric.auger.pro
In-Reply-To: <20180214082639.54556efb@w520.home>
On 14/02/18 15:26, Alex Williamson wrote:
> On Wed, 14 Feb 2018 14:53:40 +0000
> Jean-Philippe Brucker <jean-philippe.brucker@arm.com> wrote:
>
>> When enabling both VFIO and VIRTIO_IOMMU modules, automatically select
>> VFIO_IOMMU_TYPE1 as well.
>>
>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>> ---
>> drivers/vfio/Kconfig | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
>> index c84333eb5eb5..65a1e691110c 100644
>> --- a/drivers/vfio/Kconfig
>> +++ b/drivers/vfio/Kconfig
>> @@ -21,7 +21,7 @@ config VFIO_VIRQFD
>> menuconfig VFIO
>> tristate "VFIO Non-Privileged userspace driver framework"
>> depends on IOMMU_API
>> - select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
>> + select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3 || VIRTIO_IOMMU)
>> select ANON_INODES
>> help
>> VFIO provides a framework for secure userspace device drivers.
>
> Why are we basing this on specific IOMMU drivers in the first place?
> Only ARM is doing that. Shouldn't IOMMU_API only be enabled for ARM
> targets that support it and therefore we can forget about the specific
> IOMMU drivers? Thanks,
Makes sense - the majority of ARM systems (and mobile/embedded ARM64
ones) making use of IOMMU_API won't actually support VFIO, but it can't
hurt to allow them to select the type 1 driver regardless. Especially as
multiplatform configs are liable to be pulling in the SMMU driver(s) anyway.
Robin.
^ permalink raw reply
* Re: [PATCH v2 4/6] crypto: virtio: convert to new crypto engine API
From: Michael S. Tsirkin @ 2018-02-14 15:51 UTC (permalink / raw)
To: Corentin Labbe
Cc: herbert, corbet, linux-doc, linux-kernel, fabien.dessenne,
virtualization, linux-sunxi, linux-crypto, mcoquelin.stm32, davem,
linux-arm-kernel, alexandre.torgue
In-Reply-To: <20180126191534.17569-5-clabbe.montjoie@gmail.com>
On Fri, Jan 26, 2018 at 08:15:32PM +0100, Corentin Labbe wrote:
> This patch convert the driver to the new crypto engine API.
>
> Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Pls queue when/if rest of changes go in.
> ---
> drivers/crypto/virtio/virtio_crypto_algs.c | 16 ++++++++++------
> drivers/crypto/virtio/virtio_crypto_common.h | 3 +--
> drivers/crypto/virtio/virtio_crypto_core.c | 3 ---
> 3 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c
> index abe8c15450df..ba190cfa7aa1 100644
> --- a/drivers/crypto/virtio/virtio_crypto_algs.c
> +++ b/drivers/crypto/virtio/virtio_crypto_algs.c
> @@ -29,6 +29,7 @@
>
>
> struct virtio_crypto_ablkcipher_ctx {
> + struct crypto_engine_ctx enginectx;
> struct virtio_crypto *vcrypto;
> struct crypto_tfm *tfm;
>
> @@ -491,7 +492,7 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
> vc_sym_req->ablkcipher_req = req;
> vc_sym_req->encrypt = true;
>
> - return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
> + return crypto_transfer_ablkcipher_request_to_engine(data_vq->engine, req);
> }
>
> static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
> @@ -511,7 +512,7 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
> vc_sym_req->ablkcipher_req = req;
> vc_sym_req->encrypt = false;
>
> - return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
> + return crypto_transfer_ablkcipher_request_to_engine(data_vq->engine, req);
> }
>
> static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm)
> @@ -521,6 +522,9 @@ static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm)
> tfm->crt_ablkcipher.reqsize = sizeof(struct virtio_crypto_sym_request);
> ctx->tfm = tfm;
>
> + ctx->enginectx.op.do_one_request = virtio_crypto_ablkcipher_crypt_req;
> + ctx->enginectx.op.prepare_request = NULL;
> + ctx->enginectx.op.unprepare_request = NULL;
> return 0;
> }
>
> @@ -538,9 +542,9 @@ static void virtio_crypto_ablkcipher_exit(struct crypto_tfm *tfm)
> }
>
> int virtio_crypto_ablkcipher_crypt_req(
> - struct crypto_engine *engine,
> - struct ablkcipher_request *req)
> + struct crypto_engine *engine, void *vreq)
> {
> + struct ablkcipher_request *req = container_of(vreq, struct ablkcipher_request, base);
> struct virtio_crypto_sym_request *vc_sym_req =
> ablkcipher_request_ctx(req);
> struct virtio_crypto_request *vc_req = &vc_sym_req->base;
> @@ -561,8 +565,8 @@ static void virtio_crypto_ablkcipher_finalize_req(
> struct ablkcipher_request *req,
> int err)
> {
> - crypto_finalize_cipher_request(vc_sym_req->base.dataq->engine,
> - req, err);
> + crypto_finalize_ablkcipher_request(vc_sym_req->base.dataq->engine,
> + req, err);
> kzfree(vc_sym_req->iv);
> virtcrypto_clear_request(&vc_sym_req->base);
> }
> diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h
> index e976539a05d9..72621bd67211 100644
> --- a/drivers/crypto/virtio/virtio_crypto_common.h
> +++ b/drivers/crypto/virtio/virtio_crypto_common.h
> @@ -107,8 +107,7 @@ struct virtio_crypto *virtcrypto_get_dev_node(int node);
> int virtcrypto_dev_start(struct virtio_crypto *vcrypto);
> void virtcrypto_dev_stop(struct virtio_crypto *vcrypto);
> int virtio_crypto_ablkcipher_crypt_req(
> - struct crypto_engine *engine,
> - struct ablkcipher_request *req);
> + struct crypto_engine *engine, void *vreq);
>
> void
> virtcrypto_clear_request(struct virtio_crypto_request *vc_req);
> diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
> index ff1410a32c2b..83326986c113 100644
> --- a/drivers/crypto/virtio/virtio_crypto_core.c
> +++ b/drivers/crypto/virtio/virtio_crypto_core.c
> @@ -111,9 +111,6 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
> ret = -ENOMEM;
> goto err_engine;
> }
> -
> - vi->data_vq[i].engine->cipher_one_request =
> - virtio_crypto_ablkcipher_crypt_req;
> }
>
> kfree(names);
> --
> 2.13.6
^ permalink raw reply
* [vhost:vhost 22/28] drivers/firmware/qemu_fw_cfg.c:35:10: fatal error: linux/fw_cfg.h: No such file or directory
From: kbuild test robot @ 2018-02-14 18:21 UTC (permalink / raw)
To: Marc-André Lureau
Cc: netdev, Michael S. Tsirkin, kbuild-all, kvm, virtualization
[-- Attachment #1: Type: text/plain, Size: 929 bytes --]
tree: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost
head: 20b12f92d80433b9bd0d54b9712897501ac66fdd
commit: f59055103f6930c771fc597c42a92cbe997a765d [22/28] fw_cfg: add a public uapi header
config: i386-randconfig-i0-201806 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
git checkout f59055103f6930c771fc597c42a92cbe997a765d
# save the attached .config to linux build tree
make ARCH=i386
All errors (new ones prefixed by >>):
>> drivers/firmware/qemu_fw_cfg.c:35:10: fatal error: linux/fw_cfg.h: No such file or directory
#include <linux/fw_cfg.h>
^~~~~~~~~~~~~~~~
compilation terminated.
vim +35 drivers/firmware/qemu_fw_cfg.c
> 35 #include <linux/fw_cfg.h>
36
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 31348 bytes --]
[-- Attachment #3: Type: text/plain, Size: 183 bytes --]
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* [vhost:vhost 24/24] drivers/firmware/qemu_fw_cfg.c:499:22: error: storage size of 'files' isn't known
From: kbuild test robot @ 2018-02-14 20:27 UTC (permalink / raw)
To: Marc-André Lureau
Cc: netdev, Michael S. Tsirkin, kbuild-all, kvm, virtualization
[-- Attachment #1: Type: text/plain, Size: 1905 bytes --]
tree: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost
head: 5d457fe6aeaab9d0a1665eafc8af7139bc6b6f2e
commit: 5d457fe6aeaab9d0a1665eafc8af7139bc6b6f2e [24/24] fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read
config: i386-randconfig-x015-201806 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
git checkout 5d457fe6aeaab9d0a1665eafc8af7139bc6b6f2e
# save the attached .config to linux build tree
make ARCH=i386
All errors (new ones prefixed by >>):
drivers/firmware/qemu_fw_cfg.c: In function 'fw_cfg_register_dir_entries':
>> drivers/firmware/qemu_fw_cfg.c:499:22: error: storage size of 'files' isn't known
struct fw_cfg_files files;
^~~~~
drivers/firmware/qemu_fw_cfg.c:499:22: warning: unused variable 'files' [-Wunused-variable]
vim +499 drivers/firmware/qemu_fw_cfg.c
493
494 /* iterate over all fw_cfg directory entries, registering each one */
495 static int fw_cfg_register_dir_entries(void)
496 {
497 int ret = 0;
498 u32 count, i;
> 499 struct fw_cfg_files files;
500 struct fw_cfg_file *dir;
501 size_t dir_size;
502
503 fw_cfg_read_blob(FW_CFG_FILE_DIR, &files.count, 0, sizeof(files.count));
504 count = be32_to_cpu(files.count);
505 dir_size = count * sizeof(struct fw_cfg_file);
506
507 dir = kmalloc(dir_size, GFP_KERNEL);
508 if (!dir)
509 return -ENOMEM;
510
511 fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files.count), dir_size);
512
513 for (i = 0; i < count; i++) {
514 ret = fw_cfg_register_file(&dir[i]);
515 if (ret)
516 break;
517 }
518
519 kfree(dir);
520 return ret;
521 }
522
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 32274 bytes --]
[-- Attachment #3: Type: text/plain, Size: 183 bytes --]
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH 4/4] vfio: Allow type-1 IOMMU instantiation with a virtio-iommu
From: Jean-Philippe Brucker @ 2018-02-15 13:53 UTC (permalink / raw)
To: Robin Murphy, Alex Williamson
Cc: virtio-dev@lists.oasis-open.org, jayachandran.nair@cavium.com,
Lorenzo Pieralisi, tnowicki@caviumnetworks.com,
kvm@vger.kernel.org, mst@redhat.com, joro@8bytes.org, Will Deacon,
virtualization@lists.linux-foundation.org, Marc Zyngier,
iommu@lists.linux-foundation.org, jintack@cs.columbia.edu,
eric.auger@redhat.com, kvmarm@lists.cs.columbia.edu,
eric.auger.pro@gmail.com
In-Reply-To: <9f98aa85-3160-e285-cacd-2f429c58a775@arm.com>
On 14/02/18 15:35, Robin Murphy wrote:
> On 14/02/18 15:26, Alex Williamson wrote:
>> On Wed, 14 Feb 2018 14:53:40 +0000
>> Jean-Philippe Brucker <jean-philippe.brucker@arm.com> wrote:
>>
>>> When enabling both VFIO and VIRTIO_IOMMU modules, automatically select
>>> VFIO_IOMMU_TYPE1 as well.
>>>
>>> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
>>> ---
>>> drivers/vfio/Kconfig | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
>>> index c84333eb5eb5..65a1e691110c 100644
>>> --- a/drivers/vfio/Kconfig
>>> +++ b/drivers/vfio/Kconfig
>>> @@ -21,7 +21,7 @@ config VFIO_VIRQFD
>>> menuconfig VFIO
>>> tristate "VFIO Non-Privileged userspace driver framework"
>>> depends on IOMMU_API
>>> - select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
>>> + select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3 || VIRTIO_IOMMU)
>>> select ANON_INODES
>>> help
>>> VFIO provides a framework for secure userspace device drivers.
>>
>> Why are we basing this on specific IOMMU drivers in the first place?
>> Only ARM is doing that. Shouldn't IOMMU_API only be enabled for ARM
>> targets that support it and therefore we can forget about the specific
>> IOMMU drivers? Thanks,
>
> Makes sense - the majority of ARM systems (and mobile/embedded ARM64
> ones) making use of IOMMU_API won't actually support VFIO, but it can't
> hurt to allow them to select the type 1 driver regardless. Especially as
> multiplatform configs are liable to be pulling in the SMMU driver(s) anyway.
Cool, then I'll change that line to:
+ select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64)
Thanks,
Jean
^ permalink raw reply
* Re: [PATCH v3 1/2] drm/virtio: Add window server support
From: Tomeu Vizoso @ 2018-02-15 15:28 UTC (permalink / raw)
To: Gerd Hoffmann
Cc: Michael S. Tsirkin, David Airlie, linux-kernel, dri-devel,
virtualization, Zach Reizner, kernel
In-Reply-To: <20180212114540.iygbha554busy4ip@sirius.home.kraxel.org>
On 02/12/2018 12:45 PM, Gerd Hoffmann wrote:>>>> 4. QEMU pops
data+buffers from the virtqueue, looks up shmem FD for each
>>>> resource, sends data + FDs to the compositor with SCM_RIGHTS
>>>
>>> BTW: Is there a 1:1 relationship between buffers and shmem blocks? Or
>>> does the wayland protocol allow for offsets in buffer meta data, so you
>>> can place multiple buffers in a single shmem block?
>>
>> The latter:
>>
https://wayland.freedesktop.org/docs/html/apa.html#protocol-spec-wl_shm_pool
>
> Ah, good, that makes it alot easier.
>
> So, yes, using ivshmem would be one option. Tricky part here is the
> buffer management though. It's just a raw piece of memory. The guest
> proxy could mmap the pci bar and manage it. But then it is again either
> unmodified guest + copying the data, or modified client (which requests
> buffers from guest proxy) for zero-copy.
What if at VIRTIO_GPU_CMD_RESOURCE_CREATE_2D time we created a ivshmem
device to back that resource. The ivshmem device would in turn be backed
by a hostmem device that wraps a shmem FD.
The guest client can then export that resource/BO and pass the FD to the
guest proxy. The guest proxy would import it and put the resource_id in
the equivalent message in our protocol extension.
QEMU would get that resource id from vsock, look up which hostmem device
is associated with that resource, and pass its FD to the compositor.
> We also need a solution for the keymap shmem block. I guess the keymap
> doesn't change all that often, so maybe it is easiest to just copy it
> over (host proxy -> guest proxy) instead of trying to map the host shmem
> into the guest?
Not sure if that would be much simpler than creating a ivshmem+hostmem
combo that wraps the incoming shmem FD and then having virtio-gpu create
a BO that imports it.
Regards,
Tomeu
^ permalink raw reply
* Re: [PATCH v2 0/6] crypto: engine - Permit to enqueue all async requests
From: Herbert Xu @ 2018-02-15 15:51 UTC (permalink / raw)
To: Corentin Labbe
Cc: alexandre.torgue, corbet, mst, linux-doc, linux-kernel,
fabien.dessenne, virtualization, linux-sunxi, linux-crypto,
mcoquelin.stm32, davem, linux-arm-kernel
In-Reply-To: <20180126191534.17569-1-clabbe.montjoie@gmail.com>
On Fri, Jan 26, 2018 at 08:15:28PM +0100, Corentin Labbe wrote:
> Hello
>
> The current crypto_engine support only ahash and ablkcipher request.
> My first patch which try to add skcipher was Nacked, it will add too many functions
> and adding other algs(aead, asymetric_key) will make the situation worst.
>
> This patchset remove all algs specific stuff and now only process generic crypto_async_request.
>
> The requests handler function pointer are now moved out of struct engine and
> are now stored directly in a crypto_engine_reqctx.
>
> The original proposal of Herbert [1] cannot be done completly since the crypto_engine
> could only dequeue crypto_async_request and it is impossible to access any request_ctx
> without knowing the underlying request type.
>
> So I do something near that was requested: adding crypto_engine_reqctx in TFM context.
> Note that the current implementation expect that crypto_engine_reqctx
> is the first member of the context.
>
> The first patch is a try to document the crypto engine API.
> The second patch convert the crypto engine with the new way,
> while the following patchs convert the 4 existing users of crypto_engine.
> Note that this split break bisection, so probably the final commit will be all merged.
>
> Appart from virtio, all 4 latest patch were compile tested only.
> But the crypto engine is tested with my new sun8i-ce driver.
>
> Regards
>
> [1] https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1474434.html
>
> Changes since V1:
> - renamed crypto_engine_reqctx to crypto_engine_ctx
> - indentation fix in function parameter
> - do not export crypto_transfer_request
> - Add aead support
> - crypto_finalize_request is now static
>
> Changes since RFC:
> - Added a documentation patch
> - Added patch for stm32-cryp
> - Changed parameter of all crypto_engine_op functions from
> crypto_async_request to void*
> - Reintroduced crypto_transfer_xxx_request_to_engine functions
>
> Corentin Labbe (6):
> Documentation: crypto: document crypto engine API
> crypto: engine - Permit to enqueue all async requests
> crypto: omap: convert to new crypto engine API
> crypto: virtio: convert to new crypto engine API
> crypto: stm32-hash: convert to the new crypto engine API
> crypto: stm32-cryp: convert to the new crypto engine API
All applied. Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [vhost:vhost 24/24] drivers/firmware/qemu_fw_cfg.c:499:22: error: storage size of 'files' isn't known
From: Michael S. Tsirkin @ 2018-02-15 18:26 UTC (permalink / raw)
To: Marc-Andre Lureau
Cc: kvm, netdev, virtualization, kbuild-all, Marc-André Lureau,
kbuild test robot
In-Reply-To: <CAMxuvawhzjNA8gQSCpVtff-A6-Umg6fPz37nPhqRrDiRwKqUCQ@mail.gmail.com>
On Thu, Feb 15, 2018 at 10:46:50AM +0100, Marc-Andre Lureau wrote:
> Hi
>
> On Wed, Feb 14, 2018 at 9:27 PM, kbuild test robot
> <fengguang.wu@intel.com> wrote:
> > tree: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost
> > head: 5d457fe6aeaab9d0a1665eafc8af7139bc6b6f2e
> > commit: 5d457fe6aeaab9d0a1665eafc8af7139bc6b6f2e [24/24] fw_cfg: fix sparse warnings around FW_CFG_FILE_DIR read
> > config: i386-randconfig-x015-201806 (attached as .config)
> > compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
> > reproduce:
> > git checkout 5d457fe6aeaab9d0a1665eafc8af7139bc6b6f2e
> > # save the attached .config to linux build tree
> > make ARCH=i386
> >
> > All errors (new ones prefixed by >>):
> >
> > drivers/firmware/qemu_fw_cfg.c: In function 'fw_cfg_register_dir_entries':
> >>> drivers/firmware/qemu_fw_cfg.c:499:22: error: storage size of 'files' isn't known
> > struct fw_cfg_files files;
> > ^~~~~
>
> struct fw_cfg_files {
> __be32 count; /* number of entries */
> struct fw_cfg_file f[];
> };
>
> Interesting, I don't have that warning with 7.3.1.
>
> I thought the size would be sizeof(count) by standard.
>
> I replaced it with a __be32 files_count variable instead.
>
> > drivers/firmware/qemu_fw_cfg.c:499:22: warning: unused variable 'files' [-Wunused-variable]
> >
>
> files.count is used 3 lines below, that looks like a compiler bug to me.
No - i tried dropping one patch out of series, this did not work out.
So whole series is out for now.
> > vim +499 drivers/firmware/qemu_fw_cfg.c
> >
> > 493
> > 494 /* iterate over all fw_cfg directory entries, registering each one */
> > 495 static int fw_cfg_register_dir_entries(void)
> > 496 {
> > 497 int ret = 0;
> > 498 u32 count, i;
> > > 499 struct fw_cfg_files files;
> > 500 struct fw_cfg_file *dir;
> > 501 size_t dir_size;
> > 502
> > 503 fw_cfg_read_blob(FW_CFG_FILE_DIR, &files.count, 0, sizeof(files.count));
> > 504 count = be32_to_cpu(files.count);
> > 505 dir_size = count * sizeof(struct fw_cfg_file);
> > 506
> > 507 dir = kmalloc(dir_size, GFP_KERNEL);
> > 508 if (!dir)
> > 509 return -ENOMEM;
> > 510
> > 511 fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files.count), dir_size);
> > 512
> > 513 for (i = 0; i < count; i++) {
> > 514 ret = fw_cfg_register_file(&dir[i]);
> > 515 if (ret)
> > 516 break;
> > 517 }
> > 518
> > 519 kfree(dir);
> > 520 return ret;
> > 521 }
> > 522
> >
> > ---
> > 0-DAY kernel test infrastructure Open Source Technology Center
> > https://lists.01.org/pipermail/kbuild-all Intel Corporation
^ permalink raw reply
* [PULL] virtio: cleanups and fixes
From: Michael S. Tsirkin @ 2018-02-15 18:27 UTC (permalink / raw)
To: Linus Torvalds
Cc: kvm, mst, netdev, cohuck, linux-kernel, stable, virtualization,
groug
The following changes since commit d25cc43c6775bff6b8e3dad97c747954b805e421:
vhost: don't hold onto file pointer for VHOST_SET_LOG_FD (2018-02-01 16:26:47 +0200)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus
for you to fetch changes up to fa08a3b4eba59429cf7e241a7af089103e79160f:
virtio/s390: implement PM operations for virtio_ccw (2018-02-14 14:34:09 +0200)
----------------------------------------------------------------
virtio: bugfixes
This includes a bugfix for virtio 9p fs.
It also fixes hybernation for s390 guests with virtio devices.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
----------------------------------------------------------------
Christian Borntraeger (1):
virtio/s390: implement PM operations for virtio_ccw
Greg Kurz (1):
9p/trans_virtio: discard zero-length reply
drivers/s390/virtio/virtio_ccw.c | 29 +++++++++++++++++++++++++++++
net/9p/trans_virtio.c | 3 ++-
2 files changed, 31 insertions(+), 1 deletion(-)
^ permalink raw reply
* Re: [PATCH v3 1/2] drm/virtio: Add window server support
From: Gerd Hoffmann @ 2018-02-16 10:48 UTC (permalink / raw)
To: Tomeu Vizoso
Cc: Michael S. Tsirkin, David Airlie, linux-kernel, dri-devel,
virtualization, Zach Reizner, kernel
In-Reply-To: <37179029-8ccb-8eb2-0901-04b64cef3608@collabora.com>
> > Yes.
>
> Would it make sense for virtio-gpu to map buffers to the guest via PCI BARs?
> So we can use a single drm driver for both 2d and 3d.
Should be doable.
I'm wondering two things though:
(1) Will shmem actually help avoiding a copy?
virtio-gpu with virgl will (even if the guest doesn't use opengl) store
the resources in gpu memory. So the VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D
copy goes from guest memory directly to gpu memory, and if we export
that as dma-buf and pass it to the wayland server it should be able to
render it without doing another copy.
How does the wl_shm_pool workflow look like inside the wayland server?
Can it ask the gpu to render directly from the pool? Or is a copy to
gpu memory needed here? If the latter we would effectively trade one
copy for another ...
(2) Could we handle the mapping without needing shmem?
Possibly we could extend the vgem driver. So we pass in a iov (which
qemu gets from guest via VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), get
back a drm object. Which effectively creates drm objects on the host
which match the drm object in the guest (both backed by the same set of
physical pages).
cheers,
Gerd
^ permalink raw reply
* [PATCH 1/4] qxl: remove qxl_io_log()
From: Gerd Hoffmann @ 2018-02-16 13:28 UTC (permalink / raw)
To: dri-devel
Cc: David Airlie, Dave Airlie, open list,
open list:DRM DRIVER FOR QXL VIRTUAL GPU
In-Reply-To: <20180216132832.5374-1-kraxel@redhat.com>
qxl_io_log() sends messages over to the host (qemu) for logging.
Remove the function and all callers, we can just use standard
DRM_DEBUG calls (and if needed a serial console).
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
drivers/gpu/drm/qxl/qxl_drv.h | 3 ---
drivers/gpu/drm/qxl/qxl_cmd.c | 34 ++--------------------------------
drivers/gpu/drm/qxl/qxl_display.c | 27 ++++-----------------------
drivers/gpu/drm/qxl/qxl_fb.c | 2 --
drivers/gpu/drm/qxl/qxl_irq.c | 3 +--
5 files changed, 7 insertions(+), 62 deletions(-)
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 00a1a66b05..4b89840173 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -298,9 +298,6 @@ struct qxl_device {
int monitors_config_height;
};
-/* forward declaration for QXL_INFO_IO */
-__printf(2,3) void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...);
-
extern const struct drm_ioctl_desc qxl_ioctls[];
extern int qxl_max_ioctl;
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index c0fb52c6d4..850f8d7d37 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -341,12 +341,9 @@ int qxl_io_update_area(struct qxl_device *qdev, struct qxl_bo *surf,
surface_height = surf->surf.height;
if (area->left < 0 || area->top < 0 ||
- area->right > surface_width || area->bottom > surface_height) {
- qxl_io_log(qdev, "%s: not doing area update for "
- "%d, (%d,%d,%d,%d) (%d,%d)\n", __func__, surface_id, area->left,
- area->top, area->right, area->bottom, surface_width, surface_height);
+ area->right > surface_width || area->bottom > surface_height)
return -EINVAL;
- }
+
mutex_lock(&qdev->update_area_mutex);
qdev->ram_header->update_area = *area;
qdev->ram_header->update_surface = surface_id;
@@ -407,20 +404,6 @@ void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id)
wait_for_io_cmd(qdev, id, QXL_IO_MEMSLOT_ADD_ASYNC);
}
-void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...)
-{
- va_list args;
-
- va_start(args, fmt);
- vsnprintf(qdev->ram_header->log_buf, QXL_LOG_BUF_SIZE, fmt, args);
- va_end(args);
- /*
- * DO not do a DRM output here - this will call printk, which will
- * call back into qxl for rendering (qxl_fb)
- */
- outb(0, qdev->io_base + QXL_IO_LOG);
-}
-
void qxl_io_reset(struct qxl_device *qdev)
{
outb(0, qdev->io_base + QXL_IO_RESET);
@@ -428,19 +411,6 @@ void qxl_io_reset(struct qxl_device *qdev)
void qxl_io_monitors_config(struct qxl_device *qdev)
{
- qxl_io_log(qdev, "%s: %d [%dx%d+%d+%d]\n", __func__,
- qdev->monitors_config ?
- qdev->monitors_config->count : -1,
- qdev->monitors_config && qdev->monitors_config->count ?
- qdev->monitors_config->heads[0].width : -1,
- qdev->monitors_config && qdev->monitors_config->count ?
- qdev->monitors_config->heads[0].height : -1,
- qdev->monitors_config && qdev->monitors_config->count ?
- qdev->monitors_config->heads[0].x : -1,
- qdev->monitors_config && qdev->monitors_config->count ?
- qdev->monitors_config->heads[0].y : -1
- );
-
wait_for_io_cmd(qdev, 0, QXL_IO_MONITORS_CONFIG_ASYNC);
}
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 9a9214ae0f..a0b6bced03 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -48,12 +48,8 @@ static void qxl_alloc_client_monitors_config(struct qxl_device *qdev, unsigned c
qdev->client_monitors_config = kzalloc(
sizeof(struct qxl_monitors_config) +
sizeof(struct qxl_head) * count, GFP_KERNEL);
- if (!qdev->client_monitors_config) {
- qxl_io_log(qdev,
- "%s: allocation failure for %u heads\n",
- __func__, count);
+ if (!qdev->client_monitors_config)
return;
- }
}
qdev->client_monitors_config->count = count;
}
@@ -74,12 +70,8 @@ static int qxl_display_copy_rom_client_monitors_config(struct qxl_device *qdev)
num_monitors = qdev->rom->client_monitors_config.count;
crc = crc32(0, (const uint8_t *)&qdev->rom->client_monitors_config,
sizeof(qdev->rom->client_monitors_config));
- if (crc != qdev->rom->client_monitors_config_crc) {
- qxl_io_log(qdev, "crc mismatch: have %X (%zd) != %X\n", crc,
- sizeof(qdev->rom->client_monitors_config),
- qdev->rom->client_monitors_config_crc);
+ if (crc != qdev->rom->client_monitors_config_crc)
return MONITORS_CONFIG_BAD_CRC;
- }
if (!num_monitors) {
DRM_DEBUG_KMS("no client monitors configured\n");
return status;
@@ -170,12 +162,10 @@ void qxl_display_read_client_monitors_config(struct qxl_device *qdev)
udelay(5);
}
if (status == MONITORS_CONFIG_BAD_CRC) {
- qxl_io_log(qdev, "config: bad crc\n");
DRM_DEBUG_KMS("ignoring client monitors config: bad crc");
return;
}
if (status == MONITORS_CONFIG_UNCHANGED) {
- qxl_io_log(qdev, "config: unchanged\n");
DRM_DEBUG_KMS("ignoring client monitors config: unchanged");
return;
}
@@ -385,14 +375,6 @@ static bool qxl_crtc_mode_fixup(struct drm_crtc *crtc,
const struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
- struct drm_device *dev = crtc->dev;
- struct qxl_device *qdev = dev->dev_private;
-
- qxl_io_log(qdev, "%s: (%d,%d) => (%d,%d)\n",
- __func__,
- mode->hdisplay, mode->vdisplay,
- adjusted_mode->hdisplay,
- adjusted_mode->vdisplay);
return true;
}
@@ -403,10 +385,9 @@ qxl_send_monitors_config(struct qxl_device *qdev)
BUG_ON(!qdev->ram_header->monitors_config);
- if (qdev->monitors_config->count == 0) {
- qxl_io_log(qdev, "%s: 0 monitors??\n", __func__);
+ if (qdev->monitors_config->count == 0)
return;
- }
+
for (i = 0 ; i < qdev->monitors_config->count ; ++i) {
struct qxl_head *head = &qdev->monitors_config->heads[i];
diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index 23af3e3526..603bf564ef 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -185,8 +185,6 @@ static int qxlfb_framebuffer_dirty(struct drm_framebuffer *fb,
/*
* we are using a shadow draw buffer, at qdev->surface0_shadow
*/
- qxl_io_log(qdev, "dirty x[%d, %d], y[%d, %d]\n", clips->x1, clips->x2,
- clips->y1, clips->y2);
image->dx = clips->x1;
image->dy = clips->y1;
image->width = clips->x2 - clips->x1;
diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index 23a40106ab..3bb31add63 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c
@@ -57,10 +57,9 @@ irqreturn_t qxl_irq_handler(int irq, void *arg)
* to avoid endless loops).
*/
qdev->irq_received_error++;
- qxl_io_log(qdev, "%s: driver is in bug mode.\n", __func__);
+ DRM_WARN("driver is in bug mode\n");
}
if (pending & QXL_INTERRUPT_CLIENT_MONITORS_CONFIG) {
- qxl_io_log(qdev, "QXL_INTERRUPT_CLIENT_MONITORS_CONFIG\n");
schedule_work(&qdev->client_monitors_config_work);
}
qdev->ram_header->int_mask = QXL_INTERRUPT_MASK;
--
2.9.3
^ permalink raw reply related
* [PATCH 2/4] qxl: move qxl_send_monitors_config()
From: Gerd Hoffmann @ 2018-02-16 13:28 UTC (permalink / raw)
To: dri-devel
Cc: David Airlie, Dave Airlie, open list,
open list:DRM DRIVER FOR QXL VIRTUAL GPU
In-Reply-To: <20180216132832.5374-1-kraxel@redhat.com>
Needed to avoid a forward declaration in a followup patch.
Pure code move, no functional change.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
drivers/gpu/drm/qxl/qxl_display.c | 47 +++++++++++++++++++--------------------
1 file changed, 23 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index a0b6bced03..8efd07f677 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -258,6 +258,29 @@ static int qxl_add_common_modes(struct drm_connector *connector,
return i - 1;
}
+static void qxl_send_monitors_config(struct qxl_device *qdev)
+{
+ int i;
+
+ BUG_ON(!qdev->ram_header->monitors_config);
+
+ if (qdev->monitors_config->count == 0)
+ return;
+
+ for (i = 0 ; i < qdev->monitors_config->count ; ++i) {
+ struct qxl_head *head = &qdev->monitors_config->heads[i];
+
+ if (head->y > 8192 || head->x > 8192 ||
+ head->width > 8192 || head->height > 8192) {
+ DRM_ERROR("head %d wrong: %dx%d+%d+%d\n",
+ i, head->width, head->height,
+ head->x, head->y);
+ return;
+ }
+ }
+ qxl_io_monitors_config(qdev);
+}
+
static void qxl_crtc_atomic_flush(struct drm_crtc *crtc,
struct drm_crtc_state *old_crtc_state)
{
@@ -378,30 +401,6 @@ static bool qxl_crtc_mode_fixup(struct drm_crtc *crtc,
return true;
}
-static void
-qxl_send_monitors_config(struct qxl_device *qdev)
-{
- int i;
-
- BUG_ON(!qdev->ram_header->monitors_config);
-
- if (qdev->monitors_config->count == 0)
- return;
-
- for (i = 0 ; i < qdev->monitors_config->count ; ++i) {
- struct qxl_head *head = &qdev->monitors_config->heads[i];
-
- if (head->y > 8192 || head->x > 8192 ||
- head->width > 8192 || head->height > 8192) {
- DRM_ERROR("head %d wrong: %dx%d+%d+%d\n",
- i, head->width, head->height,
- head->x, head->y);
- return;
- }
- }
- qxl_io_monitors_config(qdev);
-}
-
static void qxl_monitors_config_set(struct qxl_device *qdev,
int index,
unsigned x, unsigned y,
--
2.9.3
^ permalink raw reply related
* [PATCH 3/4] qxl: hook monitors_config updates into crtc, not encoder.
From: Gerd Hoffmann @ 2018-02-16 13:28 UTC (permalink / raw)
To: dri-devel
Cc: David Airlie, Dave Airlie, open list,
open list:DRM DRIVER FOR QXL VIRTUAL GPU
In-Reply-To: <20180216132832.5374-1-kraxel@redhat.com>
The encoder callbacks are only called in case the video mode changes.
So any layout changes without mode changes will go unnoticed.
Add qxl_crtc_update_monitors_config(), based on the old
qxl_write_monitors_config_for_encoder() function. Hook it into the
enable, disable and flush atomic crtc callbacks. Remove monitors_config
updates from all other places.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1544322
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
drivers/gpu/drm/qxl/qxl_cmd.c | 2 +
drivers/gpu/drm/qxl/qxl_display.c | 156 ++++++++++++++++----------------------
2 files changed, 66 insertions(+), 92 deletions(-)
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index 850f8d7d37..95db20f214 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -371,6 +371,7 @@ void qxl_io_flush_surfaces(struct qxl_device *qdev)
void qxl_io_destroy_primary(struct qxl_device *qdev)
{
wait_for_io_cmd(qdev, 0, QXL_IO_DESTROY_PRIMARY_ASYNC);
+ qdev->primary_created = false;
}
void qxl_io_create_primary(struct qxl_device *qdev,
@@ -396,6 +397,7 @@ void qxl_io_create_primary(struct qxl_device *qdev,
create->type = QXL_SURF_TYPE_PRIMARY;
wait_for_io_cmd(qdev, 0, QXL_IO_CREATE_PRIMARY_ASYNC);
+ qdev->primary_created = true;
}
void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id)
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 8efd07f677..b7dac01f5e 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -281,6 +281,66 @@ static void qxl_send_monitors_config(struct qxl_device *qdev)
qxl_io_monitors_config(qdev);
}
+static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc,
+ const char *reason)
+{
+ struct drm_device *dev = crtc->dev;
+ struct qxl_device *qdev = dev->dev_private;
+ struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
+ struct qxl_head head;
+ int oldcount, i = qcrtc->index;
+
+ if (!qdev->primary_created) {
+ DRM_DEBUG_KMS("no primary surface, skip (%s)\n", reason);
+ return;
+ }
+
+ if (!qdev->monitors_config ||
+ qdev->monitors_config->max_allowed <= i)
+ return;
+
+ head.id = i;
+ head.flags = 0;
+ oldcount = qdev->monitors_config->count;
+ if (crtc->state->active) {
+ struct drm_display_mode *mode = &crtc->mode;
+ head.width = mode->hdisplay;
+ head.height = mode->vdisplay;
+ head.x = crtc->x;
+ head.y = crtc->y;
+ if (qdev->monitors_config->count < i + 1)
+ qdev->monitors_config->count = i + 1;
+ } else if (i > 0) {
+ head.width = 0;
+ head.height = 0;
+ head.x = 0;
+ head.y = 0;
+ if (qdev->monitors_config->count == i + 1)
+ qdev->monitors_config->count = i;
+ } else {
+ DRM_DEBUG_KMS("inactive head 0, skip (%s)\n", reason);
+ return;
+ }
+
+ if (head.width == qdev->monitors_config->heads[i].width &&
+ head.height == qdev->monitors_config->heads[i].height &&
+ head.x == qdev->monitors_config->heads[i].x &&
+ head.y == qdev->monitors_config->heads[i].y &&
+ oldcount == qdev->monitors_config->count)
+ return;
+
+ DRM_DEBUG_KMS("head %d, %dx%d, at +%d+%d, %s (%s)\n",
+ i, head.width, head.height, head.x, head.y,
+ crtc->state->active ? "on" : "off", reason);
+ if (oldcount != qdev->monitors_config->count)
+ DRM_DEBUG_KMS("active heads %d -> %d (%d total)\n",
+ oldcount, qdev->monitors_config->count,
+ qdev->monitors_config->max_allowed);
+
+ qdev->monitors_config->heads[i] = head;
+ qxl_send_monitors_config(qdev);
+}
+
static void qxl_crtc_atomic_flush(struct drm_crtc *crtc,
struct drm_crtc_state *old_crtc_state)
{
@@ -296,6 +356,8 @@ static void qxl_crtc_atomic_flush(struct drm_crtc *crtc,
drm_crtc_send_vblank_event(crtc, event);
spin_unlock_irqrestore(&dev->event_lock, flags);
}
+
+ qxl_crtc_update_monitors_config(crtc, "flush");
}
static void qxl_crtc_destroy(struct drm_crtc *crtc)
@@ -401,55 +463,20 @@ static bool qxl_crtc_mode_fixup(struct drm_crtc *crtc,
return true;
}
-static void qxl_monitors_config_set(struct qxl_device *qdev,
- int index,
- unsigned x, unsigned y,
- unsigned width, unsigned height,
- unsigned surf_id)
-{
- DRM_DEBUG_KMS("%d:%dx%d+%d+%d\n", index, width, height, x, y);
- qdev->monitors_config->heads[index].x = x;
- qdev->monitors_config->heads[index].y = y;
- qdev->monitors_config->heads[index].width = width;
- qdev->monitors_config->heads[index].height = height;
- qdev->monitors_config->heads[index].surface_id = surf_id;
-
-}
-
-static void qxl_mode_set_nofb(struct drm_crtc *crtc)
-{
- struct qxl_device *qdev = crtc->dev->dev_private;
- struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
- struct drm_display_mode *mode = &crtc->mode;
-
- DRM_DEBUG("Mode set (%d,%d)\n",
- mode->hdisplay, mode->vdisplay);
-
- qxl_monitors_config_set(qdev, qcrtc->index, 0, 0,
- mode->hdisplay, mode->vdisplay, 0);
-
-}
-
static void qxl_crtc_atomic_enable(struct drm_crtc *crtc,
struct drm_crtc_state *old_state)
{
- DRM_DEBUG("\n");
+ qxl_crtc_update_monitors_config(crtc, "enable");
}
static void qxl_crtc_atomic_disable(struct drm_crtc *crtc,
struct drm_crtc_state *old_state)
{
- struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
- struct qxl_device *qdev = crtc->dev->dev_private;
-
- qxl_monitors_config_set(qdev, qcrtc->index, 0, 0, 0, 0, 0);
-
- qxl_send_monitors_config(qdev);
+ qxl_crtc_update_monitors_config(crtc, "disable");
}
static const struct drm_crtc_helper_funcs qxl_crtc_helper_funcs = {
.mode_fixup = qxl_crtc_mode_fixup,
- .mode_set_nofb = qxl_mode_set_nofb,
.atomic_flush = qxl_crtc_atomic_flush,
.atomic_enable = qxl_crtc_atomic_enable,
.atomic_disable = qxl_crtc_atomic_disable,
@@ -939,61 +966,8 @@ static void qxl_enc_prepare(struct drm_encoder *encoder)
DRM_DEBUG("\n");
}
-static void qxl_write_monitors_config_for_encoder(struct qxl_device *qdev,
- struct drm_encoder *encoder)
-{
- int i;
- struct qxl_output *output = drm_encoder_to_qxl_output(encoder);
- struct qxl_head *head;
- struct drm_display_mode *mode;
-
- BUG_ON(!encoder);
- /* TODO: ugly, do better */
- i = output->index;
- if (!qdev->monitors_config ||
- qdev->monitors_config->max_allowed <= i) {
- DRM_ERROR(
- "head number too large or missing monitors config: %p, %d",
- qdev->monitors_config,
- qdev->monitors_config ?
- qdev->monitors_config->max_allowed : -1);
- return;
- }
- if (!encoder->crtc) {
- DRM_ERROR("missing crtc on encoder %p\n", encoder);
- return;
- }
- if (i != 0)
- DRM_DEBUG("missing for multiple monitors: no head holes\n");
- head = &qdev->monitors_config->heads[i];
- head->id = i;
- if (encoder->crtc->enabled) {
- mode = &encoder->crtc->mode;
- head->width = mode->hdisplay;
- head->height = mode->vdisplay;
- head->x = encoder->crtc->x;
- head->y = encoder->crtc->y;
- if (qdev->monitors_config->count < i + 1)
- qdev->monitors_config->count = i + 1;
- } else {
- head->width = 0;
- head->height = 0;
- head->x = 0;
- head->y = 0;
- }
- DRM_DEBUG_KMS("setting head %d to +%d+%d %dx%d out of %d\n",
- i, head->x, head->y, head->width, head->height, qdev->monitors_config->count);
- head->flags = 0;
- /* TODO - somewhere else to call this for multiple monitors
- * (config_commit?) */
- qxl_send_monitors_config(qdev);
-}
-
static void qxl_enc_commit(struct drm_encoder *encoder)
{
- struct qxl_device *qdev = encoder->dev->dev_private;
-
- qxl_write_monitors_config_for_encoder(qdev, encoder);
DRM_DEBUG("\n");
}
@@ -1080,8 +1054,6 @@ static enum drm_connector_status qxl_conn_detect(
qxl_head_enabled(&qdev->client_monitors_config->heads[output->index]);
DRM_DEBUG("#%d connected: %d\n", output->index, connected);
- if (!connected)
- qxl_monitors_config_set(qdev, output->index, 0, 0, 0, 0, 0);
return connected ? connector_status_connected
: connector_status_disconnected;
--
2.9.3
^ permalink raw reply related
* [PATCH 4/4] qxl: drop dummy functions
From: Gerd Hoffmann @ 2018-02-16 13:28 UTC (permalink / raw)
To: dri-devel
Cc: David Airlie, Dave Airlie, open list,
open list:DRM DRIVER FOR QXL VIRTUAL GPU
In-Reply-To: <20180216132832.5374-1-kraxel@redhat.com>
These days drm core checks function pointers everywhere before calling
them. So we can drop a bunch of dummy functions now.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
drivers/gpu/drm/qxl/qxl_display.c | 50 ---------------------------------------
1 file changed, 50 deletions(-)
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index b7dac01f5e..4a8c80bde5 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -456,13 +456,6 @@ qxl_framebuffer_init(struct drm_device *dev,
return 0;
}
-static bool qxl_crtc_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- return true;
-}
-
static void qxl_crtc_atomic_enable(struct drm_crtc *crtc,
struct drm_crtc_state *old_state)
{
@@ -476,7 +469,6 @@ static void qxl_crtc_atomic_disable(struct drm_crtc *crtc,
}
static const struct drm_crtc_helper_funcs qxl_crtc_helper_funcs = {
- .mode_fixup = qxl_crtc_mode_fixup,
.atomic_flush = qxl_crtc_atomic_flush,
.atomic_enable = qxl_crtc_atomic_enable,
.atomic_disable = qxl_crtc_atomic_disable,
@@ -620,12 +612,6 @@ static void qxl_primary_atomic_disable(struct drm_plane *plane,
}
}
-static int qxl_plane_atomic_check(struct drm_plane *plane,
- struct drm_plane_state *state)
-{
- return 0;
-}
-
static void qxl_cursor_atomic_update(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
@@ -831,7 +817,6 @@ static const uint32_t qxl_cursor_plane_formats[] = {
};
static const struct drm_plane_helper_funcs qxl_cursor_helper_funcs = {
- .atomic_check = qxl_plane_atomic_check,
.atomic_update = qxl_cursor_atomic_update,
.atomic_disable = qxl_cursor_atomic_disable,
.prepare_fb = qxl_plane_prepare_fb,
@@ -956,28 +941,6 @@ static int qdev_crtc_init(struct drm_device *dev, int crtc_id)
return r;
}
-static void qxl_enc_dpms(struct drm_encoder *encoder, int mode)
-{
- DRM_DEBUG("\n");
-}
-
-static void qxl_enc_prepare(struct drm_encoder *encoder)
-{
- DRM_DEBUG("\n");
-}
-
-static void qxl_enc_commit(struct drm_encoder *encoder)
-{
- DRM_DEBUG("\n");
-}
-
-static void qxl_enc_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- DRM_DEBUG("\n");
-}
-
static int qxl_conn_get_modes(struct drm_connector *connector)
{
unsigned pwidth = 1024;
@@ -1023,10 +986,6 @@ static struct drm_encoder *qxl_best_encoder(struct drm_connector *connector)
static const struct drm_encoder_helper_funcs qxl_enc_helper_funcs = {
- .dpms = qxl_enc_dpms,
- .prepare = qxl_enc_prepare,
- .mode_set = qxl_enc_mode_set,
- .commit = qxl_enc_commit,
};
static const struct drm_connector_helper_funcs qxl_connector_helper_funcs = {
@@ -1059,14 +1018,6 @@ static enum drm_connector_status qxl_conn_detect(
: connector_status_disconnected;
}
-static int qxl_conn_set_property(struct drm_connector *connector,
- struct drm_property *property,
- uint64_t value)
-{
- DRM_DEBUG("\n");
- return 0;
-}
-
static void qxl_conn_destroy(struct drm_connector *connector)
{
struct qxl_output *qxl_output =
@@ -1081,7 +1032,6 @@ static const struct drm_connector_funcs qxl_connector_funcs = {
.dpms = drm_helper_connector_dpms,
.detect = qxl_conn_detect,
.fill_modes = drm_helper_probe_single_connector_modes,
- .set_property = qxl_conn_set_property,
.destroy = qxl_conn_destroy,
.reset = drm_atomic_helper_connector_reset,
.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
--
2.9.3
^ permalink raw reply related
* Re: [PATCH v2 0/6] crypto: engine - Permit to enqueue all async requests
From: Corentin Labbe @ 2018-02-16 15:36 UTC (permalink / raw)
To: Herbert Xu
Cc: alexandre.torgue, corbet, mst, linux-doc, linux-kernel,
fabien.dessenne, virtualization, linux-sunxi, linux-crypto,
mcoquelin.stm32, davem, linux-arm-kernel
In-Reply-To: <20180215155100.GJ7352@gondor.apana.org.au>
On Thu, Feb 15, 2018 at 11:51:00PM +0800, Herbert Xu wrote:
> On Fri, Jan 26, 2018 at 08:15:28PM +0100, Corentin Labbe wrote:
> > Hello
> >
> > The current crypto_engine support only ahash and ablkcipher request.
> > My first patch which try to add skcipher was Nacked, it will add too many functions
> > and adding other algs(aead, asymetric_key) will make the situation worst.
> >
> > This patchset remove all algs specific stuff and now only process generic crypto_async_request.
> >
> > The requests handler function pointer are now moved out of struct engine and
> > are now stored directly in a crypto_engine_reqctx.
> >
> > The original proposal of Herbert [1] cannot be done completly since the crypto_engine
> > could only dequeue crypto_async_request and it is impossible to access any request_ctx
> > without knowing the underlying request type.
> >
> > So I do something near that was requested: adding crypto_engine_reqctx in TFM context.
> > Note that the current implementation expect that crypto_engine_reqctx
> > is the first member of the context.
> >
> > The first patch is a try to document the crypto engine API.
> > The second patch convert the crypto engine with the new way,
> > while the following patchs convert the 4 existing users of crypto_engine.
> > Note that this split break bisection, so probably the final commit will be all merged.
> >
> > Appart from virtio, all 4 latest patch were compile tested only.
> > But the crypto engine is tested with my new sun8i-ce driver.
> >
> > Regards
> >
> > [1] https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1474434.html
> >
> > Changes since V1:
> > - renamed crypto_engine_reqctx to crypto_engine_ctx
> > - indentation fix in function parameter
> > - do not export crypto_transfer_request
> > - Add aead support
> > - crypto_finalize_request is now static
> >
> > Changes since RFC:
> > - Added a documentation patch
> > - Added patch for stm32-cryp
> > - Changed parameter of all crypto_engine_op functions from
> > crypto_async_request to void*
> > - Reintroduced crypto_transfer_xxx_request_to_engine functions
> >
> > Corentin Labbe (6):
> > Documentation: crypto: document crypto engine API
> > crypto: engine - Permit to enqueue all async requests
> > crypto: omap: convert to new crypto engine API
> > crypto: virtio: convert to new crypto engine API
> > crypto: stm32-hash: convert to the new crypto engine API
> > crypto: stm32-cryp: convert to the new crypto engine API
>
> All applied. Thanks.
Hello
As mentionned in the cover letter, all patchs (except documentation one) should be squashed.
A kbuild robot reported a build error on cryptodev due to this.
Regards
^ permalink raw reply
* Re: [PATCH v21 1/5] xbitmap: Introduce xbitmap
From: Andy Shevchenko @ 2018-02-16 17:44 UTC (permalink / raw)
To: Wei Wang
Cc: yang.zhang.wz, kvm, Michael S. Tsirkin, Tetsuo Handa,
liliang.opensource, qemu-devel, virtualization, linux-mm,
aarcange, virtio-dev, Matthew Wilcox, Matthew Wilcox, nilal, riel,
cornelia.huck, mhocko, quan.xu0, Linux Kernel Mailing List,
amit.shah, Paolo Bonzini, Andrew Morton, mgorman
In-Reply-To: <1515496262-7533-2-git-send-email-wei.w.wang@intel.com>
On Tue, Jan 9, 2018 at 1:10 PM, Wei Wang <wei.w.wang@intel.com> wrote:
> From: Matthew Wilcox <mawilcox@microsoft.com>
>
> The eXtensible Bitmap is a sparse bitmap representation which is
> efficient for set bits which tend to cluster. It supports up to
> 'unsigned long' worth of bits.
> lib/xbitmap.c | 444 +++++++++++++++++++++++++++++++
Please, split tests to a separate module.
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* [RFC PATCH v3 0/3] Enable virtio_net to act as a backup for a passthru device
From: Sridhar Samudrala @ 2018-02-16 18:11 UTC (permalink / raw)
To: mst, stephen, davem, netdev, virtualization, virtio-dev,
jesse.brandeburg, alexander.h.duyck, kubakici, sridhar.samudrala,
jasowang, loseweigh
Patch 1 introduces a new feature bit VIRTIO_NET_F_BACKUP that can be
used by hypervisor to indicate that virtio_net interface should act as
a backup for another device with the same MAC address.
Ppatch 2 is in response to the community request for a 3 netdev
solution. However, it creates some issues we'll get into in a moment.
It extends virtio_net to use alternate datapath when available and
registered. When BACKUP feature is enabled, virtio_net driver creates
an additional 'bypass' netdev that acts as a master device and controls
2 slave devices. The original virtio_net netdev is registered as
'backup' netdev and a passthru/vf device with the same MAC gets
registered as 'active' netdev. Both 'bypass' and 'backup' netdevs are
associated with the same 'pci' device. The user accesses the network
interface via 'bypass' netdev. The 'bypass' netdev chooses 'active' netdev
as default for transmits when it is available with link up and running.
We noticed a couple of issues with this approach during testing.
- As both 'bypass' and 'backup' netdevs are associated with the same
virtio pci device, udev tries to rename both of them with the same name
and the 2nd rename will fail. This would be OK as long as the first netdev
to be renamed is the 'bypass' netdev, but the order in which udev gets
to rename the 2 netdevs is not reliable.
- When the 'active' netdev is unplugged OR not present on a destination
system after live migration, the user will see 2 virtio_net netdevs.
Patch 3 refactors much of the changes made in patch 2, which was done on
purpose just to show the solution we recommend as part of one patch set.
If we submit a final version of this, we would combine patch 2/3 together.
This patch removes the creation of an additional netdev, Instead, it
uses a new virtnet_bypass_info struct added to the original 'backup' netdev
to track the 'bypass' information and introduces an additional set of ndo and
ethtool ops that are used when BACKUP feature is enabled.
One difference with the 3 netdev model compared to the 2 netdev model is that
the 'bypass' netdev is created with 'noqueue' qdisc marked as 'NETIF_F_LLTX'.
This avoids going through an additional qdisc and acquiring an additional
qdisc and tx lock during transmits.
If we can replace the qdisc of virtio netdev dynamically, it should be
possible to get these optimizations enabled even with 2 netdev model when
BACKUP feature is enabled.
As this patch series is initially focusing on usecases where hypervisor
fully controls the VM networking and the guest is not expected to directly
configure any hardware settings, it doesn't expose all the ndo/ethtool ops
that are supported by virtio_net at this time. To support additional usecases,
it should be possible to enable additional ops later by caching the state
in virtio netdev and replaying when the 'active' netdev gets registered.
The hypervisor needs to enable only one datapath at any time so that packets
don't get looped back to the VM over the other datapath. When a VF is
plugged, the virtio datapath link state can be marked as down.
At the time of live migration, the hypervisor needs to unplug the VF device
from the guest on the source host and reset the MAC filter of the VF to
initiate failover of datapath to virtio before starting the migration. After
the migration is completed, the destination hypervisor sets the MAC filter
on the VF and plugs it back to the guest to switch over to VF datapath.
This patch is based on the discussion initiated by Jesse on this thread.
https://marc.info/?l=linux-virtualization&m=151189725224231&w=2
Sridhar Samudrala (3):
virtio_net: Introduce VIRTIO_NET_F_BACKUP feature bit
virtio_net: Extend virtio to use VF datapath when available
virtio_net: Enable alternate datapath without creating an additional
netdev
drivers/net/virtio_net.c | 564 +++++++++++++++++++++++++++++++++++++++-
include/uapi/linux/virtio_net.h | 3 +
2 files changed, 563 insertions(+), 4 deletions(-)
--
2.14.3
^ permalink raw reply
* [RFC PATCH v3 1/3] virtio_net: Introduce VIRTIO_NET_F_BACKUP feature bit
From: Sridhar Samudrala @ 2018-02-16 18:11 UTC (permalink / raw)
To: mst, stephen, davem, netdev, virtualization, virtio-dev,
jesse.brandeburg, alexander.h.duyck, kubakici, sridhar.samudrala,
jasowang, loseweigh
In-Reply-To: <1518804682-16881-1-git-send-email-sridhar.samudrala@intel.com>
This feature bit can be used by hypervisor to indicate virtio_net device to
act as a backup for another device with the same MAC address.
VIRTIO_NET_F_BACKUP is defined as bit 62 as it is a device feature bit.
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
drivers/net/virtio_net.c | 2 +-
include/uapi/linux/virtio_net.h | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 626c27352ae2..bcd13fe906ca 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2920,7 +2920,7 @@ static struct virtio_device_id id_table[] = {
VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
VIRTIO_NET_F_CTRL_MAC_ADDR, \
VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
- VIRTIO_NET_F_SPEED_DUPLEX
+ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_BACKUP
static unsigned int features[] = {
VIRTNET_FEATURES,
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 5de6ed37695b..c7c35fd1a5ed 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -57,6 +57,9 @@
* Steering */
#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
+#define VIRTIO_NET_F_BACKUP 62 /* Act as backup for another device
+ * with the same MAC.
+ */
#define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Device set linkspeed and duplex */
#ifndef VIRTIO_NET_NO_LEGACY
--
2.14.3
^ permalink raw reply related
* [RFC PATCH v3 2/3] virtio_net: Extend virtio to use VF datapath when available
From: Sridhar Samudrala @ 2018-02-16 18:11 UTC (permalink / raw)
To: mst, stephen, davem, netdev, virtualization, virtio-dev,
jesse.brandeburg, alexander.h.duyck, kubakici, sridhar.samudrala,
jasowang, loseweigh
In-Reply-To: <1518804682-16881-1-git-send-email-sridhar.samudrala@intel.com>
This patch enables virtio_net to switch over to a VF datapath when a VF
netdev is present with the same MAC address. It allows live migration
of a VM with a direct attached VF without the need to setup a bond/team
between a VF and virtio net device in the guest.
The hypervisor needs to enable only one datapath at any time so that
packets don't get looped back to the VM over the other datapath. When a VF
is plugged, the virtio datapath link state can be marked as down. The
hypervisor needs to unplug the VF device from the guest on the source host
and reset the MAC filter of the VF to initiate failover of datapath to
virtio before starting the migration. After the migration is completed,
the destination hypervisor sets the MAC filter on the VF and plugs it back
to the guest to switch over to VF datapath.
When BACKUP feature is enabled, an additional netdev(bypass netdev) is
created that acts as a master device and tracks the state of the 2 lower
netdevs. The original virtio_net netdev is marked as 'backup' netdev and a
passthru device with the same MAC is registered as 'active' netdev.
This patch is based on the discussion initiated by Jesse on this thread.
https://marc.info/?l=linux-virtualization&m=151189725224231&w=2
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
drivers/net/virtio_net.c | 639 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 638 insertions(+), 1 deletion(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index bcd13fe906ca..14679806c1b1 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -30,6 +30,7 @@
#include <linux/cpu.h>
#include <linux/average.h>
#include <linux/filter.h>
+#include <linux/netdevice.h>
#include <net/route.h>
#include <net/xdp.h>
@@ -147,6 +148,27 @@ struct receive_queue {
struct xdp_rxq_info xdp_rxq;
};
+/* bypass state maintained when BACKUP feature is enabled */
+struct virtnet_bypass_info {
+ /* passthru netdev with same MAC */
+ struct net_device __rcu *active_netdev;
+
+ /* virtio_net netdev */
+ struct net_device __rcu *backup_netdev;
+
+ /* active netdev stats */
+ struct rtnl_link_stats64 active_stats;
+
+ /* backup netdev stats */
+ struct rtnl_link_stats64 backup_stats;
+
+ /* aggregated stats */
+ struct rtnl_link_stats64 bypass_stats;
+
+ /* spinlock while updating stats */
+ spinlock_t stats_lock;
+};
+
struct virtnet_info {
struct virtio_device *vdev;
struct virtqueue *cvq;
@@ -206,6 +228,9 @@ struct virtnet_info {
u32 speed;
unsigned long guest_offloads;
+
+ /* upper netdev created when BACKUP feature enabled */
+ struct net_device *bypass_netdev;
};
struct padded_vnet_hdr {
@@ -2255,6 +2280,11 @@ static const struct net_device_ops virtnet_netdev = {
.ndo_features_check = passthru_features_check,
};
+static bool virtnet_bypass_xmit_ready(struct net_device *dev)
+{
+ return netif_running(dev) && netif_carrier_ok(dev);
+}
+
static void virtnet_config_changed_work(struct work_struct *work)
{
struct virtnet_info *vi =
@@ -2647,6 +2677,601 @@ static int virtnet_validate(struct virtio_device *vdev)
return 0;
}
+static void
+virtnet_bypass_child_open(struct net_device *dev,
+ struct net_device *child_netdev)
+{
+ int err = dev_open(child_netdev);
+
+ if (err)
+ netdev_warn(dev, "unable to open slave: %s: %d\n",
+ child_netdev->name, err);
+}
+
+static int virtnet_bypass_open(struct net_device *dev)
+{
+ struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ struct net_device *child_netdev;
+
+ netif_carrier_off(dev);
+ netif_tx_wake_all_queues(dev);
+
+ child_netdev = rtnl_dereference(vbi->active_netdev);
+ if (child_netdev)
+ virtnet_bypass_child_open(dev, child_netdev);
+
+ child_netdev = rtnl_dereference(vbi->backup_netdev);
+ if (child_netdev)
+ virtnet_bypass_child_open(dev, child_netdev);
+
+ return 0;
+}
+
+static int virtnet_bypass_close(struct net_device *dev)
+{
+ struct virtnet_bypass_info *vi = netdev_priv(dev);
+ struct net_device *child_netdev;
+
+ netif_tx_disable(dev);
+
+ child_netdev = rtnl_dereference(vi->active_netdev);
+ if (child_netdev)
+ dev_close(child_netdev);
+
+ child_netdev = rtnl_dereference(vi->backup_netdev);
+ if (child_netdev)
+ dev_close(child_netdev);
+
+ return 0;
+}
+
+static netdev_tx_t
+virtnet_bypass_drop_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ atomic_long_inc(&dev->tx_dropped);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+static netdev_tx_t
+virtnet_bypass_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ struct net_device *xmit_dev;
+
+ /* Try xmit via active netdev followed by backup netdev */
+ xmit_dev = rcu_dereference_bh(vbi->active_netdev);
+ if (!xmit_dev || !virtnet_bypass_xmit_ready(xmit_dev)) {
+ xmit_dev = rcu_dereference_bh(vbi->backup_netdev);
+ if (!xmit_dev || !virtnet_bypass_xmit_ready(xmit_dev))
+ return virtnet_bypass_drop_xmit(skb, dev);
+ }
+
+ skb->dev = xmit_dev;
+ skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
+
+ return dev_queue_xmit(skb);
+}
+
+static u16
+virtnet_bypass_select_queue(struct net_device *dev, struct sk_buff *skb,
+ void *accel_priv, select_queue_fallback_t fallback)
+{
+ /* This helper function exists to help dev_pick_tx get the correct
+ * destination queue. Using a helper function skips a call to
+ * skb_tx_hash and will put the skbs in the queue we expect on their
+ * way down to the bonding driver.
+ */
+ u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+
+ /* Save the original txq to restore before passing to the driver */
+ qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+
+ if (unlikely(txq >= dev->real_num_tx_queues)) {
+ do {
+ txq -= dev->real_num_tx_queues;
+ } while (txq >= dev->real_num_tx_queues);
+ }
+
+ return txq;
+}
+
+/* fold stats, assuming all rtnl_link_stats64 fields are u64, but
+ * that some drivers can provide 32bit values only.
+ */
+static void
+virtnet_bypass_fold_stats(struct rtnl_link_stats64 *_res,
+ const struct rtnl_link_stats64 *_new,
+ const struct rtnl_link_stats64 *_old)
+{
+ const u64 *new = (const u64 *)_new;
+ const u64 *old = (const u64 *)_old;
+ u64 *res = (u64 *)_res;
+ int i;
+
+ for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) {
+ u64 nv = new[i];
+ u64 ov = old[i];
+ s64 delta = nv - ov;
+
+ /* detects if this particular field is 32bit only */
+ if (((nv | ov) >> 32) == 0)
+ delta = (s64)(s32)((u32)nv - (u32)ov);
+
+ /* filter anomalies, some drivers reset their stats
+ * at down/up events.
+ */
+ if (delta > 0)
+ res[i] += delta;
+ }
+}
+
+static void
+virtnet_bypass_get_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ const struct rtnl_link_stats64 *new;
+ struct rtnl_link_stats64 temp;
+ struct net_device *child_netdev;
+
+ spin_lock(&vbi->stats_lock);
+ memcpy(stats, &vbi->bypass_stats, sizeof(*stats));
+
+ rcu_read_lock();
+
+ child_netdev = rcu_dereference(vbi->active_netdev);
+ if (child_netdev) {
+ new = dev_get_stats(child_netdev, &temp);
+ virtnet_bypass_fold_stats(stats, new, &vbi->active_stats);
+ memcpy(&vbi->active_stats, new, sizeof(*new));
+ }
+
+ child_netdev = rcu_dereference(vbi->backup_netdev);
+ if (child_netdev) {
+ new = dev_get_stats(child_netdev, &temp);
+ virtnet_bypass_fold_stats(stats, new, &vbi->backup_stats);
+ memcpy(&vbi->backup_stats, new, sizeof(*new));
+ }
+
+ rcu_read_unlock();
+
+ memcpy(&vbi->bypass_stats, stats, sizeof(*stats));
+ spin_unlock(&vbi->stats_lock);
+}
+
+static int virtnet_bypass_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ struct net_device *child_netdev;
+ int ret = 0;
+
+ child_netdev = rcu_dereference(vbi->active_netdev);
+ if (child_netdev) {
+ ret = dev_set_mtu(child_netdev, new_mtu);
+ if (ret)
+ return ret;
+ }
+
+ child_netdev = rcu_dereference(vbi->backup_netdev);
+ if (child_netdev) {
+ ret = dev_set_mtu(child_netdev, new_mtu);
+ if (ret)
+ netdev_err(child_netdev,
+ "Unexpected failure to set mtu to %d\n",
+ new_mtu);
+ }
+
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static const struct net_device_ops virtnet_bypass_netdev_ops = {
+ .ndo_open = virtnet_bypass_open,
+ .ndo_stop = virtnet_bypass_close,
+ .ndo_start_xmit = virtnet_bypass_start_xmit,
+ .ndo_select_queue = virtnet_bypass_select_queue,
+ .ndo_get_stats64 = virtnet_bypass_get_stats,
+ .ndo_change_mtu = virtnet_bypass_change_mtu,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_features_check = passthru_features_check,
+};
+
+static int
+virtnet_bypass_ethtool_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ struct net_device *child_netdev;
+
+ child_netdev = rtnl_dereference(vbi->active_netdev);
+ if (!child_netdev || !virtnet_bypass_xmit_ready(child_netdev)) {
+ child_netdev = rtnl_dereference(vbi->backup_netdev);
+ if (!child_netdev || !virtnet_bypass_xmit_ready(child_netdev)) {
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ cmd->base.port = PORT_OTHER;
+ cmd->base.speed = SPEED_UNKNOWN;
+
+ return 0;
+ }
+ }
+
+ return __ethtool_get_link_ksettings(child_netdev, cmd);
+}
+
+#define BYPASS_DRV_NAME "virtnet_bypass"
+#define BYPASS_DRV_VERSION "0.1"
+
+static void
+virtnet_bypass_ethtool_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, BYPASS_DRV_NAME, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, BYPASS_DRV_VERSION, sizeof(drvinfo->version));
+}
+
+static const struct ethtool_ops virtnet_bypass_ethtool_ops = {
+ .get_drvinfo = virtnet_bypass_ethtool_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_link_ksettings = virtnet_bypass_ethtool_get_link_ksettings,
+};
+
+static struct net_device *
+get_virtnet_bypass_bymac(struct net *net, const u8 *mac)
+{
+ struct net_device *dev;
+
+ ASSERT_RTNL();
+
+ for_each_netdev(net, dev) {
+ if (dev->netdev_ops != &virtnet_bypass_netdev_ops)
+ continue; /* not a virtnet_bypass device */
+
+ if (ether_addr_equal(mac, dev->perm_addr))
+ return dev;
+ }
+
+ return NULL;
+}
+
+static struct net_device *
+get_virtnet_bypass_byref(struct net_device *child_netdev)
+{
+ struct net *net = dev_net(child_netdev);
+ struct net_device *dev;
+
+ ASSERT_RTNL();
+
+ for_each_netdev(net, dev) {
+ struct virtnet_bypass_info *vbi;
+
+ if (dev->netdev_ops != &virtnet_bypass_netdev_ops)
+ continue; /* not a virtnet_bypass device */
+
+ vbi = netdev_priv(dev);
+
+ if ((rtnl_dereference(vbi->active_netdev) == child_netdev) ||
+ (rtnl_dereference(vbi->backup_netdev) == child_netdev))
+ return dev; /* a match */
+ }
+
+ return NULL;
+}
+
+/* Called when child dev is injecting data into network stack.
+ * Change the associated network device from lower dev to virtio.
+ * note: already called with rcu_read_lock
+ */
+static rx_handler_result_t virtnet_bypass_handle_frame(struct sk_buff **pskb)
+{
+ struct sk_buff *skb = *pskb;
+ struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
+
+ skb->dev = ndev;
+
+ return RX_HANDLER_ANOTHER;
+}
+
+static int virtnet_bypass_register_child(struct net_device *child_netdev)
+{
+ struct virtnet_bypass_info *vbi;
+ struct net_device *dev;
+ bool backup;
+ int ret;
+
+ if (child_netdev->addr_len != ETH_ALEN)
+ return NOTIFY_DONE;
+
+ /* We will use the MAC address to locate the virtnet_bypass netdev
+ * to associate with the child netdev. If we don't find a matching
+ * bypass netdev, move on.
+ */
+ dev = get_virtnet_bypass_bymac(dev_net(child_netdev),
+ child_netdev->perm_addr);
+ if (!dev)
+ return NOTIFY_DONE;
+
+ vbi = netdev_priv(dev);
+ backup = (child_netdev->dev.parent == dev->dev.parent);
+ if (backup ? rtnl_dereference(vbi->backup_netdev) :
+ rtnl_dereference(vbi->active_netdev)) {
+ netdev_info(dev,
+ "%s attempting to join bypass dev when %s already present\n",
+ child_netdev->name,
+ backup ? "backup" : "active");
+ return NOTIFY_DONE;
+ }
+
+ ret = netdev_rx_handler_register(child_netdev,
+ virtnet_bypass_handle_frame, dev);
+ if (ret != 0) {
+ netdev_err(child_netdev,
+ "can not register bypass receive handler (err = %d)\n",
+ ret);
+ goto rx_handler_failed;
+ }
+
+ ret = netdev_upper_dev_link(child_netdev, dev, NULL);
+ if (ret != 0) {
+ netdev_err(child_netdev,
+ "can not set master device %s (err = %d)\n",
+ dev->name, ret);
+ goto upper_link_failed;
+ }
+
+ child_netdev->flags |= IFF_SLAVE;
+
+ if (netif_running(dev)) {
+ ret = dev_open(child_netdev);
+ if (ret && (ret != -EBUSY)) {
+ netdev_err(dev, "Opening child %s failed ret:%d\n",
+ child_netdev->name, ret);
+ goto err_interface_up;
+ }
+ }
+
+ /* Align MTU of child with master */
+ ret = dev_set_mtu(child_netdev, dev->mtu);
+ if (ret) {
+ netdev_err(dev,
+ "unable to change mtu of %s to %u register failed\n",
+ child_netdev->name, dev->mtu);
+ goto err_set_mtu;
+ }
+
+ call_netdevice_notifiers(NETDEV_JOIN, child_netdev);
+
+ netdev_info(dev, "registering %s\n", child_netdev->name);
+
+ dev_hold(child_netdev);
+ if (backup) {
+ rcu_assign_pointer(vbi->backup_netdev, child_netdev);
+ dev_get_stats(vbi->backup_netdev, &vbi->backup_stats);
+ } else {
+ rcu_assign_pointer(vbi->active_netdev, child_netdev);
+ dev_get_stats(vbi->active_netdev, &vbi->active_stats);
+ dev->min_mtu = child_netdev->min_mtu;
+ dev->max_mtu = child_netdev->max_mtu;
+ }
+
+ return NOTIFY_OK;
+
+err_set_mtu:
+ dev_close(child_netdev);
+err_interface_up:
+ netdev_upper_dev_unlink(child_netdev, dev);
+ child_netdev->flags &= ~IFF_SLAVE;
+upper_link_failed:
+ netdev_rx_handler_unregister(child_netdev);
+rx_handler_failed:
+ return NOTIFY_DONE;
+}
+
+static int virtnet_bypass_unregister_child(struct net_device *child_netdev)
+{
+ struct virtnet_bypass_info *vbi;
+ struct net_device *dev, *backup;
+
+ dev = get_virtnet_bypass_byref(child_netdev);
+ if (!dev)
+ return NOTIFY_DONE;
+
+ vbi = netdev_priv(dev);
+
+ netdev_info(dev, "unregistering %s\n", child_netdev->name);
+
+ netdev_rx_handler_unregister(child_netdev);
+ netdev_upper_dev_unlink(child_netdev, dev);
+ child_netdev->flags &= ~IFF_SLAVE;
+
+ if (child_netdev->dev.parent == dev->dev.parent) {
+ RCU_INIT_POINTER(vbi->backup_netdev, NULL);
+ } else {
+ RCU_INIT_POINTER(vbi->active_netdev, NULL);
+ backup = rtnl_dereference(vbi->backup_netdev);
+ if (backup) {
+ dev->min_mtu = backup->min_mtu;
+ dev->max_mtu = backup->max_mtu;
+ }
+ }
+
+ dev_put(child_netdev);
+
+ return NOTIFY_OK;
+}
+
+static int virtnet_bypass_update_link(struct net_device *child_netdev)
+{
+ struct net_device *dev, *active, *backup;
+ struct virtnet_bypass_info *vbi;
+
+ dev = get_virtnet_bypass_byref(child_netdev);
+ if (!dev || !netif_running(dev))
+ return NOTIFY_DONE;
+
+ vbi = netdev_priv(dev);
+
+ active = rtnl_dereference(vbi->active_netdev);
+ backup = rtnl_dereference(vbi->backup_netdev);
+
+ if ((active && virtnet_bypass_xmit_ready(active)) ||
+ (backup && virtnet_bypass_xmit_ready(backup))) {
+ netif_carrier_on(dev);
+ netif_tx_wake_all_queues(dev);
+ } else {
+ netif_carrier_off(dev);
+ netif_tx_stop_all_queues(dev);
+ }
+
+ return NOTIFY_OK;
+}
+
+static int
+virtnet_bypass_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+
+ /* Skip our own events */
+ if (event_dev->netdev_ops == &virtnet_bypass_netdev_ops)
+ return NOTIFY_DONE;
+
+ /* Avoid non-Ethernet type devices */
+ if (event_dev->type != ARPHRD_ETHER)
+ return NOTIFY_DONE;
+
+ /* Avoid Vlan dev with same MAC registering as child dev */
+ if (is_vlan_dev(event_dev))
+ return NOTIFY_DONE;
+
+ /* Avoid Bonding master dev with same MAC registering as child dev */
+ if ((event_dev->priv_flags & IFF_BONDING) &&
+ (event_dev->flags & IFF_MASTER))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ return virtnet_bypass_register_child(event_dev);
+ case NETDEV_UNREGISTER:
+ return virtnet_bypass_unregister_child(event_dev);
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+ return virtnet_bypass_update_link(event_dev);
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block virtnet_bypass_notifier = {
+ .notifier_call = virtnet_bypass_event,
+};
+
+static int virtnet_bypass_create(struct virtnet_info *vi)
+{
+ struct net_device *backup_netdev = vi->dev;
+ struct device *dev = &vi->vdev->dev;
+ struct net_device *bypass_netdev;
+ int res;
+
+ /* Alloc at least 2 queues, for now we are going with 16 assuming
+ * that most devices being bonded won't have too many queues.
+ */
+ bypass_netdev = alloc_etherdev_mq(sizeof(struct virtnet_bypass_info),
+ 16);
+ if (!bypass_netdev) {
+ dev_err(dev, "Unable to allocate bypass_netdev!\n");
+ return -ENOMEM;
+ }
+
+ dev_net_set(bypass_netdev, dev_net(backup_netdev));
+ SET_NETDEV_DEV(bypass_netdev, dev);
+
+ bypass_netdev->netdev_ops = &virtnet_bypass_netdev_ops;
+ bypass_netdev->ethtool_ops = &virtnet_bypass_ethtool_ops;
+
+ /* Initialize the device options */
+ bypass_netdev->flags |= IFF_MASTER;
+ bypass_netdev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT |
+ IFF_NO_QUEUE;
+ bypass_netdev->priv_flags &= ~(IFF_XMIT_DST_RELEASE |
+ IFF_TX_SKB_SHARING);
+
+ /* don't acquire bypass netdev's netif_tx_lock when transmitting */
+ bypass_netdev->features |= NETIF_F_LLTX;
+
+ /* Don't allow bypass devices to change network namespaces. */
+ bypass_netdev->features |= NETIF_F_NETNS_LOCAL;
+
+ bypass_netdev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG |
+ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO |
+ NETIF_F_HIGHDMA | NETIF_F_LRO;
+
+ bypass_netdev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
+ bypass_netdev->features |= bypass_netdev->hw_features;
+
+ /* For now treat bypass netdev as VLAN challenged since we
+ * cannot assume VLAN functionality with a VF
+ */
+ bypass_netdev->features |= NETIF_F_VLAN_CHALLENGED;
+
+ memcpy(bypass_netdev->dev_addr, backup_netdev->dev_addr,
+ bypass_netdev->addr_len);
+
+ bypass_netdev->min_mtu = backup_netdev->min_mtu;
+ bypass_netdev->max_mtu = backup_netdev->max_mtu;
+
+ res = register_netdev(bypass_netdev);
+ if (res < 0) {
+ dev_err(dev, "Unable to register bypass_netdev!\n");
+ free_netdev(bypass_netdev);
+ return res;
+ }
+
+ netif_carrier_off(bypass_netdev);
+
+ vi->bypass_netdev = bypass_netdev;
+
+ /* Change the name of the backup interface to vbkup0
+ * we may need to revisit naming later but this gets it out
+ * of the way for now.
+ */
+ strcpy(backup_netdev->name, "vbkup%d");
+
+ return 0;
+}
+
+static void virtnet_bypass_destroy(struct virtnet_info *vi)
+{
+ struct net_device *bypass_netdev = vi->bypass_netdev;
+ struct virtnet_bypass_info *vbi;
+ struct net_device *child_netdev;
+
+ /* no device found, nothing to free */
+ if (!bypass_netdev)
+ return;
+
+ vbi = netdev_priv(bypass_netdev);
+
+ netif_device_detach(bypass_netdev);
+
+ rtnl_lock();
+
+ child_netdev = rtnl_dereference(vbi->active_netdev);
+ if (child_netdev)
+ virtnet_bypass_unregister_child(child_netdev);
+
+ child_netdev = rtnl_dereference(vbi->backup_netdev);
+ if (child_netdev)
+ virtnet_bypass_unregister_child(child_netdev);
+
+ unregister_netdevice(bypass_netdev);
+
+ rtnl_unlock();
+
+ free_netdev(bypass_netdev);
+}
+
static int virtnet_probe(struct virtio_device *vdev)
{
int i, err = -ENOMEM;
@@ -2797,10 +3422,15 @@ static int virtnet_probe(struct virtio_device *vdev)
virtnet_init_settings(dev);
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_BACKUP)) {
+ if (virtnet_bypass_create(vi) != 0)
+ goto free_vqs;
+ }
+
err = register_netdev(dev);
if (err) {
pr_debug("virtio_net: registering device failed\n");
- goto free_vqs;
+ goto free_bypass;
}
virtio_device_ready(vdev);
@@ -2837,6 +3467,8 @@ static int virtnet_probe(struct virtio_device *vdev)
vi->vdev->config->reset(vdev);
unregister_netdev(dev);
+free_bypass:
+ virtnet_bypass_destroy(vi);
free_vqs:
cancel_delayed_work_sync(&vi->refill);
free_receive_page_frags(vi);
@@ -2871,6 +3503,8 @@ static void virtnet_remove(struct virtio_device *vdev)
unregister_netdev(vi->dev);
+ virtnet_bypass_destroy(vi);
+
remove_vq_common(vi);
free_netdev(vi->dev);
@@ -2968,6 +3602,8 @@ static __init int virtio_net_driver_init(void)
ret = register_virtio_driver(&virtio_net_driver);
if (ret)
goto err_virtio;
+
+ register_netdevice_notifier(&virtnet_bypass_notifier);
return 0;
err_virtio:
cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
@@ -2980,6 +3616,7 @@ module_init(virtio_net_driver_init);
static __exit void virtio_net_driver_exit(void)
{
+ unregister_netdevice_notifier(&virtnet_bypass_notifier);
unregister_virtio_driver(&virtio_net_driver);
cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
cpuhp_remove_multi_state(virtionet_online);
--
2.14.3
^ permalink raw reply related
* [RFC PATCH v3 3/3] virtio_net: Enable alternate datapath without creating an additional netdev
From: Sridhar Samudrala @ 2018-02-16 18:11 UTC (permalink / raw)
To: mst, stephen, davem, netdev, virtualization, virtio-dev,
jesse.brandeburg, alexander.h.duyck, kubakici, sridhar.samudrala,
jasowang, loseweigh
In-Reply-To: <1518804682-16881-1-git-send-email-sridhar.samudrala@intel.com>
This patch addresses the issues that were seen with the 3 netdev model by
avoiding the creation of an additional netdev. Instead the bypass state
information is tracked in the original netdev and a different set of
ndo_ops and ethtool_ops are used when BACKUP feature is enabled.
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
drivers/net/virtio_net.c | 283 +++++++++++++++++------------------------------
1 file changed, 101 insertions(+), 182 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 14679806c1b1..c85b2949f151 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -154,7 +154,7 @@ struct virtnet_bypass_info {
struct net_device __rcu *active_netdev;
/* virtio_net netdev */
- struct net_device __rcu *backup_netdev;
+ struct net_device *backup_netdev;
/* active netdev stats */
struct rtnl_link_stats64 active_stats;
@@ -229,8 +229,8 @@ struct virtnet_info {
unsigned long guest_offloads;
- /* upper netdev created when BACKUP feature enabled */
- struct net_device *bypass_netdev;
+ /* bypass state maintained when BACKUP feature is enabled */
+ struct virtnet_bypass_info *vbi;
};
struct padded_vnet_hdr {
@@ -2285,6 +2285,22 @@ static bool virtnet_bypass_xmit_ready(struct net_device *dev)
return netif_running(dev) && netif_carrier_ok(dev);
}
+static bool virtnet_bypass_active_ready(struct net_device *dev)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtnet_bypass_info *vbi = vi->vbi;
+ struct net_device *active;
+
+ if (!vbi)
+ return false;
+
+ active = rcu_dereference(vbi->active_netdev);
+ if (!active || !virtnet_bypass_xmit_ready(active))
+ return false;
+
+ return true;
+}
+
static void virtnet_config_changed_work(struct work_struct *work)
{
struct virtnet_info *vi =
@@ -2312,7 +2328,7 @@ static void virtnet_config_changed_work(struct work_struct *work)
virtnet_update_settings(vi);
netif_carrier_on(vi->dev);
netif_tx_wake_all_queues(vi->dev);
- } else {
+ } else if (!virtnet_bypass_active_ready(vi->dev)) {
netif_carrier_off(vi->dev);
netif_tx_stop_all_queues(vi->dev);
}
@@ -2501,7 +2517,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
if (vi->has_cvq) {
vi->cvq = vqs[total_vqs - 1];
- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN) &&
+ !virtio_has_feature(vi->vdev, VIRTIO_NET_F_BACKUP))
vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
@@ -2690,62 +2707,54 @@ virtnet_bypass_child_open(struct net_device *dev,
static int virtnet_bypass_open(struct net_device *dev)
{
- struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtnet_bypass_info *vbi = vi->vbi;
struct net_device *child_netdev;
-
- netif_carrier_off(dev);
- netif_tx_wake_all_queues(dev);
+ int err;
child_netdev = rtnl_dereference(vbi->active_netdev);
if (child_netdev)
virtnet_bypass_child_open(dev, child_netdev);
- child_netdev = rtnl_dereference(vbi->backup_netdev);
- if (child_netdev)
- virtnet_bypass_child_open(dev, child_netdev);
+ err = virtnet_open(dev);
+ if (err < 0) {
+ dev_close(child_netdev);
+ return err;
+ }
return 0;
}
static int virtnet_bypass_close(struct net_device *dev)
{
- struct virtnet_bypass_info *vi = netdev_priv(dev);
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtnet_bypass_info *vbi = vi->vbi;
struct net_device *child_netdev;
- netif_tx_disable(dev);
+ virtnet_close(dev);
- child_netdev = rtnl_dereference(vi->active_netdev);
- if (child_netdev)
- dev_close(child_netdev);
+ if (!vbi)
+ goto done;
- child_netdev = rtnl_dereference(vi->backup_netdev);
+ child_netdev = rtnl_dereference(vbi->active_netdev);
if (child_netdev)
dev_close(child_netdev);
+done:
return 0;
}
-static netdev_tx_t
-virtnet_bypass_drop_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- atomic_long_inc(&dev->tx_dropped);
- dev_kfree_skb_any(skb);
- return NETDEV_TX_OK;
-}
-
static netdev_tx_t
virtnet_bypass_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtnet_bypass_info *vbi = vi->vbi;
struct net_device *xmit_dev;
/* Try xmit via active netdev followed by backup netdev */
xmit_dev = rcu_dereference_bh(vbi->active_netdev);
- if (!xmit_dev || !virtnet_bypass_xmit_ready(xmit_dev)) {
- xmit_dev = rcu_dereference_bh(vbi->backup_netdev);
- if (!xmit_dev || !virtnet_bypass_xmit_ready(xmit_dev))
- return virtnet_bypass_drop_xmit(skb, dev);
- }
+ if (!xmit_dev || !virtnet_bypass_xmit_ready(xmit_dev))
+ return start_xmit(skb, dev);
skb->dev = xmit_dev;
skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
@@ -2810,7 +2819,8 @@ static void
virtnet_bypass_get_stats(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
- struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtnet_bypass_info *vbi = vi->vbi;
const struct rtnl_link_stats64 *new;
struct rtnl_link_stats64 temp;
struct net_device *child_netdev;
@@ -2827,12 +2837,10 @@ virtnet_bypass_get_stats(struct net_device *dev,
memcpy(&vbi->active_stats, new, sizeof(*new));
}
- child_netdev = rcu_dereference(vbi->backup_netdev);
- if (child_netdev) {
- new = dev_get_stats(child_netdev, &temp);
- virtnet_bypass_fold_stats(stats, new, &vbi->backup_stats);
- memcpy(&vbi->backup_stats, new, sizeof(*new));
- }
+ memset(&temp, 0, sizeof(temp));
+ virtnet_stats(vbi->backup_netdev, &temp);
+ virtnet_bypass_fold_stats(stats, &temp, &vbi->backup_stats);
+ memcpy(&vbi->backup_stats, &temp, sizeof(temp));
rcu_read_unlock();
@@ -2842,7 +2850,8 @@ virtnet_bypass_get_stats(struct net_device *dev,
static int virtnet_bypass_change_mtu(struct net_device *dev, int new_mtu)
{
- struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtnet_bypass_info *vbi = vi->vbi;
struct net_device *child_netdev;
int ret = 0;
@@ -2853,15 +2862,6 @@ static int virtnet_bypass_change_mtu(struct net_device *dev, int new_mtu)
return ret;
}
- child_netdev = rcu_dereference(vbi->backup_netdev);
- if (child_netdev) {
- ret = dev_set_mtu(child_netdev, new_mtu);
- if (ret)
- netdev_err(child_netdev,
- "Unexpected failure to set mtu to %d\n",
- new_mtu);
- }
-
dev->mtu = new_mtu;
return 0;
}
@@ -2881,20 +2881,13 @@ static int
virtnet_bypass_ethtool_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd)
{
- struct virtnet_bypass_info *vbi = netdev_priv(dev);
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtnet_bypass_info *vbi = vi->vbi;
struct net_device *child_netdev;
child_netdev = rtnl_dereference(vbi->active_netdev);
- if (!child_netdev || !virtnet_bypass_xmit_ready(child_netdev)) {
- child_netdev = rtnl_dereference(vbi->backup_netdev);
- if (!child_netdev || !virtnet_bypass_xmit_ready(child_netdev)) {
- cmd->base.duplex = DUPLEX_UNKNOWN;
- cmd->base.port = PORT_OTHER;
- cmd->base.speed = SPEED_UNKNOWN;
-
- return 0;
- }
- }
+ if (!child_netdev || !virtnet_bypass_xmit_ready(child_netdev))
+ return virtnet_get_link_ksettings(dev, cmd);
return __ethtool_get_link_ksettings(child_netdev, cmd);
}
@@ -2944,14 +2937,15 @@ get_virtnet_bypass_byref(struct net_device *child_netdev)
for_each_netdev(net, dev) {
struct virtnet_bypass_info *vbi;
+ struct virtnet_info *vi;
if (dev->netdev_ops != &virtnet_bypass_netdev_ops)
continue; /* not a virtnet_bypass device */
- vbi = netdev_priv(dev);
+ vi = netdev_priv(dev);
+ vbi = vi->vbi;
- if ((rtnl_dereference(vbi->active_netdev) == child_netdev) ||
- (rtnl_dereference(vbi->backup_netdev) == child_netdev))
+ if (rtnl_dereference(vbi->active_netdev) == child_netdev)
return dev; /* a match */
}
@@ -2974,9 +2968,9 @@ static rx_handler_result_t virtnet_bypass_handle_frame(struct sk_buff **pskb)
static int virtnet_bypass_register_child(struct net_device *child_netdev)
{
+ struct net_device *dev, *active;
struct virtnet_bypass_info *vbi;
- struct net_device *dev;
- bool backup;
+ struct virtnet_info *vi;
int ret;
if (child_netdev->addr_len != ETH_ALEN)
@@ -2991,14 +2985,14 @@ static int virtnet_bypass_register_child(struct net_device *child_netdev)
if (!dev)
return NOTIFY_DONE;
- vbi = netdev_priv(dev);
- backup = (child_netdev->dev.parent == dev->dev.parent);
- if (backup ? rtnl_dereference(vbi->backup_netdev) :
- rtnl_dereference(vbi->active_netdev)) {
+ vi = netdev_priv(dev);
+ vbi = vi->vbi;
+
+ active = rtnl_dereference(vbi->active_netdev);
+ if (active) {
netdev_info(dev,
"%s attempting to join bypass dev when %s already present\n",
- child_netdev->name,
- backup ? "backup" : "active");
+ child_netdev->name, active->name);
return NOTIFY_DONE;
}
@@ -3030,7 +3024,7 @@ static int virtnet_bypass_register_child(struct net_device *child_netdev)
}
}
- /* Align MTU of child with master */
+ /* Align MTU of child with virtio */
ret = dev_set_mtu(child_netdev, dev->mtu);
if (ret) {
netdev_err(dev,
@@ -3044,15 +3038,10 @@ static int virtnet_bypass_register_child(struct net_device *child_netdev)
netdev_info(dev, "registering %s\n", child_netdev->name);
dev_hold(child_netdev);
- if (backup) {
- rcu_assign_pointer(vbi->backup_netdev, child_netdev);
- dev_get_stats(vbi->backup_netdev, &vbi->backup_stats);
- } else {
- rcu_assign_pointer(vbi->active_netdev, child_netdev);
- dev_get_stats(vbi->active_netdev, &vbi->active_stats);
- dev->min_mtu = child_netdev->min_mtu;
- dev->max_mtu = child_netdev->max_mtu;
- }
+ rcu_assign_pointer(vbi->active_netdev, child_netdev);
+ dev_get_stats(vbi->active_netdev, &vbi->active_stats);
+ dev->min_mtu = child_netdev->min_mtu;
+ dev->max_mtu = child_netdev->max_mtu;
return NOTIFY_OK;
@@ -3070,13 +3059,15 @@ static int virtnet_bypass_register_child(struct net_device *child_netdev)
static int virtnet_bypass_unregister_child(struct net_device *child_netdev)
{
struct virtnet_bypass_info *vbi;
- struct net_device *dev, *backup;
+ struct virtnet_info *vi;
+ struct net_device *dev;
dev = get_virtnet_bypass_byref(child_netdev);
if (!dev)
return NOTIFY_DONE;
- vbi = netdev_priv(dev);
+ vi = netdev_priv(dev);
+ vbi = vi->vbi;
netdev_info(dev, "unregistering %s\n", child_netdev->name);
@@ -3084,41 +3075,35 @@ static int virtnet_bypass_unregister_child(struct net_device *child_netdev)
netdev_upper_dev_unlink(child_netdev, dev);
child_netdev->flags &= ~IFF_SLAVE;
- if (child_netdev->dev.parent == dev->dev.parent) {
- RCU_INIT_POINTER(vbi->backup_netdev, NULL);
- } else {
- RCU_INIT_POINTER(vbi->active_netdev, NULL);
- backup = rtnl_dereference(vbi->backup_netdev);
- if (backup) {
- dev->min_mtu = backup->min_mtu;
- dev->max_mtu = backup->max_mtu;
- }
- }
+ RCU_INIT_POINTER(vbi->active_netdev, NULL);
+ dev->min_mtu = MIN_MTU;
+ dev->max_mtu = MAX_MTU;
dev_put(child_netdev);
+ if (!(vi->status & VIRTIO_NET_S_LINK_UP)) {
+ netif_carrier_off(dev);
+ netif_tx_stop_all_queues(dev);
+ }
+
return NOTIFY_OK;
}
static int virtnet_bypass_update_link(struct net_device *child_netdev)
{
- struct net_device *dev, *active, *backup;
- struct virtnet_bypass_info *vbi;
+ struct virtnet_info *vi;
+ struct net_device *dev;
dev = get_virtnet_bypass_byref(child_netdev);
- if (!dev || !netif_running(dev))
+ if (!dev)
return NOTIFY_DONE;
- vbi = netdev_priv(dev);
-
- active = rtnl_dereference(vbi->active_netdev);
- backup = rtnl_dereference(vbi->backup_netdev);
+ vi = netdev_priv(dev);
- if ((active && virtnet_bypass_xmit_ready(active)) ||
- (backup && virtnet_bypass_xmit_ready(backup))) {
+ if (virtnet_bypass_xmit_ready(child_netdev)) {
netif_carrier_on(dev);
netif_tx_wake_all_queues(dev);
- } else {
+ } else if (!(vi->status & VIRTIO_NET_S_LINK_UP)) {
netif_carrier_off(dev);
netif_tx_stop_all_queues(dev);
}
@@ -3169,107 +3154,41 @@ static struct notifier_block virtnet_bypass_notifier = {
static int virtnet_bypass_create(struct virtnet_info *vi)
{
- struct net_device *backup_netdev = vi->dev;
- struct device *dev = &vi->vdev->dev;
- struct net_device *bypass_netdev;
- int res;
+ struct net_device *dev = vi->dev;
+ struct virtnet_bypass_info *vbi;
- /* Alloc at least 2 queues, for now we are going with 16 assuming
- * that most devices being bonded won't have too many queues.
- */
- bypass_netdev = alloc_etherdev_mq(sizeof(struct virtnet_bypass_info),
- 16);
- if (!bypass_netdev) {
- dev_err(dev, "Unable to allocate bypass_netdev!\n");
+ vbi = kzalloc(sizeof(*vbi), GFP_KERNEL);
+ if (!vbi)
return -ENOMEM;
- }
-
- dev_net_set(bypass_netdev, dev_net(backup_netdev));
- SET_NETDEV_DEV(bypass_netdev, dev);
-
- bypass_netdev->netdev_ops = &virtnet_bypass_netdev_ops;
- bypass_netdev->ethtool_ops = &virtnet_bypass_ethtool_ops;
-
- /* Initialize the device options */
- bypass_netdev->flags |= IFF_MASTER;
- bypass_netdev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT |
- IFF_NO_QUEUE;
- bypass_netdev->priv_flags &= ~(IFF_XMIT_DST_RELEASE |
- IFF_TX_SKB_SHARING);
-
- /* don't acquire bypass netdev's netif_tx_lock when transmitting */
- bypass_netdev->features |= NETIF_F_LLTX;
-
- /* Don't allow bypass devices to change network namespaces. */
- bypass_netdev->features |= NETIF_F_NETNS_LOCAL;
-
- bypass_netdev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG |
- NETIF_F_FRAGLIST | NETIF_F_ALL_TSO |
- NETIF_F_HIGHDMA | NETIF_F_LRO;
-
- bypass_netdev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
- bypass_netdev->features |= bypass_netdev->hw_features;
-
- /* For now treat bypass netdev as VLAN challenged since we
- * cannot assume VLAN functionality with a VF
- */
- bypass_netdev->features |= NETIF_F_VLAN_CHALLENGED;
-
- memcpy(bypass_netdev->dev_addr, backup_netdev->dev_addr,
- bypass_netdev->addr_len);
- bypass_netdev->min_mtu = backup_netdev->min_mtu;
- bypass_netdev->max_mtu = backup_netdev->max_mtu;
+ dev->netdev_ops = &virtnet_bypass_netdev_ops;
+ dev->ethtool_ops = &virtnet_bypass_ethtool_ops;
- res = register_netdev(bypass_netdev);
- if (res < 0) {
- dev_err(dev, "Unable to register bypass_netdev!\n");
- free_netdev(bypass_netdev);
- return res;
- }
-
- netif_carrier_off(bypass_netdev);
-
- vi->bypass_netdev = bypass_netdev;
-
- /* Change the name of the backup interface to vbkup0
- * we may need to revisit naming later but this gets it out
- * of the way for now.
- */
- strcpy(backup_netdev->name, "vbkup%d");
+ vbi->backup_netdev = dev;
+ virtnet_stats(vbi->backup_netdev, &vbi->backup_stats);
+ vi->vbi = vbi;
return 0;
}
static void virtnet_bypass_destroy(struct virtnet_info *vi)
{
- struct net_device *bypass_netdev = vi->bypass_netdev;
- struct virtnet_bypass_info *vbi;
+ struct virtnet_bypass_info *vbi = vi->vbi;
struct net_device *child_netdev;
- /* no device found, nothing to free */
- if (!bypass_netdev)
+ if (!vbi)
return;
- vbi = netdev_priv(bypass_netdev);
-
- netif_device_detach(bypass_netdev);
-
rtnl_lock();
child_netdev = rtnl_dereference(vbi->active_netdev);
if (child_netdev)
virtnet_bypass_unregister_child(child_netdev);
- child_netdev = rtnl_dereference(vbi->backup_netdev);
- if (child_netdev)
- virtnet_bypass_unregister_child(child_netdev);
-
- unregister_netdevice(bypass_netdev);
-
rtnl_unlock();
- free_netdev(bypass_netdev);
+ kfree(vbi);
+ vi->vbi = NULL;
}
static int virtnet_probe(struct virtio_device *vdev)
--
2.14.3
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox