* [PATCH 1/3] virtio: Basic implementation of virtio pstore driver
From: Namhyung Kim @ 2016-08-31 8:08 UTC (permalink / raw)
To: virtio-dev, virtualization, kvm, qemu-devel
Cc: Tony Luck, Radim Krčmář, Kees Cook,
Michael S. Tsirkin, Anton Vorontsov, Will Deacon, LKML,
Steven Rostedt, Minchan Kim, Anthony Liguori, Colin Cross,
Paolo Bonzini, Ingo Molnar
In-Reply-To: <20160831080802.13408-1-namhyung@kernel.org>
The virtio pstore driver provides interface to the pstore subsystem so
that the guest kernel's log/dump message can be saved on the host
machine. Users can access the log file directly on the host, or on the
guest at the next boot using pstore filesystem. It currently deals with
kernel log (printk) buffer only, but we can extend it to have other
information (like ftrace dump) later.
It supports legacy PCI device using single order-2 page buffer. It uses
two virtqueues - one for (sync) read and another for (async) write.
Since it cannot wait for write finished, it supports up to 128
concurrent IO. The buffer size is configurable now.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Anthony Liguori <aliguori@amazon.com>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: kvm@vger.kernel.org
Cc: qemu-devel@nongnu.org
Cc: virtualization@lists.linux-foundation.org
Cc: virtio-dev@lists.oasis-open.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
drivers/virtio/Kconfig | 10 +
drivers/virtio/Makefile | 1 +
drivers/virtio/virtio_pstore.c | 417 +++++++++++++++++++++++++++++++++++++
include/uapi/linux/Kbuild | 1 +
include/uapi/linux/virtio_ids.h | 1 +
include/uapi/linux/virtio_pstore.h | 74 +++++++
6 files changed, 504 insertions(+)
create mode 100644 drivers/virtio/virtio_pstore.c
create mode 100644 include/uapi/linux/virtio_pstore.h
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 77590320d44c..8f0e6c796c12 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -58,6 +58,16 @@ config VIRTIO_INPUT
If unsure, say M.
+config VIRTIO_PSTORE
+ tristate "Virtio pstore driver"
+ depends on VIRTIO
+ depends on PSTORE
+ ---help---
+ This driver supports virtio pstore devices to save/restore
+ panic and oops messages on the host.
+
+ If unsure, say M.
+
config VIRTIO_MMIO
tristate "Platform bus driver for memory mapped virtio devices"
depends on HAS_IOMEM && HAS_DMA
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 41e30e3dc842..bee68cb26d48 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -5,3 +5,4 @@ virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
+obj-$(CONFIG_VIRTIO_PSTORE) += virtio_pstore.o
diff --git a/drivers/virtio/virtio_pstore.c b/drivers/virtio/virtio_pstore.c
new file mode 100644
index 000000000000..ec41f0d2f0b7
--- /dev/null
+++ b/drivers/virtio/virtio_pstore.c
@@ -0,0 +1,417 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pstore.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <uapi/linux/virtio_ids.h>
+#include <uapi/linux/virtio_pstore.h>
+
+#define VIRT_PSTORE_ORDER 2
+#define VIRT_PSTORE_BUFSIZE (4096 << VIRT_PSTORE_ORDER)
+#define VIRT_PSTORE_NR_REQ 128
+
+struct virtio_pstore {
+ struct virtio_device *vdev;
+ struct virtqueue *vq[2];
+ struct pstore_info pstore;
+ struct virtio_pstore_req req[VIRT_PSTORE_NR_REQ];
+ struct virtio_pstore_res res[VIRT_PSTORE_NR_REQ];
+ unsigned int req_id;
+
+ /* Waiting for host to ack */
+ wait_queue_head_t acked;
+ int failed;
+};
+
+#define TYPE_TABLE_ENTRY(_entry) \
+ { PSTORE_TYPE_##_entry, VIRTIO_PSTORE_TYPE_##_entry }
+
+struct type_table {
+ int pstore;
+ u16 virtio;
+} type_table[] = {
+ TYPE_TABLE_ENTRY(DMESG),
+};
+
+#undef TYPE_TABLE_ENTRY
+
+
+static u16 to_virtio_type(struct virtio_pstore *vps, enum pstore_type_id type)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(type_table); i++) {
+ if (type == type_table[i].pstore)
+ return cpu_to_virtio16(vps->vdev, type_table[i].virtio);
+ }
+
+ return cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_TYPE_UNKNOWN);
+}
+
+static enum pstore_type_id from_virtio_type(struct virtio_pstore *vps, u16 type)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(type_table); i++) {
+ if (virtio16_to_cpu(vps->vdev, type) == type_table[i].virtio)
+ return type_table[i].pstore;
+ }
+
+ return PSTORE_TYPE_UNKNOWN;
+}
+
+static void virtpstore_ack(struct virtqueue *vq)
+{
+ struct virtio_pstore *vps = vq->vdev->priv;
+
+ wake_up(&vps->acked);
+}
+
+static void virtpstore_check(struct virtqueue *vq)
+{
+ struct virtio_pstore *vps = vq->vdev->priv;
+ struct virtio_pstore_res *res;
+ unsigned int len;
+
+ res = virtqueue_get_buf(vq, &len);
+ if (res == NULL)
+ return;
+
+ if (virtio32_to_cpu(vq->vdev, res->ret) < 0)
+ vps->failed = 1;
+}
+
+static void virt_pstore_get_reqs(struct virtio_pstore *vps,
+ struct virtio_pstore_req **preq,
+ struct virtio_pstore_res **pres)
+{
+ unsigned int idx = vps->req_id++ % VIRT_PSTORE_NR_REQ;
+
+ *preq = &vps->req[idx];
+ *pres = &vps->res[idx];
+
+ memset(*preq, 0, sizeof(**preq));
+ memset(*pres, 0, sizeof(**pres));
+}
+
+static int virt_pstore_open(struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req;
+ struct virtio_pstore_res *res;
+ struct scatterlist sgo[1], sgi[1];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int len;
+
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_OPEN);
+
+ sg_init_one(sgo, req, sizeof(*req));
+ sg_init_one(sgi, res, sizeof(*res));
+ virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
+ virtqueue_kick(vps->vq[0]);
+
+ wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
+ return virtio32_to_cpu(vps->vdev, res->ret);
+}
+
+static int virt_pstore_close(struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req = &vps->req[vps->req_id];
+ struct virtio_pstore_res *res = &vps->res[vps->req_id];
+ struct scatterlist sgo[1], sgi[1];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int len;
+
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_CLOSE);
+
+ sg_init_one(sgo, req, sizeof(*req));
+ sg_init_one(sgi, res, sizeof(*res));
+ virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
+ virtqueue_kick(vps->vq[0]);
+
+ wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
+ return virtio32_to_cpu(vps->vdev, res->ret);
+}
+
+static ssize_t virt_pstore_read(u64 *id, enum pstore_type_id *type,
+ int *count, struct timespec *time,
+ char **buf, bool *compressed,
+ ssize_t *ecc_notice_size,
+ struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req;
+ struct virtio_pstore_res *res;
+ struct virtio_pstore_fileinfo info;
+ struct scatterlist sgo[1], sgi[3];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int len;
+ unsigned int flags;
+ int ret;
+ void *bf;
+
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_READ);
+
+ sg_init_one(sgo, req, sizeof(*req));
+ sg_init_table(sgi, 3);
+ sg_set_buf(&sgi[0], res, sizeof(*res));
+ sg_set_buf(&sgi[1], &info, sizeof(info));
+ sg_set_buf(&sgi[2], psi->buf, psi->bufsize);
+ virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
+ virtqueue_kick(vps->vq[0]);
+
+ wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
+ if (len < sizeof(*res) + sizeof(info))
+ return -1;
+
+ ret = virtio32_to_cpu(vps->vdev, res->ret);
+ if (ret < 0)
+ return ret;
+
+ len = virtio32_to_cpu(vps->vdev, info.len);
+
+ bf = kmalloc(len, GFP_KERNEL);
+ if (bf == NULL)
+ return -ENOMEM;
+
+ *id = virtio64_to_cpu(vps->vdev, info.id);
+ *type = from_virtio_type(vps, info.type);
+ *count = virtio32_to_cpu(vps->vdev, info.count);
+
+ flags = virtio32_to_cpu(vps->vdev, info.flags);
+ *compressed = flags & VIRTIO_PSTORE_FL_COMPRESSED;
+
+ time->tv_sec = virtio64_to_cpu(vps->vdev, info.time_sec);
+ time->tv_nsec = virtio32_to_cpu(vps->vdev, info.time_nsec);
+
+ memcpy(bf, psi->buf, len);
+ *buf = bf;
+
+ return len;
+}
+
+static int notrace virt_pstore_write(enum pstore_type_id type,
+ enum kmsg_dump_reason reason,
+ u64 *id, unsigned int part, int count,
+ bool compressed, size_t size,
+ struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req;
+ struct virtio_pstore_res *res;
+ struct scatterlist sgo[2], sgi[1];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int flags = compressed ? VIRTIO_PSTORE_FL_COMPRESSED : 0;
+
+ if (vps->failed)
+ return -1;
+
+ *id = vps->req_id;
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_WRITE);
+ req->type = to_virtio_type(vps, type);
+ req->flags = cpu_to_virtio32(vps->vdev, flags);
+
+ sg_init_table(sgo, 2);
+ sg_set_buf(&sgo[0], req, sizeof(*req));
+ sg_set_buf(&sgo[1], psi->buf, size);
+ sg_init_one(sgi, res, sizeof(*res));
+ virtqueue_add_sgs(vps->vq[1], sgs, 1, 1, vps, GFP_ATOMIC);
+ virtqueue_kick(vps->vq[1]);
+
+ return 0;
+}
+
+static int virt_pstore_erase(enum pstore_type_id type, u64 id, int count,
+ struct timespec time, struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req;
+ struct virtio_pstore_res *res;
+ struct scatterlist sgo[1], sgi[1];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int len;
+
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_ERASE);
+ req->type = to_virtio_type(vps, type);
+ req->id = cpu_to_virtio64(vps->vdev, id);
+ req->count = cpu_to_virtio32(vps->vdev, count);
+
+ sg_init_one(sgo, req, sizeof(*req));
+ sg_init_one(sgi, res, sizeof(*res));
+ virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
+ virtqueue_kick(vps->vq[0]);
+
+ wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
+ return virtio32_to_cpu(vps->vdev, res->ret);
+}
+
+static int virt_pstore_init(struct virtio_pstore *vps)
+{
+ struct pstore_info *psinfo = &vps->pstore;
+ int err;
+
+ if (!psinfo->bufsize)
+ psinfo->bufsize = VIRT_PSTORE_BUFSIZE;
+
+ psinfo->buf = alloc_pages_exact(psinfo->bufsize, GFP_KERNEL);
+ if (!psinfo->buf) {
+ pr_err("cannot allocate pstore buffer\n");
+ return -ENOMEM;
+ }
+
+ psinfo->owner = THIS_MODULE;
+ psinfo->name = "virtio";
+ psinfo->open = virt_pstore_open;
+ psinfo->close = virt_pstore_close;
+ psinfo->read = virt_pstore_read;
+ psinfo->erase = virt_pstore_erase;
+ psinfo->write = virt_pstore_write;
+ psinfo->flags = PSTORE_FLAGS_FRAGILE;
+
+ psinfo->data = vps;
+ spin_lock_init(&psinfo->buf_lock);
+
+ err = pstore_register(psinfo);
+ if (err)
+ kfree(psinfo->buf);
+
+ return err;
+}
+
+static int virt_pstore_exit(struct virtio_pstore *vps)
+{
+ struct pstore_info *psinfo = &vps->pstore;
+
+ pstore_unregister(psinfo);
+
+ free_pages_exact(psinfo->buf, psinfo->bufsize);
+ psinfo->buf = NULL;
+ psinfo->bufsize = 0;
+
+ return 0;
+}
+
+static int virtpstore_init_vqs(struct virtio_pstore *vps)
+{
+ vq_callback_t *callbacks[] = { virtpstore_ack, virtpstore_check };
+ const char *names[] = { "pstore_read", "pstore_write" };
+
+ return vps->vdev->config->find_vqs(vps->vdev, 2, vps->vq,
+ callbacks, names);
+}
+
+static void virtpstore_init_config(struct virtio_pstore *vps)
+{
+ u32 bufsize;
+
+ virtio_cread(vps->vdev, struct virtio_pstore_config, bufsize, &bufsize);
+
+ vps->pstore.bufsize = PAGE_ALIGN(bufsize);
+}
+
+static void virtpstore_confirm_config(struct virtio_pstore *vps)
+{
+ u32 bufsize = vps->pstore.bufsize;
+
+ virtio_cwrite(vps->vdev, struct virtio_pstore_config, bufsize,
+ &bufsize);
+}
+
+static int virtpstore_probe(struct virtio_device *vdev)
+{
+ struct virtio_pstore *vps;
+ int err;
+
+ if (!vdev->config->get) {
+ dev_err(&vdev->dev, "driver init: config access disabled\n");
+ return -EINVAL;
+ }
+
+ vdev->priv = vps = kzalloc(sizeof(*vps), GFP_KERNEL);
+ if (!vps) {
+ err = -ENOMEM;
+ goto out;
+ }
+ vps->vdev = vdev;
+
+ err = virtpstore_init_vqs(vps);
+ if (err < 0)
+ goto out_free;
+
+ virtpstore_init_config(vps);
+
+ err = virt_pstore_init(vps);
+ if (err)
+ goto out_del_vq;
+
+ virtpstore_confirm_config(vps);
+
+ init_waitqueue_head(&vps->acked);
+
+ virtio_device_ready(vdev);
+
+ dev_info(&vdev->dev, "driver init: ok (bufsize = %luK, flags = %x)\n",
+ vps->pstore.bufsize >> 10, vps->pstore.flags);
+
+ return 0;
+
+out_del_vq:
+ vdev->config->del_vqs(vdev);
+out_free:
+ kfree(vps);
+out:
+ dev_err(&vdev->dev, "driver init: failed with %d\n", err);
+ return err;
+}
+
+static void virtpstore_remove(struct virtio_device *vdev)
+{
+ struct virtio_pstore *vps = vdev->priv;
+
+ virt_pstore_exit(vps);
+
+ /* Now we reset the device so we can clean up the queues. */
+ vdev->config->reset(vdev);
+
+ vdev->config->del_vqs(vdev);
+
+ kfree(vps);
+}
+
+static unsigned int features[] = {
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_PSTORE, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct virtio_driver virtio_pstore_driver = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .id_table = id_table,
+ .probe = virtpstore_probe,
+ .remove = virtpstore_remove,
+};
+
+module_virtio_driver(virtio_pstore_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Namhyung Kim <namhyung@kernel.org>");
+MODULE_DESCRIPTION("Virtio pstore driver");
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 6d4e92ccdc91..9bbb1554d8b2 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -449,6 +449,7 @@ header-y += virtio_ids.h
header-y += virtio_input.h
header-y += virtio_net.h
header-y += virtio_pci.h
+header-y += virtio_pstore.h
header-y += virtio_ring.h
header-y += virtio_rng.h
header-y += virtio_scsi.h
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 77925f587b15..c72a9ab588c0 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -41,5 +41,6 @@
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_INPUT 18 /* virtio input */
+#define VIRTIO_ID_PSTORE 22 /* virtio pstore */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_pstore.h b/include/uapi/linux/virtio_pstore.h
new file mode 100644
index 000000000000..f4b0d204d8ae
--- /dev/null
+++ b/include/uapi/linux/virtio_pstore.h
@@ -0,0 +1,74 @@
+#ifndef _LINUX_VIRTIO_PSTORE_H
+#define _LINUX_VIRTIO_PSTORE_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+#include <linux/types.h>
+#include <linux/virtio_types.h>
+
+#define VIRTIO_PSTORE_CMD_NULL 0
+#define VIRTIO_PSTORE_CMD_OPEN 1
+#define VIRTIO_PSTORE_CMD_READ 2
+#define VIRTIO_PSTORE_CMD_WRITE 3
+#define VIRTIO_PSTORE_CMD_ERASE 4
+#define VIRTIO_PSTORE_CMD_CLOSE 5
+
+#define VIRTIO_PSTORE_TYPE_UNKNOWN 0
+#define VIRTIO_PSTORE_TYPE_DMESG 1
+
+#define VIRTIO_PSTORE_FL_COMPRESSED 1
+
+struct virtio_pstore_req {
+ __virtio16 cmd;
+ __virtio16 type;
+ __virtio32 flags;
+ __virtio64 id;
+ __virtio32 count;
+ __virtio32 reserved;
+};
+
+struct virtio_pstore_res {
+ __virtio16 cmd;
+ __virtio16 type;
+ __virtio32 ret;
+};
+
+struct virtio_pstore_fileinfo {
+ __virtio64 id;
+ __virtio32 count;
+ __virtio16 type;
+ __virtio16 unused;
+ __virtio32 flags;
+ __virtio32 len;
+ __virtio64 time_sec;
+ __virtio32 time_nsec;
+ __virtio32 reserved;
+};
+
+struct virtio_pstore_config {
+ __virtio32 bufsize;
+};
+
+#endif /* _LINUX_VIRTIO_PSTORE_H */
--
2.9.3
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply related
* [PATCH 2/3] qemu: Implement virtio-pstore device
From: Namhyung Kim @ 2016-08-31 8:08 UTC (permalink / raw)
To: virtio-dev, virtualization, kvm, qemu-devel
Cc: Anton Vorontsov, Daniel P . Berrange, Kees Cook,
Michael S. Tsirkin, Namhyung Kim, Radim Krčmář,
LKML, Steven Rostedt, Minchan Kim, Tony Luck, Anthony Liguori,
Colin Cross, Paolo Bonzini, Ingo Molnar
In-Reply-To: <20160831080802.13408-1-namhyung@kernel.org>
From: Namhyung Kim <namhyung@gmail.com>
Add virtio pstore device to allow kernel log files saved on the host.
It will save the log files on the directory given by pstore device
option.
$ qemu-system-x86_64 -device virtio-pstore,directory=dir-xx ...
(guest) # echo c > /proc/sysrq-trigger
$ ls dir-xx
dmesg-1.enc.z dmesg-2.enc.z
The log files are usually compressed using zlib. Users can see the log
messages directly on the host or on the guest (using pstore filesystem).
The 'directory' property is required for virtio-pstore device to work.
It also adds 'bufsize' property to set size of pstore bufer.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Anthony Liguori <aliguori@amazon.com>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Daniel P. Berrange <berrange@redhat.com>
Cc: kvm@vger.kernel.org
Cc: qemu-devel@nongnu.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
---
hw/virtio/Makefile.objs | 2 +-
hw/virtio/virtio-pci.c | 52 ++
hw/virtio/virtio-pci.h | 14 +
hw/virtio/virtio-pstore.c | 689 +++++++++++++++++++++++++
include/hw/pci/pci.h | 1 +
include/hw/virtio/virtio-pstore.h | 36 ++
include/standard-headers/linux/virtio_ids.h | 1 +
include/standard-headers/linux/virtio_pstore.h | 76 +++
qdev-monitor.c | 1 +
9 files changed, 871 insertions(+), 1 deletion(-)
create mode 100644 hw/virtio/virtio-pstore.c
create mode 100644 include/hw/virtio/virtio-pstore.h
create mode 100644 include/standard-headers/linux/virtio_pstore.h
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index 3e2b175..aae7082 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -4,4 +4,4 @@ common-obj-y += virtio-bus.o
common-obj-y += virtio-mmio.o
obj-y += virtio.o virtio-balloon.o
-obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o
+obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o virtio-pstore.o
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 755f921..c184823 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2416,6 +2416,57 @@ static const TypeInfo virtio_host_pci_info = {
};
#endif
+/* virtio-pstore-pci */
+
+static void virtio_pstore_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+ VirtIOPstorePCI *vps = VIRTIO_PSTORE_PCI(vpci_dev);
+ DeviceState *vdev = DEVICE(&vps->vdev);
+ Error *err = NULL;
+
+ qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
+ object_property_set_bool(OBJECT(vdev), true, "realized", &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+}
+
+static void virtio_pstore_pci_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+ PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+ k->realize = virtio_pstore_pci_realize;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+
+ pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+ pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_PSTORE;
+ pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+ pcidev_k->class_id = PCI_CLASS_OTHERS;
+}
+
+static void virtio_pstore_pci_instance_init(Object *obj)
+{
+ VirtIOPstorePCI *dev = VIRTIO_PSTORE_PCI(obj);
+
+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+ TYPE_VIRTIO_PSTORE);
+ object_property_add_alias(obj, "directory", OBJECT(&dev->vdev),
+ "directory", &error_abort);
+ object_property_add_alias(obj, "bufsize", OBJECT(&dev->vdev),
+ "bufsize", &error_abort);
+}
+
+static const TypeInfo virtio_pstore_pci_info = {
+ .name = TYPE_VIRTIO_PSTORE_PCI,
+ .parent = TYPE_VIRTIO_PCI,
+ .instance_size = sizeof(VirtIOPstorePCI),
+ .instance_init = virtio_pstore_pci_instance_init,
+ .class_init = virtio_pstore_pci_class_init,
+};
+
/* virtio-pci-bus */
static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
@@ -2485,6 +2536,7 @@ static void virtio_pci_register_types(void)
#ifdef CONFIG_VHOST_SCSI
type_register_static(&vhost_scsi_pci_info);
#endif
+ type_register_static(&virtio_pstore_pci_info);
}
type_init(virtio_pci_register_types)
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 25fbf8a..354b2b7 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -31,6 +31,7 @@
#ifdef CONFIG_VHOST_SCSI
#include "hw/virtio/vhost-scsi.h"
#endif
+#include "hw/virtio/virtio-pstore.h"
typedef struct VirtIOPCIProxy VirtIOPCIProxy;
typedef struct VirtIOBlkPCI VirtIOBlkPCI;
@@ -44,6 +45,7 @@ typedef struct VirtIOInputPCI VirtIOInputPCI;
typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI;
typedef struct VirtIOInputHostPCI VirtIOInputHostPCI;
typedef struct VirtIOGPUPCI VirtIOGPUPCI;
+typedef struct VirtIOPstorePCI VirtIOPstorePCI;
/* virtio-pci-bus */
@@ -324,6 +326,18 @@ struct VirtIOGPUPCI {
VirtIOGPU vdev;
};
+/*
+ * virtio-pstore-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_PSTORE_PCI "virtio-pstore-pci"
+#define VIRTIO_PSTORE_PCI(obj) \
+ OBJECT_CHECK(VirtIOPstorePCI, (obj), TYPE_VIRTIO_PSTORE_PCI)
+
+struct VirtIOPstorePCI {
+ VirtIOPCIProxy parent_obj;
+ VirtIOPstore vdev;
+};
+
/* Virtio ABI version, if we increment this, we break the guest driver. */
#define VIRTIO_PCI_ABI_VERSION 0
diff --git a/hw/virtio/virtio-pstore.c b/hw/virtio/virtio-pstore.c
new file mode 100644
index 0000000..08d1063
--- /dev/null
+++ b/hw/virtio/virtio-pstore.c
@@ -0,0 +1,689 @@
+/*
+ * Virtio Pstore Device
+ *
+ * Copyright (C) 2016 LG Electronics
+ *
+ * Authors:
+ * Namhyung Kim <namhyung@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <stdio.h>
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "qapi/visitor.h"
+#include "qapi-event.h"
+#include "io/channel-file.h"
+#include "trace.h"
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "hw/virtio/virtio-pstore.h"
+
+#define PSTORE_DEFAULT_BUFSIZE (16 * 1024)
+#define PSTORE_DEFAULT_FILE_MAX 5
+
+/* the index should match to the type value */
+static const char *virtio_pstore_file_prefix[] = {
+ "unknown-", /* VIRTIO_PSTORE_TYPE_UNKNOWN */
+ "dmesg-", /* VIRTIO_PSTORE_TYPE_DMESG */
+};
+
+static char *virtio_pstore_to_filename(VirtIOPstore *s,
+ struct virtio_pstore_req *req)
+{
+ const char *basename;
+ unsigned long long id;
+ unsigned int type = le16_to_cpu(req->type);
+ unsigned int flags = le32_to_cpu(req->flags);
+
+ if (type < ARRAY_SIZE(virtio_pstore_file_prefix)) {
+ basename = virtio_pstore_file_prefix[type];
+ } else {
+ basename = "unknown-";
+ }
+
+ id = s->id++;
+ return g_strdup_printf("%s/%s%llu%s", s->directory, basename, id,
+ flags & VIRTIO_PSTORE_FL_COMPRESSED ? ".enc.z" : "");
+}
+
+static char *virtio_pstore_from_filename(VirtIOPstore *s, char *name,
+ struct virtio_pstore_fileinfo *info)
+{
+ char *filename;
+ unsigned int idx;
+
+ filename = g_strdup_printf("%s/%s", s->directory, name);
+ if (filename == NULL)
+ return NULL;
+
+ for (idx = 0; idx < ARRAY_SIZE(virtio_pstore_file_prefix); idx++) {
+ if (g_str_has_prefix(name, virtio_pstore_file_prefix[idx])) {
+ info->type = idx;
+ name += strlen(virtio_pstore_file_prefix[idx]);
+ break;
+ }
+ }
+
+ if (idx == ARRAY_SIZE(virtio_pstore_file_prefix)) {
+ g_free(filename);
+ return NULL;
+ }
+
+ qemu_strtoull(name, NULL, 0, &info->id);
+
+ info->flags = 0;
+ if (g_str_has_suffix(name, ".enc.z")) {
+ info->flags |= VIRTIO_PSTORE_FL_COMPRESSED;
+ }
+
+ return filename;
+}
+
+static int prefix_idx;
+static int prefix_count;
+static int prefix_len;
+
+static int filter_pstore(const struct dirent *de)
+{
+ int i;
+
+ for (i = 0; i < prefix_count; i++) {
+ const char *prefix = virtio_pstore_file_prefix[prefix_idx + i];
+
+ if (g_str_has_prefix(de->d_name, prefix)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int sort_pstore(const struct dirent **a, const struct dirent **b)
+{
+ uint64_t id_a, id_b;
+
+ qemu_strtoull((*a)->d_name + prefix_len, NULL, 0, &id_a);
+ qemu_strtoull((*b)->d_name + prefix_len, NULL, 0, &id_b);
+
+ return id_a - id_b;
+}
+
+static int delete_old_pstore_file(VirtIOPstore *s, unsigned short type)
+{
+ int ret = 0;
+ int i, num;
+ char *filename;
+ struct dirent **files;
+
+ if (type >= ARRAY_SIZE(virtio_pstore_file_prefix)) {
+ type = VIRTIO_PSTORE_TYPE_UNKNOWN;
+ }
+
+ prefix_idx = type;
+ prefix_len = strlen(virtio_pstore_file_prefix[type]);
+ prefix_count = 1; /* only scan current type */
+
+ /* delete the oldest file in the same type */
+ num = scandir(s->directory, &files, filter_pstore, sort_pstore);
+ if (num < 0)
+ return num;
+ if (num < (int)s->file_max)
+ goto out;
+
+ filename = g_strdup_printf("%s/%s", s->directory, files[0]->d_name);
+ if (filename == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = unlink(filename);
+
+out:
+ for (i = 0; i < num; i++) {
+ g_free(files[i]);
+ }
+ g_free(files);
+
+ return ret;
+}
+
+static ssize_t virtio_pstore_do_open(VirtIOPstore *s)
+{
+ /* scan all pstore files */
+ prefix_idx = 0;
+ prefix_count = ARRAY_SIZE(virtio_pstore_file_prefix);
+
+ s->file_idx = 0;
+ s->num_file = scandir(s->directory, &s->files, filter_pstore, alphasort);
+
+ return s->num_file >= 0 ? 0 : -1;
+}
+
+static ssize_t virtio_pstore_do_close(VirtIOPstore *s)
+{
+ int i;
+
+ for (i = 0; i < s->num_file; i++) {
+ g_free(s->files[i]);
+ }
+ g_free(s->files);
+ s->files = NULL;
+
+ s->num_file = 0;
+ return 0;
+}
+
+static ssize_t virtio_pstore_do_erase(VirtIOPstore *s,
+ struct virtio_pstore_req *req)
+{
+ char *filename;
+ int ret;
+
+ filename = virtio_pstore_to_filename(s, req);
+ if (filename == NULL)
+ return -1;
+
+ ret = unlink(filename);
+
+ g_free(filename);
+ return ret;
+}
+
+struct pstore_read_arg {
+ VirtIOPstore *vps;
+ VirtQueueElement *elem;
+ struct virtio_pstore_fileinfo info;
+ QIOChannel *ioc;
+};
+
+static gboolean pstore_async_read_fn(QIOChannel *ioc, GIOCondition condition,
+ gpointer data)
+{
+ struct pstore_read_arg *rarg = data;
+ struct virtio_pstore_fileinfo *info = &rarg->info;
+ VirtIOPstore *vps = rarg->vps;
+ VirtQueueElement *elem = rarg->elem;
+ struct virtio_pstore_res res;
+ size_t offset = sizeof(res) + sizeof(*info);
+ struct iovec *sg = elem->in_sg;
+ unsigned int sg_num = elem->in_num;
+ Error *err = NULL;
+ ssize_t len;
+ int ret;
+
+ /* skip res and fileinfo */
+ iov_discard_front(&sg, &sg_num, sizeof(res) + sizeof(*info));
+
+ len = qio_channel_readv(rarg->ioc, sg, sg_num, &err);
+ if (len < 0) {
+ if (errno == EAGAIN) {
+ len = 0;
+ }
+ ret = -1;
+ } else {
+ info->len = cpu_to_le32(len);
+ ret = 0;
+ }
+
+ res.cmd = cpu_to_le16(VIRTIO_PSTORE_CMD_READ);
+ res.type = cpu_to_le16(VIRTIO_PSTORE_TYPE_UNKNOWN);
+ res.ret = cpu_to_le32(ret);
+
+ /* now copy res and fileinfo */
+ iov_from_buf(elem->in_sg, elem->in_num, 0, &res, sizeof(res));
+ iov_from_buf(elem->in_sg, elem->in_num, sizeof(res), info, sizeof(*info));
+
+ len += offset;
+ virtqueue_push(vps->rvq, elem, len);
+ virtio_notify(VIRTIO_DEVICE(vps), vps->rvq);
+
+ return FALSE;
+}
+
+static void free_rarg_fn(gpointer data)
+{
+ struct pstore_read_arg *rarg = data;
+
+ qio_channel_close(rarg->ioc, NULL);
+
+ g_free(rarg->elem);
+ g_free(rarg);
+}
+
+static ssize_t virtio_pstore_do_read(VirtIOPstore *s, VirtQueueElement *elem)
+{
+ char *filename = NULL;
+ int idx;
+ struct stat stbuf;
+ struct pstore_read_arg *rarg = NULL;
+ QIOChannelFile *iocf;
+ Error *err = NULL;
+ int ret = -1;
+
+ if (s->file_idx >= s->num_file) {
+ return 0;
+ }
+
+ rarg = g_malloc(sizeof(*rarg));
+ if (rarg == NULL) {
+ return -1;
+ }
+
+ idx = s->file_idx++;
+ filename = virtio_pstore_from_filename(s, s->files[idx]->d_name,
+ &rarg->info);
+ if (filename == NULL) {
+ goto out;
+ }
+
+ iocf = qio_channel_file_new_path(filename, O_RDONLY, 0644, &err);
+ if (err) {
+ error_reportf_err(err, "cannot create io channel: ");
+ goto out;
+ }
+
+ if (fstat(iocf->fd, &stbuf) < 0) {
+ goto out;
+ }
+
+ rarg->vps = s;
+ rarg->elem = elem;
+ rarg->ioc = QIO_CHANNEL(iocf);
+ rarg->info.id = cpu_to_le64(rarg->info.id);
+ rarg->info.type = cpu_to_le16(rarg->info.type);
+ rarg->info.flags = cpu_to_le32(rarg->info.flags);
+ rarg->info.time_sec = cpu_to_le64(stbuf.st_ctim.tv_sec);
+ rarg->info.time_nsec = cpu_to_le32(stbuf.st_ctim.tv_nsec);
+
+ qio_channel_set_blocking(rarg->ioc, false, &err);
+ qio_channel_add_watch(rarg->ioc, G_IO_IN, pstore_async_read_fn, rarg,
+ free_rarg_fn);
+ g_free(filename);
+ return 1;
+
+out:
+ g_free(filename);
+ g_free(rarg);
+
+ return ret;
+}
+
+struct pstore_write_arg {
+ VirtIOPstore *vps;
+ VirtQueueElement *elem;
+ struct virtio_pstore_req *req;
+ QIOChannel *ioc;
+};
+
+static gboolean pstore_async_write_fn(QIOChannel *ioc, GIOCondition condition,
+ gpointer data)
+{
+ struct pstore_write_arg *warg = data;
+ VirtIOPstore *vps = warg->vps;
+ VirtQueueElement *elem = warg->elem;
+ struct iovec *sg = elem->out_sg;
+ unsigned int sg_num = elem->out_num;
+ struct virtio_pstore_res res;
+ Error *err = NULL;
+ ssize_t len;
+ int ret;
+
+ /* we already consumed the req */
+ iov_discard_front(&sg, &sg_num, sizeof(*warg->req));
+
+ len = qio_channel_writev(warg->ioc, sg, sg_num, &err);
+ if (len < 0) {
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+
+ res.cmd = cpu_to_le16(VIRTIO_PSTORE_CMD_WRITE);
+ res.type = warg->req->type;
+ res.ret = cpu_to_le32(ret);
+
+ /* tell the result to guest */
+ iov_from_buf(elem->in_sg, elem->in_num, 0, &res, sizeof(res));
+
+ virtqueue_push(vps->wvq, elem, sizeof(res));
+ virtio_notify(VIRTIO_DEVICE(vps), vps->wvq);
+
+ return FALSE;
+}
+
+static void free_warg_fn(gpointer data)
+{
+ struct pstore_write_arg *warg = data;
+
+ qio_channel_close(warg->ioc, NULL);
+
+ g_free(warg->elem);
+ g_free(warg);
+}
+
+static ssize_t virtio_pstore_do_write(VirtIOPstore *s, VirtQueueElement *elem,
+ struct virtio_pstore_req *req)
+{
+ unsigned short type = le16_to_cpu(req->type);
+ char *filename = NULL;
+ int flags = O_WRONLY | O_CREAT | O_TRUNC;
+ struct pstore_write_arg *warg = NULL;
+ QIOChannelFile *iocf;
+ Error *err = NULL;
+ int ret = -1;
+
+ /* do not keep same type of files more than 'file-max' */
+ delete_old_pstore_file(s, type);
+
+ filename = virtio_pstore_to_filename(s, req);
+ if (filename == NULL) {
+ return -1;
+ }
+
+ iocf = qio_channel_file_new_path(filename, flags, 0644, &err);
+ if (err) {
+ error_reportf_err(err, "cannot create io channel: ");
+ goto out;
+ }
+
+ warg = g_malloc(sizeof(*warg));
+ if (warg == NULL) {
+ goto out;
+ }
+
+ warg->vps = s;
+ warg->elem = elem;
+ warg->req = req;
+ warg->ioc = QIO_CHANNEL(iocf);
+
+ qio_channel_set_blocking(warg->ioc, false, &err);
+ qio_channel_add_watch(warg->ioc, G_IO_OUT, pstore_async_write_fn, warg,
+ free_warg_fn);
+ g_free(filename);
+ return 1;
+
+out:
+ g_free(filename);
+ g_free(warg);
+ return ret;
+}
+
+static void virtio_pstore_handle_io(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOPstore *s = VIRTIO_PSTORE(vdev);
+ VirtQueueElement *elem;
+ struct virtio_pstore_req req;
+ struct virtio_pstore_res res;
+ ssize_t len = 0;
+ int ret;
+
+ for (;;) {
+ elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+ if (!elem) {
+ return;
+ }
+
+ if (elem->out_num < 1 || elem->in_num < 1) {
+ error_report("request or response buffer is missing");
+ exit(1);
+ }
+
+ if (elem->out_num > 2 || elem->in_num > 3) {
+ error_report("invalid number of input/output buffer");
+ exit(1);
+ }
+
+ len = iov_to_buf(elem->out_sg, elem->out_num, 0, &req, sizeof(req));
+ if (len != (ssize_t)sizeof(req)) {
+ error_report("invalid request size: %ld", (long)len);
+ exit(1);
+ }
+ res.cmd = req.cmd;
+ res.type = req.type;
+
+ switch (le16_to_cpu(req.cmd)) {
+ case VIRTIO_PSTORE_CMD_OPEN:
+ ret = virtio_pstore_do_open(s);
+ break;
+ case VIRTIO_PSTORE_CMD_CLOSE:
+ ret = virtio_pstore_do_close(s);
+ break;
+ case VIRTIO_PSTORE_CMD_ERASE:
+ ret = virtio_pstore_do_erase(s, &req);
+ break;
+ case VIRTIO_PSTORE_CMD_READ:
+ ret = virtio_pstore_do_read(s, elem);
+ if (ret == 1) {
+ /* async channel io */
+ continue;
+ }
+ break;
+ case VIRTIO_PSTORE_CMD_WRITE:
+ ret = virtio_pstore_do_write(s, elem, &req);
+ if (ret == 1) {
+ /* async channel io */
+ continue;
+ }
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ res.ret = ret;
+
+ iov_from_buf(elem->in_sg, elem->in_num, 0, &res, sizeof(res));
+ virtqueue_push(vq, elem, sizeof(res) + len);
+
+ virtio_notify(vdev, vq);
+ g_free(elem);
+
+ if (ret < 0) {
+ return;
+ }
+ }
+}
+
+static void virtio_pstore_device_realize(DeviceState *dev, Error **errp)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIOPstore *s = VIRTIO_PSTORE(dev);
+
+ virtio_init(vdev, "virtio-pstore", VIRTIO_ID_PSTORE,
+ sizeof(struct virtio_pstore_config));
+
+ s->id = 1;
+
+ if (!s->bufsize)
+ s->bufsize = PSTORE_DEFAULT_BUFSIZE;
+ if (!s->file_max)
+ s->file_max = PSTORE_DEFAULT_FILE_MAX;
+
+ s->rvq = virtio_add_queue(vdev, 128, virtio_pstore_handle_io);
+ s->wvq = virtio_add_queue(vdev, 128, virtio_pstore_handle_io);
+}
+
+static void virtio_pstore_device_unrealize(DeviceState *dev, Error **errp)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+
+ virtio_cleanup(vdev);
+}
+
+static void virtio_pstore_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+ VirtIOPstore *dev = VIRTIO_PSTORE(vdev);
+ struct virtio_pstore_config config;
+
+ config.bufsize = cpu_to_le32(dev->bufsize);
+
+ memcpy(config_data, &config, sizeof(struct virtio_pstore_config));
+}
+
+static void virtio_pstore_set_config(VirtIODevice *vdev,
+ const uint8_t *config_data)
+{
+ VirtIOPstore *dev = VIRTIO_PSTORE(vdev);
+ struct virtio_pstore_config config;
+
+ memcpy(&config, config_data, sizeof(struct virtio_pstore_config));
+
+ dev->bufsize = le32_to_cpu(config.bufsize);
+}
+
+static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp)
+{
+ return f;
+}
+
+static void pstore_get_directory(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ VirtIOPstore *s = opaque;
+
+ visit_type_str(v, name, &s->directory, errp);
+}
+
+static void pstore_set_directory(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ VirtIOPstore *s = opaque;
+ Error *local_err = NULL;
+ char *value;
+
+ visit_type_str(v, name, &value, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ g_free(s->directory);
+ s->directory = value;
+}
+
+static void pstore_release_directory(Object *obj, const char *name,
+ void *opaque)
+{
+ VirtIOPstore *s = opaque;
+
+ g_free(s->directory);
+ s->directory = NULL;
+}
+
+static void pstore_get_bufsize(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ VirtIOPstore *s = opaque;
+ uint64_t value = s->bufsize;
+
+ visit_type_size(v, name, &value, errp);
+}
+
+static void pstore_set_bufsize(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ VirtIOPstore *s = opaque;
+ Error *error = NULL;
+ uint64_t value;
+
+ visit_type_size(v, name, &value, &error);
+ if (error) {
+ error_propagate(errp, error);
+ return;
+ }
+
+ if (value < 4096) {
+ error_setg(&error, "Warning: too small buffer size: %"PRIu64, value);
+ error_propagate(errp, error);
+ return;
+ }
+
+ s->bufsize = value;
+}
+
+static void pstore_get_file_max(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ VirtIOPstore *s = opaque;
+ int64_t value = s->file_max;
+
+ visit_type_int(v, name, &value, errp);
+}
+
+static void pstore_set_file_max(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ VirtIOPstore *s = opaque;
+ Error *error = NULL;
+ int64_t value;
+
+ visit_type_int(v, name, &value, &error);
+ if (error) {
+ error_propagate(errp, error);
+ return;
+ }
+
+ s->file_max = value;
+}
+
+static Property virtio_pstore_properties[] = {
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_pstore_instance_init(Object *obj)
+{
+ VirtIOPstore *s = VIRTIO_PSTORE(obj);
+
+ object_property_add(obj, "directory", "str",
+ pstore_get_directory, pstore_set_directory,
+ pstore_release_directory, s, NULL);
+ object_property_add(obj, "bufsize", "size",
+ pstore_get_bufsize, pstore_set_bufsize, NULL, s, NULL);
+ object_property_add(obj, "file-max", "int",
+ pstore_get_file_max, pstore_set_file_max, NULL, s, NULL);
+}
+
+static void virtio_pstore_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+ dc->props = virtio_pstore_properties;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ vdc->realize = virtio_pstore_device_realize;
+ vdc->unrealize = virtio_pstore_device_unrealize;
+ vdc->get_config = virtio_pstore_get_config;
+ vdc->set_config = virtio_pstore_set_config;
+ vdc->get_features = get_features;
+}
+
+static const TypeInfo virtio_pstore_info = {
+ .name = TYPE_VIRTIO_PSTORE,
+ .parent = TYPE_VIRTIO_DEVICE,
+ .instance_size = sizeof(VirtIOPstore),
+ .instance_init = virtio_pstore_instance_init,
+ .class_init = virtio_pstore_class_init,
+};
+
+static void virtio_register_types(void)
+{
+ type_register_static(&virtio_pstore_info);
+}
+
+type_init(virtio_register_types)
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 929ec2f..b31774a 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -79,6 +79,7 @@
#define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004
#define PCI_DEVICE_ID_VIRTIO_RNG 0x1005
#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
+#define PCI_DEVICE_ID_VIRTIO_PSTORE 0x100a
#define PCI_VENDOR_ID_REDHAT 0x1b36
#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001
diff --git a/include/hw/virtio/virtio-pstore.h b/include/hw/virtio/virtio-pstore.h
new file mode 100644
index 0000000..85b1828
--- /dev/null
+++ b/include/hw/virtio/virtio-pstore.h
@@ -0,0 +1,36 @@
+/*
+ * Virtio Pstore Support
+ *
+ * Authors:
+ * Namhyung Kim <namhyung@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _QEMU_VIRTIO_PSTORE_H
+#define _QEMU_VIRTIO_PSTORE_H
+
+#include "standard-headers/linux/virtio_pstore.h"
+#include "hw/virtio/virtio.h"
+#include "hw/pci/pci.h"
+
+#define TYPE_VIRTIO_PSTORE "virtio-pstore-device"
+#define VIRTIO_PSTORE(obj) \
+ OBJECT_CHECK(VirtIOPstore, (obj), TYPE_VIRTIO_PSTORE)
+
+typedef struct VirtIOPstore {
+ VirtIODevice parent_obj;
+ VirtQueue *rvq;
+ VirtQueue *wvq;
+ char *directory;
+ int file_idx;
+ int num_file;
+ struct dirent **files;
+ uint64_t id;
+ uint64_t bufsize;
+ uint64_t file_max;
+} VirtIOPstore;
+
+#endif
diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
index 77925f5..c72a9ab 100644
--- a/include/standard-headers/linux/virtio_ids.h
+++ b/include/standard-headers/linux/virtio_ids.h
@@ -41,5 +41,6 @@
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_INPUT 18 /* virtio input */
+#define VIRTIO_ID_PSTORE 22 /* virtio pstore */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/standard-headers/linux/virtio_pstore.h b/include/standard-headers/linux/virtio_pstore.h
new file mode 100644
index 0000000..2f91839
--- /dev/null
+++ b/include/standard-headers/linux/virtio_pstore.h
@@ -0,0 +1,76 @@
+#ifndef _LINUX_VIRTIO_PSTORE_H
+#define _LINUX_VIRTIO_PSTORE_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+#include "standard-headers/linux/types.h"
+#include "standard-headers/linux/virtio_types.h"
+#include "standard-headers/linux/virtio_ids.h"
+#include "standard-headers/linux/virtio_config.h"
+
+#define VIRTIO_PSTORE_CMD_NULL 0
+#define VIRTIO_PSTORE_CMD_OPEN 1
+#define VIRTIO_PSTORE_CMD_READ 2
+#define VIRTIO_PSTORE_CMD_WRITE 3
+#define VIRTIO_PSTORE_CMD_ERASE 4
+#define VIRTIO_PSTORE_CMD_CLOSE 5
+
+#define VIRTIO_PSTORE_TYPE_UNKNOWN 0
+#define VIRTIO_PSTORE_TYPE_DMESG 1
+
+#define VIRTIO_PSTORE_FL_COMPRESSED 1
+
+struct virtio_pstore_req {
+ __virtio16 cmd;
+ __virtio16 type;
+ __virtio32 flags;
+ __virtio64 id;
+ __virtio32 count;
+ __virtio32 reserved;
+};
+
+struct virtio_pstore_res {
+ __virtio16 cmd;
+ __virtio16 type;
+ __virtio32 ret;
+};
+
+struct virtio_pstore_fileinfo {
+ __virtio64 id;
+ __virtio32 count;
+ __virtio16 type;
+ __virtio16 unused;
+ __virtio32 flags;
+ __virtio32 len;
+ __virtio64 time_sec;
+ __virtio32 time_nsec;
+ __virtio32 reserved;
+};
+
+struct virtio_pstore_config {
+ __virtio32 bufsize;
+};
+
+#endif /* _LINUX_VIRTIO_PSTORE_H */
diff --git a/qdev-monitor.c b/qdev-monitor.c
index e19617f..e1df5a9 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -73,6 +73,7 @@ static const QDevAlias qdev_alias_table[] = {
{ "virtio-serial-pci", "virtio-serial", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
{ "virtio-tablet-ccw", "virtio-tablet", QEMU_ARCH_S390X },
{ "virtio-tablet-pci", "virtio-tablet", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
+ { "virtio-pstore-pci", "virtio-pstore" },
{ }
};
--
2.9.3
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply related
* [PATCH 3/3] kvmtool: Implement virtio-pstore device
From: Namhyung Kim @ 2016-08-31 8:08 UTC (permalink / raw)
To: virtio-dev, virtualization, kvm, qemu-devel
Cc: Namhyung Kim, Will Deacon, LKML
In-Reply-To: <20160831080802.13408-1-namhyung@kernel.org>
From: Namhyung Kim <namhyung@gmail.com>
Add virtio pstore device to allow kernel log messages saved on the
host. With this patch, it will save the log files under directory given
by --pstore option.
$ lkvm run --pstore=dir-xx
(guest) # echo c > /proc/sysrq-trigger
$ ls dir-xx
dmesg-1.enc.z dmesg-2.enc.z
The log files are usually compressed using zlib. User can easily see
the messages on the host or on the guest (using pstore filesystem).
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
---
Makefile | 1 +
builtin-run.c | 2 +
include/kvm/kvm-config.h | 1 +
include/kvm/virtio-pci-dev.h | 2 +
include/kvm/virtio-pstore.h | 53 +++++
include/linux/virtio_ids.h | 1 +
virtio/pstore.c | 447 +++++++++++++++++++++++++++++++++++++++++++
7 files changed, 507 insertions(+)
create mode 100644 include/kvm/virtio-pstore.h
create mode 100644 virtio/pstore.c
diff --git a/Makefile b/Makefile
index 1f0196f..d7462b9 100644
--- a/Makefile
+++ b/Makefile
@@ -67,6 +67,7 @@ OBJS += virtio/net.o
OBJS += virtio/rng.o
OBJS += virtio/balloon.o
OBJS += virtio/pci.o
+OBJS += virtio/pstore.o
OBJS += disk/blk.o
OBJS += disk/qcow.o
OBJS += disk/raw.o
diff --git a/builtin-run.c b/builtin-run.c
index 72b878d..08c12dd 100644
--- a/builtin-run.c
+++ b/builtin-run.c
@@ -128,6 +128,8 @@ void kvm_run_set_wrapper_sandbox(void)
" rootfs"), \
OPT_STRING('\0', "hugetlbfs", &(cfg)->hugetlbfs_path, "path", \
"Hugetlbfs path"), \
+ OPT_STRING('\0', "pstore", &(cfg)->pstore_path, "path", \
+ "pstore data path"), \
\
OPT_GROUP("Kernel options:"), \
OPT_STRING('k', "kernel", &(cfg)->kernel_filename, "kernel", \
diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
index 386fa8c..42b7651 100644
--- a/include/kvm/kvm-config.h
+++ b/include/kvm/kvm-config.h
@@ -45,6 +45,7 @@ struct kvm_config {
const char *hugetlbfs_path;
const char *custom_rootfs_name;
const char *real_cmdline;
+ const char *pstore_path;
struct virtio_net_params *net_params;
bool single_step;
bool vnc;
diff --git a/include/kvm/virtio-pci-dev.h b/include/kvm/virtio-pci-dev.h
index 48ae018..4339d94 100644
--- a/include/kvm/virtio-pci-dev.h
+++ b/include/kvm/virtio-pci-dev.h
@@ -15,6 +15,7 @@
#define PCI_DEVICE_ID_VIRTIO_BLN 0x1005
#define PCI_DEVICE_ID_VIRTIO_SCSI 0x1008
#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
+#define PCI_DEVICE_ID_VIRTIO_PSTORE 0x100a
#define PCI_DEVICE_ID_VESA 0x2000
#define PCI_DEVICE_ID_PCI_SHMEM 0x0001
@@ -34,5 +35,6 @@
#define PCI_CLASS_RNG 0xff0000
#define PCI_CLASS_BLN 0xff0000
#define PCI_CLASS_9P 0xff0000
+#define PCI_CLASS_PSTORE 0xff0000
#endif /* VIRTIO_PCI_DEV_H_ */
diff --git a/include/kvm/virtio-pstore.h b/include/kvm/virtio-pstore.h
new file mode 100644
index 0000000..9f52ffd
--- /dev/null
+++ b/include/kvm/virtio-pstore.h
@@ -0,0 +1,53 @@
+#ifndef KVM__PSTORE_VIRTIO_H
+#define KVM__PSTORE_VIRTIO_H
+
+#include <kvm/virtio.h>
+#include <sys/types.h>
+
+#define VIRTIO_PSTORE_CMD_NULL 0
+#define VIRTIO_PSTORE_CMD_OPEN 1
+#define VIRTIO_PSTORE_CMD_READ 2
+#define VIRTIO_PSTORE_CMD_WRITE 3
+#define VIRTIO_PSTORE_CMD_ERASE 4
+#define VIRTIO_PSTORE_CMD_CLOSE 5
+
+#define VIRTIO_PSTORE_TYPE_UNKNOWN 0
+#define VIRTIO_PSTORE_TYPE_DMESG 1
+
+#define VIRTIO_PSTORE_FL_COMPRESSED 1
+
+struct virtio_pstore_req {
+ __virtio16 cmd;
+ __virtio16 type;
+ __virtio32 flags;
+ __virtio64 id;
+ __virtio32 count;
+ __virtio32 reserved;
+};
+
+struct virtio_pstore_res {
+ __virtio16 cmd;
+ __virtio16 type;
+ __virtio32 ret;
+};
+
+struct virtio_pstore_fileinfo {
+ __virtio64 id;
+ __virtio32 count;
+ __virtio16 type;
+ __virtio16 unused;
+ __virtio32 flags;
+ __virtio32 len;
+ __virtio64 time_sec;
+ __virtio32 time_nsec;
+ __virtio32 reserved;
+};
+
+struct virtio_pstore_config {
+ __virtio32 bufsize;
+};
+
+int virtio_pstore__init(struct kvm *kvm);
+int virtio_pstore__exit(struct kvm *kvm);
+
+#endif /* KVM__PSTORE_VIRTIO_H */
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
index 5f60aa4..40eabf7 100644
--- a/include/linux/virtio_ids.h
+++ b/include/linux/virtio_ids.h
@@ -40,5 +40,6 @@
#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_INPUT 18 /* virtio input */
+#define VIRTIO_ID_PSTORE 22 /* virtio pstore */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/virtio/pstore.c b/virtio/pstore.c
new file mode 100644
index 0000000..fb9806f
--- /dev/null
+++ b/virtio/pstore.c
@@ -0,0 +1,447 @@
+#include "kvm/virtio-pstore.h"
+
+#include "kvm/virtio-pci-dev.h"
+
+#include "kvm/virtio.h"
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+#include "kvm/threadpool.h"
+#include "kvm/guest_compat.h"
+
+#include <linux/virtio_ring.h>
+
+#include <linux/list.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+#include <linux/kernel.h>
+#include <sys/eventfd.h>
+
+#define NUM_VIRT_QUEUES 2
+#define VIRTIO_PSTORE_QUEUE_SIZE 128
+
+struct io_thread_arg {
+ struct kvm *kvm;
+ struct pstore_dev *pdev;
+};
+
+struct pstore_dev {
+ struct list_head list;
+ struct virtio_device vdev;
+ pthread_t io_thread;
+ int io_efd;
+ int done;
+
+ struct virtio_pstore_config *config;
+
+ int fd;
+ DIR *dir;
+ u64 id;
+
+ /* virtio queue */
+ struct virt_queue vqs[NUM_VIRT_QUEUES];
+};
+
+static LIST_HEAD(pdevs);
+static int compat_id = -1;
+
+static u8 *get_config(struct kvm *kvm, void *dev)
+{
+ struct pstore_dev *pdev = dev;
+
+ return (u8*)pdev->config;
+}
+
+static u32 get_host_features(struct kvm *kvm, void *dev)
+{
+ /* Unused */
+ return 0;
+}
+
+static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
+{
+ /* Unused */
+}
+
+static void virtio_pstore_to_filename(struct kvm *kvm, struct pstore_dev *pdev,
+ char *buf, size_t sz,
+ struct virtio_pstore_req *req)
+{
+ const char *basename;
+ unsigned long long id = 0;
+ unsigned int flags = virtio_host_to_guest_u64(pdev->vqs, req->flags);
+
+ switch (req->type) {
+ case VIRTIO_PSTORE_TYPE_DMESG:
+ basename = "dmesg";
+ id = pdev->id++;
+ break;
+ default:
+ basename = "unknown";
+ break;
+ }
+
+ snprintf(buf, sz, "%s/%s-%llu%s", kvm->cfg.pstore_path, basename, id,
+ flags & VIRTIO_PSTORE_FL_COMPRESSED ? ".enc.z" : "");
+}
+
+static void virtio_pstore_from_filename(struct kvm *kvm, char *name,
+ char *buf, size_t sz,
+ struct virtio_pstore_fileinfo *info)
+{
+ size_t len = strlen(name);
+
+ snprintf(buf, sz, "%s/%s", kvm->cfg.pstore_path, name);
+
+ info->flags = 0;
+ if (len > 6 && !strncmp(name + len - 6, ".enc.z", 6))
+ info->flags |= VIRTIO_PSTORE_FL_COMPRESSED;
+
+ if (!strncmp(name, "dmesg-", 6)) {
+ info->type = VIRTIO_PSTORE_TYPE_DMESG;
+ name += strlen("dmesg-");
+ } else if (!strncmp(name, "unknown-", 8)) {
+ info->type = VIRTIO_PSTORE_TYPE_UNKNOWN;
+ name += strlen("unknown-");
+ }
+
+ info->id = strtoul(name, NULL, 0);
+}
+
+static int virtio_pstore_do_open(struct kvm *kvm, struct pstore_dev *pdev,
+ struct virtio_pstore_req *req,
+ struct iovec *iov)
+{
+ pdev->dir = opendir(kvm->cfg.pstore_path);
+ if (pdev->dir == NULL)
+ return -errno;
+
+ return 0;
+}
+
+static int virtio_pstore_do_close(struct kvm *kvm, struct pstore_dev *pdev,
+ struct virtio_pstore_req *req,
+ struct iovec *iov)
+{
+ if (pdev->dir == NULL)
+ return -1;
+
+ closedir(pdev->dir);
+ pdev->dir = NULL;
+
+ return 0;
+}
+
+static ssize_t virtio_pstore_do_read(struct kvm *kvm, struct pstore_dev *pdev,
+ struct virtio_pstore_req *req,
+ struct iovec *iov,
+ struct virtio_pstore_fileinfo *info)
+{
+ char path[PATH_MAX];
+ FILE *fp;
+ ssize_t len = 0;
+ struct stat stbuf;
+ struct dirent *dent;
+
+ if (pdev->dir == NULL)
+ return 0;
+
+ dent = readdir(pdev->dir);
+ while (dent) {
+ if (dent->d_name[0] != '.')
+ break;
+ dent = readdir(pdev->dir);
+ }
+
+ if (dent == NULL)
+ return 0;
+
+ virtio_pstore_from_filename(kvm, dent->d_name, path, sizeof(path), info);
+ fp = fopen(path, "r");
+ if (fp == NULL)
+ return -1;
+
+ if (fstat(fileno(fp), &stbuf) < 0)
+ return -1;
+
+ len = fread(iov[3].iov_base, 1, iov[3].iov_len, fp);
+ if (len < 0 && errno == EAGAIN) {
+ len = 0;
+ goto out;
+ }
+
+ info->id = virtio_host_to_guest_u64(pdev->vqs, info->id);
+ info->type = virtio_host_to_guest_u64(pdev->vqs, info->type);
+ info->flags = virtio_host_to_guest_u32(pdev->vqs, info->flags);
+ info->len = virtio_host_to_guest_u32(pdev->vqs, len);
+
+ info->time_sec = virtio_host_to_guest_u64(pdev->vqs, stbuf.st_ctim.tv_sec);
+ info->time_nsec = virtio_host_to_guest_u32(pdev->vqs, stbuf.st_ctim.tv_nsec);
+
+ len += sizeof(*info);
+
+out:
+ fclose(fp);
+ return len;
+}
+
+static ssize_t virtio_pstore_do_write(struct kvm *kvm, struct pstore_dev *pdev,
+ struct virtio_pstore_req *req,
+ struct iovec *iov)
+{
+ char path[PATH_MAX];
+ FILE *fp;
+ ssize_t len = 0;
+
+ virtio_pstore_to_filename(kvm, pdev, path, sizeof(path), req);
+
+ fp = fopen(path, "a");
+ if (fp == NULL)
+ return -1;
+
+ len = fwrite(iov[1].iov_base, 1, iov[1].iov_len, fp);
+ if (len < 0 && errno == EAGAIN)
+ len = 0;
+
+ fclose(fp);
+ return 0;
+}
+
+static ssize_t virtio_pstore_do_erase(struct kvm *kvm, struct pstore_dev *pdev,
+ struct virtio_pstore_req *req,
+ struct iovec *iov)
+{
+ char path[PATH_MAX];
+
+ virtio_pstore_to_filename(kvm, pdev, path, sizeof(path), req);
+
+ return unlink(path);
+}
+
+static bool virtio_pstore_do_io_request(struct kvm *kvm, struct pstore_dev *pdev,
+ struct virt_queue *vq)
+{
+ struct iovec iov[VIRTIO_PSTORE_QUEUE_SIZE];
+ struct virtio_pstore_req *req;
+ struct virtio_pstore_res *res;
+ struct virtio_pstore_fileinfo *info;
+ ssize_t len = 0;
+ u16 out, in, head;
+ int ret = 0;
+
+ head = virt_queue__get_iov(vq, iov, &out, &in, kvm);
+
+ if (iov[0].iov_len != sizeof(*req) || iov[out].iov_len != sizeof(*res)) {
+ return false;
+ }
+
+ req = iov[0].iov_base;
+ res = iov[out].iov_base;
+
+ switch (virtio_guest_to_host_u16(vq, req->cmd)) {
+ case VIRTIO_PSTORE_CMD_OPEN:
+ ret = virtio_pstore_do_open(kvm, pdev, req, iov);
+ break;
+ case VIRTIO_PSTORE_CMD_READ:
+ info = iov[out + 1].iov_base;
+ ret = virtio_pstore_do_read(kvm, pdev, req, iov, info);
+ if (ret > 0) {
+ len = ret;
+ ret = 0;
+ }
+ break;
+ case VIRTIO_PSTORE_CMD_WRITE:
+ ret = virtio_pstore_do_write(kvm, pdev, req, iov);
+ break;
+ case VIRTIO_PSTORE_CMD_CLOSE:
+ ret = virtio_pstore_do_close(kvm, pdev, req, iov);
+ break;
+ case VIRTIO_PSTORE_CMD_ERASE:
+ ret = virtio_pstore_do_erase(kvm, pdev, req, iov);
+ break;
+ default:
+ return false;
+ }
+
+ res->cmd = req->cmd;
+ res->type = req->type;
+ res->ret = virtio_host_to_guest_u32(vq, ret);
+
+ virt_queue__set_used_elem(vq, head, sizeof(*res) + len);
+
+ return ret == 0;
+}
+
+static void virtio_pstore_do_io(struct kvm *kvm, struct pstore_dev *pdev,
+ struct virt_queue *vq)
+{
+ bool done = false;
+
+ while (virt_queue__available(vq)) {
+ virtio_pstore_do_io_request(kvm, pdev, vq);
+ done = true;
+ }
+
+ if (done)
+ pdev->vdev.ops->signal_vq(kvm, &pdev->vdev, vq - pdev->vqs);
+}
+
+static void *virtio_pstore_io_thread(void *arg)
+{
+ struct io_thread_arg *io_arg = arg;
+ struct pstore_dev *pdev = io_arg->pdev;
+ struct kvm *kvm = io_arg->kvm;
+ u64 data;
+ int r;
+
+ kvm__set_thread_name("virtio-pstore-io");
+
+ while (!pdev->done) {
+ r = read(pdev->io_efd, &data, sizeof(u64));
+ if (r < 0)
+ continue;
+
+ virtio_pstore_do_io(kvm, pdev, &pdev->vqs[0]);
+ virtio_pstore_do_io(kvm, pdev, &pdev->vqs[1]);
+ }
+ free(io_arg);
+
+ pthread_exit(NULL);
+ return NULL;
+}
+
+static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align,
+ u32 pfn)
+{
+ struct pstore_dev *pdev = dev;
+ struct virt_queue *queue;
+ void *p;
+
+ compat__remove_message(compat_id);
+
+ queue = &pdev->vqs[vq];
+ queue->pfn = pfn;
+ p = virtio_get_vq(kvm, queue->pfn, page_size);
+
+ vring_init(&queue->vring, VIRTIO_PSTORE_QUEUE_SIZE, p, align);
+
+ return 0;
+}
+
+static int notify_vq(struct kvm *kvm, void *dev, u32 vq)
+{
+ struct pstore_dev *pdev = dev;
+ u64 data = 1;
+ int r;
+
+ r = write(pdev->io_efd, &data, sizeof(data));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int get_pfn_vq(struct kvm *kvm, void *dev, u32 vq)
+{
+ struct pstore_dev *pdev = dev;
+
+ return pdev->vqs[vq].pfn;
+}
+
+static int get_size_vq(struct kvm *kvm, void *dev, u32 vq)
+{
+ return VIRTIO_PSTORE_QUEUE_SIZE;
+}
+
+static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size)
+{
+ /* FIXME: dynamic */
+ return size;
+}
+
+static struct virtio_ops pstore_dev_virtio_ops = {
+ .get_config = get_config,
+ .get_host_features = get_host_features,
+ .set_guest_features = set_guest_features,
+ .init_vq = init_vq,
+ .notify_vq = notify_vq,
+ .get_pfn_vq = get_pfn_vq,
+ .get_size_vq = get_size_vq,
+ .set_size_vq = set_size_vq,
+};
+
+int virtio_pstore__init(struct kvm *kvm)
+{
+ struct pstore_dev *pdev;
+ struct io_thread_arg *io_arg = NULL;
+ int r;
+
+ if (!kvm->cfg.pstore_path)
+ return 0;
+
+ pdev = calloc(1, sizeof(*pdev));
+ if (pdev == NULL)
+ return -ENOMEM;
+
+ pdev->config = calloc(1, sizeof(*pdev->config));
+ if (pdev->config == NULL) {
+ r = -ENOMEM;
+ goto cleanup;
+ }
+
+ pdev->id = 1;
+
+ io_arg = malloc(sizeof(*io_arg));
+ if (io_arg == NULL) {
+ r = -ENOMEM;
+ goto cleanup;
+ }
+
+ pdev->io_efd = eventfd(0, 0);
+
+ *io_arg = (struct io_thread_arg) {
+ .pdev = pdev,
+ .kvm = kvm,
+ };
+ r = pthread_create(&pdev->io_thread, NULL,
+ virtio_pstore_io_thread, io_arg);
+ if (r < 0)
+ goto cleanup;
+
+ r = virtio_init(kvm, pdev, &pdev->vdev, &pstore_dev_virtio_ops,
+ VIRTIO_DEFAULT_TRANS(kvm), PCI_DEVICE_ID_VIRTIO_PSTORE,
+ VIRTIO_ID_PSTORE, PCI_CLASS_PSTORE);
+ if (r < 0)
+ goto cleanup;
+
+ list_add_tail(&pdev->list, &pdevs);
+
+ if (compat_id == -1)
+ compat_id = virtio_compat_add_message("virtio-pstore", "CONFIG_VIRTIO_PSTORE");
+ return 0;
+
+cleanup:
+ free(io_arg);
+ free(pdev->config);
+ free(pdev);
+
+ return r;
+}
+virtio_dev_init(virtio_pstore__init);
+
+int virtio_pstore__exit(struct kvm *kvm)
+{
+ struct pstore_dev *pdev, *tmp;
+
+ list_for_each_entry_safe(pdev, tmp, &pdevs, list) {
+ list_del(&pdev->list);
+ close(pdev->io_efd);
+ pdev->vdev.ops->exit(kvm, &pdev->vdev);
+ free(pdev);
+ }
+
+ return 0;
+}
+virtio_dev_exit(virtio_pstore__exit);
--
2.9.3
^ permalink raw reply related
* Re: [PATCH v8 01/18] remoteproc: st_slim_rproc: add a slimcore rproc driver
From: Peter Griffin @ 2016-08-31 11:11 UTC (permalink / raw)
To: Bjorn Andersson
Cc: devicetree, kernel, vinod.koul, linux-remoteproc, patrice.chotard,
dri-devel, linux-kernel, airlied, dmaengine, dan.j.williams,
virtualization, Lee Jones, linux-arm-kernel
In-Reply-To: <20160830165409.GM15161@tuxbot>
Hi Bjorn,
On Tue, 30 Aug 2016, Bjorn Andersson wrote:
> On Tue 30 Aug 05:34 PDT 2016, Lee Jones wrote:
>
> Thanks for your review Lee.
>
> > On Fri, 26 Aug 2016, Peter Griffin wrote:
> [..]
> > > diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
> > > index 1a8bf76a..06765e0 100644
> > > --- a/drivers/remoteproc/Kconfig
> > > +++ b/drivers/remoteproc/Kconfig
> > > @@ -100,4 +100,12 @@ config ST_REMOTEPROC
> > > processor framework.
> > > This can be either built-in or a loadable module.
> > >
> > > +config ST_SLIM_REMOTEPROC
> > > + tristate "ST Slim remoteproc support"
> > > + select REMOTEPROC
> > > + help
> > > + Say y here to support firmware loading on IP based around
> > > + the Slim core.
> > > + If unsure say N.
>
> Saw one more thing when browsing through...
>
> As this piece of code doesn't do anything on its own and is going to be
> selected by the "function driver" I don't think this should be
> user-selectable.
Applogies, I believe you pointed this out in a previous review, but it seems to
have slipped through the net. Will fix in the next version.
Regards,
Peter.
^ permalink raw reply
* Re: [PATCH v8 01/18] remoteproc: st_slim_rproc: add a slimcore rproc driver
From: Lee Jones @ 2016-08-31 11:24 UTC (permalink / raw)
To: Peter Griffin
Cc: devicetree, kernel, vinod.koul, linux-remoteproc, patrice.chotard,
dri-devel, linux-kernel, airlied, dmaengine, dan.j.williams,
bjorn.andersson, virtualization, linux-arm-kernel
In-Reply-To: <20160830154451.GB22514@griffinp-ThinkPad-X1-Carbon-2nd>
On Tue, 30 Aug 2016, Peter Griffin wrote:
> On Tue, 30 Aug 2016, Lee Jones wrote:
> > On Fri, 26 Aug 2016, Peter Griffin wrote:
> >
> > > slim core is used as a basis for many IPs in the STi
> > > chipsets such as fdma and demux. To avoid duplicating
> > > the elf loading code in each device driver a slim
> > > rproc driver has been created.
> > >
> > > This driver is designed to be used by other device drivers
> > > such as fdma, or demux whose IP is based around a slim core.
> > > The device driver can call slim_rproc_alloc() to allocate
> > > a slim rproc and slim_rproc_put() when finished.
> > >
> > > This driver takes care of ioremapping the slim
> > > registers (dmem, imem, slimcore, peripherals), whose offsets
> > > and sizes can change between IP's. It also obtains and enables
> > > any clocks used by the device. This approach avoids having
> > > a double mapping of the registers as slim_rproc does not register
> > > its own platform device. It also maps well to device tree
> > > abstraction as it allows us to have one dt node for the whole
> > > device.
> > >
> > > All of the generic rproc elf loading code can be reused, and
> > > we provide start() stop() hooks to start and stop the slim
> > > core once the firmware has been loaded. This has been tested
> > > successfully with fdma driver.
> >
> > Nit. It would be good to use a constant line-wrap.
> >
> > 'M-x post-mode' will help with this.
>
> Can you provide the magic which makes this happen for GIT commit messages?
I tend to do it manually. However a 3 second Google search produced
[0], which looks like it could be fun/useful.
[0] https://www.emacswiki.org/emacs/Git
[...]
> > > + * License terms: GNU General Public License (GPL), version 2
> >
> > Are you sure ST are okay with the shortened version of the GPL?
>
> Do you mean the banner should be like this?
>
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> * the Free Software Foundation; either version 2 of the License, or
> * (at your option) any later version.
Yes, exactly.
[...]
> > > +/* slimcore registers */
> >
> > What's it called? slimcore, slim core, ST Slim?
>
> It is usually referred to as SLIM core, or SLIM CPU in the various functional
> specifications.
>
> >
> > Please be consistent. Use the name from the datasheet.
>
> OK. The datasheet isn't consistent either, so we will settle on SLIM core and
> SLIM CPU.
Perfect.
--
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH v8 15/18] ARM: STi: DT: STiH407: Add uniperif reader dt nodes
From: Lee Jones @ 2016-08-31 11:28 UTC (permalink / raw)
To: Peter Griffin
Cc: devicetree, kernel, vinod.koul, Arnaud Pouliquen,
linux-remoteproc, patrice.chotard, dri-devel, linux-kernel,
airlied, dmaengine, dan.j.williams, bjorn.andersson,
virtualization, linux-arm-kernel
In-Reply-To: <20160830142112.GA22514@griffinp-ThinkPad-X1-Carbon-2nd>
On Tue, 30 Aug 2016, Peter Griffin wrote:
> Thanks for reviewing and your very valuable feedback.
> On Tue, 30 Aug 2016, Lee Jones wrote:
> > On Fri, 26 Aug 2016, Peter Griffin wrote:
> >
> > > This patch adds the DT node for the uniperif reader
> > > IP block found on STiH407 family silicon.
> > >
> > > Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
> > > Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
> > > ---
> > > arch/arm/boot/dts/stih407-family.dtsi | 26 ++++++++++++++++++++++++++
> > > 1 file changed, 26 insertions(+)
> > >
> > > diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi
> > > index d263c96..bdddf2c 100644
> > > --- a/arch/arm/boot/dts/stih407-family.dtsi
> > > +++ b/arch/arm/boot/dts/stih407-family.dtsi
> > > @@ -956,5 +956,31 @@
> > > st,version = <5>;
> > > st,mode = "SPDIF";
> > > };
> > > +
> > > + sti_uni_reader0: sti-uni-reader@0 {
> > > + compatible = "st,sti-uni-reader";
> > > + status = "disabled";
> >
> > I find it's normally nicer to place the status of the node at the
> > bottom, separated by a '\n'.
>
> Ok I'll add a superflous '\n' in the next version.
Everyone loves a smart arse!
In this case I believe the '\n' to be a functional separator and not
superfluous at all.
> > > + dai-name = "Uni Reader #0 (PCM IN)";
> >
> > Oooo, not seen something like this before.
> >
> > If it does not already have one, it would require a DT Ack.
>
> No idea, the driver got merged 1 year ago.
>
> Arnaud did you get a DT ack when you merged this driver & binding?
> >
> > > + st,version = <3>;
> >
> > This will likely need a DT Ack too. We usually encode this sort of
> > information in the compatible string.
>
> See 05c1b4480e86a871b18030d6f3d532dc0ecdf38c
Well Rob's the boss. We certainly never used to take 'device ID' or
'version' attributes. I guess something must have changed.
--
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* [PATCH v2] virtio_ring: Make interrupt suppression spec compliant
From: Ladi Prosek @ 2016-08-31 12:00 UTC (permalink / raw)
To: virtualization; +Cc: lprosek, stable, mst
According to the spec, if the VIRTIO_RING_F_EVENT_IDX feature bit is
negotiated the driver MUST set flags to 0. Not dirtying the available
ring in virtqueue_disable_cb also has a minor positive performance
impact, improving L1 dcache load missed by ~0.5% in vring_bench.
Writes to the used event field (vring_used_event) are still unconditional.
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: <stable@vger.kernel.org> # f277ec4 virtio_ring: shadow available
Cc: <stable@vger.kernel.org>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
---
v1->v2:
* fixed coding style
* perf measurement results added to commit message
* patch sent to virtualization@lists.linux-foundation.org (was kvm@vger.kernel.org)
drivers/virtio/virtio_ring.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index e383ecd..926ecb7 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -732,7 +732,8 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
- vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+ if (!vq->event)
+ vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
}
}
@@ -764,7 +765,8 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
* entry. Always do both to keep code simple. */
if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
- vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+ if (!vq->event)
+ vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
}
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
END_USE(vq);
@@ -832,10 +834,11 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
* more to do. */
/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
- * entry. Always do both to keep code simple. */
+ * entry. Always update the event index to keep code simple. */
if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
- vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+ if (!vq->event)
+ vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
}
/* TODO: tune this threshold */
bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
@@ -953,7 +956,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
/* No callback? Tell other side not to bother us. */
if (!callback) {
vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
- vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
+ if (!vq->event)
+ vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
}
/* Put everything in free lists. */
--
2.5.5
^ permalink raw reply related
* Re: [PATCH 1/3] virtio: Basic implementation of virtio pstore driver
From: Michael S. Tsirkin @ 2016-08-31 14:54 UTC (permalink / raw)
To: Namhyung Kim
Cc: virtio-dev, Tony Luck, Kees Cook, kvm,
Radim Krčmář, Anton Vorontsov, Will Deacon, LKML,
Steven Rostedt, qemu-devel, Minchan Kim, Anthony Liguori,
Colin Cross, Paolo Bonzini, virtualization, Ingo Molnar
In-Reply-To: <20160831080802.13408-2-namhyung@kernel.org>
On Wed, Aug 31, 2016 at 05:08:00PM +0900, Namhyung Kim wrote:
> The virtio pstore driver provides interface to the pstore subsystem so
> that the guest kernel's log/dump message can be saved on the host
> machine. Users can access the log file directly on the host, or on the
> guest at the next boot using pstore filesystem. It currently deals with
> kernel log (printk) buffer only, but we can extend it to have other
> information (like ftrace dump) later.
>
> It supports legacy PCI device using single order-2 page buffer. It uses
> two virtqueues - one for (sync) read and another for (async) write.
> Since it cannot wait for write finished, it supports up to 128
> concurrent IO. The buffer size is configurable now.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: Anthony Liguori <aliguori@amazon.com>
> Cc: Anton Vorontsov <anton@enomsg.org>
> Cc: Colin Cross <ccross@android.com>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Tony Luck <tony.luck@intel.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Minchan Kim <minchan@kernel.org>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: kvm@vger.kernel.org
> Cc: qemu-devel@nongnu.org
> Cc: virtualization@lists.linux-foundation.org
> Cc: virtio-dev@lists.oasis-open.org
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
> drivers/virtio/Kconfig | 10 +
> drivers/virtio/Makefile | 1 +
> drivers/virtio/virtio_pstore.c | 417 +++++++++++++++++++++++++++++++++++++
> include/uapi/linux/Kbuild | 1 +
> include/uapi/linux/virtio_ids.h | 1 +
> include/uapi/linux/virtio_pstore.h | 74 +++++++
> 6 files changed, 504 insertions(+)
> create mode 100644 drivers/virtio/virtio_pstore.c
> create mode 100644 include/uapi/linux/virtio_pstore.h
>
> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> index 77590320d44c..8f0e6c796c12 100644
> --- a/drivers/virtio/Kconfig
> +++ b/drivers/virtio/Kconfig
> @@ -58,6 +58,16 @@ config VIRTIO_INPUT
>
> If unsure, say M.
>
> +config VIRTIO_PSTORE
> + tristate "Virtio pstore driver"
> + depends on VIRTIO
> + depends on PSTORE
> + ---help---
> + This driver supports virtio pstore devices to save/restore
> + panic and oops messages on the host.
> +
> + If unsure, say M.
> +
> config VIRTIO_MMIO
> tristate "Platform bus driver for memory mapped virtio devices"
> depends on HAS_IOMEM && HAS_DMA
> diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> index 41e30e3dc842..bee68cb26d48 100644
> --- a/drivers/virtio/Makefile
> +++ b/drivers/virtio/Makefile
> @@ -5,3 +5,4 @@ virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
> virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
> obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
> obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
> +obj-$(CONFIG_VIRTIO_PSTORE) += virtio_pstore.o
> diff --git a/drivers/virtio/virtio_pstore.c b/drivers/virtio/virtio_pstore.c
> new file mode 100644
> index 000000000000..ec41f0d2f0b7
> --- /dev/null
> +++ b/drivers/virtio/virtio_pstore.c
> @@ -0,0 +1,417 @@
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/pstore.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <uapi/linux/virtio_ids.h>
> +#include <uapi/linux/virtio_pstore.h>
> +
> +#define VIRT_PSTORE_ORDER 2
> +#define VIRT_PSTORE_BUFSIZE (4096 << VIRT_PSTORE_ORDER)
> +#define VIRT_PSTORE_NR_REQ 128
> +
> +struct virtio_pstore {
> + struct virtio_device *vdev;
> + struct virtqueue *vq[2];
> + struct pstore_info pstore;
> + struct virtio_pstore_req req[VIRT_PSTORE_NR_REQ];
> + struct virtio_pstore_res res[VIRT_PSTORE_NR_REQ];
> + unsigned int req_id;
> +
> + /* Waiting for host to ack */
> + wait_queue_head_t acked;
> + int failed;
> +};
> +
> +#define TYPE_TABLE_ENTRY(_entry) \
> + { PSTORE_TYPE_##_entry, VIRTIO_PSTORE_TYPE_##_entry }
> +
> +struct type_table {
> + int pstore;
> + u16 virtio;
> +} type_table[] = {
> + TYPE_TABLE_ENTRY(DMESG),
> +};
> +
> +#undef TYPE_TABLE_ENTRY
> +
> +
> +static u16 to_virtio_type(struct virtio_pstore *vps, enum pstore_type_id type)
> +{
> + unsigned int i;
> +
> + for (i = 0; i < ARRAY_SIZE(type_table); i++) {
> + if (type == type_table[i].pstore)
> + return cpu_to_virtio16(vps->vdev, type_table[i].virtio);
Does this pass sparse checks? If yes I'm surprised - this clearly
returns a virtio16 type.
> + }
> +
> + return cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_TYPE_UNKNOWN);
> +}
> +
> +static enum pstore_type_id from_virtio_type(struct virtio_pstore *vps, u16 type)
> +{
> + unsigned int i;
> +
> + for (i = 0; i < ARRAY_SIZE(type_table); i++) {
> + if (virtio16_to_cpu(vps->vdev, type) == type_table[i].virtio)
> + return type_table[i].pstore;
> + }
> +
> + return PSTORE_TYPE_UNKNOWN;
> +}
> +
> +static void virtpstore_ack(struct virtqueue *vq)
> +{
> + struct virtio_pstore *vps = vq->vdev->priv;
> +
> + wake_up(&vps->acked);
> +}
> +
> +static void virtpstore_check(struct virtqueue *vq)
> +{
> + struct virtio_pstore *vps = vq->vdev->priv;
> + struct virtio_pstore_res *res;
> + unsigned int len;
> +
> + res = virtqueue_get_buf(vq, &len);
> + if (res == NULL)
> + return;
> +
> + if (virtio32_to_cpu(vq->vdev, res->ret) < 0)
> + vps->failed = 1;
> +}
> +
> +static void virt_pstore_get_reqs(struct virtio_pstore *vps,
> + struct virtio_pstore_req **preq,
> + struct virtio_pstore_res **pres)
> +{
> + unsigned int idx = vps->req_id++ % VIRT_PSTORE_NR_REQ;
> +
> + *preq = &vps->req[idx];
> + *pres = &vps->res[idx];
> +
> + memset(*preq, 0, sizeof(**preq));
> + memset(*pres, 0, sizeof(**pres));
> +}
> +
> +static int virt_pstore_open(struct pstore_info *psi)
> +{
> + struct virtio_pstore *vps = psi->data;
> + struct virtio_pstore_req *req;
> + struct virtio_pstore_res *res;
> + struct scatterlist sgo[1], sgi[1];
> + struct scatterlist *sgs[2] = { sgo, sgi };
> + unsigned int len;
> +
> + virt_pstore_get_reqs(vps, &req, &res);
> +
> + req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_OPEN);
> +
> + sg_init_one(sgo, req, sizeof(*req));
> + sg_init_one(sgi, res, sizeof(*res));
> + virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
> + virtqueue_kick(vps->vq[0]);
> +
> + wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
> + return virtio32_to_cpu(vps->vdev, res->ret);
> +}
> +
> +static int virt_pstore_close(struct pstore_info *psi)
> +{
> + struct virtio_pstore *vps = psi->data;
> + struct virtio_pstore_req *req = &vps->req[vps->req_id];
> + struct virtio_pstore_res *res = &vps->res[vps->req_id];
> + struct scatterlist sgo[1], sgi[1];
> + struct scatterlist *sgs[2] = { sgo, sgi };
> + unsigned int len;
> +
> + virt_pstore_get_reqs(vps, &req, &res);
> +
> + req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_CLOSE);
> +
> + sg_init_one(sgo, req, sizeof(*req));
> + sg_init_one(sgi, res, sizeof(*res));
> + virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
> + virtqueue_kick(vps->vq[0]);
> +
> + wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
> + return virtio32_to_cpu(vps->vdev, res->ret);
> +}
> +
> +static ssize_t virt_pstore_read(u64 *id, enum pstore_type_id *type,
> + int *count, struct timespec *time,
> + char **buf, bool *compressed,
> + ssize_t *ecc_notice_size,
> + struct pstore_info *psi)
> +{
> + struct virtio_pstore *vps = psi->data;
> + struct virtio_pstore_req *req;
> + struct virtio_pstore_res *res;
> + struct virtio_pstore_fileinfo info;
> + struct scatterlist sgo[1], sgi[3];
> + struct scatterlist *sgs[2] = { sgo, sgi };
> + unsigned int len;
> + unsigned int flags;
> + int ret;
> + void *bf;
> +
> + virt_pstore_get_reqs(vps, &req, &res);
> +
> + req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_READ);
> +
> + sg_init_one(sgo, req, sizeof(*req));
> + sg_init_table(sgi, 3);
> + sg_set_buf(&sgi[0], res, sizeof(*res));
> + sg_set_buf(&sgi[1], &info, sizeof(info));
> + sg_set_buf(&sgi[2], psi->buf, psi->bufsize);
> + virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
> + virtqueue_kick(vps->vq[0]);
> +
> + wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
> + if (len < sizeof(*res) + sizeof(info))
> + return -1;
> +
> + ret = virtio32_to_cpu(vps->vdev, res->ret);
> + if (ret < 0)
> + return ret;
> +
> + len = virtio32_to_cpu(vps->vdev, info.len);
> +
> + bf = kmalloc(len, GFP_KERNEL);
> + if (bf == NULL)
> + return -ENOMEM;
> +
> + *id = virtio64_to_cpu(vps->vdev, info.id);
> + *type = from_virtio_type(vps, info.type);
> + *count = virtio32_to_cpu(vps->vdev, info.count);
> +
> + flags = virtio32_to_cpu(vps->vdev, info.flags);
> + *compressed = flags & VIRTIO_PSTORE_FL_COMPRESSED;
> +
> + time->tv_sec = virtio64_to_cpu(vps->vdev, info.time_sec);
> + time->tv_nsec = virtio32_to_cpu(vps->vdev, info.time_nsec);
> +
> + memcpy(bf, psi->buf, len);
> + *buf = bf;
> +
> + return len;
> +}
> +
> +static int notrace virt_pstore_write(enum pstore_type_id type,
> + enum kmsg_dump_reason reason,
> + u64 *id, unsigned int part, int count,
> + bool compressed, size_t size,
> + struct pstore_info *psi)
> +{
> + struct virtio_pstore *vps = psi->data;
> + struct virtio_pstore_req *req;
> + struct virtio_pstore_res *res;
> + struct scatterlist sgo[2], sgi[1];
> + struct scatterlist *sgs[2] = { sgo, sgi };
> + unsigned int flags = compressed ? VIRTIO_PSTORE_FL_COMPRESSED : 0;
> +
> + if (vps->failed)
> + return -1;
> +
> + *id = vps->req_id;
> + virt_pstore_get_reqs(vps, &req, &res);
> +
> + req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_WRITE);
> + req->type = to_virtio_type(vps, type);
> + req->flags = cpu_to_virtio32(vps->vdev, flags);
> +
> + sg_init_table(sgo, 2);
> + sg_set_buf(&sgo[0], req, sizeof(*req));
> + sg_set_buf(&sgo[1], psi->buf, size);
> + sg_init_one(sgi, res, sizeof(*res));
> + virtqueue_add_sgs(vps->vq[1], sgs, 1, 1, vps, GFP_ATOMIC);
> + virtqueue_kick(vps->vq[1]);
> +
> + return 0;
> +}
> +
> +static int virt_pstore_erase(enum pstore_type_id type, u64 id, int count,
> + struct timespec time, struct pstore_info *psi)
> +{
> + struct virtio_pstore *vps = psi->data;
> + struct virtio_pstore_req *req;
> + struct virtio_pstore_res *res;
> + struct scatterlist sgo[1], sgi[1];
> + struct scatterlist *sgs[2] = { sgo, sgi };
> + unsigned int len;
> +
> + virt_pstore_get_reqs(vps, &req, &res);
> +
> + req->cmd = cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_CMD_ERASE);
> + req->type = to_virtio_type(vps, type);
> + req->id = cpu_to_virtio64(vps->vdev, id);
> + req->count = cpu_to_virtio32(vps->vdev, count);
> +
> + sg_init_one(sgo, req, sizeof(*req));
> + sg_init_one(sgi, res, sizeof(*res));
> + virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
> + virtqueue_kick(vps->vq[0]);
> +
> + wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
> + return virtio32_to_cpu(vps->vdev, res->ret);
> +}
> +
> +static int virt_pstore_init(struct virtio_pstore *vps)
> +{
> + struct pstore_info *psinfo = &vps->pstore;
> + int err;
> +
> + if (!psinfo->bufsize)
> + psinfo->bufsize = VIRT_PSTORE_BUFSIZE;
> +
> + psinfo->buf = alloc_pages_exact(psinfo->bufsize, GFP_KERNEL);
> + if (!psinfo->buf) {
> + pr_err("cannot allocate pstore buffer\n");
> + return -ENOMEM;
> + }
> +
> + psinfo->owner = THIS_MODULE;
> + psinfo->name = "virtio";
> + psinfo->open = virt_pstore_open;
> + psinfo->close = virt_pstore_close;
> + psinfo->read = virt_pstore_read;
> + psinfo->erase = virt_pstore_erase;
> + psinfo->write = virt_pstore_write;
> + psinfo->flags = PSTORE_FLAGS_FRAGILE;
> +
> + psinfo->data = vps;
> + spin_lock_init(&psinfo->buf_lock);
> +
> + err = pstore_register(psinfo);
> + if (err)
> + kfree(psinfo->buf);
> +
> + return err;
> +}
> +
> +static int virt_pstore_exit(struct virtio_pstore *vps)
> +{
> + struct pstore_info *psinfo = &vps->pstore;
> +
> + pstore_unregister(psinfo);
> +
> + free_pages_exact(psinfo->buf, psinfo->bufsize);
> + psinfo->buf = NULL;
> + psinfo->bufsize = 0;
> +
> + return 0;
> +}
> +
> +static int virtpstore_init_vqs(struct virtio_pstore *vps)
> +{
> + vq_callback_t *callbacks[] = { virtpstore_ack, virtpstore_check };
> + const char *names[] = { "pstore_read", "pstore_write" };
> +
> + return vps->vdev->config->find_vqs(vps->vdev, 2, vps->vq,
> + callbacks, names);
> +}
> +
> +static void virtpstore_init_config(struct virtio_pstore *vps)
> +{
> + u32 bufsize;
> +
> + virtio_cread(vps->vdev, struct virtio_pstore_config, bufsize, &bufsize);
> +
> + vps->pstore.bufsize = PAGE_ALIGN(bufsize);
> +}
> +
> +static void virtpstore_confirm_config(struct virtio_pstore *vps)
> +{
> + u32 bufsize = vps->pstore.bufsize;
> +
> + virtio_cwrite(vps->vdev, struct virtio_pstore_config, bufsize,
> + &bufsize);
> +}
> +
> +static int virtpstore_probe(struct virtio_device *vdev)
> +{
> + struct virtio_pstore *vps;
> + int err;
> +
> + if (!vdev->config->get) {
> + dev_err(&vdev->dev, "driver init: config access disabled\n");
> + return -EINVAL;
> + }
> +
> + vdev->priv = vps = kzalloc(sizeof(*vps), GFP_KERNEL);
> + if (!vps) {
> + err = -ENOMEM;
> + goto out;
> + }
> + vps->vdev = vdev;
> +
> + err = virtpstore_init_vqs(vps);
> + if (err < 0)
> + goto out_free;
> +
> + virtpstore_init_config(vps);
> +
> + err = virt_pstore_init(vps);
> + if (err)
> + goto out_del_vq;
> +
> + virtpstore_confirm_config(vps);
> +
> + init_waitqueue_head(&vps->acked);
> +
> + virtio_device_ready(vdev);
> +
> + dev_info(&vdev->dev, "driver init: ok (bufsize = %luK, flags = %x)\n",
> + vps->pstore.bufsize >> 10, vps->pstore.flags);
> +
> + return 0;
> +
> +out_del_vq:
> + vdev->config->del_vqs(vdev);
> +out_free:
> + kfree(vps);
> +out:
> + dev_err(&vdev->dev, "driver init: failed with %d\n", err);
> + return err;
> +}
> +
> +static void virtpstore_remove(struct virtio_device *vdev)
> +{
> + struct virtio_pstore *vps = vdev->priv;
> +
> + virt_pstore_exit(vps);
> +
> + /* Now we reset the device so we can clean up the queues. */
> + vdev->config->reset(vdev);
> +
> + vdev->config->del_vqs(vdev);
> +
> + kfree(vps);
> +}
> +
> +static unsigned int features[] = {
> +};
> +
> +static struct virtio_device_id id_table[] = {
> + { VIRTIO_ID_PSTORE, VIRTIO_DEV_ANY_ID },
> + { 0 },
> +};
> +
> +static struct virtio_driver virtio_pstore_driver = {
> + .driver.name = KBUILD_MODNAME,
> + .driver.owner = THIS_MODULE,
> + .feature_table = features,
> + .feature_table_size = ARRAY_SIZE(features),
> + .id_table = id_table,
> + .probe = virtpstore_probe,
> + .remove = virtpstore_remove,
> +};
> +
> +module_virtio_driver(virtio_pstore_driver);
> +MODULE_DEVICE_TABLE(virtio, id_table);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Namhyung Kim <namhyung@kernel.org>");
> +MODULE_DESCRIPTION("Virtio pstore driver");
> diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
> index 6d4e92ccdc91..9bbb1554d8b2 100644
> --- a/include/uapi/linux/Kbuild
> +++ b/include/uapi/linux/Kbuild
> @@ -449,6 +449,7 @@ header-y += virtio_ids.h
> header-y += virtio_input.h
> header-y += virtio_net.h
> header-y += virtio_pci.h
> +header-y += virtio_pstore.h
> header-y += virtio_ring.h
> header-y += virtio_rng.h
> header-y += virtio_scsi.h
> diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
> index 77925f587b15..c72a9ab588c0 100644
> --- a/include/uapi/linux/virtio_ids.h
> +++ b/include/uapi/linux/virtio_ids.h
> @@ -41,5 +41,6 @@
> #define VIRTIO_ID_CAIF 12 /* Virtio caif */
> #define VIRTIO_ID_GPU 16 /* virtio GPU */
> #define VIRTIO_ID_INPUT 18 /* virtio input */
> +#define VIRTIO_ID_PSTORE 22 /* virtio pstore */
>
> #endif /* _LINUX_VIRTIO_IDS_H */
> diff --git a/include/uapi/linux/virtio_pstore.h b/include/uapi/linux/virtio_pstore.h
> new file mode 100644
> index 000000000000..f4b0d204d8ae
> --- /dev/null
> +++ b/include/uapi/linux/virtio_pstore.h
> @@ -0,0 +1,74 @@
> +#ifndef _LINUX_VIRTIO_PSTORE_H
> +#define _LINUX_VIRTIO_PSTORE_H
> +/* This header is BSD licensed so anyone can use the definitions to implement
> + * compatible drivers/servers.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + * 3. Neither the name of IBM nor the names of its contributors
> + * may be used to endorse or promote products derived from this software
> + * without specific prior written permission.
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE. */
> +#include <linux/types.h>
> +#include <linux/virtio_types.h>
> +
> +#define VIRTIO_PSTORE_CMD_NULL 0
> +#define VIRTIO_PSTORE_CMD_OPEN 1
> +#define VIRTIO_PSTORE_CMD_READ 2
> +#define VIRTIO_PSTORE_CMD_WRITE 3
> +#define VIRTIO_PSTORE_CMD_ERASE 4
> +#define VIRTIO_PSTORE_CMD_CLOSE 5
> +
> +#define VIRTIO_PSTORE_TYPE_UNKNOWN 0
> +#define VIRTIO_PSTORE_TYPE_DMESG 1
> +
> +#define VIRTIO_PSTORE_FL_COMPRESSED 1
> +
> +struct virtio_pstore_req {
> + __virtio16 cmd;
> + __virtio16 type;
> + __virtio32 flags;
> + __virtio64 id;
> + __virtio32 count;
> + __virtio32 reserved;
> +};
> +
> +struct virtio_pstore_res {
> + __virtio16 cmd;
> + __virtio16 type;
> + __virtio32 ret;
> +};
Is there a reason to support legacy endian-ness?
If not, you can just use __le formats.
> +struct virtio_pstore_fileinfo {
> + __virtio64 id;
> + __virtio32 count;
> + __virtio16 type;
> + __virtio16 unused;
> + __virtio32 flags;
> + __virtio32 len;
> + __virtio64 time_sec;
> + __virtio32 time_nsec;
> + __virtio32 reserved;
> +};
> +
> +struct virtio_pstore_config {
> + __virtio32 bufsize;
> +};
> +
> +#endif /* _LINUX_VIRTIO_PSTORE_H */
> --
> 2.9.3
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* [PATCH 1/2] drm/virtio: drop virtio_gpu_execbuffer_ioctl() wrapping
From: Gustavo Padovan @ 2016-08-31 16:26 UTC (permalink / raw)
To: David Airlie
Cc: open list, Gustavo Padovan, open list:VIRTIO GPU DRIVER,
open list:VIRTIO GPU DRIVER
From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Instead of wrapping virtio_gpu_execbuffer() to execute the ioctl
just execute it directly.
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
drivers/gpu/drm/virtio/virtgpu_ioctl.c | 24 ++++++++----------------
1 file changed, 8 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index c046903..e0613a9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -89,10 +89,16 @@ static void virtio_gpu_unref_list(struct list_head *head)
}
}
-static int virtio_gpu_execbuffer(struct drm_device *dev,
- struct drm_virtgpu_execbuffer *exbuf,
+/*
+ * Usage of execbuffer:
+ * Relocations need to take into account the full VIRTIO_GPUDrawable size.
+ * However, the command as passed from user space must *not* contain the initial
+ * VIRTIO_GPUReleaseInfo struct (first XXX bytes)
+ */
+static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
struct drm_file *drm_file)
{
+ struct drm_virtgpu_execbuffer *exbuf = data;
struct virtio_gpu_device *vgdev = dev->dev_private;
struct virtio_gpu_fpriv *vfpriv = drm_file->driver_priv;
struct drm_gem_object *gobj;
@@ -182,20 +188,6 @@ out_free:
return ret;
}
-/*
- * Usage of execbuffer:
- * Relocations need to take into account the full VIRTIO_GPUDrawable size.
- * However, the command as passed from user space must *not* contain the initial
- * VIRTIO_GPUReleaseInfo struct (first XXX bytes)
- */
-static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
- struct drm_file *file_priv)
-{
- struct drm_virtgpu_execbuffer *execbuffer = data;
- return virtio_gpu_execbuffer(dev, execbuffer, file_priv);
-}
-
-
static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
--
2.5.5
^ permalink raw reply related
* [PATCH 2/2] drm/virtio: add real fence context and seqno
From: Gustavo Padovan @ 2016-08-31 16:26 UTC (permalink / raw)
To: David Airlie
Cc: open list, Gustavo Padovan, open list:VIRTIO GPU DRIVER,
open list:VIRTIO GPU DRIVER
In-Reply-To: <1472660813-28219-1-git-send-email-gustavo@padovan.org>
From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
virtio fences were created with no fence context, which would make then
clash with an allocated fence context.
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
drivers/gpu/drm/virtio/virtgpu_drv.h | 1 +
drivers/gpu/drm/virtio/virtgpu_fence.c | 2 +-
drivers/gpu/drm/virtio/virtgpu_kms.c | 1 +
3 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index b18ef31..06ad923 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -75,6 +75,7 @@ typedef void (*virtio_gpu_resp_cb)(struct virtio_gpu_device *vgdev,
struct virtio_gpu_fence_driver {
atomic64_t last_seq;
uint64_t sync_seq;
+ uint64_t context;
struct list_head fences;
spinlock_t lock;
};
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c
index cf44187..f3f70fa 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fence.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fence.c
@@ -89,7 +89,7 @@ int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
(*fence)->drv = drv;
(*fence)->seq = ++drv->sync_seq;
fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock,
- 0, (*fence)->seq);
+ drv->context, (*fence)->seq);
fence_get(&(*fence)->f);
list_add_tail(&(*fence)->node, &drv->fences);
spin_unlock_irqrestore(&drv->lock, irq_flags);
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 4150873..036b0fb 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -159,6 +159,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
virtio_gpu_init_vq(&vgdev->ctrlq, virtio_gpu_dequeue_ctrl_func);
virtio_gpu_init_vq(&vgdev->cursorq, virtio_gpu_dequeue_cursor_func);
+ vgdev->fence_drv.context = fence_context_alloc(1);
spin_lock_init(&vgdev->fence_drv.lock);
INIT_LIST_HEAD(&vgdev->fence_drv.fences);
INIT_LIST_HEAD(&vgdev->cap_cache);
--
2.5.5
^ permalink raw reply related
* Re: [PATCH 1/3] virtio: Basic implementation of virtio pstore driver
From: Namhyung Kim @ 2016-09-01 0:03 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtio-dev, Tony Luck, Kees Cook, kvm,
Radim Krčmář, Anton Vorontsov, Will Deacon, LKML,
Steven Rostedt, qemu-devel, Minchan Kim, Anthony Liguori,
Colin Cross, Paolo Bonzini, virtualization, Ingo Molnar
In-Reply-To: <20160831175007-mutt-send-email-mst@kernel.org>
Hi Michael,
On Wed, Aug 31, 2016 at 05:54:04PM +0300, Michael S. Tsirkin wrote:
> On Wed, Aug 31, 2016 at 05:08:00PM +0900, Namhyung Kim wrote:
> > The virtio pstore driver provides interface to the pstore subsystem so
> > that the guest kernel's log/dump message can be saved on the host
> > machine. Users can access the log file directly on the host, or on the
> > guest at the next boot using pstore filesystem. It currently deals with
> > kernel log (printk) buffer only, but we can extend it to have other
> > information (like ftrace dump) later.
> >
> > It supports legacy PCI device using single order-2 page buffer. It uses
> > two virtqueues - one for (sync) read and another for (async) write.
> > Since it cannot wait for write finished, it supports up to 128
> > concurrent IO. The buffer size is configurable now.
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: "Michael S. Tsirkin" <mst@redhat.com>
> > Cc: Anthony Liguori <aliguori@amazon.com>
> > Cc: Anton Vorontsov <anton@enomsg.org>
> > Cc: Colin Cross <ccross@android.com>
> > Cc: Kees Cook <keescook@chromium.org>
> > Cc: Tony Luck <tony.luck@intel.com>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > Cc: Ingo Molnar <mingo@kernel.org>
> > Cc: Minchan Kim <minchan@kernel.org>
> > Cc: Will Deacon <will.deacon@arm.com>
> > Cc: kvm@vger.kernel.org
> > Cc: qemu-devel@nongnu.org
> > Cc: virtualization@lists.linux-foundation.org
> > Cc: virtio-dev@lists.oasis-open.org
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
[SNIP]
> > +#define TYPE_TABLE_ENTRY(_entry) \
> > + { PSTORE_TYPE_##_entry, VIRTIO_PSTORE_TYPE_##_entry }
> > +
> > +struct type_table {
> > + int pstore;
> > + u16 virtio;
> > +} type_table[] = {
> > + TYPE_TABLE_ENTRY(DMESG),
> > +};
> > +
> > +#undef TYPE_TABLE_ENTRY
> > +
> > +
> > +static u16 to_virtio_type(struct virtio_pstore *vps, enum pstore_type_id type)
> > +{
> > + unsigned int i;
> > +
> > + for (i = 0; i < ARRAY_SIZE(type_table); i++) {
> > + if (type == type_table[i].pstore)
> > + return cpu_to_virtio16(vps->vdev, type_table[i].virtio);
>
> Does this pass sparse checks? If yes I'm surprised - this clearly
> returns a virtio16 type.
Ah, didn't run sparse. Will change it to return a __le16 type
(according to your comment below).
>
>
> > + }
> > +
> > + return cpu_to_virtio16(vps->vdev, VIRTIO_PSTORE_TYPE_UNKNOWN);
> > +}
> > +
> > +static enum pstore_type_id from_virtio_type(struct virtio_pstore *vps, u16 type)
This one should be '__le16 type' as well.
> > +{
> > + unsigned int i;
> > +
> > + for (i = 0; i < ARRAY_SIZE(type_table); i++) {
> > + if (virtio16_to_cpu(vps->vdev, type) == type_table[i].virtio)
> > + return type_table[i].pstore;
> > + }
> > +
> > + return PSTORE_TYPE_UNKNOWN;
> > +}
> > +
[SNIP]
> > +
> > +struct virtio_pstore_req {
> > + __virtio16 cmd;
> > + __virtio16 type;
> > + __virtio32 flags;
> > + __virtio64 id;
> > + __virtio32 count;
> > + __virtio32 reserved;
> > +};
> > +
> > +struct virtio_pstore_res {
> > + __virtio16 cmd;
> > + __virtio16 type;
> > + __virtio32 ret;
> > +};
>
> Is there a reason to support legacy endian-ness?
> If not, you can just use __le formats.
I just didn't know what's the preferred type. Will change!
Thanks,
Namhyung
>
>
> > +struct virtio_pstore_fileinfo {
> > + __virtio64 id;
> > + __virtio32 count;
> > + __virtio16 type;
> > + __virtio16 unused;
> > + __virtio32 flags;
> > + __virtio32 len;
> > + __virtio64 time_sec;
> > + __virtio32 time_nsec;
> > + __virtio32 reserved;
> > +};
> > +
> > +struct virtio_pstore_config {
> > + __virtio32 bufsize;
> > +};
> > +
> > +#endif /* _LINUX_VIRTIO_PSTORE_H */
> > --
> > 2.9.3
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH v3 kernel 0/7] Extend virtio-balloon for fast (de)inflating & fast live migration
From: Wanpeng Li @ 2016-09-01 4:30 UTC (permalink / raw)
To: Liang Li
Cc: virtio-dev, kvm, dave.hansen, qemu-devel@nongnu.org Developers,
linux-kernel@vger.kernel.org, linux-mm, virtualization, dgilbert
In-Reply-To: <1470638134-24149-1-git-send-email-liang.z.li@intel.com>
2016-08-08 14:35 GMT+08:00 Liang Li <liang.z.li@intel.com>:
> This patch set contains two parts of changes to the virtio-balloon.
>
> One is the change for speeding up the inflating & deflating process,
> the main idea of this optimization is to use bitmap to send the page
> information to host instead of the PFNs, to reduce the overhead of
> virtio data transmission, address translation and madvise(). This can
> help to improve the performance by about 85%.
>
> Another change is for speeding up live migration. By skipping process
> guest's free pages in the first round of data copy, to reduce needless
> data processing, this can help to save quite a lot of CPU cycles and
> network bandwidth. We put guest's free page information in bitmap and
> send it to host with the virt queue of virtio-balloon. For an idle 8GB
> guest, this can help to shorten the total live migration time from 2Sec
> to about 500ms in the 10Gbps network environment.
I just read the slides of this feature for recent kvm forum, the cloud
providers more care about live migration downtime to avoid customers'
perception than total time, however, this feature will increase
downtime when acquire the benefit of reducing total time, maybe it
will be more acceptable if there is no downside for downtime.
Regards,
Wanpeng Li
^ permalink raw reply
* RE: [PATCH v3 kernel 0/7] Extend virtio-balloon for fast (de)inflating & fast live migration
From: Li, Liang Z @ 2016-09-01 5:46 UTC (permalink / raw)
To: Wanpeng Li
Cc: virtio-dev@lists.oasis-open.org, kvm, Hansen, Dave,
qemu-devel@nongnu.org Developers, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, virtualization@lists.linux-foundation.org,
dgilbert@redhat.com
In-Reply-To: <CANRm+Cy=p8PKg8HqRp7apU0D9X=gpnrahtXRq+S+5Gq863VO8g@mail.gmail.com>
> Subject: Re: [PATCH v3 kernel 0/7] Extend virtio-balloon for fast (de)inflating
> & fast live migration
>
> 2016-08-08 14:35 GMT+08:00 Liang Li <liang.z.li@intel.com>:
> > This patch set contains two parts of changes to the virtio-balloon.
> >
> > One is the change for speeding up the inflating & deflating process,
> > the main idea of this optimization is to use bitmap to send the page
> > information to host instead of the PFNs, to reduce the overhead of
> > virtio data transmission, address translation and madvise(). This can
> > help to improve the performance by about 85%.
> >
> > Another change is for speeding up live migration. By skipping process
> > guest's free pages in the first round of data copy, to reduce needless
> > data processing, this can help to save quite a lot of CPU cycles and
> > network bandwidth. We put guest's free page information in bitmap and
> > send it to host with the virt queue of virtio-balloon. For an idle 8GB
> > guest, this can help to shorten the total live migration time from
> > 2Sec to about 500ms in the 10Gbps network environment.
>
> I just read the slides of this feature for recent kvm forum, the cloud
> providers more care about live migration downtime to avoid customers'
> perception than total time, however, this feature will increase downtime
> when acquire the benefit of reducing total time, maybe it will be more
> acceptable if there is no downside for downtime.
>
> Regards,
> Wanpeng Li
In theory, there is no factor that will increase the downtime. There is no additional operation
and no more data copy during the stop and copy stage. But in the test, the downtime increases
and this can be reproduced. I think the busy network line maybe the reason for this. With this
optimization, a huge amount of data is written to the socket in a shorter time, so some of the write
operation may need to wait. Without this optimization, zero page checking takes more time,
the network is not so busy.
If the guest is not an idle one, I think the gap of the downtime will not so obvious. Anyway, the
downtime is still less than the max_down_time set by the user.
Thanks!
Liang
^ permalink raw reply
* Re: [PATCH v3 kernel 0/7] Extend virtio-balloon for fast (de)inflating & fast live migration
From: Wanpeng Li @ 2016-09-01 6:01 UTC (permalink / raw)
To: Li, Liang Z
Cc: virtio-dev@lists.oasis-open.org, kvm, Hansen, Dave,
qemu-devel@nongnu.org Developers, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, virtualization@lists.linux-foundation.org,
dgilbert@redhat.com
In-Reply-To: <F2CBF3009FA73547804AE4C663CAB28E3A01C59B@shsmsx102.ccr.corp.intel.com>
2016-09-01 13:46 GMT+08:00 Li, Liang Z <liang.z.li@intel.com>:
>> Subject: Re: [PATCH v3 kernel 0/7] Extend virtio-balloon for fast (de)inflating
>> & fast live migration
>>
>> 2016-08-08 14:35 GMT+08:00 Liang Li <liang.z.li@intel.com>:
>> > This patch set contains two parts of changes to the virtio-balloon.
>> >
>> > One is the change for speeding up the inflating & deflating process,
>> > the main idea of this optimization is to use bitmap to send the page
>> > information to host instead of the PFNs, to reduce the overhead of
>> > virtio data transmission, address translation and madvise(). This can
>> > help to improve the performance by about 85%.
>> >
>> > Another change is for speeding up live migration. By skipping process
>> > guest's free pages in the first round of data copy, to reduce needless
>> > data processing, this can help to save quite a lot of CPU cycles and
>> > network bandwidth. We put guest's free page information in bitmap and
>> > send it to host with the virt queue of virtio-balloon. For an idle 8GB
>> > guest, this can help to shorten the total live migration time from
>> > 2Sec to about 500ms in the 10Gbps network environment.
>>
>> I just read the slides of this feature for recent kvm forum, the cloud
>> providers more care about live migration downtime to avoid customers'
>> perception than total time, however, this feature will increase downtime
>> when acquire the benefit of reducing total time, maybe it will be more
>> acceptable if there is no downside for downtime.
>>
>> Regards,
>> Wanpeng Li
>
> In theory, there is no factor that will increase the downtime. There is no additional operation
> and no more data copy during the stop and copy stage. But in the test, the downtime increases
> and this can be reproduced. I think the busy network line maybe the reason for this. With this
> optimization, a huge amount of data is written to the socket in a shorter time, so some of the write
> operation may need to wait. Without this optimization, zero page checking takes more time,
> the network is not so busy.
>
> If the guest is not an idle one, I think the gap of the downtime will not so obvious. Anyway, the
http://www.linux-kvm.org/images/c/c3/03x06B-Liang_Li-Real_Time_and_Fast_Live_Migration_Update_for_NFV.pdf
The slides show almost the similar percentage for the idle and the
non-idle guests, they both increase ~50% downtime.
Regards,
Wanpeng Li
^ permalink raw reply
* Re: [PATCH v8 05/18] dmaengine: st_fdma: Add STMicroelectronics FDMA engine driver support
From: Peter Griffin @ 2016-09-01 10:06 UTC (permalink / raw)
To: Vinod Koul
Cc: devicetree, kernel, airlied, linux-remoteproc, virtualization,
patrice.chotard, dri-devel, linux-kernel, Ludovic Barre,
dmaengine, dan.j.williams, bjorn.andersson, lee.jones,
linux-arm-kernel
In-Reply-To: <20160830163019.GM9355@localhost>
Hi Vinod,
Thanks for reviewing.
On Tue, 30 Aug 2016, Vinod Koul wrote:
> On Fri, Aug 26, 2016 at 03:56:40PM +0100, Peter Griffin wrote:
>
> > config STM32_DMA
> > bool "STMicroelectronics STM32 DMA support"
> > depends on ARCH_STM32
> > @@ -567,7 +580,6 @@ config ZX_DMA
> > help
> > Support the DMA engine for ZTE ZX296702 platform devices.
> >
> > -
>
> unrelated change?
OK will remove and send a separate patch.
>
> > + fdev->chans = devm_kzalloc(&pdev->dev,
> > + fdev->nr_channels
> > + * sizeof(struct st_fdma_chan), GFP_KERNEL);
>
> devm_kcalloc()
Will fix in next version.
>
> > + if (!fdev->chans)
> > + return -ENOMEM;
> > +
> > + fdev->dev = &pdev->dev;
> > + fdev->drvdata = drvdata;
> > + platform_set_drvdata(pdev, fdev);
> > +
> > + fdev->irq = platform_get_irq(pdev, 0);
> > + if (fdev->irq < 0) {
> > + dev_err(&pdev->dev, "Failed to get irq resource\n");
> > + return -EINVAL;
> > + }
> > +
> > + ret = devm_request_irq(&pdev->dev, fdev->irq, st_fdma_irq_handler, 0,
> > + dev_name(&pdev->dev), fdev);
> > + if (ret) {
> > + dev_err(&pdev->dev, "Failed to request irq (%d)\n", ret);
> > + goto err;
> > + }
> > +
> > + fdev->slim_rproc = st_slim_rproc_alloc(pdev, fdev->fw_name);
> > + if (!fdev->slim_rproc) {
> > + ret = PTR_ERR(fdev->slim_rproc);
> > + dev_err(&pdev->dev, "slim_rproc_alloc failed (%d)\n", ret);
> > + goto err;
> > + }
> > +
> > + /* Initialise list of FDMA channels */
> > + INIT_LIST_HEAD(&fdev->dma_device.channels);
> > + for (i = 0; i < fdev->nr_channels; i++) {
> > + struct st_fdma_chan *fchan = &fdev->chans[i];
> > +
> > + fchan->fdev = fdev;
> > + fchan->vchan.desc_free = st_fdma_free_desc;
> > + vchan_init(&fchan->vchan, &fdev->dma_device);
>
> this initialized a tasklet
>
> > +static int st_fdma_remove(struct platform_device *pdev)
> > +{
> > + struct st_fdma_dev *fdev = platform_get_drvdata(pdev);
> > +
> > + devm_free_irq(&pdev->dev, fdev->irq, fdev);
> > + st_slim_rproc_put(fdev->slim_rproc);
> > + of_dma_controller_free(pdev->dev.of_node);
> > + dma_async_device_unregister(&fdev->dma_device);
>
> and that vchan tasklet is not quisced here :(
Eeek, good spot. Will fix in next version.
>
> > +MODULE_LICENSE("GPL v2");
> > +MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver");
> > +MODULE_AUTHOR("Ludovic.barre <Ludovic.barre@st.com>");
> > +MODULE_AUTHOR("Peter Griffin <peter.griffin@linaro.org>");
>
> No MODULE_ALIAS?
Will add in next version.
regards,
Peter.
^ permalink raw reply
* [PATCH] virtio: mark vring_dma_dev() static
From: Baoyou Xie @ 2016-09-01 11:02 UTC (permalink / raw)
To: mst; +Cc: xie.baoyou, baoyou.xie, linux-kernel, arnd, virtualization
We get 1 warning when building kernel with W=1:
drivers/virtio/virtio_ring.c:170:16: warning: no previous prototype for 'vring_dma_dev' [-Wmissing-prototypes]
In fact, this function is only used in the file in which it is
declared and don't need a declaration, but can be made static.
so this patch marks this function with 'static'.
Signed-off-by: Baoyou Xie <baoyou.xie@linaro.org>
---
drivers/virtio/virtio_ring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 114a0c8..13b58db 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -167,7 +167,7 @@ static bool vring_use_dma_api(struct virtio_device *vdev)
* making all of the arch DMA ops work on the vring device itself
* is a mess. For now, we use the parent device for DMA ops.
*/
-struct device *vring_dma_dev(const struct vring_virtqueue *vq)
+static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
{
return vq->vq.vdev->dev.parent;
}
--
2.7.4
^ permalink raw reply related
* Re: [PATCH] virtio: mark vring_dma_dev() static
From: Arnd Bergmann @ 2016-09-01 11:16 UTC (permalink / raw)
To: Baoyou Xie; +Cc: virtualization, linux-kernel, xie.baoyou, mst
In-Reply-To: <1472727777-4695-1-git-send-email-baoyou.xie@linaro.org>
On Thursday, September 1, 2016 7:02:57 PM CEST Baoyou Xie wrote:
> We get 1 warning when building kernel with W=1:
> drivers/virtio/virtio_ring.c:170:16: warning: no previous prototype for 'vring_dma_dev' [-Wmissing-prototypes]
>
> In fact, this function is only used in the file in which it is
> declared and don't need a declaration, but can be made static.
> so this patch marks this function with 'static'.
>
> Signed-off-by: Baoyou Xie <baoyou.xie@linaro.org>
>
Acked-by: Arnd Bergmann <arnd@arndb.de>
^ permalink raw reply
* Re: [PATCH] x86/paravirt: Do not trace _paravirt_ident_*() functions
From: Steven Rostedt @ 2016-09-02 14:46 UTC (permalink / raw)
To: LKML
Cc: Jeremy Fitzhardinge, Łukasz Daniluk, Andrew Morton,
virtualization, Chris Wright, Thomas Gleixner, H. Peter Anvin,
Alok Kataria, Linus Torvalds, Ingo Molnar
In-Reply-To: <20160525134726.6362a601@gandalf.local.home>
I just spent half a day bisecting function tracing because I tripped
over this again. I thought this was merged, but I guess it was missed
again.
Can someone please pull this in. And mark it for stable, it goes
probably as far back as 2.6.32.
Thanks!
-- Steve
On Wed, 25 May 2016 13:47:26 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:
> Łukasz Daniluk reported that on a RHEL kernel that his machine would lock up
> after enabling function tracer. I asked him to bisect the functions within
> available_filter_functions, which he did and it came down to three:
>
> _paravirt_nop(), _paravirt_ident_32() and _paravirt_ident_64()
>
> It was found that this is only an issue when noreplace-paravirt is added to
> the kernel command line.
>
> This means that those functions are most likely called within critical
> sections of the funtion tracer, and must not be traced.
>
> In newer kenels _paravirt_nop() is defined within gcc asm(), and is no
> longer an issue. But both _paravirt_ident_{32,64}() causes the following
> splat when they are traced:
>
> mm/pgtable-generic.c:33: bad pmd ffff8800d2435150(0000000001d00054)
> mm/pgtable-generic.c:33: bad pmd ffff8800d3624190(0000000001d00070)
> mm/pgtable-generic.c:33: bad pmd ffff8800d36a5110(0000000001d00054)
> mm/pgtable-generic.c:33: bad pmd ffff880118eb1450(0000000001d00054)
> NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [systemd-journal:469]
> Modules linked in: e1000e
> CPU: 2 PID: 469 Comm: systemd-journal Not tainted 4.6.0-rc4-test+ #513
> Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012
> task: ffff880118f740c0 ti: ffff8800d4aec000 task.ti: ffff8800d4aec000
> RIP: 0010:[<ffffffff81134148>] [<ffffffff81134148>] queued_spin_lock_slowpath+0x118/0x1a0
> RSP: 0018:ffff8800d4aefb90 EFLAGS: 00000246
> RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011eb16d40
> RDX: ffffffff82485760 RSI: 000000001f288820 RDI: ffffea0000008030
> RBP: ffff8800d4aefb90 R08: 00000000000c0000 R09: 0000000000000000
> R10: ffffffff821c8e0e R11: 0000000000000000 R12: ffff880000200fb8
> R13: 00007f7a4e3f7000 R14: ffffea000303f600 R15: ffff8800d4b562e0
> FS: 00007f7a4e3d7840(0000) GS:ffff88011eb00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007f7a4e3f7000 CR3: 00000000d3e71000 CR4: 00000000001406e0
> Stack:
> ffff8800d4aefba0 ffffffff81cc5f47 ffff8800d4aefc60 ffffffff8122c15b
> ffff8800d4aefcb0 ffff8800d4aefbd0 ffffffff811bf4cb 0000000000000002
> 0000000000000015 ffff8800d2276050 80000000c0fd8867 ffffea0000008030
> Call Trace:
> [<ffffffff81cc5f47>] _raw_spin_lock+0x27/0x30
> [<ffffffff8122c15b>] handle_pte_fault+0x13db/0x16b0
> [<ffffffff811bf4cb>] ? function_trace_call+0x15b/0x180
> [<ffffffff8122ad85>] ? handle_pte_fault+0x5/0x16b0
> [<ffffffff8122e322>] handle_mm_fault+0x312/0x670
> [<ffffffff81231068>] ? find_vma+0x68/0x70
> [<ffffffff810ab741>] __do_page_fault+0x1b1/0x4e0
> [<ffffffff810aba92>] do_page_fault+0x22/0x30
> [<ffffffff81cc7f68>] page_fault+0x28/0x30
> [<ffffffff81574af5>] ? copy_user_enhanced_fast_string+0x5/0x10
> [<ffffffff8129dec5>] ? seq_read+0x305/0x370
> [<ffffffff81279668>] __vfs_read+0x28/0xe0
> [<ffffffff81279645>] ? __vfs_read+0x5/0xe0
> [<ffffffff81279645>] ? __vfs_read+0x5/0xe0
> [<ffffffff81279df6>] vfs_read+0x86/0x130
> [<ffffffff8127b216>] SyS_read+0x46/0xa0
> [<ffffffff81cc6176>] entry_SYSCALL_64_fastpath+0x1e/0xa8
> Code: 12 48 c1 ea 0c 83 e8 01 83 e2 30 48 98 48 81 c2 40 6d 01 00 48 03 14
> c5 80 6a 5d 82 48 89 0a 8b 41 08 85 c0 75 09 f3 90 8b 41 08 <85> c0 74 f7
> 4c 8b 09 4d 85 c9 74 08 41 0f 18 09 eb 02 f3 90 8b
>
> Reported-by: Łukasz Daniluk <lukasz.daniluk@intel.com>
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
>
> diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
> index f08ac28b8136..f975d226be6e 100644
> --- a/arch/x86/kernel/paravirt.c
> +++ b/arch/x86/kernel/paravirt.c
> @@ -55,12 +55,12 @@ asm (".pushsection .entry.text, \"ax\"\n"
> ".popsection");
>
> /* identity function, which can be inlined */
> -u32 _paravirt_ident_32(u32 x)
> +u32 notrace _paravirt_ident_32(u32 x)
> {
> return x;
> }
>
> -u64 _paravirt_ident_64(u64 x)
> +u64 notrace _paravirt_ident_64(u64 x)
> {
> return x;
> }
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH] x86/paravirt: Do not trace _paravirt_ident_*() functions
From: Linus Torvalds @ 2016-09-02 16:41 UTC (permalink / raw)
To: Steven Rostedt
Cc: Jeremy Fitzhardinge, Łukasz Daniluk, Andrew Morton, LKML,
virtualization, Chris Wright, Thomas Gleixner, H. Peter Anvin,
Alok Kataria, Ingo Molnar
In-Reply-To: <20160902104630.4b004e09@gandalf.local.home>
On Fri, Sep 2, 2016 at 7:46 AM, Steven Rostedt <rostedt@goodmis.org> wrote:
>
> Can someone please pull this in. And mark it for stable, it goes
> probably as far back as 2.6.32.
Applied.
Linus
^ permalink raw reply
* Re: [PATCH] virtio-blk: Generate uevent after attribute available
From: Michael S. Tsirkin @ 2016-09-02 22:56 UTC (permalink / raw)
To: Fam Zheng; +Cc: Christoph Hellwig, linux-kernel, virtualization
In-Reply-To: <20160629012414.GA23296@ad.usersys.redhat.com>
On Wed, Jun 29, 2016 at 09:24:15AM +0800, Fam Zheng wrote:
> On Tue, 06/28 04:45, Christoph Hellwig wrote:
> > On Tue, Jun 28, 2016 at 10:39:15AM +0800, Fam Zheng wrote:
> > > Userspace listens to the KOBJ_ADD uevent generated in add_disk. At that
> > > point we haven't created the serial attribute file, therefore depending
> > > on how fast udev reacts, the /dev/disk/by-id/ entry doesn't always get
> > > created.
> > >
> > > This race condition can be easily reproduced by hot plugging a number of
> > > virtio-blk disks.
> > >
> > > Also in systemd, there used to be a related workaround in udev rules
> > > called 'WAIT_FOR="serial"', but it is removed in later versions.
> > >
> > > Now let's generate a KOBJ_CHANGE event after the attributes are ready.
> >
> > The same race is present in other drivers as well, e.g. nvme. Please
> > find a way to make this work properly without needing to hack every
> > driver to send events manually.
>
> OK, I'll take a look today!
>
> Fam
Was this fixed in the generic code?
--
MST
^ permalink raw reply
* Re: [PATCH] virtio-blk: Generate uevent after attribute available
From: Fam Zheng @ 2016-09-03 7:08 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: Christoph Hellwig, linux-kernel, virtualization
In-Reply-To: <20160903015630-mutt-send-email-mst@kernel.org>
On Sat, 09/03 01:56, Michael S. Tsirkin wrote:
> On Wed, Jun 29, 2016 at 09:24:15AM +0800, Fam Zheng wrote:
> > On Tue, 06/28 04:45, Christoph Hellwig wrote:
> > > On Tue, Jun 28, 2016 at 10:39:15AM +0800, Fam Zheng wrote:
> > > > Userspace listens to the KOBJ_ADD uevent generated in add_disk. At that
> > > > point we haven't created the serial attribute file, therefore depending
> > > > on how fast udev reacts, the /dev/disk/by-id/ entry doesn't always get
> > > > created.
> > > >
> > > > This race condition can be easily reproduced by hot plugging a number of
> > > > virtio-blk disks.
> > > >
> > > > Also in systemd, there used to be a related workaround in udev rules
> > > > called 'WAIT_FOR="serial"', but it is removed in later versions.
> > > >
> > > > Now let's generate a KOBJ_CHANGE event after the attributes are ready.
> > >
> > > The same race is present in other drivers as well, e.g. nvme. Please
> > > find a way to make this work properly without needing to hack every
> > > driver to send events manually.
> >
> > OK, I'll take a look today!
> >
> > Fam
>
> Was this fixed in the generic code?
A proposed fix is:
https://lkml.org/lkml/2016/8/17/81
Fam
^ permalink raw reply
* Re: [PATCH 09/15] virtio-blk: Pass attribute group to device_add_disk
From: Michael S. Tsirkin @ 2016-09-04 4:18 UTC (permalink / raw)
To: Fam Zheng
Cc: Jens Axboe, linux-block, Sergey Senozhatsky, Michael Ellerman,
virtualization, linux-kernel, linux-nvme, Ed L. Cashin,
Keith Busch, Minchan Kim, Paul Mackerras, Benjamin Herrenschmidt,
linux-mtd, Brian Norris, linuxppc-dev, David Woodhouse,
Nitin Gupta
In-Reply-To: <1471418115-3654-10-git-send-email-famz@redhat.com>
On Wed, Aug 17, 2016 at 03:15:09PM +0800, Fam Zheng wrote:
> Previously after device_add_disk returns, the KOBJ_ADD uevent is already
> emitted. Adding attributes after that is a poor usage of kobject, and
> in practice may result in race conditions with userspace, for
> example udev checks availability of certain attributes and initializes
> /dev entries conditionally.
>
> device_add_disk can handle adding attribute group better, so use it.
>
> Meanwhile, handle error of device_add_disk.
>
> Signed-off-by: Fam Zheng <famz@redhat.com>
Feel free to merge this with the rest of patchset
Acked-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> drivers/block/virtio_blk.c | 38 +++++++++++++++++++++++++++-----------
> 1 file changed, 27 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 4564df5..ff60d82 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -522,10 +522,10 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
> return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
> }
>
> -static const struct device_attribute dev_attr_cache_type_ro =
> +static struct device_attribute dev_attr_cache_type_ro =
> __ATTR(cache_type, S_IRUGO,
> virtblk_cache_type_show, NULL);
> -static const struct device_attribute dev_attr_cache_type_rw =
> +static struct device_attribute dev_attr_cache_type_rw =
> __ATTR(cache_type, S_IRUGO|S_IWUSR,
> virtblk_cache_type_show, virtblk_cache_type_store);
>
> @@ -550,6 +550,26 @@ static struct blk_mq_ops virtio_mq_ops = {
> static unsigned int virtblk_queue_depth;
> module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
>
> +static struct attribute *virtblk_attrs_ro[] = {
> + &dev_attr_serial.attr,
> + &dev_attr_cache_type_ro.attr,
> + NULL
> +};
> +
> +static struct attribute *virtblk_attrs_rw[] = {
> + &dev_attr_serial.attr,
> + &dev_attr_cache_type_rw.attr,
> + NULL
> +};
> +
> +static struct attribute_group virtblk_attr_group_ro = {
> + .attrs = virtblk_attrs_ro,
> +};
> +
> +static struct attribute_group virtblk_attr_group_rw = {
> + .attrs = virtblk_attrs_rw,
> +};
> +
> static int virtblk_probe(struct virtio_device *vdev)
> {
> struct virtio_blk *vblk;
> @@ -560,6 +580,7 @@ static int virtblk_probe(struct virtio_device *vdev)
> u32 v, blk_size, sg_elems, opt_io_size;
> u16 min_io_size;
> u8 physical_block_exp, alignment_offset;
> + struct attribute_group *attr_group;
>
> if (!vdev->config->get) {
> dev_err(&vdev->dev, "%s failure: config access disabled\n",
> @@ -719,19 +740,14 @@ static int virtblk_probe(struct virtio_device *vdev)
>
> virtio_device_ready(vdev);
>
> - device_add_disk(&vdev->dev, vblk->disk, NULL);
> - err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
> - if (err)
> - goto out_del_disk;
> -
> if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
> - err = device_create_file(disk_to_dev(vblk->disk),
> - &dev_attr_cache_type_rw);
> + attr_group = &virtblk_attr_group_rw;
> else
> - err = device_create_file(disk_to_dev(vblk->disk),
> - &dev_attr_cache_type_ro);
> + attr_group = &virtblk_attr_group_ro;
> + err = device_add_disk(&vdev->dev, vblk->disk, attr_group);
> if (err)
> goto out_del_disk;
> +
> return 0;
>
> out_del_disk:
> --
> 2.7.4
^ permalink raw reply
* Re: [PATCH] vhost: Add polling mode
From: Razya Ladelsky @ 2016-09-04 8:45 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Yossi_Kuperman1/Haifa/IBM%IBMIL, kvm, Joel_Nider/Haifa/IBM%IBMIL,
netdev, linux-kernel, abel.gordon, Alex_Glikson/Haifa/IBM%IBMIL,
Eran_Raichstein/Haifa/IBM%IBMIL, virtualization
In-Reply-To: <20140810194559.GA4344@redhat.com>
"Michael S. Tsirkin" <mst@redhat.com> wrote on 10/08/2014 10:45:59 PM:
> From: "Michael S. Tsirkin" <mst@redhat.com>
> To: Razya Ladelsky/Haifa/IBM@IBMIL,
> Cc: kvm@vger.kernel.org, Alex Glikson/Haifa/IBM@IBMIL, Eran
> Raichstein/Haifa/IBM@IBMIL, Yossi Kuperman1/Haifa/IBM@IBMIL, Joel
> Nider/Haifa/IBM@IBMIL, abel.gordon@gmail.com, linux-
> kernel@vger.kernel.org, netdev@vger.kernel.org,
> virtualization@lists.linux-foundation.org
> Date: 10/08/2014 10:45 PM
> Subject: Re: [PATCH] vhost: Add polling mode
>
> On Sun, Aug 10, 2014 at 11:30:35AM +0300, Razya Ladelsky wrote:
> > From: Razya Ladelsky <razya@il.ibm.com>
> > Date: Thu, 31 Jul 2014 09:47:20 +0300
> > Subject: [PATCH] vhost: Add polling mode
> >
> > When vhost is waiting for buffers from the guest driver (e.g.,
> more packets to
> > send in vhost-net's transmit queue), it normally goes to sleep and
> waits for the
> > guest to "kick" it. This kick involves a PIO in the guest, and
> therefore an exit
> > (and possibly userspace involvement in translating this PIO exit into
a file
> > descriptor event), all of which hurts performance.
> >
> > If the system is under-utilized (has cpu time to spare), vhost can
> continuously
> > poll the virtqueues for new buffers, and avoid asking the guest to
kick us.
> > This patch adds an optional polling mode to vhost, that can be enabled
via a
> > kernel module parameter, "poll_start_rate".
> >
> > When polling is active for a virtqueue, the guest is asked to disable
> > notification (kicks), and the worker thread continuously checks
> for new buffers.
> > When it does discover new buffers, it simulates a "kick" by invoking
the
> > underlying backend driver (such as vhost-net), which thinks it got
> a real kick
> > from the guest, and acts accordingly. If the underlying driver
> asks not to be
> > kicked, we disable polling on this virtqueue.
> >
> > We start polling on a virtqueue when we notice it has work to do.
Polling on
> > this virtqueue is later disabled after 3 seconds of polling
> turning up no new
> > work, as in this case we are better off returning to the exit-
> based notification
> > mechanism. The default timeout of 3 seconds can be changed with the
> > "poll_stop_idle" kernel module parameter.
> >
> > This polling approach makes lot of sense for new HW with posted-
> interrupts for
> > which we have exitless host-to-guest notifications. But even with
> support for
> > posted interrupts, guest-to-host communication still causes exits.
> Polling adds
> > the missing part.
> >
> > When systems are overloaded, there won't be enough cpu time for the
various
> > vhost threads to poll their guests' devices. For these scenarios,
> we plan to add
> > support for vhost threads that can be shared by multiple devices, even
of
> > multiple vms.
> > Our ultimate goal is to implement the I/O acceleration features
> described in:
> > KVM Forum 2013: Efficient and Scalable Virtio (by Abel Gordon)
> > https://www.youtube.com/watch?v=9EyweibHfEs
> > and
> > https://www.mail-archive.com/kvm@vger.kernel.org/msg98179.html
> >
> > I ran some experiments with TCP stream netperf and filebench
> (having 2 threads
> > performing random reads) benchmarks on an IBM System x3650 M4.
> > I have two machines, A and B. A hosts the vms, B runs the netserver.
> > The vms (on A) run netperf, its destination server is running on B.
> > All runs loaded the guests in a way that they were (cpu)
> saturated. For example,
> > I ran netperf with 64B messages, which is heavily loading the vm
> (which is why
> > its throughput is low).
> > The idea was to get it 100% loaded, so we can see that the polling
> is getting it
> > to produce higher throughput.
>
> And, did your tests actually produce 100% load on both host CPUs?
>
The vm indeed utilized 100% cpu, whether polling was enabled or not.
The vhost thread utilized less than 100% (of the other cpu) when polling
was disabled.
Enabling polling increased its utilization to 100% (in which case both
cpus were 100% utilized).
> > The system had two cores per guest, as to allow for both the vcpu
> and the vhost
> > thread to run concurrently for maximum throughput (but I didn't
> pin the threads
> > to specific cores).
> > My experiments were fair in a sense that for both cases, with or
without
> > polling, I run both threads, vcpu and vhost, on 2 cores (set their
> affinity that
> > way). The only difference was whether polling was enabled/disabled.
> >
> > Results:
> >
> > Netperf, 1 vm:
> > The polling patch improved throughput by ~33% (1516 MB/sec -> 2046
MB/sec).
> > Number of exits/sec decreased 6x.
> > The same improvement was shown when I tested with 3 vms running
netperf
> > (4086 MB/sec -> 5545 MB/sec).
> >
> > filebench, 1 vm:
> > ops/sec improved by 13% with the polling patch. Number of exits
> was reduced by
> > 31%.
> > The same experiment with 3 vms running filebench showed similar
numbers.
> >
> > Signed-off-by: Razya Ladelsky <razya@il.ibm.com>
> > ---
> > drivers/vhost/net.c | 6 +-
> > drivers/vhost/scsi.c | 6 +-
> > drivers/vhost/vhost.c | 245 ++++++++++++++++++++++++++++++++++++
> +++++++++++--
> > drivers/vhost/vhost.h | 38 +++++++-
> > 4 files changed, 277 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> > index 971a760..558aecb 100644
> > --- a/drivers/vhost/net.c
> > +++ b/drivers/vhost/net.c
> > @@ -742,8 +742,10 @@ static int vhost_net_open(struct inode
> *inode, struct file *f)
> > }
> > vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
> >
> > - vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT,
dev);
> > - vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN,
dev);
> > + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT,
> > + vqs[VHOST_NET_VQ_TX]);
> > + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN,
> > + vqs[VHOST_NET_VQ_RX]);
> >
> > f->private_data = n;
> >
> > diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
> > index 4f4ffa4..665eeeb 100644
> > --- a/drivers/vhost/scsi.c
> > +++ b/drivers/vhost/scsi.c
> > @@ -1528,9 +1528,9 @@ static int vhost_scsi_open(struct inode
> *inode, struct file *f)
> > if (!vqs)
> > goto err_vqs;
> >
> > - vhost_work_init(&vs->vs_completion_work,
vhost_scsi_complete_cmd_work);
> > - vhost_work_init(&vs->vs_event_work, tcm_vhost_evt_work);
> > -
> > + vhost_work_init(&vs->vs_completion_work, NULL,
> > + vhost_scsi_complete_cmd_work);
> > + vhost_work_init(&vs->vs_event_work, NULL, tcm_vhost_evt_work);
> > vs->vs_events_nr = 0;
> > vs->vs_events_missed = false;
> >
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index c90f437..fbe8174 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -24,9 +24,17 @@
> > #include <linux/slab.h>
> > #include <linux/kthread.h>
> > #include <linux/cgroup.h>
> > +#include <linux/jiffies.h>
> > #include <linux/module.h>
> >
> > #include "vhost.h"
> > +static int poll_start_rate = 0;
> > +module_param(poll_start_rate, int, S_IRUGO|S_IWUSR);
> > +MODULE_PARM_DESC(poll_start_rate, "Start continuous polling of
> virtqueue when rate of events is at least this number per jiffy. If
> 0, never start polling.");
> > +
> > +static int poll_stop_idle = 3*HZ; /* 3 seconds */
> > +module_param(poll_stop_idle, int, S_IRUGO|S_IWUSR);
> > +MODULE_PARM_DESC(poll_stop_idle, "Stop continuous polling of
> virtqueue after this many jiffies of no work.");
> >
> > enum {
> > VHOST_MEMORY_MAX_NREGIONS = 64,
> > @@ -58,27 +66,28 @@ static int vhost_poll_wakeup(wait_queue_t
> *wait, unsigned mode, int sync,
> > return 0;
> > }
> >
> > -void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
> > +void vhost_work_init(struct vhost_work *work, struct vhost_virtqueue
*vq,
> > + vhost_work_fn_t fn)
> > {
> > INIT_LIST_HEAD(&work->node);
> > work->fn = fn;
> > init_waitqueue_head(&work->done);
> > work->flushing = 0;
> > work->queue_seq = work->done_seq = 0;
> > + work->vq = vq;
> > }
> > EXPORT_SYMBOL_GPL(vhost_work_init);
> >
> > /* Init poll structure */
> > void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
> > - unsigned long mask, struct vhost_dev *dev)
> > + unsigned long mask, struct vhost_virtqueue *vq)
> > {
> > init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
> > init_poll_funcptr(&poll->table, vhost_poll_func);
> > poll->mask = mask;
> > - poll->dev = dev;
> > + poll->dev = vq->dev;
> > poll->wqh = NULL;
> > -
> > - vhost_work_init(&poll->work, fn);
> > + vhost_work_init(&poll->work, vq, fn);
> > }
> > EXPORT_SYMBOL_GPL(vhost_poll_init);
> >
> > @@ -174,6 +183,86 @@ void vhost_poll_queue(struct vhost_poll *poll)
> > }
> > EXPORT_SYMBOL_GPL(vhost_poll_queue);
> >
> > +/* Enable or disable virtqueue polling (vqpoll.enabled) for a
virtqueue.
> > + *
> > + * Enabling this mode it tells the guest not to notify ("kick") us
when its
> > + * has made more work available on this virtqueue; Rather, we
> will continuously
> > + * poll this virtqueue in the worker thread. If multiple
> virtqueues are polled,
> > + * the worker thread polls them all, e.g., in a round-robin fashion.
> > + * Note that vqpoll.enabled doesn't always mean that this virtqueue
is
> > + * actually being polled: The backend (e.g., net.c) may
> temporarily disable it
> > + * using vhost_disable/enable_notify(), while vqpoll.enabled is
unchanged.
> > + *
> > + * It is assumed that these functions are called relatively
> rarely, when vhost
> > + * notices that this virtqueue's usage pattern significantly
> changed in a way
> > + * that makes polling more efficient than notification, or vice
versa.
> > + * Also, we assume that vhost_vq_disable_vqpoll() is always called on
vq
> > + * cleanup, so any allocations done by vhost_vq_enable_vqpoll() can
be
> > + * reclaimed.
> > + */
> > +static void vhost_vq_enable_vqpoll(struct vhost_virtqueue *vq)
> > +{
> > + if (vq->vqpoll.enabled)
> > + return; /* already enabled, nothing to do */
> > + if (!vq->handle_kick)
> > + return; /* polling will be a waste of time if no callback! */
> > + if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) {
> > + /* vq has guest notifications enabled. Disable them,
> > + and instead add vq to the polling list */
> > + vhost_disable_notify(vq->dev, vq);
> > + list_add_tail(&vq->vqpoll.link, &vq->dev->vqpoll_list);
> > + }
> > + vq->vqpoll.jiffies_last_kick = jiffies;
> > + __get_user(vq->avail_idx, &vq->avail->idx);
> > + vq->vqpoll.enabled = true;
> > +
> > + /* Map userspace's vq->avail to the kernel's memory space. */
> > + if (get_user_pages_fast((unsigned long)vq->avail, 1, 0,
> > + &vq->vqpoll.avail_page) != 1) {
> > + /* TODO: can this happen, as we check access
> > + to vq->avail in advance? */
> > + BUG();
> > + }
> > + vq->vqpoll.avail_mapped = (struct vring_avail *) (
> > + (unsigned long)kmap(vq->vqpoll.avail_page) |
> > + ((unsigned long)vq->avail & ~PAGE_MASK));
> > +}
> > +
> > +/*
> > + * This function doesn't always succeed in changing the mode.
Sometimes
> > + * a temporary race condition prevents turning on guest
notifications, so
> > + * vq should be polled next time again.
> > + */
> > +static void vhost_vq_disable_vqpoll(struct vhost_virtqueue *vq)
> > +{
> > + if (!vq->vqpoll.enabled)
> > + return; /* already disabled, nothing to do */
> > +
> > + vq->vqpoll.enabled = false;
> > +
> > + if (!list_empty(&vq->vqpoll.link)) {
> > + /* vq is on the polling list, remove it from this list and
> > + * instead enable guest notifications. */
> > + list_del_init(&vq->vqpoll.link);
> > + if (unlikely(vhost_enable_notify(vq->dev, vq))
> > + && !vq->vqpoll.shutdown) {
> > + /* Race condition: guest wrote before we enabled
> > + * notification, so we'll never get a notification for
> > + * this work - so continue polling mode for a while. */
> > + vhost_disable_notify(vq->dev, vq);
> > + vq->vqpoll.enabled = true;
> > + vhost_enable_notify(vq->dev, vq);
> > + return;
> > + }
> > + }
> > +
> > + if (vq->vqpoll.avail_mapped) {
> > + kunmap(vq->vqpoll.avail_page);
> > + put_page(vq->vqpoll.avail_page);
> > + vq->vqpoll.avail_mapped = 0;
> > + }
> > +}
> > +
> > static void vhost_vq_reset(struct vhost_dev *dev,
> > struct vhost_virtqueue *vq)
> > {
> > @@ -199,6 +288,48 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > vq->call = NULL;
> > vq->log_ctx = NULL;
> > vq->memory = NULL;
> > + INIT_LIST_HEAD(&vq->vqpoll.link);
> > + vq->vqpoll.enabled = false;
> > + vq->vqpoll.shutdown = false;
> > + vq->vqpoll.avail_mapped = NULL;
> > +}
> > +
> > +/* roundrobin_poll() takes worker->vqpoll_list, and returns one of
the
> > + * virtqueues which the caller should kick, or NULL in case none
should be
> > + * kicked. roundrobin_poll() also disables polling on a
virtqueuewhich has
> > + * been polled for too long without success.
> > + *
> > + * This current implementation (the "round-robin" implementation)
only
> > + * polls the first vq in the list, returning it or NULL as
appropriate, and
> > + * moves this vq to the end of the list, so next time a different one
is
> > + * polled.
> > + */
> > +static struct vhost_virtqueue *roundrobin_poll(struct list_head
*list)
> > +{
> > + struct vhost_virtqueue *vq;
> > + u16 avail_idx;
> > +
> > + if (list_empty(list))
> > + return NULL;
> > +
> > + vq = list_first_entry(list, struct vhost_virtqueue, vqpoll.link);
> > + WARN_ON(!vq->vqpoll.enabled);
> > + list_move_tail(&vq->vqpoll.link, list);
> > +
> > + /* See if there is any new work available from the guest. */
> > + /* TODO: can check the optional idx feature, and if we haven't
> > + * reached that idx yet, don't kick... */
> > + avail_idx = vq->vqpoll.avail_mapped->idx;
> > + if (avail_idx != vq->last_avail_idx)
> > + return vq;
> > +
> > + if (jiffies > vq->vqpoll.jiffies_last_kick + poll_stop_idle) {
> > + /* We've been polling this virtqueue for a long time with no
> > + * results, so switch back to guest notification
> > + */
> > + vhost_vq_disable_vqpoll(vq);
> > + }
> > + return NULL;
> > }
> >
> > static int vhost_worker(void *data)
> > @@ -237,12 +368,62 @@ static int vhost_worker(void *data)
> > spin_unlock_irq(&dev->work_lock);
> >
> > if (work) {
> > + struct vhost_virtqueue *vq = work->vq;
> > __set_current_state(TASK_RUNNING);
> > work->fn(work);
> > + /* Keep track of the work rate, for deciding when to
> > + * enable polling */
> > + if (vq) {
> > + if (vq->vqpoll.jiffies_last_work != jiffies) {
> > + vq->vqpoll.jiffies_last_work = jiffies;
> > + vq->vqpoll.work_this_jiffy = 0;
> > + }
> > + vq->vqpoll.work_this_jiffy++;
> > + }
> > + /* If vq is in the round-robin list of virtqueues being
> > + * constantly checked by this thread, move vq the end
> > + * of the queue, because it had its fair chance now.
> > + */
> > + if (vq && !list_empty(&vq->vqpoll.link)) {
> > + list_move_tail(&vq->vqpoll.link,
> > + &dev->vqpoll_list);
> > + }
> > + /* Otherwise, if this vq is looking for notifications
> > + * but vq polling is not enabled for it, do it now.
> > + */
> > + else if (poll_start_rate && vq && vq->handle_kick &&
> > + !vq->vqpoll.enabled &&
> > + !vq->vqpoll.shutdown &&
> > + !(vq->used_flags & VRING_USED_F_NO_NOTIFY) &&
> > + vq->vqpoll.work_this_jiffy >=
> > + poll_start_rate) {
> > + vhost_vq_enable_vqpoll(vq);
> > + }
> > + }
> > + /* Check one virtqueue from the round-robin list */
> > + if (!list_empty(&dev->vqpoll_list)) {
> > + struct vhost_virtqueue *vq;
> > +
> > + vq = roundrobin_poll(&dev->vqpoll_list);
> > +
> > + if (vq) {
> > + vq->handle_kick(&vq->poll.work);
> > + vq->vqpoll.jiffies_last_kick = jiffies;
> > + }
> > +
> > + /* If our polling list isn't empty, ask to continue
> > + * running this thread, don't yield.
> > + */
> > + __set_current_state(TASK_RUNNING);
> > if (need_resched())
> > schedule();
> > - } else
> > - schedule();
> > + } else {
> > + if (work) {
> > + if (need_resched())
> > + schedule();
> > + } else
> > + schedule();
> > + }
> >
> > }
> > unuse_mm(dev->mm);
> > @@ -306,6 +487,7 @@ void vhost_dev_init(struct vhost_dev *dev,
> > dev->mm = NULL;
> > spin_lock_init(&dev->work_lock);
> > INIT_LIST_HEAD(&dev->work_list);
> > + INIT_LIST_HEAD(&dev->vqpoll_list);
> > dev->worker = NULL;
> >
> > for (i = 0; i < dev->nvqs; ++i) {
> > @@ -318,7 +500,7 @@ void vhost_dev_init(struct vhost_dev *dev,
> > vhost_vq_reset(dev, vq);
> > if (vq->handle_kick)
> > vhost_poll_init(&vq->poll, vq->handle_kick,
> > - POLLIN, dev);
> > + POLLIN, vq);
> > }
> > }
> > EXPORT_SYMBOL_GPL(vhost_dev_init);
> > @@ -350,7 +532,7 @@ static int vhost_attach_cgroups(struct vhost_dev
*dev)
> > struct vhost_attach_cgroups_struct attach;
> >
> > attach.owner = current;
> > - vhost_work_init(&attach.work, vhost_attach_cgroups_work);
> > + vhost_work_init(&attach.work, NULL, vhost_attach_cgroups_work);
> > vhost_work_queue(dev, &attach.work);
> > vhost_work_flush(dev, &attach.work);
> > return attach.ret;
> > @@ -444,6 +626,26 @@ void vhost_dev_stop(struct vhost_dev *dev)
> > }
> > EXPORT_SYMBOL_GPL(vhost_dev_stop);
> >
> > +/* shutdown_vqpoll() asks the worker thread to shut down virtqueue
polling
> > + * mode for a given virtqueue which is itself being shut down. We ask
the
> > + * worker thread to do this rather than doing it directly, so that we
don't
> > + * race with the worker thread's use of the queue.
> > + */
> > +static void shutdown_vqpoll_work(struct vhost_work *work)
> > +{
> > + work->vq->vqpoll.shutdown = true;
> > + vhost_vq_disable_vqpoll(work->vq);
> > + WARN_ON(work->vq->vqpoll.avail_mapped);
> > +}
> > +
> > +static void shutdown_vqpoll(struct vhost_virtqueue *vq)
> > +{
> > + struct vhost_work work;
> > +
> > + vhost_work_init(&work, vq, shutdown_vqpoll_work);
> > + vhost_work_queue(vq->dev, &work);
> > + vhost_work_flush(vq->dev, &work);
> > +}
> > /* Caller should have device mutex if and only if locked is set */
> > void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
> > {
> > @@ -460,6 +662,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev,
> bool locked)
> > eventfd_ctx_put(dev->vqs[i]->call_ctx);
> > if (dev->vqs[i]->call)
> > fput(dev->vqs[i]->call);
> > + shutdown_vqpoll(dev->vqs[i]);
> > vhost_vq_reset(dev, dev->vqs[i]);
> > }
> > vhost_dev_free_iovecs(dev);
> > @@ -1491,6 +1694,19 @@ bool vhost_enable_notify(struct vhost_dev
> *dev, struct vhost_virtqueue *vq)
> > u16 avail_idx;
> > int r;
> >
> > + /* In polling mode, when the backend (e.g., net.c) asks to enable
> > + * notifications, we don't enable guest notifications. Instead,
start
> > + * polling on this vq by adding it to the round-robin list.
> > + */
> > + if (vq->vqpoll.enabled) {
> > + if (list_empty(&vq->vqpoll.link)) {
> > + list_add_tail(&vq->vqpoll.link,
> > + &vq->dev->vqpoll_list);
> > + vq->vqpoll.jiffies_last_kick = jiffies;
> > + }
> > + return false;
> > + }
> > +
> > if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
> > return false;
> > vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
> > @@ -1528,6 +1744,17 @@ void vhost_disable_notify(struct vhost_dev
> *dev, struct vhost_virtqueue *vq)
> > {
> > int r;
> >
> > + /* If this virtqueue is vqpoll.enabled, and on the polling list,
it
> > + * will generate notifications even if the guest is asked not to
send
> > + * them. So we must remove it from the round-robin polling list.
> > + * Note that vqpoll.enabled remains set.
> > + */
> > + if (vq->vqpoll.enabled) {
> > + if (!list_empty(&vq->vqpoll.link))
> > + list_del_init(&vq->vqpoll.link);
> > + return;
> > + }
> > +
> > if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
> > return;
> > vq->used_flags |= VRING_USED_F_NO_NOTIFY;
> > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> > index 3eda654..11aaaf4 100644
> > --- a/drivers/vhost/vhost.h
> > +++ b/drivers/vhost/vhost.h
> > @@ -24,6 +24,7 @@ struct vhost_work {
> > int flushing;
> > unsigned queue_seq;
> > unsigned done_seq;
> > + struct vhost_virtqueue *vq;
> > };
> >
> > /* Poll a file (eventfd or socket) */
> > @@ -37,11 +38,12 @@ struct vhost_poll {
> > struct vhost_dev *dev;
> > };
> >
> > -void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
> > +void vhost_work_init(struct vhost_work *work, struct vhost_virtqueue
*vq,
> > + vhost_work_fn_t fn);
> > void vhost_work_queue(struct vhost_dev *dev, struct vhost_work
*work);
> >
> > void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
> > - unsigned long mask, struct vhost_dev *dev);
> > + unsigned long mask, struct vhost_virtqueue *vq);
> > int vhost_poll_start(struct vhost_poll *poll, struct file *file);
> > void vhost_poll_stop(struct vhost_poll *poll);
> > void vhost_poll_flush(struct vhost_poll *poll);
> > @@ -54,8 +56,6 @@ struct vhost_log {
> > u64 len;
> > };
> >
> > -struct vhost_virtqueue;
> > -
> > /* The virtqueue structure describes a queue attached to a device. */
> > struct vhost_virtqueue {
> > struct vhost_dev *dev;
> > @@ -110,6 +110,35 @@ struct vhost_virtqueue {
> > /* Log write descriptors */
> > void __user *log_base;
> > struct vhost_log *log;
> > + struct {
> > + /* When a virtqueue is in vqpoll.enabled mode, it declares
> > + * that instead of using guest notifications (kicks) to
> > + * discover new work, we prefer to continuously poll this
> > + * virtqueue in the worker thread.
> > + * If !enabled, the rest of the fields below are undefined.
> > + */
> > + bool enabled;
> > + /* vqpoll.enabled doesn't always mean that this virtqueue is
> > + * actually being polled: The backend (e.g., net.c) may
> > + * temporarily disable it using vhost_disable/enable_notify().
> > + * vqpoll.link is used to maintain the thread's round-robin
> > + * list of virtqueues that actually need to be polled.
> > + * Note list_empty(link) means this virtqueue isn't polled.
> > + */
> > + struct list_head link;
> > + /* If this flag is true, the virtqueue is being shut down,
> > + * so vqpoll should not be re-enabled.
> > + */
> > + bool shutdown;
> > + /* Various counters used to decide when to enter polling mode
> > + * or leave it and return to notification mode.
> > + */
> > + unsigned long jiffies_last_kick;
> > + unsigned long jiffies_last_work;
> > + int work_this_jiffy;
> > + struct page *avail_page;
> > + volatile struct vring_avail *avail_mapped;
> > + } vqpoll;
> > };
> >
> > struct vhost_dev {
> > @@ -123,6 +152,7 @@ struct vhost_dev {
> > spinlock_t work_lock;
> > struct list_head work_list;
> > struct task_struct *worker;
> > + struct list_head vqpoll_list;
> > };
> >
> > void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue
> **vqs, int nvqs);
> > --
> > 1.7.9.5
>
^ permalink raw reply
* [RFC/PATCHSET 0/3] virtio: Implement virtio pstore device (v5)
From: Namhyung Kim @ 2016-09-04 14:38 UTC (permalink / raw)
To: virtio-dev
Cc: Anton Vorontsov, Kees Cook, kvm, Radim Krčmář,
qemu-devel, Michael S. Tsirkin, Will Deacon, LKML, Steven Rostedt,
virtualization, Minchan Kim, Tony Luck, Anthony Liguori,
Colin Cross, Paolo Bonzini, Ingo Molnar
Hello,
This is another iteration of the virtio-pstore work. I've addressed
comments from Michael S. Tsirkin on the kernel code.
* changes in v5)
- convert __virtioXX to __leXX (Michael)
* changes in v4)
- use qio_channel_file_new_path() (Daniel)
- rename to delete_old_pstore_file (Daniel)
- convert G_REMOVE_SOURCE to FALSE (Daniel)
* changes in v3)
- use QIOChannel API (Stefan, Daniel)
- add bound check for malcious guests (Daniel)
- drop support PSTORE_TYPE_CONSOLE for now
- update license to allow GPL v2 or later (Michael)
- limit number of pstore files on qemu
* changes in v2)
- update VIRTIO_ID_PSTORE to 22 (Cornelia, Stefan)
- make buffer size configurable (Cornelia)
- support PSTORE_TYPE_CONSOLE (Kees)
- use separate virtqueues for read and write
- support concurrent async write
- manage pstore (file) id in device side
- fix various mistakes in qemu device (Stefan)
It started from the fact that dumping ftrace buffer at kernel
oops/panic takes too much time. Although there's a way to reduce the
size of the original data, sometimes I want to have the information as
many as possible. Maybe kexec/kdump can solve this problem but it
consumes some portion of guest memory so I'd like to avoid it. And I
know the qemu + crashtool can dump and analyze the whole guest memory
including the ftrace buffer without wasting guest memory, but it adds
one more layer and has some limitation as an out-of-tree tool like not
being in sync with the kernel changes.
So I think it'd be great using the pstore interface to dump guest
kernel data on the host. One can read the data on the host directly
or on the guest (at the next boot) using pstore filesystem as usual.
While this patchset only implements dumping kernel log buffer, it can
be extended to have ftrace buffer and probably some more..
The patch 0001 implements virtio pstore driver. It has two virt queue
for (sync) read and (async) write, pstore buffer and io request and
response structure. The virtio_pstore_req struct is to give
information about the current pstore operation. The result will be
written to the virtio_pstore_res struct. For read operation it also
uses virtio_pstore_fileinfo struct.
The patch 0002 and 0003 implement virtio-pstore legacy PCI device on
qemu-kvm and kvmtool respectively. I referenced virtio-baloon and
virtio-rng implementations and I don't know whether kvmtool supports
modern virtio 1.0+ spec. Other transports might be supported later.
For example, using virtio-pstore on qemu looks like below:
$ qemu-system-x86_64 -enable-kvm -device virtio-pstore,directory=xxx
When guest kernel gets panic the log messages will be saved under the
xxx directory.
$ ls xxx
dmesg-1.enc.z dmesg-2.enc.z
As you can see the pstore subsystem compresses the log data using zlib
(now supports lzo and lz4 too). The data can be extracted with the
following command:
$ cat xxx/dmesg-1.enc.z | \
> python -c 'import sys, zlib; print(zlib.decompress(sys.stdin.read()))'
Oops#1 Part1
<5>[ 0.000000] Linux version 4.6.0kvm+ (namhyung@danjae) (gcc version 5.3.0 (GCC) ) #145 SMP Mon Jul 18 10:22:45 KST 2016
<6>[ 0.000000] Command line: root=/dev/vda console=ttyS0
<6>[ 0.000000] x86/fpu: Legacy x87 FPU detected.
<6>[ 0.000000] x86/fpu: Using 'eager' FPU context switches.
<6>[ 0.000000] e820: BIOS-provided physical RAM map:
<6>[ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
<6>[ 0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
<6>[ 0.000000] BIOS-e820: [mem 0x00000000000f0000-0x00000000000fffff] reserved
<6>[ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x0000000007fddfff] usable
<6>[ 0.000000] BIOS-e820: [mem 0x0000000007fde000-0x0000000007ffffff] reserved
<6>[ 0.000000] BIOS-e820: [mem 0x00000000feffc000-0x00000000feffffff] reserved
<6>[ 0.000000] BIOS-e820: [mem 0x00000000fffc0000-0x00000000ffffffff] reserved
<6>[ 0.000000] NX (Execute Disable) protection: active
<6>[ 0.000000] SMBIOS 2.8 present.
<7>[ 0.000000] DMI: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014
...
Namhyung Kim (3):
virtio: Basic implementation of virtio pstore driver
qemu: Implement virtio-pstore device
kvmtool: Implement virtio-pstore device
drivers/virtio/Kconfig | 10 +
drivers/virtio/Makefile | 1 +
drivers/virtio/virtio_pstore.c | 417 +++++++++++++++++++++++++++++++++++++
include/uapi/linux/Kbuild | 1 +
include/uapi/linux/virtio_ids.h | 1 +
include/uapi/linux/virtio_pstore.h | 74 +++++++
6 files changed, 504 insertions(+)
create mode 100644 drivers/virtio/virtio_pstore.c
create mode 100644 include/uapi/linux/virtio_pstore.h
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Anthony Liguori <aliguori@amazon.com>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: kvm@vger.kernel.org
Cc: qemu-devel@nongnu.org
Cc: virtualization@lists.linux-foundation.org
Cc: virtio-dev@lists.oasis-open.org
Thanks,
Namhyung
--
2.9.3
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* [PATCH 1/3] virtio: Basic implementation of virtio pstore driver
From: Namhyung Kim @ 2016-09-04 14:38 UTC (permalink / raw)
To: virtio-dev
Cc: Anton Vorontsov, Kees Cook, kvm, Radim Krčmář,
qemu-devel, Michael S. Tsirkin, Will Deacon, LKML, Steven Rostedt,
virtualization, Minchan Kim, Tony Luck, Anthony Liguori,
Colin Cross, Paolo Bonzini, Ingo Molnar
In-Reply-To: <20160904143900.14850-1-namhyung@kernel.org>
The virtio pstore driver provides interface to the pstore subsystem so
that the guest kernel's log/dump message can be saved on the host
machine. Users can access the log file directly on the host, or on the
guest at the next boot using pstore filesystem. It currently deals with
kernel log (printk) buffer only, but we can extend it to have other
information (like ftrace dump) later.
It supports legacy PCI device using a 16K buffer by default and it's
configurable. It uses two virtqueues - one for (sync) read and another
for (async) write. Since it cannot wait for write finished, it supports
up to 128 concurrent IO.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Anthony Liguori <aliguori@amazon.com>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: virtio-dev@lists.oasis-open.org
Cc: kvm@vger.kernel.org
Cc: qemu-devel@nongnu.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
drivers/virtio/Kconfig | 10 +
drivers/virtio/Makefile | 1 +
drivers/virtio/virtio_pstore.c | 417 +++++++++++++++++++++++++++++++++++++
include/uapi/linux/Kbuild | 1 +
include/uapi/linux/virtio_ids.h | 1 +
include/uapi/linux/virtio_pstore.h | 74 +++++++
6 files changed, 504 insertions(+)
create mode 100644 drivers/virtio/virtio_pstore.c
create mode 100644 include/uapi/linux/virtio_pstore.h
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 77590320d44c..8f0e6c796c12 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -58,6 +58,16 @@ config VIRTIO_INPUT
If unsure, say M.
+config VIRTIO_PSTORE
+ tristate "Virtio pstore driver"
+ depends on VIRTIO
+ depends on PSTORE
+ ---help---
+ This driver supports virtio pstore devices to save/restore
+ panic and oops messages on the host.
+
+ If unsure, say M.
+
config VIRTIO_MMIO
tristate "Platform bus driver for memory mapped virtio devices"
depends on HAS_IOMEM && HAS_DMA
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 41e30e3dc842..bee68cb26d48 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -5,3 +5,4 @@ virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
+obj-$(CONFIG_VIRTIO_PSTORE) += virtio_pstore.o
diff --git a/drivers/virtio/virtio_pstore.c b/drivers/virtio/virtio_pstore.c
new file mode 100644
index 000000000000..c8fd8e39d1b8
--- /dev/null
+++ b/drivers/virtio/virtio_pstore.c
@@ -0,0 +1,417 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pstore.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <uapi/linux/virtio_ids.h>
+#include <uapi/linux/virtio_pstore.h>
+
+#define VIRT_PSTORE_ORDER 2
+#define VIRT_PSTORE_BUFSIZE (4096 << VIRT_PSTORE_ORDER)
+#define VIRT_PSTORE_NR_REQ 128
+
+struct virtio_pstore {
+ struct virtio_device *vdev;
+ struct virtqueue *vq[2];
+ struct pstore_info pstore;
+ struct virtio_pstore_req req[VIRT_PSTORE_NR_REQ];
+ struct virtio_pstore_res res[VIRT_PSTORE_NR_REQ];
+ unsigned int req_id;
+
+ /* Waiting for host to ack */
+ wait_queue_head_t acked;
+ int failed;
+};
+
+#define TYPE_TABLE_ENTRY(_entry) \
+ { PSTORE_TYPE_##_entry, VIRTIO_PSTORE_TYPE_##_entry }
+
+struct type_table {
+ int pstore;
+ u16 virtio;
+} type_table[] = {
+ TYPE_TABLE_ENTRY(DMESG),
+};
+
+#undef TYPE_TABLE_ENTRY
+
+
+static __le16 to_virtio_type(enum pstore_type_id type)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(type_table); i++) {
+ if (type == type_table[i].pstore)
+ return cpu_to_le16(type_table[i].virtio);
+ }
+
+ return cpu_to_le16(VIRTIO_PSTORE_TYPE_UNKNOWN);
+}
+
+static enum pstore_type_id from_virtio_type(__le16 type)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(type_table); i++) {
+ if (le16_to_cpu(type) == type_table[i].virtio)
+ return type_table[i].pstore;
+ }
+
+ return PSTORE_TYPE_UNKNOWN;
+}
+
+static void virtpstore_ack(struct virtqueue *vq)
+{
+ struct virtio_pstore *vps = vq->vdev->priv;
+
+ wake_up(&vps->acked);
+}
+
+static void virtpstore_check(struct virtqueue *vq)
+{
+ struct virtio_pstore *vps = vq->vdev->priv;
+ struct virtio_pstore_res *res;
+ unsigned int len;
+
+ res = virtqueue_get_buf(vq, &len);
+ if (res == NULL)
+ return;
+
+ if (le32_to_cpu(res->ret) < 0)
+ vps->failed = 1;
+}
+
+static void virt_pstore_get_reqs(struct virtio_pstore *vps,
+ struct virtio_pstore_req **preq,
+ struct virtio_pstore_res **pres)
+{
+ unsigned int idx = vps->req_id++ % VIRT_PSTORE_NR_REQ;
+
+ *preq = &vps->req[idx];
+ *pres = &vps->res[idx];
+
+ memset(*preq, 0, sizeof(**preq));
+ memset(*pres, 0, sizeof(**pres));
+}
+
+static int virt_pstore_open(struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req;
+ struct virtio_pstore_res *res;
+ struct scatterlist sgo[1], sgi[1];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int len;
+
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_le16(VIRTIO_PSTORE_CMD_OPEN);
+
+ sg_init_one(sgo, req, sizeof(*req));
+ sg_init_one(sgi, res, sizeof(*res));
+ virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
+ virtqueue_kick(vps->vq[0]);
+
+ wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
+ return le32_to_cpu(res->ret);
+}
+
+static int virt_pstore_close(struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req = &vps->req[vps->req_id];
+ struct virtio_pstore_res *res = &vps->res[vps->req_id];
+ struct scatterlist sgo[1], sgi[1];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int len;
+
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_le16(VIRTIO_PSTORE_CMD_CLOSE);
+
+ sg_init_one(sgo, req, sizeof(*req));
+ sg_init_one(sgi, res, sizeof(*res));
+ virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
+ virtqueue_kick(vps->vq[0]);
+
+ wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
+ return le32_to_cpu(res->ret);
+}
+
+static ssize_t virt_pstore_read(u64 *id, enum pstore_type_id *type,
+ int *count, struct timespec *time,
+ char **buf, bool *compressed,
+ ssize_t *ecc_notice_size,
+ struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req;
+ struct virtio_pstore_res *res;
+ struct virtio_pstore_fileinfo info;
+ struct scatterlist sgo[1], sgi[3];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int len;
+ unsigned int flags;
+ int ret;
+ void *bf;
+
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_le16(VIRTIO_PSTORE_CMD_READ);
+
+ sg_init_one(sgo, req, sizeof(*req));
+ sg_init_table(sgi, 3);
+ sg_set_buf(&sgi[0], res, sizeof(*res));
+ sg_set_buf(&sgi[1], &info, sizeof(info));
+ sg_set_buf(&sgi[2], psi->buf, psi->bufsize);
+ virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
+ virtqueue_kick(vps->vq[0]);
+
+ wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
+ if (len < sizeof(*res) + sizeof(info))
+ return -1;
+
+ ret = le32_to_cpu(res->ret);
+ if (ret < 0)
+ return ret;
+
+ len = le32_to_cpu(info.len);
+
+ bf = kmalloc(len, GFP_KERNEL);
+ if (bf == NULL)
+ return -ENOMEM;
+
+ *id = le64_to_cpu(info.id);
+ *type = from_virtio_type(info.type);
+ *count = le32_to_cpu(info.count);
+
+ flags = le32_to_cpu(info.flags);
+ *compressed = flags & VIRTIO_PSTORE_FL_COMPRESSED;
+
+ time->tv_sec = le64_to_cpu(info.time_sec);
+ time->tv_nsec = le32_to_cpu(info.time_nsec);
+
+ memcpy(bf, psi->buf, len);
+ *buf = bf;
+
+ return len;
+}
+
+static int notrace virt_pstore_write(enum pstore_type_id type,
+ enum kmsg_dump_reason reason,
+ u64 *id, unsigned int part, int count,
+ bool compressed, size_t size,
+ struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req;
+ struct virtio_pstore_res *res;
+ struct scatterlist sgo[2], sgi[1];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int flags = compressed ? VIRTIO_PSTORE_FL_COMPRESSED : 0;
+
+ if (vps->failed)
+ return -1;
+
+ *id = vps->req_id;
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_le16(VIRTIO_PSTORE_CMD_WRITE);
+ req->type = to_virtio_type(type);
+ req->flags = cpu_to_le32(flags);
+
+ sg_init_table(sgo, 2);
+ sg_set_buf(&sgo[0], req, sizeof(*req));
+ sg_set_buf(&sgo[1], psi->buf, size);
+ sg_init_one(sgi, res, sizeof(*res));
+ virtqueue_add_sgs(vps->vq[1], sgs, 1, 1, vps, GFP_ATOMIC);
+ virtqueue_kick(vps->vq[1]);
+
+ return 0;
+}
+
+static int virt_pstore_erase(enum pstore_type_id type, u64 id, int count,
+ struct timespec time, struct pstore_info *psi)
+{
+ struct virtio_pstore *vps = psi->data;
+ struct virtio_pstore_req *req;
+ struct virtio_pstore_res *res;
+ struct scatterlist sgo[1], sgi[1];
+ struct scatterlist *sgs[2] = { sgo, sgi };
+ unsigned int len;
+
+ virt_pstore_get_reqs(vps, &req, &res);
+
+ req->cmd = cpu_to_le16(VIRTIO_PSTORE_CMD_ERASE);
+ req->type = to_virtio_type(type);
+ req->id = cpu_to_le64(id);
+ req->count = cpu_to_le32(count);
+
+ sg_init_one(sgo, req, sizeof(*req));
+ sg_init_one(sgi, res, sizeof(*res));
+ virtqueue_add_sgs(vps->vq[0], sgs, 1, 1, vps, GFP_KERNEL);
+ virtqueue_kick(vps->vq[0]);
+
+ wait_event(vps->acked, virtqueue_get_buf(vps->vq[0], &len));
+ return le32_to_cpu(res->ret);
+}
+
+static int virt_pstore_init(struct virtio_pstore *vps)
+{
+ struct pstore_info *psinfo = &vps->pstore;
+ int err;
+
+ if (!psinfo->bufsize)
+ psinfo->bufsize = VIRT_PSTORE_BUFSIZE;
+
+ psinfo->buf = alloc_pages_exact(psinfo->bufsize, GFP_KERNEL);
+ if (!psinfo->buf) {
+ pr_err("cannot allocate pstore buffer\n");
+ return -ENOMEM;
+ }
+
+ psinfo->owner = THIS_MODULE;
+ psinfo->name = "virtio";
+ psinfo->open = virt_pstore_open;
+ psinfo->close = virt_pstore_close;
+ psinfo->read = virt_pstore_read;
+ psinfo->erase = virt_pstore_erase;
+ psinfo->write = virt_pstore_write;
+ psinfo->flags = PSTORE_FLAGS_FRAGILE;
+
+ psinfo->data = vps;
+ spin_lock_init(&psinfo->buf_lock);
+
+ err = pstore_register(psinfo);
+ if (err)
+ kfree(psinfo->buf);
+
+ return err;
+}
+
+static int virt_pstore_exit(struct virtio_pstore *vps)
+{
+ struct pstore_info *psinfo = &vps->pstore;
+
+ pstore_unregister(psinfo);
+
+ free_pages_exact(psinfo->buf, psinfo->bufsize);
+ psinfo->buf = NULL;
+ psinfo->bufsize = 0;
+
+ return 0;
+}
+
+static int virtpstore_init_vqs(struct virtio_pstore *vps)
+{
+ vq_callback_t *callbacks[] = { virtpstore_ack, virtpstore_check };
+ const char *names[] = { "pstore_read", "pstore_write" };
+
+ return vps->vdev->config->find_vqs(vps->vdev, 2, vps->vq,
+ callbacks, names);
+}
+
+static void virtpstore_init_config(struct virtio_pstore *vps)
+{
+ u32 bufsize;
+
+ virtio_cread(vps->vdev, struct virtio_pstore_config, bufsize, &bufsize);
+
+ vps->pstore.bufsize = PAGE_ALIGN(bufsize);
+}
+
+static void virtpstore_confirm_config(struct virtio_pstore *vps)
+{
+ u32 bufsize = vps->pstore.bufsize;
+
+ virtio_cwrite(vps->vdev, struct virtio_pstore_config, bufsize,
+ &bufsize);
+}
+
+static int virtpstore_probe(struct virtio_device *vdev)
+{
+ struct virtio_pstore *vps;
+ int err;
+
+ if (!vdev->config->get) {
+ dev_err(&vdev->dev, "driver init: config access disabled\n");
+ return -EINVAL;
+ }
+
+ vdev->priv = vps = kzalloc(sizeof(*vps), GFP_KERNEL);
+ if (!vps) {
+ err = -ENOMEM;
+ goto out;
+ }
+ vps->vdev = vdev;
+
+ err = virtpstore_init_vqs(vps);
+ if (err < 0)
+ goto out_free;
+
+ virtpstore_init_config(vps);
+
+ err = virt_pstore_init(vps);
+ if (err)
+ goto out_del_vq;
+
+ virtpstore_confirm_config(vps);
+
+ init_waitqueue_head(&vps->acked);
+
+ virtio_device_ready(vdev);
+
+ dev_info(&vdev->dev, "driver init: ok (bufsize = %luK, flags = %x)\n",
+ vps->pstore.bufsize >> 10, vps->pstore.flags);
+
+ return 0;
+
+out_del_vq:
+ vdev->config->del_vqs(vdev);
+out_free:
+ kfree(vps);
+out:
+ dev_err(&vdev->dev, "driver init: failed with %d\n", err);
+ return err;
+}
+
+static void virtpstore_remove(struct virtio_device *vdev)
+{
+ struct virtio_pstore *vps = vdev->priv;
+
+ virt_pstore_exit(vps);
+
+ /* Now we reset the device so we can clean up the queues. */
+ vdev->config->reset(vdev);
+
+ vdev->config->del_vqs(vdev);
+
+ kfree(vps);
+}
+
+static unsigned int features[] = {
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_PSTORE, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct virtio_driver virtio_pstore_driver = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .id_table = id_table,
+ .probe = virtpstore_probe,
+ .remove = virtpstore_remove,
+};
+
+module_virtio_driver(virtio_pstore_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Namhyung Kim <namhyung@kernel.org>");
+MODULE_DESCRIPTION("Virtio pstore driver");
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 6d4e92ccdc91..9bbb1554d8b2 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -449,6 +449,7 @@ header-y += virtio_ids.h
header-y += virtio_input.h
header-y += virtio_net.h
header-y += virtio_pci.h
+header-y += virtio_pstore.h
header-y += virtio_ring.h
header-y += virtio_rng.h
header-y += virtio_scsi.h
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 77925f587b15..c72a9ab588c0 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -41,5 +41,6 @@
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_INPUT 18 /* virtio input */
+#define VIRTIO_ID_PSTORE 22 /* virtio pstore */
#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_pstore.h b/include/uapi/linux/virtio_pstore.h
new file mode 100644
index 000000000000..57c35327f53b
--- /dev/null
+++ b/include/uapi/linux/virtio_pstore.h
@@ -0,0 +1,74 @@
+#ifndef _LINUX_VIRTIO_PSTORE_H
+#define _LINUX_VIRTIO_PSTORE_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+#include <linux/types.h>
+#include <linux/virtio_types.h>
+
+#define VIRTIO_PSTORE_CMD_NULL 0
+#define VIRTIO_PSTORE_CMD_OPEN 1
+#define VIRTIO_PSTORE_CMD_READ 2
+#define VIRTIO_PSTORE_CMD_WRITE 3
+#define VIRTIO_PSTORE_CMD_ERASE 4
+#define VIRTIO_PSTORE_CMD_CLOSE 5
+
+#define VIRTIO_PSTORE_TYPE_UNKNOWN 0
+#define VIRTIO_PSTORE_TYPE_DMESG 1
+
+#define VIRTIO_PSTORE_FL_COMPRESSED 1
+
+struct virtio_pstore_req {
+ __le16 cmd;
+ __le16 type;
+ __le32 flags;
+ __le64 id;
+ __le32 count;
+ __le32 reserved;
+};
+
+struct virtio_pstore_res {
+ __le16 cmd;
+ __le16 type;
+ __le32 ret;
+};
+
+struct virtio_pstore_fileinfo {
+ __le64 id;
+ __le32 count;
+ __le16 type;
+ __le16 unused;
+ __le32 flags;
+ __le32 len;
+ __le64 time_sec;
+ __le32 time_nsec;
+ __le32 reserved;
+};
+
+struct virtio_pstore_config {
+ __le32 bufsize;
+};
+
+#endif /* _LINUX_VIRTIO_PSTORE_H */
--
2.9.3
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox