* [PATCH net-next,v2 3/6] net/mlx5: Add wrappers for HyperV PCIe operations
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
Add wrapper functions for HyperV PCIe read / write /
block_invalidate_register operations. This will be used as an
infrastructure in the downstream patch for software communication.
This will be enabled by default if CONFIG_PCI_HYPERV_INTERFACE is set.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
3 files changed, 87 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8b7edaa..247295b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
#
# Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
new file mode 100644
index 0000000..cf08d02
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+
+static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset, bool read)
+{
+ int rc = -EOPNOTSUPP;
+ int bytes_returned;
+ int block_id;
+
+ if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
+
+ rc = read ?
+ hyperv_read_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id,
+ &bytes_returned) :
+ hyperv_write_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id);
+
+ /* Make sure len bytes were read successfully */
+ if (read)
+ rc |= !(len == bytes_returned);
+
+ if (rc) {
+ mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
+ read ? "read" : "write", rc, len,
+ offset);
+ return rc;
+ }
+
+ return 0;
+}
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, true);
+}
+
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, false);
+}
+
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ return hyperv_reg_block_invalidate(dev->pdev, context,
+ block_invalidate);
+}
+
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
+{
+ hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
new file mode 100644
index 0000000..f9a4557
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_H__
+#define __LIB_HV_H__
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+#include <linux/hyperv.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
+#endif
+
+#endif /* __LIB_HV_H__ */
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v2 1/6] PCI: hv: Add a paravirtual backchannel in software
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org, Dexuan Cui, Jake Oshins
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>
From: Dexuan Cui <decui@microsoft.com>
Windows SR-IOV provides a backchannel mechanism in software for communication
between a VF driver and a PF driver. These "configuration blocks" are
similar in concept to PCI configuration space, but instead of doing reads and
writes in 32-bit chunks through a very slow path, packets of up to 128 bytes
can be sent or received asynchronously.
Nearly every SR-IOV device contains just such a communications channel in
hardware, so using this one in software is usually optional. Using the
software channel, however, allows driver implementers to leverage software
tools that fuzz the communications channel looking for vulnerabilities.
The usage model for these packets puts the responsibility for reading or
writing on the VF driver. The VF driver sends a read or a write packet,
indicating which "block" is being referred to by number.
If the PF driver wishes to initiate communication, it can "invalidate" one or
more of the first 64 blocks. This invalidation is delivered via a callback
supplied by the VF driver by this driver.
No protocol is implied, except that supplied by the PF and VF drivers.
Signed-off-by: Jake Oshins <jakeo@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
drivers/pci/controller/pci-hyperv.c | 302 ++++++++++++++++++++++++++++++++++++
include/linux/hyperv.h | 15 ++
2 files changed, 317 insertions(+)
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 40b6254..57adeca 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -365,6 +365,39 @@ struct pci_delete_interrupt {
struct tran_int_desc int_desc;
} __packed;
+/*
+ * Note: the VM must pass a valid block id, wslot and bytes_requested.
+ */
+struct pci_read_block {
+ struct pci_message message_type;
+ u32 block_id;
+ union win_slot_encoding wslot;
+ u32 bytes_requested;
+} __packed;
+
+struct pci_read_block_response {
+ struct vmpacket_descriptor hdr;
+ u32 status;
+ u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+/*
+ * Note: the VM must pass a valid block id, wslot and byte_count.
+ */
+struct pci_write_block {
+ struct pci_message message_type;
+ u32 block_id;
+ union win_slot_encoding wslot;
+ u32 byte_count;
+ u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+struct pci_dev_inval_block {
+ struct pci_incoming_message incoming;
+ union win_slot_encoding wslot;
+ u64 block_mask;
+} __packed;
+
struct pci_dev_incoming {
struct pci_incoming_message incoming;
union win_slot_encoding wslot;
@@ -499,6 +532,9 @@ struct hv_pci_dev {
struct hv_pcibus_device *hbus;
struct work_struct wrk;
+ void (*block_invalidate)(void *context, u64 block_mask);
+ void *invalidate_context;
+
/*
* What would be observed if one wrote 0xFFFFFFFF to a BAR and then
* read it back, for each of the BAR offsets within config space.
@@ -817,6 +853,256 @@ static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
.write = hv_pcifront_write_config,
};
+/*
+ * Paravirtual backchannel
+ *
+ * Hyper-V SR-IOV provides a backchannel mechanism in software for
+ * communication between a VF driver and a PF driver. These
+ * "configuration blocks" are similar in concept to PCI configuration space,
+ * but instead of doing reads and writes in 32-bit chunks through a very slow
+ * path, packets of up to 128 bytes can be sent or received asynchronously.
+ *
+ * Nearly every SR-IOV device contains just such a communications channel in
+ * hardware, so using this one in software is usually optional. Using the
+ * software channel, however, allows driver implementers to leverage software
+ * tools that fuzz the communications channel looking for vulnerabilities.
+ *
+ * The usage model for these packets puts the responsibility for reading or
+ * writing on the VF driver. The VF driver sends a read or a write packet,
+ * indicating which "block" is being referred to by number.
+ *
+ * If the PF driver wishes to initiate communication, it can "invalidate" one or
+ * more of the first 64 blocks. This invalidation is delivered via a callback
+ * supplied by the VF driver by this driver.
+ *
+ * No protocol is implied, except that supplied by the PF and VF drivers.
+ */
+
+struct hv_read_config_compl {
+ struct hv_pci_compl comp_pkt;
+ void *buf;
+ unsigned int len;
+ unsigned int bytes_returned;
+};
+
+/**
+ * hv_pci_read_config_compl() - Invoked when a response packet
+ * for a read config block operation arrives.
+ * @context: Identifies the read config operation
+ * @resp: The response packet itself
+ * @resp_packet_size: Size in bytes of the response packet
+ */
+static void hv_pci_read_config_compl(void *context, struct pci_response *resp,
+ int resp_packet_size)
+{
+ struct hv_read_config_compl *comp = context;
+ struct pci_read_block_response *read_resp =
+ (struct pci_read_block_response *)resp;
+ unsigned int data_len, hdr_len;
+
+ hdr_len = offsetof(struct pci_read_block_response, bytes);
+ if (resp_packet_size < hdr_len) {
+ comp->comp_pkt.completion_status = -1;
+ goto out;
+ }
+
+ data_len = resp_packet_size - hdr_len;
+ if (data_len > 0 && read_resp->status == 0) {
+ comp->bytes_returned = min(comp->len, data_len);
+ memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
+ } else {
+ comp->bytes_returned = 0;
+ }
+
+ comp->comp_pkt.completion_status = read_resp->status;
+out:
+ complete(&comp->comp_pkt.host_event);
+}
+
+/**
+ * hv_read_config_block() - Sends a read config block request to
+ * the back-end driver running in the Hyper-V parent partition.
+ * @pdev: The PCI driver's representation for this device.
+ * @buf: Buffer into which the config block will be copied.
+ * @len: Size in bytes of buf.
+ * @block_id: Identifies the config block which has been requested.
+ * @bytes_returned: Size which came back from the back-end driver.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+ unsigned int block_id, unsigned int *bytes_returned)
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct {
+ struct pci_packet pkt;
+ char buf[sizeof(struct pci_read_block)];
+ } pkt;
+ struct hv_read_config_compl comp_pkt;
+ struct pci_read_block *read_blk;
+ int ret;
+
+ if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ init_completion(&comp_pkt.comp_pkt.host_event);
+ comp_pkt.buf = buf;
+ comp_pkt.len = len;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.pkt.completion_func = hv_pci_read_config_compl;
+ pkt.pkt.compl_ctxt = &comp_pkt;
+ read_blk = (struct pci_read_block *)&pkt.pkt.message;
+ read_blk->message_type.type = PCI_READ_BLOCK;
+ read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+ read_blk->block_id = block_id;
+ read_blk->bytes_requested = len;
+
+ ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
+ sizeof(*read_blk), (unsigned long)&pkt.pkt,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ return ret;
+
+ ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
+ if (ret)
+ return ret;
+
+ if (comp_pkt.comp_pkt.completion_status != 0 ||
+ comp_pkt.bytes_returned == 0) {
+ dev_err(&hbus->hdev->device,
+ "Read Config Block failed: 0x%x, bytes_returned=%d\n",
+ comp_pkt.comp_pkt.completion_status,
+ comp_pkt.bytes_returned);
+ return -EIO;
+ }
+
+ *bytes_returned = comp_pkt.bytes_returned;
+ return 0;
+}
+EXPORT_SYMBOL(hv_read_config_block);
+
+/**
+ * hv_pci_write_config_compl() - Invoked when a response packet for a write
+ * config block operation arrives.
+ * @context: Identifies the write config operation
+ * @resp: The response packet itself
+ * @resp_packet_size: Size in bytes of the response packet
+ */
+static void hv_pci_write_config_compl(void *context, struct pci_response *resp,
+ int resp_packet_size)
+{
+ struct hv_pci_compl *comp_pkt = context;
+
+ comp_pkt->completion_status = resp->status;
+ complete(&comp_pkt->host_event);
+}
+
+/**
+ * hv_write_config_block() - Sends a write config block request to the
+ * back-end driver running in the Hyper-V parent partition.
+ * @pdev: The PCI driver's representation for this device.
+ * @buf: Buffer from which the config block will be copied.
+ * @len: Size in bytes of buf.
+ * @block_id: Identifies the config block which is being written.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+ unsigned int block_id)
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct {
+ struct pci_packet pkt;
+ char buf[sizeof(struct pci_write_block)];
+ u32 reserved;
+ } pkt;
+ struct hv_pci_compl comp_pkt;
+ struct pci_write_block *write_blk;
+ u32 pkt_size;
+ int ret;
+
+ if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ init_completion(&comp_pkt.host_event);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.pkt.completion_func = hv_pci_write_config_compl;
+ pkt.pkt.compl_ctxt = &comp_pkt;
+ write_blk = (struct pci_write_block *)&pkt.pkt.message;
+ write_blk->message_type.type = PCI_WRITE_BLOCK;
+ write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+ write_blk->block_id = block_id;
+ write_blk->byte_count = len;
+ memcpy(write_blk->bytes, buf, len);
+ pkt_size = offsetof(struct pci_write_block, bytes) + len;
+ /*
+ * This quirk is required on some hosts shipped around 2018, because
+ * these hosts don't check the pkt_size correctly (new hosts have been
+ * fixed since early 2019). The quirk is also safe on very old hosts
+ * and new hosts, because, on them, what really matters is the length
+ * specified in write_blk->byte_count.
+ */
+ pkt_size += sizeof(pkt.reserved);
+
+ ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
+ (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ return ret;
+
+ ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
+ if (ret)
+ return ret;
+
+ if (comp_pkt.completion_status != 0) {
+ dev_err(&hbus->hdev->device,
+ "Write Config Block failed: 0x%x\n",
+ comp_pkt.completion_status);
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(hv_write_config_block);
+
+/**
+ * hv_register_block_invalidate() - Invoked when a config block invalidation
+ * arrives from the back-end driver.
+ * @pdev: The PCI driver's representation for this device.
+ * @context: Identifies the device.
+ * @block_invalidate: Identifies all of the blocks being invalidated.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct hv_pci_dev *hpdev;
+
+ hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
+ if (!hpdev)
+ return -ENODEV;
+
+ hpdev->block_invalidate = block_invalidate;
+ hpdev->invalidate_context = context;
+
+ put_pcichild(hpdev);
+ return 0;
+
+}
+EXPORT_SYMBOL(hv_register_block_invalidate);
+
/* Interrupt management hooks */
static void hv_int_desc_free(struct hv_pci_dev *hpdev,
struct tran_int_desc *int_desc)
@@ -1968,6 +2254,7 @@ static void hv_pci_onchannelcallback(void *context)
struct pci_response *response;
struct pci_incoming_message *new_message;
struct pci_bus_relations *bus_rel;
+ struct pci_dev_inval_block *inval;
struct pci_dev_incoming *dev_message;
struct hv_pci_dev *hpdev;
@@ -2045,6 +2332,21 @@ static void hv_pci_onchannelcallback(void *context)
}
break;
+ case PCI_INVALIDATE_BLOCK:
+
+ inval = (struct pci_dev_inval_block *)buffer;
+ hpdev = get_pcichild_wslot(hbus,
+ inval->wslot.slot);
+ if (hpdev) {
+ if (hpdev->block_invalidate) {
+ hpdev->block_invalidate(
+ hpdev->invalidate_context,
+ inval->block_mask);
+ }
+ put_pcichild(hpdev);
+ }
+ break;
+
default:
dev_warn(&hbus->hdev->device,
"Unimplemented protocol message %x\n",
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc3..9d37f8c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1578,4 +1578,19 @@ struct vmpacket_descriptor *
for (pkt = hv_pkt_iter_first(channel); pkt; \
pkt = hv_pkt_iter_next(channel, pkt))
+/*
+ * Functions for passing data between SR-IOV PF and VF drivers. The VF driver
+ * sends requests to read and write blocks. Each block must be 128 bytes or
+ * smaller. Optionally, the VF driver can register a callback function which
+ * will be invoked when the host says that one or more of the first 64 block
+ * IDs is "invalid" which means that the VF driver should reread them.
+ */
+#define HV_CONFIG_BLOCK_SIZE_MAX 128
+int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+int hv_register_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
#endif /* _HYPERV_H */
--
1.8.3.1
^ permalink raw reply related
* [PATCH] storvsc: setup 1:1 mapping between hardware queue and CPU queue
From: longli @ 2019-08-19 19:35 UTC (permalink / raw)
To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
James E.J. Bottomley, Martin K. Petersen, linux-hyperv,
linux-scsi, linux-kernel
Cc: Long Li
From: Long Li <longli@microsoft.com>
storvsc doesn't use a dedicated hardware queue for a given CPU queue. When
issuing I/O, it selects returning CPU (hardware queue) dynamically based on
vmbus channel usage across all channels.
This patch sets up a 1:1 mapping between hardware queue and CPU queue, thus
avoiding unnecessary locking at upper layer when issuing I/O.
Signed-off-by: Long Li <longli@microsoft.com>
---
drivers/scsi/storvsc_drv.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index b89269120a2d..26c16d40ec46 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1682,6 +1682,18 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
return 0;
}
+static int storvsc_map_queues(struct Scsi_Host *shost)
+{
+ unsigned int cpu;
+ struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+
+ for_each_possible_cpu(cpu) {
+ qmap->mq_map[cpu] = cpu;
+ }
+
+ return 0;
+}
+
static struct scsi_host_template scsi_driver = {
.module = THIS_MODULE,
.name = "storvsc_host_t",
@@ -1697,6 +1709,7 @@ static struct scsi_host_template scsi_driver = {
.this_id = -1,
/* Make sure we dont get a sg segment crosses a page boundary */
.dma_boundary = PAGE_SIZE-1,
+ .map_queues = storvsc_map_queues,
.no_write_same = 1,
.track_queue_depth = 1,
};
@@ -1836,8 +1849,7 @@ static int storvsc_probe(struct hv_device *device,
/*
* Set the number of HW queues we are supporting.
*/
- if (stor_device->num_sc != 0)
- host->nr_hw_queues = stor_device->num_sc + 1;
+ host->nr_hw_queues = num_possible_cpus();
/*
* Set the error handler work queue.
--
2.17.1
^ permalink raw reply related
* [PATCH v3 00/12] Enhance the hv_vmbus driver to support hibernation
From: Dexuan Cui @ 2019-08-20 1:51 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
Hi all,
The patchset is to enhance hv_vmbus to support hibernation when Linux VM
runs on Hyper-V. A second patchset to enhance the high-level VSC drivers
(hv_netvsc, hv_storvsc, etc.) for hibernation will be posted after this
patchset is acceped. If you want to test this hibernation feaure, all the
needed patches can be found on my github branch:
https://github.com/dcui/linux/commits/decui/hibernation/2019-0818/v5.3-rc5
This patchset is based on v5.3-rc5.
Please review.
Hi tglx,
I hope all the 12 patchset, including the below 3 patches,
[PATCH v3 01/12] x86/hyper-v: Suspend/resume the hypercall page for hibernation
[PATCH v3 02/12] x86/hyper-v: Implement hv_is_hibernation_supported()
[PATCH v3 03/12] clocksource/drivers: Suspend/resume Hyper-V
can go through Sasha's hyperv/linux.git tree, since all the changes belong
to the hv stuff. However, if you think it's better for these 3 patches to go
through the tip.git tree, it also works for me.
Hi Michael Kelley,
I added your Reviewed-by's for patch 1, 3, 4, 5, 7 and 9, since you reviewed
these patches. Please review the others.
Looking forward to your comments!
Thanks,
Dexuan
Changes in v2:
Patch 3: Improved the changelog and added a comment.
Patch 4: Remove the "hv_stimer_cleanup" in hv_synic_suspend(), because I
suppose https://lkml.org/lkml/2019/7/27/5 will be accepted. Also
improved changelog and the comment.
Patch 5: Fixed the third argument of print_hex_dump_debug(). Also improved
the changelog.
Patch 6: Improved the changelog and the comment. Added a check for the
'vmbus_proto_version' in vmbus_bus_resume().
Changes in v3:
Patch 2: Add a new API hv_is_hibernation_supported().
Patch 6: Add a new helper function is_sub_channel().
Patch 8: Find the old channels via Instance GUID rather than the RELID.
Patch 10: Add code to clean up hv_sock channels by force
Patch 11: Add code to wait in the suspend path: all the hv_sock channels
and sub-channels should be cleaned up first before Linux sends
the VMBUS UNLOAD message.
Patch 12: Add code to fix up the old primary channels before further
resuming.
Dexuan Cui (12):
x86/hyper-v: Suspend/resume the hypercall page for hibernation
x86/hyper-v: Implement hv_is_hibernation_supported()
clocksource/drivers: Suspend/resume Hyper-V clocksource for
hibernation
Drivers: hv: vmbus: Break out synic enable and disable operations
Drivers: hv: vmbus: Suspend/resume the synic for hibernation
Drivers: hv: vmbus: Add a helper function is_sub_channel()
Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for
hibernation
Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend
Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
Drivers: hv: vmbus: Resume after fixing up old primary channels
arch/x86/hyperv/hv_init.c | 41 ++++++
drivers/clocksource/hyperv_timer.c | 25 ++++
drivers/hv/channel_mgmt.c | 127 +++++++++++++++---
drivers/hv/connection.c | 35 ++++-
drivers/hv/hv.c | 66 ++++++----
drivers/hv/hyperv_vmbus.h | 33 +++++
drivers/hv/vmbus_drv.c | 262 +++++++++++++++++++++++++++++++++++++
include/asm-generic/mshyperv.h | 2 +
include/linux/hyperv.h | 16 ++-
9 files changed, 558 insertions(+), 49 deletions(-)
--
1.8.3.1
^ permalink raw reply
* [PATCH v3 01/12] x86/hyper-v: Suspend/resume the hypercall page for hibernation
From: Dexuan Cui @ 2019-08-20 1:51 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
This is needed for hibernation, e.g. when we resume the old kernel, we need
to disable the "current" kernel's hypercall page and then resume the old
kernel's.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
arch/x86/hyperv/hv_init.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0d25868..78e53d9 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -20,6 +20,7 @@
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
+#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
void *hv_hypercall_pg;
@@ -223,6 +224,34 @@ static int __init hv_pci_init(void)
return 1;
}
+static int hv_suspend(void)
+{
+ union hv_x64_msr_hypercall_contents hypercall_msr;
+
+ /* Reset the hypercall page */
+ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hypercall_msr.enable = 0;
+ wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+
+ return 0;
+}
+
+static void hv_resume(void)
+{
+ union hv_x64_msr_hypercall_contents hypercall_msr;
+
+ /* Re-enable the hypercall page */
+ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hypercall_msr.enable = 1;
+ hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
+ wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+}
+
+static struct syscore_ops hv_syscore_ops = {
+ .suspend = hv_suspend,
+ .resume = hv_resume,
+};
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -303,6 +332,9 @@ void __init hyperv_init(void)
/* Register Hyper-V specific clocksource */
hv_init_clocksource();
+
+ register_syscore_ops(&hv_syscore_ops);
+
return;
remove_cpuhp_state:
@@ -322,6 +354,8 @@ void hyperv_cleanup(void)
{
union hv_x64_msr_hypercall_contents hypercall_msr;
+ unregister_syscore_ops(&hv_syscore_ops);
+
/* Reset our OS id */
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 07/12] Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
The high-level VSC drivers will implement device-specific callbacks.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
drivers/hv/vmbus_drv.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
include/linux/hyperv.h | 3 +++
2 files changed, 49 insertions(+)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2ef375c..a30c70a 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -911,6 +911,43 @@ static void vmbus_shutdown(struct device *child_device)
drv->shutdown(dev);
}
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->suspend)
+ return -EOPNOTSUPP;
+
+ return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+ struct hv_driver *drv;
+ struct hv_device *dev = device_to_hv_device(child_device);
+
+ /* The device may not be attached yet */
+ if (!child_device->driver)
+ return 0;
+
+ drv = drv_to_hv_drv(child_device->driver);
+ if (!drv->resume)
+ return -EOPNOTSUPP;
+
+ return drv->resume(dev);
+}
/*
* vmbus_device_release - Final callback release of the vmbus child device
@@ -926,6 +963,14 @@ static void vmbus_device_release(struct device *device)
kfree(hv_dev);
}
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
/* The one and only one */
static struct bus_type hv_bus = {
.name = "vmbus",
@@ -936,6 +981,7 @@ static void vmbus_device_release(struct device *device)
.uevent = vmbus_uevent,
.dev_groups = vmbus_dev_groups,
.drv_groups = vmbus_drv_groups,
+ .pm = &vmbus_pm,
};
struct onmessage_work_context {
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2d39248..8a60e77 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1157,6 +1157,9 @@ struct hv_driver {
int (*remove)(struct hv_device *);
void (*shutdown)(struct hv_device *);
+ int (*suspend)(struct hv_device *);
+ int (*resume)(struct hv_device *);
+
};
/* Base device object */
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 09/12] Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
Before Linux enters hibernation, it sends the CHANNELMSG_UNLOAD message to
the host so all the offers are gone. After hibernation, Linux needs to
re-negotiate with the host using the same vmbus protocol version (which
was in use before hibernation), and ask the host to re-offer the vmbus
devices.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
drivers/hv/connection.c | 3 +--
drivers/hv/hyperv_vmbus.h | 2 ++
drivers/hv/vmbus_drv.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 6c7a983..701d9a8 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -59,8 +59,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
}
}
-static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
- __u32 version)
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
{
int ret = 0;
unsigned int cur_cpu;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index c42b46d..4610277 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -272,6 +272,8 @@ struct vmbus_msginfo {
extern struct vmbus_connection vmbus_connection;
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
+
static inline void vmbus_send_interrupt(u32 relid)
{
sync_set_bit(relid, vmbus_connection.send_int_page);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index a30c70a..ce9974b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2089,6 +2089,51 @@ static int vmbus_acpi_add(struct acpi_device *device)
return ret_val;
}
+static int vmbus_bus_suspend(struct device *dev)
+{
+ vmbus_initiate_unload(false);
+
+ vmbus_connection.conn_state = DISCONNECTED;
+
+ return 0;
+}
+
+static int vmbus_bus_resume(struct device *dev)
+{
+ struct vmbus_channel_msginfo *msginfo;
+ size_t msgsize;
+ int ret;
+
+ /*
+ * We only use the 'vmbus_proto_version', which was in use before
+ * hibernation, to re-negotiate with the host.
+ */
+ if (vmbus_proto_version == VERSION_INVAL ||
+ vmbus_proto_version == 0) {
+ pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
+ return -EINVAL;
+ }
+
+ msgsize = sizeof(*msginfo) +
+ sizeof(struct vmbus_channel_initiate_contact);
+
+ msginfo = kzalloc(msgsize, GFP_KERNEL);
+
+ if (msginfo == NULL)
+ return -ENOMEM;
+
+ ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
+
+ kfree(msginfo);
+
+ if (ret != 0)
+ return ret;
+
+ vmbus_request_offers();
+
+ return 0;
+}
+
static const struct acpi_device_id vmbus_acpi_device_ids[] = {
{"VMBUS", 0},
{"VMBus", 0},
@@ -2096,6 +2141,19 @@ static int vmbus_acpi_add(struct acpi_device *device)
};
MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the
+ * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the
+ * pci "noirq" restore callback runs before "non-noirq" callbacks (see
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
+ * resume callback must also run via the "noirq" callbacks.
+ */
+static const struct dev_pm_ops vmbus_bus_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
+};
+
static struct acpi_driver vmbus_acpi_driver = {
.name = "vmbus",
.ids = vmbus_acpi_device_ids,
@@ -2103,6 +2161,7 @@ static int vmbus_acpi_add(struct acpi_device *device)
.add = vmbus_acpi_add,
.remove = vmbus_acpi_remove,
},
+ .drv.pm = &vmbus_bus_pm,
};
static void hv_kexec_handler(void)
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 12/12] Drivers: hv: vmbus: Resume after fixing up old primary channels
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
When the host re-offers the primary channels upon resume, the host only
guarantees the Instance GUID doesn't change, so vmbus_bus_suspend()
should invalidate channel->offermsg.child_relid and figure out the
number of primary channels that need to be fixed up upon resume.
Upon resume, vmbus_onoffer() finds the old channel structs, and maps
the new offers to the old channels, and fixes up the old structs,
and finally the resume callbacks of the VSC drivers will re-open
the channels.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
drivers/hv/channel_mgmt.c | 76 +++++++++++++++++++++++++++++++++++------------
drivers/hv/connection.c | 2 ++
drivers/hv/hyperv_vmbus.h | 14 +++++++++
drivers/hv/vmbus_drv.c | 17 +++++++++++
include/linux/hyperv.h | 3 ++
5 files changed, 93 insertions(+), 19 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 8491d1b..61b6288 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
cpumask_clear_cpu(channel->target_cpu,
&primary_channel->alloced_cpus_in_node);
- vmbus_release_relid(channel->offermsg.child_relid);
+ /*
+ * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
+ * the relid is invalidated; after hibernation, when the user-space app
+ * destroys the channel, the relid is INVALID_RELID, and in this case
+ * it's unnecessary and unsafe to release the old relid, since the same
+ * relid can refer to a completely different channel now.
+ */
+ if (channel->offermsg.child_relid != INVALID_RELID)
+ vmbus_release_relid(channel->offermsg.child_relid);
free_channel(channel);
}
@@ -853,6 +861,36 @@ void vmbus_initiate_unload(bool crash)
vmbus_wait_for_unload();
}
+static void check_ready_for_resume_event(void)
+{
+ /*
+ * If all the old primary channels have been fixed up, then it's safe
+ * to resume.
+ */
+ if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
+ complete(&vmbus_connection.ready_for_resume_event);
+}
+
+static void vmbus_setup_channel_state(struct vmbus_channel *channel,
+ struct vmbus_channel_offer_channel *offer)
+{
+ /*
+ * Setup state for signalling the host.
+ */
+ channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
+
+ if (vmbus_proto_version != VERSION_WS2008) {
+ channel->is_dedicated_interrupt =
+ (offer->is_dedicated_interrupt != 0);
+ channel->sig_event = offer->connection_id;
+ }
+
+ memcpy(&channel->offermsg, offer,
+ sizeof(struct vmbus_channel_offer_channel));
+ channel->monitor_grp = (u8)offer->monitorid / 32;
+ channel->monitor_bit = (u8)offer->monitorid % 32;
+}
+
/*
* vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
*
@@ -875,12 +913,21 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
atomic_dec(&vmbus_connection.offer_in_progress);
/*
- * We're resuming from hibernation: we expect the host to send
- * exactly the same offers that we had before the hibernation.
+ * We're resuming from hibernation: all the sub-channel and
+ * hv_sock channels we had before the hibernation should have
+ * been cleaned up, and now we must be seeing a re-offered
+ * primary channel that we had before the hibernation.
*/
+
+ WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
+ /* Fix up the relid. */
+ oldchannel->offermsg.child_relid = offer->child_relid;
+
offer_sz = sizeof(*offer);
- if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0)
+ if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
+ check_ready_for_resume_event();
return;
+ }
pr_debug("Mismatched offer from the host (relid=%d)\n",
offer->child_relid);
@@ -890,6 +937,11 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
false);
print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
16, 4, offer, offer_sz, false);
+
+ vmbus_setup_channel_state(oldchannel, offer);
+
+ check_ready_for_resume_event();
+
return;
}
@@ -902,21 +954,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
return;
}
- /*
- * Setup state for signalling the host.
- */
- newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
-
- if (vmbus_proto_version != VERSION_WS2008) {
- newchannel->is_dedicated_interrupt =
- (offer->is_dedicated_interrupt != 0);
- newchannel->sig_event = offer->connection_id;
- }
-
- memcpy(&newchannel->offermsg, offer,
- sizeof(struct vmbus_channel_offer_channel));
- newchannel->monitor_grp = (u8)offer->monitorid / 32;
- newchannel->monitor_bit = (u8)offer->monitorid % 32;
+ vmbus_setup_channel_state(newchannel, offer);
vmbus_process_offer(newchannel);
}
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index f15d3115..ebfa9aa 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -29,6 +29,8 @@ struct vmbus_connection vmbus_connection = {
.ready_for_suspend_event= COMPLETION_INITIALIZER(
vmbus_connection.ready_for_suspend_event),
+ .ready_for_resume_event = COMPLETION_INITIALIZER(
+ vmbus_connection.ready_for_resume_event),
};
EXPORT_SYMBOL_GPL(vmbus_connection);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 9f96e23..fb00ffd 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -270,6 +270,20 @@ struct vmbus_connection {
* drop to zero.
*/
struct completion ready_for_suspend_event;
+
+ /*
+ * The number of primary channels that should be "fixed up"
+ * upon resume: these channels are re-offered upon resume, and some
+ * fields of the channel offers (i.e. child_relid and connection_id)
+ * can change, so the old offermsg must be fixed up, before the resume
+ * callbacks of the VSC drivers start to further touch the channels.
+ */
+ atomic_t nr_chan_fixup_on_resume;
+ /*
+ * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
+ * drop to zero.
+ */
+ struct completion ready_for_resume_event;
};
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 0507157..edec984 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2161,9 +2161,17 @@ static int vmbus_bus_suspend(struct device *dev)
if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
+
mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ /*
+ * Invalidate the field. Upon resume, vmbus_onoffer() will fix
+ * up the field, and the other fields (if necessary).
+ */
+ channel->offermsg.child_relid = INVALID_RELID;
+
if (is_hvsock_channel(channel)) {
if (!channel->rescind) {
pr_err("hv_sock channel not rescinded!\n");
@@ -2178,6 +2186,8 @@ static int vmbus_bus_suspend(struct device *dev)
WARN_ON_ONCE(1);
}
spin_unlock_irqrestore(&channel->lock, flags);
+
+ atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
}
mutex_unlock(&vmbus_connection.channel_mutex);
@@ -2186,6 +2196,9 @@ static int vmbus_bus_suspend(struct device *dev)
vmbus_connection.conn_state = DISCONNECTED;
+ /* Reset the event for the next resume. */
+ reinit_completion(&vmbus_connection.ready_for_resume_event);
+
return 0;
}
@@ -2220,8 +2233,12 @@ static int vmbus_bus_resume(struct device *dev)
if (ret != 0)
return ret;
+ WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
+
vmbus_request_offers();
+ wait_for_completion(&vmbus_connection.ready_for_resume_event);
+
/* Reset the event for the next suspend. */
reinit_completion(&vmbus_connection.ready_for_suspend_event);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 8a60e77..a3aa9e9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -426,6 +426,9 @@ enum vmbus_channel_message_type {
CHANNELMSG_COUNT
};
+/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */
+#define INVALID_RELID U32_MAX
+
struct vmbus_channel_message_header {
enum vmbus_channel_message_type msgtype;
u32 padding;
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 11/12] Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
Before suspend, Linux must make sure all the hv_sock channels have been
properly cleaned up, because a hv_sock connection can not persist across
hibernation, and the user-space app must be properly notified of the
state change of the connection.
Before suspend, Linux also must make sure all the sub-channels have been
destroyed, i.e. the related channel structs of the sub-channels must be
properly removed, otherwise they would cause a conflict when the
sub-channels are recreated upon resume.
Add a counter to track such channels, and vmbus_bus_suspend() should wait
for the counter to drop to zero.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
drivers/hv/channel_mgmt.c | 28 ++++++++++++++++++++++++++++
drivers/hv/connection.c | 3 +++
drivers/hv/hyperv_vmbus.h | 12 ++++++++++++
drivers/hv/vmbus_drv.c | 41 ++++++++++++++++++++++++++++++++++++++++-
4 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f7a1184..8491d1b 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -499,6 +499,8 @@ static void vmbus_add_channel_work(struct work_struct *work)
return;
err_deq_chan:
+ WARN_ON_ONCE(1);
+
mutex_lock(&vmbus_connection.channel_mutex);
/*
@@ -545,6 +547,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
mutex_lock(&vmbus_connection.channel_mutex);
+ /* Remember the channels that should be cleaned up upon suspend. */
+ if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
+ atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
+
/*
* Now that we have acquired the channel_mutex,
* we can release the potentially racing rescind thread.
@@ -915,6 +921,16 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
vmbus_process_offer(newchannel);
}
+static void check_ready_for_suspend_event(void)
+{
+ /*
+ * If all the sub-channels or hv_sock channels have been cleaned up,
+ * then it's safe to suspend.
+ */
+ if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
+ complete(&vmbus_connection.ready_for_suspend_event);
+}
+
/*
* vmbus_onoffer_rescind - Rescind offer handler.
*
@@ -925,6 +941,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
struct vmbus_channel_rescind_offer *rescind;
struct vmbus_channel *channel;
struct device *dev;
+ bool clean_up_chan_for_suspend;
rescind = (struct vmbus_channel_rescind_offer *)hdr;
@@ -964,6 +981,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
return;
}
+ clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
+ is_sub_channel(channel);
/*
* Before setting channel->rescind in vmbus_rescind_cleanup(), we
* should make sure the channel callback is not running any more.
@@ -989,6 +1008,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
if (channel->device_obj) {
if (channel->chn_rescind_callback) {
channel->chn_rescind_callback(channel);
+
+ if (clean_up_chan_for_suspend)
+ check_ready_for_suspend_event();
+
return;
}
/*
@@ -1021,6 +1044,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
}
mutex_unlock(&vmbus_connection.channel_mutex);
}
+
+ /* The "channel" may have been freed. Do not access it any longer. */
+
+ if (clean_up_chan_for_suspend)
+ check_ready_for_suspend_event();
}
void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 701d9a8..f15d3115 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -26,6 +26,9 @@
struct vmbus_connection vmbus_connection = {
.conn_state = DISCONNECTED,
.next_gpadl_handle = ATOMIC_INIT(0xE1E10),
+
+ .ready_for_suspend_event= COMPLETION_INITIALIZER(
+ vmbus_connection.ready_for_suspend_event),
};
EXPORT_SYMBOL_GPL(vmbus_connection);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 4610277..9f96e23 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -258,6 +258,18 @@ struct vmbus_connection {
struct workqueue_struct *work_queue;
struct workqueue_struct *handle_primary_chan_wq;
struct workqueue_struct *handle_sub_chan_wq;
+
+ /*
+ * The number of sub-channels and hv_sock channels that should be
+ * cleaned up upon suspend: sub-channels will be re-created upon
+ * resume, and hv_sock channels should not survive suspend.
+ */
+ atomic_t nr_chan_close_on_suspend;
+ /*
+ * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
+ * drop to zero.
+ */
+ struct completion ready_for_suspend_event;
};
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2bea669..0507157 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2127,7 +2127,8 @@ static int vmbus_acpi_add(struct acpi_device *device)
static int vmbus_bus_suspend(struct device *dev)
{
- struct vmbus_channel *channel;
+ struct vmbus_channel *channel, *sc;
+ unsigned long flags;
while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
/*
@@ -2146,6 +2147,41 @@ static int vmbus_bus_suspend(struct device *dev)
}
mutex_unlock(&vmbus_connection.channel_mutex);
+ /*
+ * Wait until all the sub-channels and hv_sock channels have been
+ * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
+ * they would conflict with the new sub-channels that will be created
+ * in the resume path. hv_sock channels should also be destroyed, but
+ * a hv_sock channel of an established hv_sock connection can not be
+ * really destroyed since it may still be referenced by the userspace
+ * application, so we just force the hv_sock channel to be rescinded
+ * by vmbus_force_channel_rescinded(), and the userspace application
+ * will thoroughly destroy the channel after hibernation.
+ */
+ if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
+ wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (is_hvsock_channel(channel)) {
+ if (!channel->rescind) {
+ pr_err("hv_sock channel not rescinded!\n");
+ WARN_ON_ONCE(1);
+ }
+ continue;
+ }
+
+ spin_lock_irqsave(&channel->lock, flags);
+ list_for_each_entry(sc, &channel->sc_list, sc_list) {
+ pr_err("Sub-channel not deleted!\n");
+ WARN_ON_ONCE(1);
+ }
+ spin_unlock_irqrestore(&channel->lock, flags);
+ }
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
vmbus_initiate_unload(false);
vmbus_connection.conn_state = DISCONNECTED;
@@ -2186,6 +2222,9 @@ static int vmbus_bus_resume(struct device *dev)
vmbus_request_offers();
+ /* Reset the event for the next suspend. */
+ reinit_completion(&vmbus_connection.ready_for_suspend_event);
+
return 0;
}
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 10/12] Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
hibernation. There is no better method to clean up the channels since
some of the channels may still be referenced by the userspace apps when
hiberantin is triggered: in this case, the "rescind" fields of the
channels are set, and the apps will thoroughly destroy the channels
after hibernation.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
drivers/hv/vmbus_drv.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ce9974b..2bea669 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -24,6 +24,7 @@
#include <linux/sched/task_stack.h>
#include <asm/mshyperv.h>
+#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/ptrace.h>
#include <linux/screen_info.h>
@@ -1069,6 +1070,41 @@ void vmbus_on_msg_dpc(unsigned long data)
vmbus_signal_eom(msg, message_type);
}
+/*
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
+ * hibernation, because hv_sock connections can not persist across hibernation.
+ */
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
+{
+ struct onmessage_work_context *ctx;
+ struct vmbus_channel_rescind_offer *rescind;
+
+ WARN_ON(!is_hvsock_channel(channel));
+
+ /*
+ * sizeof(*ctx) is small and the allocation should really not fail,
+ * otherwise the state of the hv_sock connections ends up in limbo.
+ */
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
+
+ /*
+ * So far, these are not really used by Linux. Just set them to the
+ * reasonable values conforming to the definitions of the fields.
+ */
+ ctx->msg.header.message_type = 1;
+ ctx->msg.header.payload_size = sizeof(*rescind);
+
+ /* These values are actually used by Linux. */
+ rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
+ rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
+ rescind->child_relid = channel->offermsg.child_relid;
+
+ INIT_WORK(&ctx->work, vmbus_onmessage_work);
+
+ queue_work_on(vmbus_connection.connect_cpu,
+ vmbus_connection.work_queue,
+ &ctx->work);
+}
/*
* Direct callback for channels using other deferred processing
@@ -2091,6 +2127,25 @@ static int vmbus_acpi_add(struct acpi_device *device)
static int vmbus_bus_suspend(struct device *dev)
{
+ struct vmbus_channel *channel;
+
+ while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
+ /*
+ * We wait here until any channel offer is currently
+ * being processed.
+ */
+ msleep(1);
+ }
+
+ mutex_lock(&vmbus_connection.channel_mutex);
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ if (!is_hvsock_channel(channel))
+ continue;
+
+ vmbus_force_channel_rescinded(channel);
+ }
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
vmbus_initiate_unload(false);
vmbus_connection.conn_state = DISCONNECTED;
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 08/12] Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
When the VM resumes, the host re-sends the offers. We should not add the
offers to the global vmbus_connection.chn_list again.
This patch assumes the RELIDs of the channels don't change across
hibernation. Actually this is not always true, especially in the case of
NIC SR-IOV the VF vmbus device's RELID sometimes can change. A later patch
will address this issue by mapping the new offers to the old channels and
fixing up the old channels, if necessary.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
drivers/hv/channel_mgmt.c | 29 ++++++++++++++++++++++++++++-
drivers/hv/connection.c | 27 +++++++++++++++++++++++++++
drivers/hv/hyperv_vmbus.h | 3 +++
3 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index addcef5..f7a1184 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -854,12 +854,39 @@ void vmbus_initiate_unload(bool crash)
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
{
struct vmbus_channel_offer_channel *offer;
- struct vmbus_channel *newchannel;
+ struct vmbus_channel *oldchannel, *newchannel;
+ size_t offer_sz;
offer = (struct vmbus_channel_offer_channel *)hdr;
trace_vmbus_onoffer(offer);
+ mutex_lock(&vmbus_connection.channel_mutex);
+ oldchannel = find_primary_channel_by_offer(offer);
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ if (oldchannel != NULL) {
+ atomic_dec(&vmbus_connection.offer_in_progress);
+
+ /*
+ * We're resuming from hibernation: we expect the host to send
+ * exactly the same offers that we had before the hibernation.
+ */
+ offer_sz = sizeof(*offer);
+ if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0)
+ return;
+
+ pr_debug("Mismatched offer from the host (relid=%d)\n",
+ offer->child_relid);
+
+ print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, &oldchannel->offermsg, offer_sz,
+ false);
+ print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
+ 16, 4, offer, offer_sz, false);
+ return;
+ }
+
/* Allocate the channel object and save this offer. */
newchannel = alloc_channel();
if (!newchannel) {
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 09829e1..6c7a983 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -337,6 +337,33 @@ struct vmbus_channel *relid2channel(u32 relid)
}
/*
+ * find_primary_channel_by_offer - Get the channel object given the new offer.
+ * This is only used in the resume path of hibernation.
+ */
+struct vmbus_channel *
+find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
+{
+ struct vmbus_channel *channel;
+ const guid_t *inst1, *inst2;
+
+ WARN_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+
+ /* Ignore sub-channel offers. */
+ if (offer->offer.sub_channel_index != 0)
+ return NULL;
+
+ list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+ inst1 = &channel->offermsg.offer.if_instance;
+ inst2 = &offer->offer.if_instance;
+
+ if (guid_equal(inst1, inst2))
+ return channel;
+ }
+
+ return NULL;
+}
+
+/*
* vmbus_on_event - Process a channel event notification
*
* For batched channels (default) optimize host to guest signaling
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 9f7fb6d..c42b46d 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -310,6 +310,9 @@ int vmbus_add_channel_kobj(struct hv_device *device_obj,
struct vmbus_channel *relid2channel(u32 relid);
+struct vmbus_channel *
+find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer);
+
void vmbus_free_channels(void);
/* Connection interface */
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 05/12] Drivers: hv: vmbus: Suspend/resume the synic for hibernation
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
This is needed when we resume the old kernel from the "current" kernel.
Note: when hv_synic_suspend() and hv_synic_resume() run, all the
non-boot CPUs have been offlined, and interrupts are disabled on CPU0.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
drivers/hv/vmbus_drv.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ebd35fc..2ef375c 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -30,6 +30,7 @@
#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
+#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
#include "hyperv_vmbus.h"
@@ -2086,6 +2087,47 @@ static void hv_crash_handler(struct pt_regs *regs)
hyperv_cleanup();
};
+static int hv_synic_suspend(void)
+{
+ /*
+ * When we reach here, all the non-boot CPUs have been offlined, and
+ * the stimers on them have been unbound in hv_synic_cleanup() ->
+ * hv_stimer_cleanup() -> clockevents_unbind_device().
+ *
+ * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
+ * we do not unbind the stimer on CPU0 because: 1) it's unnecessary
+ * because the interrupts remain disabled between syscore_suspend()
+ * and syscore_resume(): see create_image() and resume_target_kernel();
+ * 2) the stimer on CPU0 is automatically disabled later by
+ * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
+ * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
+ * would be triggered if we call clockevents_unbind_device(), which
+ * may sleep, in an interrupts-disabled context. So, we intentionally
+ * don't call hv_stimer_cleanup(0) here.
+ */
+
+ hv_synic_disable_regs(0);
+
+ return 0;
+}
+
+static void hv_synic_resume(void)
+{
+ hv_synic_enable_regs(0);
+
+ /*
+ * Note: we don't need to call hv_stimer_init(0), because the timer
+ * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
+ * automatically re-enabled in timekeeping_resume().
+ */
+}
+
+/* The callbacks run only on CPU0, with irqs_disabled. */
+static struct syscore_ops hv_synic_syscore_ops = {
+ .suspend = hv_synic_suspend,
+ .resume = hv_synic_resume,
+};
+
static int __init hv_acpi_init(void)
{
int ret, t;
@@ -2116,6 +2158,8 @@ static int __init hv_acpi_init(void)
hv_setup_kexec_handler(hv_kexec_handler);
hv_setup_crash_handler(hv_crash_handler);
+ register_syscore_ops(&hv_synic_syscore_ops);
+
return 0;
cleanup:
@@ -2128,6 +2172,8 @@ static void __exit vmbus_exit(void)
{
int cpu;
+ unregister_syscore_ops(&hv_synic_syscore_ops);
+
hv_remove_kexec_handler();
hv_remove_crash_handler();
vmbus_connection.conn_state = DISCONNECTED;
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 06/12] Drivers: hv: vmbus: Add a helper function is_sub_channel()
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
The existing method of telling if a channel is sub-channel in
vmbus_process_offer() is cumbersome. This new simple helper function
is preferred in future.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
include/linux/hyperv.h | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc3..2d39248 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -245,7 +245,10 @@ struct vmbus_channel_offer {
} pipe;
} u;
/*
- * The sub_channel_index is defined in win8.
+ * The sub_channel_index is defined in Win8: a value of zero means a
+ * primary channel and a value of non-zero means a sub-channel.
+ *
+ * Before Win8, the field is reserved, meaning it's always zero.
*/
u16 sub_channel_index;
u16 reserved3;
@@ -934,6 +937,11 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
}
+static inline bool is_sub_channel(const struct vmbus_channel *c)
+{
+ return c->offermsg.offer.sub_channel_index != 0;
+}
+
static inline void set_channel_affinity_state(struct vmbus_channel *c,
enum hv_numa_policy policy)
{
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 03/12] clocksource/drivers: Suspend/resume Hyper-V clocksource for hibernation
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
This is needed for hibernation, e.g. when we resume the old kernel, we need
to disable the "current" kernel's TSC page and then resume the old kernel's.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
drivers/clocksource/hyperv_timer.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 17b96f9..8f3422c 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -238,12 +238,37 @@ static u64 read_hv_clock_tsc(struct clocksource *arg)
return read_hv_sched_clock_tsc();
}
+static void suspend_hv_clock_tsc(struct clocksource *arg)
+{
+ u64 tsc_msr;
+
+ /* Disable the TSC page */
+ hv_get_reference_tsc(tsc_msr);
+ tsc_msr &= ~BIT_ULL(0);
+ hv_set_reference_tsc(tsc_msr);
+}
+
+
+static void resume_hv_clock_tsc(struct clocksource *arg)
+{
+ phys_addr_t phys_addr = page_to_phys(vmalloc_to_page(tsc_pg));
+ u64 tsc_msr;
+
+ /* Re-enable the TSC page */
+ hv_get_reference_tsc(tsc_msr);
+ tsc_msr &= GENMASK_ULL(11, 0);
+ tsc_msr |= BIT_ULL(0) | (u64)phys_addr;
+ hv_set_reference_tsc(tsc_msr);
+}
+
static struct clocksource hyperv_cs_tsc = {
.name = "hyperv_clocksource_tsc_page",
.rating = 400,
.read = read_hv_clock_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .suspend= suspend_hv_clock_tsc,
+ .resume = resume_hv_clock_tsc,
};
#endif
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 04/12] Drivers: hv: vmbus: Break out synic enable and disable operations
From: Dexuan Cui @ 2019-08-20 1:52 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
Break out synic enable and disable operations into separate
hv_synic_disable_regs() and hv_synic_enable_regs() functions for use by a
later patch to support hibernation.
There is no functional change except the unnecessary check
"if (sctrl.enable != 1) return -EFAULT;" which is removed, because when
we're in hv_synic_cleanup(), we're absolutely sure sctrl.enable must be 1.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
drivers/hv/hv.c | 66 ++++++++++++++++++++++++++---------------------
drivers/hv/hyperv_vmbus.h | 2 ++
2 files changed, 39 insertions(+), 29 deletions(-)
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 6188fb7..fcc5279 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -154,7 +154,7 @@ void hv_synic_free(void)
* retrieve the initialized message and event pages. Otherwise, we create and
* initialize the message and event pages.
*/
-int hv_synic_init(unsigned int cpu)
+void hv_synic_enable_regs(unsigned int cpu)
{
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
@@ -196,6 +196,11 @@ int hv_synic_init(unsigned int cpu)
sctrl.enable = 1;
hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_init(unsigned int cpu)
+{
+ hv_synic_enable_regs(cpu);
hv_stimer_init(cpu);
@@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu)
/*
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
*/
-int hv_synic_cleanup(unsigned int cpu)
+void hv_synic_disable_regs(unsigned int cpu)
{
union hv_synic_sint shared_sint;
union hv_synic_simp simp;
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;
+
+ hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ shared_sint.masked = 1;
+
+ /* Need to correctly cleanup in the case of SMP!!! */
+ /* Disable the interrupt */
+ hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ hv_get_simp(simp.as_uint64);
+ simp.simp_enabled = 0;
+ simp.base_simp_gpa = 0;
+
+ hv_set_simp(simp.as_uint64);
+
+ hv_get_siefp(siefp.as_uint64);
+ siefp.siefp_enabled = 0;
+ siefp.base_siefp_gpa = 0;
+
+ hv_set_siefp(siefp.as_uint64);
+
+ /* Disable the global synic bit */
+ hv_get_synic_state(sctrl.as_uint64);
+ sctrl.enable = 0;
+ hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_cleanup(unsigned int cpu)
+{
struct vmbus_channel *channel, *sc;
bool channel_found = false;
unsigned long flags;
- hv_get_synic_state(sctrl.as_uint64);
- if (sctrl.enable != 1)
- return -EFAULT;
-
/*
* Search for channels which are bound to the CPU we're about to
* cleanup. In case we find one and vmbus is still connected we need to
@@ -249,29 +279,7 @@ int hv_synic_cleanup(unsigned int cpu)
hv_stimer_cleanup(cpu);
- hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- shared_sint.masked = 1;
-
- /* Need to correctly cleanup in the case of SMP!!! */
- /* Disable the interrupt */
- hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- hv_get_simp(simp.as_uint64);
- simp.simp_enabled = 0;
- simp.base_simp_gpa = 0;
-
- hv_set_simp(simp.as_uint64);
-
- hv_get_siefp(siefp.as_uint64);
- siefp.siefp_enabled = 0;
- siefp.base_siefp_gpa = 0;
-
- hv_set_siefp(siefp.as_uint64);
-
- /* Disable the global synic bit */
- sctrl.enable = 0;
- hv_set_synic_state(sctrl.as_uint64);
+ hv_synic_disable_regs(cpu);
return 0;
}
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index fb16a62..9f7fb6d 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -169,8 +169,10 @@ extern int hv_post_message(union hv_connection_id connection_id,
extern void hv_synic_free(void);
+extern void hv_synic_enable_regs(unsigned int cpu);
extern int hv_synic_init(unsigned int cpu);
+extern void hv_synic_disable_regs(unsigned int cpu);
extern int hv_synic_cleanup(unsigned int cpu);
/* Interface */
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3 02/12] x86/hyper-v: Implement hv_is_hibernation_supported()
From: Dexuan Cui @ 2019-08-20 1:51 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, tglx@linutronix.de
Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>
When a Linux VM runs on Hyper-V and hibernates, it must disable the
memory hot-add/remove and balloon up/down capabilities in the hv_balloon
driver.
By default, Hyper-V does not enable the virtual ACPI S4 state for a VM;
on recent Hyper-V hosts, the administrator is able to enable the virtual
ACPI S4 state for a VM, so we hope to use the presence of the virtual ACPI
S4 state as a hint for hv_balloon to disable the aforementioned
capabilities.
The new API will be used by hv_balloon.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
arch/x86/hyperv/hv_init.c | 7 +++++++
include/asm-generic/mshyperv.h | 2 ++
2 files changed, 9 insertions(+)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 78e53d9..6735e45 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -7,6 +7,7 @@
* Author : K. Y. Srinivasan <kys@microsoft.com>
*/
+#include <linux/acpi.h>
#include <linux/efi.h>
#include <linux/types.h>
#include <asm/apic.h>
@@ -453,3 +454,9 @@ bool hv_is_hyperv_initialized(void)
return hypercall_msr.enable;
}
EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized);
+
+bool hv_is_hibernation_supported(void)
+{
+ return acpi_sleep_state_supported(ACPI_STATE_S4);
+}
+EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 0becb7d..1cb4001 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -166,9 +166,11 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
void hyperv_report_panic(struct pt_regs *regs, long err);
void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
bool hv_is_hyperv_initialized(void);
+bool hv_is_hibernation_supported(void);
void hyperv_cleanup(void);
#else /* CONFIG_HYPERV */
static inline bool hv_is_hyperv_initialized(void) { return false; }
+static inline bool hv_is_hibernation_supported(void) { return false; }
static inline void hyperv_cleanup(void) {}
#endif /* CONFIG_HYPERV */
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 0/3] hv: vmbus: add fuzz testing to hv device
From: Branden Bonaby @ 2019-08-20 2:44 UTC (permalink / raw)
To: kys, haiyangz, sthemmin, sashal
Cc: Branden Bonaby, linux-hyperv, linux-kernel
This patchset introduces a testing framework for Hyper-V drivers.
This framework allows us to introduce delays in the packet receive
path on a per-device basis. While the current code only supports
introducing arbitrary delays in the host/guest communication path,
we intend to expand this to support error injection in the future.
Changes in v2:
Patch 1: As per Vitaly's suggestion, wrapped the test code under an
#ifdef and updated the Kconfig file, so that the test code
will only be used when the config option is set to true.
(default is false).
Updated hyperv_vmbus header to contain new #ifdef with new
new functions for the test code.
Patch 2: Moved code from under sysfs to debugfs and wrapped it under
the new ifdef.
Updated MAINTAINERS file with new debugfs-hyperv file under
the section for hyperv.
Patch 3: Updated testing tool with new debugfs location.
Branden Bonaby (3):
drivers: hv: vmbus: Introduce latency testing
drivers: hv: vmbus: add fuzz test attributes to debugfs
tools: hv: add vmbus testing tool
Documentation/ABI/testing/debugfs-hyperv | 21 ++
MAINTAINERS | 1 +
drivers/hv/Kconfig | 7 +
drivers/hv/connection.c | 3 +
drivers/hv/hyperv_vmbus.h | 20 ++
drivers/hv/ring_buffer.c | 7 +
drivers/hv/vmbus_drv.c | 167 ++++++++++++
include/linux/hyperv.h | 21 ++
tools/hv/vmbus_testing | 334 +++++++++++++++++++++++
9 files changed, 581 insertions(+)
create mode 100644 Documentation/ABI/testing/debugfs-hyperv
create mode 100644 tools/hv/vmbus_testing
--
2.17.1
^ permalink raw reply
* [PATCH v2 1/3] drivers: hv: vmbus: Introduce latency testing
From: Branden Bonaby @ 2019-08-20 2:44 UTC (permalink / raw)
To: kys, haiyangz, sthemmin, sashal
Cc: Branden Bonaby, linux-hyperv, linux-kernel
In-Reply-To: <cover.1566266609.git.brandonbonaby94@gmail.com>
Introduce user specified latency in the packet reception path.
Signed-off-by: Branden Bonaby <brandonbonaby94@gmail.com>
---
Changes in v2:
- Add #ifdef in Kconfig file so test code will not interfere
with non-test code.
- Move test code functions for delay to hyperv_vmbus header
file.
- Wrap test code under #ifdef statement.
drivers/hv/Kconfig | 7 +++++++
drivers/hv/connection.c | 3 +++
drivers/hv/hyperv_vmbus.h | 20 ++++++++++++++++++++
drivers/hv/ring_buffer.c | 7 +++++++
include/linux/hyperv.h | 21 +++++++++++++++++++++
5 files changed, 58 insertions(+)
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 9a59957922d4..d97437ba0626 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -29,4 +29,11 @@ config HYPERV_BALLOON
help
Select this option to enable Hyper-V Balloon driver.
+config HYPERV_TESTING
+ bool "Hyper-V testing"
+ default n
+ depends on HYPERV && DEBUG_FS
+ help
+ Select this option to enable Hyper-V vmbus testing.
+
endmenu
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 09829e15d4a0..c9c63a4033cd 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -357,6 +357,9 @@ void vmbus_on_event(unsigned long data)
trace_vmbus_on_event(channel);
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_delay_test(channel, INTERRUPT_DELAY);
+#endif /* CONFIG_HYPERV_TESTING */
do {
void (*callback_fn)(void *);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 362e70e9d145..edf14f596d8c 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -357,4 +357,24 @@ enum hvutil_device_state {
HVUTIL_DEVICE_DYING, /* driver unload is in progress */
};
+#ifdef CONFIG_HYPERV_TESTING
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#define TESTING "hyperv"
+
+enum delay {
+ INTERRUPT_DELAY = 0,
+ MESSAGE_DELAY = 1,
+};
+
+int hv_debug_delay_files(struct hv_device *dev, struct dentry *root);
+int hv_debug_add_dev_dir(struct hv_device *dev);
+void hv_debug_rm_dev_dir(struct hv_device *dev);
+void hv_debug_rm_all_dir(void);
+void hv_debug_set_dir_dentry(struct hv_device *dev, struct dentry *root);
+void hv_debug_delay_test(struct vmbus_channel *channel, enum delay delay_type);
+
+#endif /* CONFIG_HYPERV_TESTING */
+
#endif /* _HYPERV_VMBUS_H */
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 9a03b163cbbd..51adda23b398 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -396,6 +396,10 @@ struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
struct hv_ring_buffer_info *rbi = &channel->inbound;
struct vmpacket_descriptor *desc;
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_delay_test(channel, MESSAGE_DELAY);
+#endif /* CONFIG_HYPERV_TESTING */
+
if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
return NULL;
@@ -421,6 +425,9 @@ __hv_pkt_iter_next(struct vmbus_channel *channel,
u32 packetlen = desc->len8 << 3;
u32 dsize = rbi->ring_datasize;
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_delay_test(channel, MESSAGE_DELAY);
+#endif /* CONFIG_HYPERV_TESTING */
/* bump offset to next potential packet */
rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
if (rbi->priv_read_index >= dsize)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc34c4a6..6bf8ef5c780c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -926,6 +926,21 @@ struct vmbus_channel {
* full outbound ring buffer.
*/
u64 out_full_first;
+
+#ifdef CONFIG_HYPERV_TESTING
+ /* enabling/disabling fuzz testing on the channel (default is false)*/
+ bool fuzz_testing_state;
+
+ /* Interrupt delay will delay the guest from emptying the ring buffer
+ * for a specific amount of time. The delay is in microseconds and will
+ * be between 1 to a maximum of 1000, its default is 0 (no delay).
+ * The Message delay will delay guest reading on a per message basis
+ * in microseconds between 1 to 1000 with the default being 0
+ * (no delay).
+ */
+ u32 fuzz_testing_interrupt_delay;
+ u32 fuzz_testing_message_delay;
+#endif /* CONFIG_HYPERV_TESTING */
};
static inline bool is_hvsock_channel(const struct vmbus_channel *c)
@@ -1166,6 +1181,12 @@ struct hv_device {
struct vmbus_channel *channel;
struct kset *channels_kset;
+
+#ifdef CONFIG_HYPERV_TESTING
+ /* place holder to keep track of the dir for hv device in debugfs */
+ struct dentry *debug_dir;
+#endif /* CONFIG_HYPERV_TESTING */
+
};
--
2.17.1
^ permalink raw reply related
* [PATCH v2 2/3] drivers: hv: vmbus: add test attributes to debugfs
From: Branden Bonaby @ 2019-08-20 2:44 UTC (permalink / raw)
To: kys, haiyangz, sthemmin, sashal
Cc: Branden Bonaby, linux-hyperv, linux-kernel
In-Reply-To: <cover.1566266609.git.brandonbonaby94@gmail.com>
Expose the test parameters as part of the debugfs channel attributes.
We will control the testing state via these attributes.
Signed-off-by: Branden Bonaby <brandonbonaby94@gmail.com>
---
Changes in v2:
- Move test attributes to debugfs.
- Wrap test code under #ifdef statements.
- Add new documentation file under Documentation/ABI/testing.
- Make commit message reflect the change from from sysfs to debugfs.
Documentation/ABI/testing/debugfs-hyperv | 21 +++
MAINTAINERS | 1 +
drivers/hv/vmbus_drv.c | 167 +++++++++++++++++++++++
3 files changed, 189 insertions(+)
create mode 100644 Documentation/ABI/testing/debugfs-hyperv
diff --git a/Documentation/ABI/testing/debugfs-hyperv b/Documentation/ABI/testing/debugfs-hyperv
new file mode 100644
index 000000000000..b25f751fafa8
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hyperv
@@ -0,0 +1,21 @@
+What: /sys/kernel/debug/hyperv/<UUID>/fuzz_test_state
+Date: August 2019
+KernelVersion: 5.3
+Contact: Branden Bonaby <brandonbonaby94@gmail.com>
+Description: Fuzz testing status of a vmbus device, whether its in an ON
+ state or a OFF state
+Users: Debugging tools
+
+What: /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_buffer_interrupt_delay
+Date: August 2019
+KernelVersion: 5.3
+Contact: Branden Bonaby <brandonbonaby94@gmail.com>
+Description: Fuzz testing buffer delay value between 0 - 1000
+Users: Debugging tools
+
+What: /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_message_delay
+Date: August 2019
+KernelVersion: 5.3
+Contact: Branden Bonaby <brandonbonaby94@gmail.com>
+Description: Fuzz testing message delay value between 0 - 1000
+Users: Debugging tools
diff --git a/MAINTAINERS b/MAINTAINERS
index e81e60bd7c26..120284a8185f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7460,6 +7460,7 @@ F: include/uapi/linux/hyperv.h
F: include/asm-generic/mshyperv.h
F: tools/hv/
F: Documentation/ABI/stable/sysfs-bus-vmbus
+F: Documentation/ABI/testing/debugfs-hyperv
HYPERBUS SUPPORT
M: Vignesh Raghavendra <vigneshr@ti.com>
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ebd35fc35290..b6d023ae9664 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -919,6 +919,10 @@ static void vmbus_device_release(struct device *device)
struct hv_device *hv_dev = device_to_hv_device(device);
struct vmbus_channel *channel = hv_dev->channel;
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_rm_dev_dir(hv_dev);
+#endif /* CONFIG_HYPERV_TESTING */
+
mutex_lock(&vmbus_connection.channel_mutex);
hv_process_channel_removal(channel);
mutex_unlock(&vmbus_connection.channel_mutex);
@@ -1727,6 +1731,9 @@ int vmbus_device_register(struct hv_device *child_device_obj)
pr_err("Unable to register primary channeln");
goto err_kset_unregister;
}
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_add_dev_dir(child_device_obj);
+#endif /* CONFIG_HYPERV_TESTING */
return 0;
@@ -2086,6 +2093,159 @@ static void hv_crash_handler(struct pt_regs *regs)
hyperv_cleanup();
};
+#ifdef CONFIG_HYPERV_TESTING
+
+struct dentry *hv_root;
+
+static int hv_debugfs_delay_get(void *data, u64 *val)
+{
+ *val = *(u32 *)data;
+ return 0;
+}
+
+static int hv_debugfs_delay_set(void *data, u64 val)
+{
+ if (val >= 1 && val <= 1000)
+ *(u32 *)data = val;
+ /*Best to not use else statement here since we want
+ * the delay to remain the same if val > 1000
+ */
+ else if (val <= 0)
+ *(u32 *)data = 0;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(hv_debugfs_delay_fops, hv_debugfs_delay_get,
+ hv_debugfs_delay_set, "%llu\n");
+
+/* Setup delay files to store test values */
+int hv_debug_delay_files(struct hv_device *dev, struct dentry *root)
+{
+ struct vmbus_channel *channel = dev->channel;
+ char *buffer = "fuzz_test_buffer_interrupt_delay";
+ char *message = "fuzz_test_message_delay";
+ int *buffer_val = &channel->fuzz_testing_interrupt_delay;
+ int *message_val = &channel->fuzz_testing_message_delay;
+ struct dentry *buffer_file, *message_file;
+
+ buffer_file = debugfs_create_file(buffer, 0644, root,
+ buffer_val,
+ &hv_debugfs_delay_fops);
+ if (IS_ERR(buffer_file)) {
+ pr_debug("debugfs_hyperv: file %s not created\n", buffer);
+ return PTR_ERR(buffer_file);
+ }
+
+ message_file = debugfs_create_file(message, 0644, root,
+ message_val,
+ &hv_debugfs_delay_fops);
+ if (IS_ERR(message_file)) {
+ pr_debug("debugfs_hyperv: file %s not created\n", message);
+ return PTR_ERR(message_file);
+ }
+
+ return 0;
+}
+
+/* Setup test state value for vmbus device */
+int hv_debug_set_test_state(struct hv_device *dev, struct dentry *root)
+{
+ struct vmbus_channel *channel = dev->channel;
+ bool *state = &channel->fuzz_testing_state;
+ char *status = "fuzz_test_state";
+ struct dentry *test_state;
+
+ test_state = debugfs_create_bool(status, 0644, root, state);
+ if (IS_ERR(test_state)) {
+ pr_debug("debugfs_hyperv: file %s not created\n", status);
+ return PTR_ERR(test_state);
+ }
+
+ return 0;
+}
+
+/* Bind hv device to a dentry for debugfs */
+void hv_debug_set_dir_dentry(struct hv_device *dev, struct dentry *root)
+{
+ if (hv_root)
+ dev->debug_dir = root;
+}
+
+/* Create all test dentry's and names for fuzz testing */
+int hv_debug_add_dev_dir(struct hv_device *dev)
+{
+ const char *device = dev_name(&dev->device);
+ char *delay_name = "delay";
+ struct dentry *delay, *dev_root;
+ int ret;
+
+ if (!IS_ERR(hv_root)) {
+ dev_root = debugfs_create_dir(device, hv_root);
+ if (IS_ERR_OR_NULL(dev_root)) {
+ pr_debug("debugfs_hyperv: %s/%s/ not created\n",
+ TESTING, device);
+ return PTR_ERR(dev_root);
+ }
+
+ hv_debug_set_test_state(dev, dev_root);
+ hv_debug_set_dir_dentry(dev, dev_root);
+ delay = debugfs_create_dir(delay_name, dev_root);
+
+ if (IS_ERR(delay)) {
+ pr_debug("debugfs_hyperv: %s/%s/%s/ not created\n",
+ TESTING, device, delay_name);
+ return PTR_ERR(delay);
+ }
+ ret = hv_debug_delay_files(dev, delay);
+
+ return ret;
+ }
+ pr_debug("debugfs_hyperv: %s/ not in root debugfs path\n", TESTING);
+ return PTR_ERR(hv_root);
+}
+
+/* Remove dentry associated with released hv device */
+void hv_debug_rm_dev_dir(struct hv_device *dev)
+{
+ if (!IS_ERR(hv_root))
+ debugfs_remove_recursive(dev->debug_dir);
+}
+
+/* Remove all dentrys associated with vmbus testing */
+void hv_debug_rm_all_dir(void)
+{
+ debugfs_remove_recursive(hv_root);
+}
+
+/* Delay buffer/message reads on a vmbus channel */
+void hv_debug_delay_test(struct vmbus_channel *channel, enum delay delay_type)
+{
+ struct vmbus_channel *test_channel = channel->primary_channel ?
+ channel->primary_channel :
+ channel;
+ bool state = test_channel->fuzz_testing_state;
+
+ if (state) {
+ if (delay_type == 0)
+ udelay(test_channel->fuzz_testing_interrupt_delay);
+ else
+ udelay(test_channel->fuzz_testing_message_delay);
+ }
+}
+
+/* Initialize top dentry for vmbus testing */
+int hv_debug_init(void)
+{
+ hv_root = debugfs_create_dir(TESTING, NULL);
+ if (IS_ERR(hv_root)) {
+ pr_debug("debugfs_hyperv: %s/ not created\n", TESTING);
+ return PTR_ERR(hv_root);
+ }
+
+ return 0;
+}
+#endif /* CONFIG_HYPERV_TESTING */
+
static int __init hv_acpi_init(void)
{
int ret, t;
@@ -2108,6 +2268,9 @@ static int __init hv_acpi_init(void)
ret = -ETIMEDOUT;
goto cleanup;
}
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_init();
+#endif /* CONFIG_HYPERV_TESTING */
ret = vmbus_bus_init();
if (ret)
@@ -2140,6 +2303,10 @@ static void __exit vmbus_exit(void)
tasklet_kill(&hv_cpu->msg_dpc);
}
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_rm_all_dir();
+#endif /* CONFIG_HYPERV_TESTING */
+
vmbus_free_channels();
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
--
2.17.1
^ permalink raw reply related
* [PATCH v2 3/3] tools: hv: add vmbus testing tool
From: Branden Bonaby @ 2019-08-20 2:45 UTC (permalink / raw)
To: kys, haiyangz, sthemmin, sashal
Cc: Branden Bonaby, linux-hyperv, linux-kernel
In-Reply-To: <cover.1566266609.git.brandonbonaby94@gmail.com>
This is a userspace tool to drive the testing. Currently it supports
introducing user specified delay in the host to guest communication
path on a per-channel basis.
Signed-off-by: Branden Bonaby <brandonbonaby94@gmail.com>
---
Changes in v2:
- Move testing location to new location in debugfs.
tools/hv/vmbus_testing | 334 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 334 insertions(+)
create mode 100644 tools/hv/vmbus_testing
diff --git a/tools/hv/vmbus_testing b/tools/hv/vmbus_testing
new file mode 100644
index 000000000000..f615009b7393
--- /dev/null
+++ b/tools/hv/vmbus_testing
@@ -0,0 +1,334 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+#Program to allow users to fuzz test Hyper-V drivers
+#by interfacing with Hyper-V debugfs directories
+#author: Branden Bonaby
+
+import os
+import cmd
+import argparse
+from collections import defaultdict
+from argparse import RawDescriptionHelpFormatter
+
+#debugfs paths for vmbus must exist (same as in lsvmbus)
+debugfs_sys_path = '/sys/kernel/debug/hyperv'
+if not os.path.isdir(debugfs_sys_path):
+ print("{} doesn't exist/check permissions".format(debugfs_sys_path))
+ exit(-1)
+#Do not change unless, you change the debugfs attributes
+#in "/sys/kernel/debug/hyperv/<UUID>/". All fuzz testing
+#attributes will start with "fuzz_test".
+pathlen = len(debugfs_sys_path)
+fuzz_state_location = "fuzz_test_state"
+fuzz_states = {0 : "Disable", 1 : "Enable"}
+fuzz_methods ={1 : "Delay_testing"}
+fuzz_delay_types = {1 : "fuzz_test_buffer_interrupt_delay", 2 :"fuzz_test_message_delay"}
+
+def parse_args():
+ parser = argparse.ArgumentParser(description = "vmbus_testing "\
+ "[-s] [0|1] [-q] [-p] <debugfs-path>\n""vmbus_testing [-s]"\
+ " [0|1] [-q][-p] <debugfs-path> delay [-d] [val][val] [-E|-D]\n"
+ "vmbus_testing [-q] disable-all\n"
+ "vmbus_testing [-q] view [-v|-V]\n"\
+ "vmbus_testing --version",
+ epilog = "Current testing options {}".format(fuzz_methods),
+ prog = 'vmbus_testing',
+ formatter_class = RawDescriptionHelpFormatter)
+ subparsers = parser.add_subparsers(dest="action")
+ parser.add_argument('--version', action='version',\
+ version = '%(prog)s 1.0')
+ parser.add_argument("-q","--quiet",action = "store_true",\
+ help = "silence none important test messages")
+ parser.add_argument("-s","--state",metavar = "", type = int,\
+ choices = range(0,2),\
+ help = "Turn testing ON or OFF for a single device."\
+ " The value (1) will turn testing ON. The value"\
+ " of (0) will turn testing OFF with the default set"\
+ " to (0).")
+ parser.add_argument("-p","--path", metavar = "",\
+ help = "Refers to the debugfs path to a vmbus device."
+ " If the path is not a valid path to a vmbus device,"\
+ " the program will exit. The path must be the"\
+ " absolute path; use the lsvmbus command to find"\
+ " the path.")
+ parser_delay = subparsers.add_parser("delay",\
+ help = "Delay buffer/message reads in microseconds.",
+ description = "vmbus_testing -s [0|1] [-q] -p "\
+ "<debugfs-path> delay -d "\
+ "[buffer-delay-value] [message-delay-value]\n"
+ "vmbus_testing [-q] delay [buffer-delay-value] "\
+ "[message-delay-value] -E\n"
+ "vmbus_testing [-q] delay [buffer-delay-value] "\
+ "[message-delay-value] -D",
+ formatter_class = RawDescriptionHelpFormatter)
+ delay_group = parser_delay.add_mutually_exclusive_group()
+ delay_group.add_argument("-E","--en-all-delay", action = "store_true",\
+ help = "Enable Buffer/Message Delay testing on ALL"\
+ " devices. Use -d option with this to set the values"\
+ " for both the buffer delay and the message delay. No"\
+ " value can be (0) or less than (-1). If testing is"\
+ " disabled on a device prior to running this command,"\
+ " testing will be enabled on the device as a result"\
+ " of this command.")
+ delay_group.add_argument("-D","--dis-all-delay", action="store_true",\
+ help = "Disable Buffer/Message delay testing on ALL"\
+ " devices. A value equal to (-1) will keep the"\
+ " current delay value, and a value equal to (0) will"\
+ " remove delay testing for the specfied delay column."\
+ " only values (-1) and (0) will be accepted but at"\
+ " least one value must be a (0) or a (-1).")
+ parser_delay.add_argument("-d","--delay-time", metavar="", nargs=2,\
+ type = check_range, default =[0,0], required = (True),\
+ help = "Buffer/message delay time. A value of (0) will"\
+ "disable delay testing on the specified delay column,"\
+ " while a value of (-1) will ignore the specified"\
+ " delay column. The default values are [0] & [0]."\
+ " The first column represents the buffer delay value"\
+ " and the second represents the message delay value."\
+ " Value constraints: -1 <= value <= 1000.")
+ parser_dis_all = subparsers.add_parser("disable-all",\
+ help = "Disable ALL testing on all vmbus devices.",
+ description = "vmbus_testing disable-all",
+ formatter_class = RawDescriptionHelpFormatter)
+ parser_view = subparsers.add_parser("view",\
+ help = "View testing on vmbus devices.",
+ description = "vmbus_testing view -V\n"
+ "vmbus_testing -p <debugfs-path> view -v",
+ formatter_class = RawDescriptionHelpFormatter)
+ view_group = parser_view.add_mutually_exclusive_group()
+ view_group.add_argument("-V","--view-all-states",action = "store_true",\
+ help = "View the test status for all vmbus devices.")
+ view_group.add_argument("-v","--view-single-device",\
+ action = "store_true",help = "View test values for a"\
+ " single vmbus device.")
+
+ return parser.parse_args()
+
+#value checking for range checking input in parser
+def check_range(arg1):
+ try:
+ val = int(arg1)
+ except ValueError as err:
+ raise argparse.ArgumentTypeError(str(err))
+ if val < -1 or val > 1000:
+ message = ("\n\nError, Expected -1 <= value <= 1000, got value"\
+ " {}\n").format(val)
+ raise argparse.ArgumentTypeError(message)
+ return val
+
+def main():
+ try:
+ dev_list = []
+ for dir in os.listdir(debugfs_sys_path):\
+ dev_list.append(os.path.join(debugfs_sys_path,dir))
+ #key value, pairs
+ #key = debugfs device path
+ #value = list of fuzz testing attributes.
+ device_and_files = defaultdict(list)
+ for dev in dev_list:
+ path = os.path.join(dev,"delay")
+ for f in os.listdir(path):
+ if (f.startswith("fuzz_test")):
+ device_and_files[path].append(f)
+
+ device_and_files.default_factory = None
+ args = parse_args()
+ path = args.path
+ state = args.state
+ quiet = args.quiet
+ if (not quiet):
+ print("*** Use lsvmbus to get vmbus device type"\
+ " information.*** ")
+ if (state is not None and validate_args_path(path,dev_list)):
+ if (state is not get_test_state(path)):
+ change_test_state(path,quiet)
+ state = get_test_state(path)
+ if (state is 0 and path is not None):
+ disable_testing_single_device(path,0,quiet)
+ return
+ #Use subparsers as the key for different fuzz testing methods
+ if (args.action == "delay"):
+ delay = args.delay_time
+ if (validate_delay_values(args,delay)):
+ delay_test_all_devices(dev_list,delay,quiet)
+ elif (validate_args_path(path,dev_list)):
+ if(get_test_state(path) is 1):
+ delay_test_store(path,delay,quiet)
+ return
+ print("device testing OFF, use -s 1 to turn ON")
+ elif (args.action == "disable-all"):
+ disable_all_testing(dev_list,quiet)
+ elif (args.action == "view"):
+ if (args.view_all_states):
+ all_devices_test_status(dev_list)
+ elif (args.view_single_device):
+ if (validate_args_path(path,dev_list)):
+ device_test_values(device_and_files,\
+ path)
+ return
+ print("Error,(check path) usage: -p"\
+ " <debugfs device path> view -v")
+ except AttributeError:
+ print("check usage, 1 or more elements not provided")
+ exit(-1)
+
+#Validate delay values to make sure they are acceptable to
+#to either enable all delays on a device or disable all
+#delays on a device
+def validate_delay_values(args,delay):
+ if (args.en_all_delay):
+ for i in delay:
+ if (i < -1 or i == 0):
+ print("\nError, Values must be"\
+ " equal to -1 or be > 0, use"\
+ " -d option")
+ exit(-1)
+ return True
+ elif (args.dis_all_delay):
+ for i in delay:
+ if (i < -1 or i > 0):
+ print("\nError, at least 1 value"
+ " is not a (0) or a (-1)")
+ exit(-1)
+ return True
+ else:
+ return False
+
+
+#Validate argument path
+def validate_args_path(path,dev_list):
+ if (path in dev_list):
+ return True
+ else:
+ return False
+
+#display Testing status of single device
+def device_test_values(device_and_files,path):
+
+ delay_path = os.path.join(path,'delay')
+ for test in device_and_files.get(delay_path):
+ print("{}".format(test), end = '')
+ print((" value = {}")\
+ .format(read_test_files(os.path.join(delay_path,test))))
+
+#display Testing state of devices
+def all_devices_test_status(dev_list):
+ for device in dev_list:
+ if (get_test_state(device) is 1):
+ print("Testing = ON for: {}".format(device.split("/")[5]))
+ else:
+ print("Testing = OFF for: {}".format(device.split("/")[5]))
+
+#read the vmbus device files, path must be absolute path before calling
+def read_test_files(path):
+ try:
+ with open(path,"r") as f:
+ state = f.readline().strip()
+ if (state == 'N'):
+ state = 0
+ elif (state == 'Y'):
+ state = 1
+ return int(state)
+
+ except IOError as e:
+ errno, strerror = e.args
+ print("I/O error({0}): {1} on file {2}"\
+ .format(errno,strerror,path))
+ exit(-1)
+ except ValueError:
+ print ("Element to int conversion error in: \n{}".format(path))
+ exit(-1)
+
+#writing to vmbus device files, path must be absolute path before calling
+def write_test_files(path,value):
+ try:
+ with open(path,"w") as f:
+ f.write("{}".format(value))
+ except IOError as e:
+ errno, strerror = e.args
+ print("I/O error({0}): {1} on file {2}"\
+ .format(errno,strerror,path))
+ exit(-1)
+
+#change testing state of device
+def change_test_state(device,quiet):
+ state_path = os.path.join(device,fuzz_state_location)
+ if (get_test_state(device) is 0):
+ write_test_files(state_path,1)
+ if (not quiet):
+ print("Testing = ON for device: {}"\
+ .format(state_path.split("/")[5]))
+ else:
+ write_test_files(state_path,0)
+ if (not quiet):
+ print("Testing = OFF for device: {}"\
+ .format(state_path.split("/")[5]))
+
+#get testing state of device
+def get_test_state(device):
+ #state == 1 - test = ON
+ #state == 0 - test = OFF
+ return read_test_files(os.path.join(device,fuzz_state_location))
+
+#Enter 1 - 1000 microseconds, into a single device using the
+#fuzz_test_buffer_interrupt_delay and fuzz_test_message_delay
+#debugfs attributes
+def delay_test_store(device,delay_length,quiet):
+
+ try:
+ # delay[0]- buffer delay, delay[1]- message delay
+ buff_test = os.path.join(os.path.sep,device,'delay',
+ fuzz_delay_types.get(1))
+ mess_test = os.path.join(os.path.sep,device,'delay',
+ fuzz_delay_types.get(2))
+
+ if (delay_length[0] >= 0):
+ write_test_files(buff_test,delay_length[0])
+ if (delay_length[1] >= 0):
+ write_test_files(mess_test,delay_length[1])
+ if (not quiet):
+ print("Buffer delay testing = {} for: {}"\
+ .format(read_test_files(buff_test),\
+ buff_test.split("/")[5]))
+ print("Message delay testing = {} for: {}"\
+ .format(read_test_files(mess_test),\
+ mess_test.split("/")[5]))
+ except IOError as e:
+ errno, strerror = e.args
+ print("I/O error({0}): {1} on files {2}{3}"\
+ .format(errno,strerror,buff_test,mess_test))
+ exit(-1)
+
+#enabling/disabling delay testing on all devices
+def delay_test_all_devices(dev_list,delay,quiet):
+
+ for device in (dev_list):
+ if (get_test_state(device) is 0):
+ change_test_state(device,quiet)
+ delay_test_store(device,delay,quiet)
+
+#disabling testing on single device
+def disable_testing_single_device(device,test_type,quiet):
+
+ #test_type represents corresponding key
+ #delay method in delay_methods dict.
+ #special type 0 , used to disable all
+ #testing on SINGLE device.
+
+ if (test_type is 1 or test_type is 0):
+ #disable list [buffer,message]
+ disable_delay = [0,0]
+ if (get_test_state(device) is 1):
+ change_test_state(device,quiet)
+ delay_test_store(device,disable_delay,quiet)
+
+#disabling testing on ALL devices
+def disable_all_testing(dev_list,quiet):
+
+ #delay disable list [buffer,message]
+ for device in dev_list:
+ disable_testing_single_device(device,0,quiet)
+
+if __name__ == "__main__":
+ main()
--
2.17.1
^ permalink raw reply related
* [PATCH] HID: hyperv: Use in-place iterator API in the channel callback
From: Dexuan Cui @ 2019-08-20 2:56 UTC (permalink / raw)
To: jikos@kernel.org, benjamin.tissoires@redhat.com,
linux-input@vger.kernel.org, linux-hyperv@vger.kernel.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley
Cc: gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org,
Dexuan Cui
Simplify the ring buffer handling with the in-place API.
Also avoid the dynamic allocation and the memory leak in the channel
callback function.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
Hi Jiri, Benjamin, can this patch go through Sasha's hyperv tree:
https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
This is a purely Hyper-V specific change.
drivers/hid/hid-hyperv.c | 56 +++++++++---------------------------------------
1 file changed, 10 insertions(+), 46 deletions(-)
diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
index 7795831..f363163 100644
--- a/drivers/hid/hid-hyperv.c
+++ b/drivers/hid/hid-hyperv.c
@@ -314,60 +314,24 @@ static void mousevsc_on_receive(struct hv_device *device,
static void mousevsc_on_channel_callback(void *context)
{
- const int packet_size = 0x100;
- int ret;
struct hv_device *device = context;
- u32 bytes_recvd;
- u64 req_id;
struct vmpacket_descriptor *desc;
- unsigned char *buffer;
- int bufferlen = packet_size;
-
- buffer = kmalloc(bufferlen, GFP_ATOMIC);
- if (!buffer)
- return;
-
- do {
- ret = vmbus_recvpacket_raw(device->channel, buffer,
- bufferlen, &bytes_recvd, &req_id);
-
- switch (ret) {
- case 0:
- if (bytes_recvd <= 0) {
- kfree(buffer);
- return;
- }
- desc = (struct vmpacket_descriptor *)buffer;
-
- switch (desc->type) {
- case VM_PKT_COMP:
- break;
-
- case VM_PKT_DATA_INBAND:
- mousevsc_on_receive(device, desc);
- break;
-
- default:
- pr_err("unhandled packet type %d, tid %llx len %d\n",
- desc->type, req_id, bytes_recvd);
- break;
- }
+ foreach_vmbus_pkt(desc, device->channel) {
+ switch (desc->type) {
+ case VM_PKT_COMP:
break;
- case -ENOBUFS:
- kfree(buffer);
- /* Handle large packet */
- bufferlen = bytes_recvd;
- buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
-
- if (!buffer)
- return;
+ case VM_PKT_DATA_INBAND:
+ mousevsc_on_receive(device, desc);
+ break;
+ default:
+ pr_err("Unhandled packet type %d, tid %llx len %d\n",
+ desc->type, desc->trans_id, desc->len8 * 8);
break;
}
- } while (1);
-
+ }
}
static int mousevsc_connect_to_vsp(struct hv_device *device)
--
1.8.3.1
^ permalink raw reply related
* [PATCH] Input: hyperv-keyboard: Use in-place iterator API in the channel callback
From: Dexuan Cui @ 2019-08-20 3:01 UTC (permalink / raw)
To: dmitry.torokhov@gmail.com, linux-input@vger.kernel.org,
linux-hyperv@vger.kernel.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan, Michael Kelley
Cc: gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org,
Dexuan Cui
Simplify the ring buffer handling with the in-place API.
Also avoid the dynamic allocation and the memory leak in the channel
callback function.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
Hi Dmitry, can this patch go through Sasha's hyperv tree:
https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
This is a purely Hyper-V specific change.
drivers/input/serio/hyperv-keyboard.c | 35 ++++++-----------------------------
1 file changed, 6 insertions(+), 29 deletions(-)
diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c
index 88ae7c2..e486a8a 100644
--- a/drivers/input/serio/hyperv-keyboard.c
+++ b/drivers/input/serio/hyperv-keyboard.c
@@ -237,40 +237,17 @@ static void hv_kbd_handle_received_packet(struct hv_device *hv_dev,
static void hv_kbd_on_channel_callback(void *context)
{
+ struct vmpacket_descriptor *desc;
struct hv_device *hv_dev = context;
- void *buffer;
- int bufferlen = 0x100; /* Start with sensible size */
u32 bytes_recvd;
u64 req_id;
- int error;
- buffer = kmalloc(bufferlen, GFP_ATOMIC);
- if (!buffer)
- return;
-
- while (1) {
- error = vmbus_recvpacket_raw(hv_dev->channel, buffer, bufferlen,
- &bytes_recvd, &req_id);
- switch (error) {
- case 0:
- if (bytes_recvd == 0) {
- kfree(buffer);
- return;
- }
-
- hv_kbd_handle_received_packet(hv_dev, buffer,
- bytes_recvd, req_id);
- break;
+ foreach_vmbus_pkt(desc, hv_dev->channel) {
+ bytes_recvd = desc->len8 * 8;
+ req_id = desc->trans_id;
- case -ENOBUFS:
- kfree(buffer);
- /* Handle large packet */
- bufferlen = bytes_recvd;
- buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
- if (!buffer)
- return;
- break;
- }
+ hv_kbd_handle_received_packet(hv_dev, desc, bytes_recvd,
+ req_id);
}
}
--
1.8.3.1
^ permalink raw reply related
* [PATCH] Drivers: hv: vmbus: Remove the unused "tsc_page" from struct hv_context
From: Dexuan Cui @ 2019-08-20 3:06 UTC (permalink / raw)
To: linux-hyperv@vger.kernel.org, Stephen Hemminger, Sasha Levin,
sashal@kernel.org, Haiyang Zhang, KY Srinivasan, Michael Kelley
Cc: gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org,
Dexuan Cui
This field is no longer used after the commit
63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code")
, because it's replaced by the global variable
"struct ms_hyperv_tsc_page *tsc_pg;" (now, the variable is in
drivers/clocksource/hyperv_timer.c).
Fixes: 63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code")
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
drivers/hv/hyperv_vmbus.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 362e70e..fb16a62 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -146,8 +146,6 @@ struct hv_context {
*/
u64 guestid;
- void *tsc_page;
-
struct hv_per_cpu_context __percpu *cpu_context;
/*
--
1.8.3.1
^ permalink raw reply related
* [PATCH] vsock: Fix a lockdep warning in __vsock_release()
From: Dexuan Cui @ 2019-08-20 3:14 UTC (permalink / raw)
To: jhansen@vmware.com, davem@davemloft.net, stefanha@redhat.com,
sgarzare@redhat.com, netdev@vger.kernel.org, Stephen Hemminger,
Sasha Levin, sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
Michael Kelley
Cc: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
linux-kernel@vger.kernel.org, Dexuan Cui
Lockdep is unhappy if two locks from the same class are held.
Fix the below warning by making __vsock_release() non-recursive -- this
patch is kind of ugly, but it looks to me there is not a better way to
deal with the problem here.
============================================
WARNING: possible recursive locking detected
5.2.0+ #6 Not tainted
--------------------------------------------
a.out/1020 is trying to acquire lock:
0000000074731a98 (sk_lock-AF_VSOCK){+.+.}, at: hvs_release+0x10/0x120 [hv_sock]
but task is already holding lock:
0000000014ff8397 (sk_lock-AF_VSOCK){+.+.}, at: __vsock_release+0x2e/0xf0 [vsock]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(sk_lock-AF_VSOCK);
lock(sk_lock-AF_VSOCK);
*** DEADLOCK ***
May be due to missing lock nesting notation
2 locks held by a.out/1020:
#0: 00000000f8bceaa7 (&sb->s_type->i_mutex_key#10){+.+.}, at: __sock_release+0x2d/0xa0
#1: 0000000014ff8397 (sk_lock-AF_VSOCK){+.+.}, at: __vsock_release+0x2e/0xf0 [vsock]
stack backtrace:
CPU: 7 PID: 1020 Comm: a.out Not tainted 5.2.0+ #6
Call Trace:
dump_stack+0x67/0x90
__lock_acquire.cold.66+0x14d/0x1f8
lock_acquire+0xb5/0x1c0
lock_sock_nested+0x6d/0x90
hvs_release+0x10/0x120 [hv_sock]
__vsock_release+0x24/0xf0 [vsock]
__vsock_release+0xa0/0xf0 [vsock]
vsock_release+0x12/0x30 [vsock]
__sock_release+0x37/0xa0
sock_close+0x14/0x20
__fput+0xc1/0x250
task_work_run+0x98/0xc0
do_exit+0x3dd/0xc60
do_group_exit+0x47/0xc0
get_signal+0x169/0xc60
do_signal+0x30/0x710
exit_to_usermode_loop+0x50/0xa0
do_syscall_64+0x1fc/0x220
entry_SYSCALL_64_after_hwframe+0x49/0xbe
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
net/vmw_vsock/af_vsock.c | 33 ++++++++++++++++++++++++++++++++-
net/vmw_vsock/hyperv_transport.c | 2 +-
2 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index ab47bf3..420f605 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -638,6 +638,37 @@ struct sock *__vsock_create(struct net *net,
}
EXPORT_SYMBOL_GPL(__vsock_create);
+static void __vsock_release2(struct sock *sk)
+{
+ if (sk) {
+ struct sk_buff *skb;
+ struct vsock_sock *vsk;
+
+ vsk = vsock_sk(sk);
+
+ /* The release call is supposed to use lock_sock_nested()
+ * rather than lock_sock(), if a lock should be acquired.
+ */
+ transport->release(vsk);
+
+ /* Use the nested version to avoid the warning
+ * "possible recursive locking detected".
+ */
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+ sock_orphan(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ while ((skb = skb_dequeue(&sk->sk_receive_queue)))
+ kfree_skb(skb);
+
+ /* This sk can not be a listener, so it's unnecessary
+ * to call vsock_dequeue_accept().
+ */
+ release_sock(sk);
+ sock_put(sk);
+ }
+}
+
static void __vsock_release(struct sock *sk)
{
if (sk) {
@@ -659,7 +690,7 @@ static void __vsock_release(struct sock *sk)
/* Clean up any sockets that never were accepted. */
while ((pending = vsock_dequeue_accept(sk)) != NULL) {
- __vsock_release(pending);
+ __vsock_release2(pending);
sock_put(pending);
}
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 9d864eb..4b126b2 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -559,7 +559,7 @@ static void hvs_release(struct vsock_sock *vsk)
struct sock *sk = sk_vsock(vsk);
bool remove_sock;
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
remove_sock = hvs_close_lock_held(vsk);
release_sock(sk);
if (remove_sock)
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH] Input: hyperv-keyboard: Use in-place iterator API in the channel callback
From: dmitry.torokhov @ 2019-08-20 3:18 UTC (permalink / raw)
To: Dexuan Cui
Cc: linux-input@vger.kernel.org, linux-hyperv@vger.kernel.org,
Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
KY Srinivasan, Michael Kelley, gregkh@linuxfoundation.org,
linux-kernel@vger.kernel.org
In-Reply-To: <1566270066-27546-1-git-send-email-decui@microsoft.com>
On Tue, Aug 20, 2019 at 03:01:23AM +0000, Dexuan Cui wrote:
> Simplify the ring buffer handling with the in-place API.
>
> Also avoid the dynamic allocation and the memory leak in the channel
> callback function.
>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>
> Hi Dmitry, can this patch go through Sasha's hyperv tree:
> https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
>
> This is a purely Hyper-V specific change.
Sure, no problem.
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
>
> drivers/input/serio/hyperv-keyboard.c | 35 ++++++-----------------------------
> 1 file changed, 6 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c
> index 88ae7c2..e486a8a 100644
> --- a/drivers/input/serio/hyperv-keyboard.c
> +++ b/drivers/input/serio/hyperv-keyboard.c
> @@ -237,40 +237,17 @@ static void hv_kbd_handle_received_packet(struct hv_device *hv_dev,
>
> static void hv_kbd_on_channel_callback(void *context)
> {
> + struct vmpacket_descriptor *desc;
> struct hv_device *hv_dev = context;
> - void *buffer;
> - int bufferlen = 0x100; /* Start with sensible size */
> u32 bytes_recvd;
> u64 req_id;
> - int error;
>
> - buffer = kmalloc(bufferlen, GFP_ATOMIC);
> - if (!buffer)
> - return;
> -
> - while (1) {
> - error = vmbus_recvpacket_raw(hv_dev->channel, buffer, bufferlen,
> - &bytes_recvd, &req_id);
> - switch (error) {
> - case 0:
> - if (bytes_recvd == 0) {
> - kfree(buffer);
> - return;
> - }
> -
> - hv_kbd_handle_received_packet(hv_dev, buffer,
> - bytes_recvd, req_id);
> - break;
> + foreach_vmbus_pkt(desc, hv_dev->channel) {
> + bytes_recvd = desc->len8 * 8;
> + req_id = desc->trans_id;
>
> - case -ENOBUFS:
> - kfree(buffer);
> - /* Handle large packet */
> - bufferlen = bytes_recvd;
> - buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
> - if (!buffer)
> - return;
> - break;
> - }
> + hv_kbd_handle_received_packet(hv_dev, desc, bytes_recvd,
> + req_id);
> }
> }
>
> --
> 1.8.3.1
>
--
Dmitry
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox