LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] soc: fsl: qe: fix static checker warning
From: Maxim Kochetkov @ 2021-08-11  7:10 UTC (permalink / raw)
  To: Qiang Zhao, Li Yang, Maxim Kochetkov, Saravana Kannan
  Cc: linuxppc-dev, linux-arm-kernel, Dan Carpenter, linux-kernel

The patch be7ecbd240b2: "soc: fsl: qe: convert QE interrupt
controller to platform_device" from Aug 3, 2021, leads to the
following static checker warning:

	drivers/soc/fsl/qe/qe_ic.c:438 qe_ic_init()
	warn: unsigned 'qe_ic->virq_low' is never less than zero.

In old variant irq_of_parse_and_map() returns zero if failed so
unsigned int for virq_high/virq_low was ok.
In new variant platform_get_irq() returns negative error codes
if failed so we need to use int for virq_high/virq_low.

Also simplify high_handler checking and remove the curly braces
to make checkpatch happy.

Fixes: be7ecbd240b2 ("soc: fsl: qe: convert QE interrupt controller to platform_device")
Signed-off-by: Maxim Kochetkov <fido_max@inbox.ru>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
---
 drivers/soc/fsl/qe/qe_ic.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe_ic.c b/drivers/soc/fsl/qe/qe_ic.c
index e710d554425d..bff34ee2150a 100644
--- a/drivers/soc/fsl/qe/qe_ic.c
+++ b/drivers/soc/fsl/qe/qe_ic.c
@@ -54,8 +54,8 @@ struct qe_ic {
 	struct irq_chip hc_irq;
 
 	/* VIRQ numbers of QE high/low irqs */
-	unsigned int virq_high;
-	unsigned int virq_low;
+	int virq_high;
+	int virq_low;
 };
 
 /*
@@ -435,9 +435,8 @@ static int qe_ic_init(struct platform_device *pdev)
 	qe_ic->virq_high = platform_get_irq(pdev, 0);
 	qe_ic->virq_low = platform_get_irq(pdev, 1);
 
-	if (qe_ic->virq_low < 0) {
+	if (qe_ic->virq_low < 0)
 		return -ENODEV;
-	}
 
 	if (qe_ic->virq_high != qe_ic->virq_low) {
 		low_handler = qe_ic_cascade_low;
@@ -459,7 +458,7 @@ static int qe_ic_init(struct platform_device *pdev)
 	irq_set_handler_data(qe_ic->virq_low, qe_ic);
 	irq_set_chained_handler(qe_ic->virq_low, low_handler);
 
-	if (qe_ic->virq_high && qe_ic->virq_high != qe_ic->virq_low) {
+	if (high_handler) {
 		irq_set_handler_data(qe_ic->virq_high, qe_ic);
 		irq_set_chained_handler(qe_ic->virq_high, high_handler);
 	}
-- 
2.31.1


^ permalink raw reply related

* [PATCH v3 4/8] PCI: replace pci_dev::driver usage that gets the driver name
From: Uwe Kleine-König @ 2021-08-11  8:06 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: linux-pci, Alexander Duyck, oss-drivers, Paul Mackerras,
	Herbert Xu, Rafał Miłecki, Jesse Brandeburg,
	Ido Schimmel, Jakub Kicinski, Yisen Zhuang, Vadym Kochan,
	Michael Buesch, Jiri Pirko, Salil Mehta, Greg Kroah-Hartman,
	linux-wireless, linux-kernel, Taras Chornyi, Zhou Wang,
	linux-crypto, kernel, netdev, Simon Horman, Oliver O'Halloran,
	linuxppc-dev, David S. Miller
In-Reply-To: <20210811080637.2596434-1-u.kleine-koenig@pengutronix.de>

struct pci_dev::driver holds (apart from a constant offset) the same
data as struct pci_dev::dev->driver. With the goal to remove struct
pci_dev::driver to get rid of data duplication replace getting the
driver name by dev_driver_string() which implicitly makes use of struct
pci_dev::dev->driver.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/powerpc/include/asm/ppc-pci.h                   | 7 ++++++-
 drivers/bcma/host_pci.c                              | 7 ++++---
 drivers/crypto/hisilicon/qm.c                        | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c   | 2 +-
 drivers/net/ethernet/marvell/prestera/prestera_pci.c | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/pci.c            | 2 +-
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 +-
 drivers/ssb/pcihost_wrapper.c                        | 8 +++++---
 8 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 2b9edbf6e929..3f89dda91e89 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -57,7 +57,12 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev);
 
 static inline const char *eeh_driver_name(struct pci_dev *pdev)
 {
-	return (pdev && pdev->driver) ? pdev->driver->name : "<null>";
+	const char *drvstr = pdev ? dev_driver_string(&pdev->dev) : "";
+
+	if (*drvstr == '\0')
+		return "<null>";
+
+	return drvstr;
 }
 
 #endif /* CONFIG_EEH */
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index 69c10a7b7c61..dc2ffa686964 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -175,9 +175,10 @@ static int bcma_host_pci_probe(struct pci_dev *dev,
 	if (err)
 		goto err_kfree_bus;
 
-	name = dev_name(&dev->dev);
-	if (dev->driver && dev->driver->name)
-		name = dev->driver->name;
+	name = dev_driver_string(&dev->dev);
+	if (*name == '\0')
+		name = dev_name(&dev->dev);
+
 	err = pci_request_regions(dev, name);
 	if (err)
 		goto err_pci_disable;
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 1d67f94a1d56..ddf8d0ea6a75 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3003,7 +3003,7 @@ static int qm_alloc_uacce(struct hisi_qm *qm)
 	};
 	int ret;
 
-	ret = strscpy(interface.name, pdev->driver->name,
+	ret = strscpy(interface.name, dev_driver_string(&pdev->dev),
 		      sizeof(interface.name));
 	if (ret < 0)
 		return -ENAMETOOLONG;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 82061ab6930f..e7a2c8d4558f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -593,7 +593,7 @@ static void hns3_get_drvinfo(struct net_device *netdev,
 		return;
 	}
 
-	strncpy(drvinfo->driver, h->pdev->driver->name,
+	strncpy(drvinfo->driver, dev_driver_string(&h->pdev->dev),
 		sizeof(drvinfo->driver));
 	drvinfo->driver[sizeof(drvinfo->driver) - 1] = '\0';
 
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index a250d394da38..a8f007f6dad2 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -720,7 +720,7 @@ static int prestera_fw_load(struct prestera_fw *fw)
 static int prestera_pci_probe(struct pci_dev *pdev,
 			      const struct pci_device_id *id)
 {
-	const char *driver_name = pdev->driver->name;
+	const char *driver_name = dev_driver_string(&pdev->dev);
 	struct prestera_fw *fw;
 	int err;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 13b0259f7ea6..8f306364f7bf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1876,7 +1876,7 @@ static void mlxsw_pci_cmd_fini(struct mlxsw_pci *mlxsw_pci)
 
 static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	const char *driver_name = pdev->driver->name;
+	const char *driver_name = dev_driver_string(&pdev->dev);
 	struct mlxsw_pci *mlxsw_pci;
 	int err;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 1b482446536d..1c0a9edcd005 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -202,7 +202,7 @@ nfp_get_drvinfo(struct nfp_app *app, struct pci_dev *pdev,
 {
 	char nsp_version[ETHTOOL_FWVERS_LEN] = {};
 
-	strlcpy(drvinfo->driver, pdev->driver->name, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->driver, dev_driver_string(&pdev->dev), sizeof(drvinfo->driver));
 	nfp_net_get_nspinfo(app, nsp_version);
 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
 		 "%s %s %s %s", vnic_version, nsp_version,
diff --git a/drivers/ssb/pcihost_wrapper.c b/drivers/ssb/pcihost_wrapper.c
index 410215c16920..4938ed5cfae5 100644
--- a/drivers/ssb/pcihost_wrapper.c
+++ b/drivers/ssb/pcihost_wrapper.c
@@ -78,9 +78,11 @@ static int ssb_pcihost_probe(struct pci_dev *dev,
 	err = pci_enable_device(dev);
 	if (err)
 		goto err_kfree_ssb;
-	name = dev_name(&dev->dev);
-	if (dev->driver && dev->driver->name)
-		name = dev->driver->name;
+
+	name = dev_driver_string(&dev->dev);
+	if (*name == '\0')
+		name = dev_name(&dev->dev);
+
 	err = pci_request_regions(dev, name);
 	if (err)
 		goto err_pci_disable;
-- 
2.30.2


^ permalink raw reply related

* [PATCH v3 7/8] PCI: Replace pci_dev::driver usage by pci_dev::dev.driver
From: Uwe Kleine-König @ 2021-08-11  8:06 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Mark Rutland, Peter Zijlstra, H. Peter Anvin,
	Oliver O'Halloran, Jiri Olsa, Boris Ostrovsky,
	Stefano Stabellini, Mathias Nyman, x86, Alexander Shishkin,
	Ingo Molnar, linux-pci, xen-devel, Andrew Donnellan,
	Arnd Bergmann, Konrad Rzeszutek Wilk, Arnaldo Carvalho de Melo,
	Borislav Petkov, Bjorn Helgaas, Namhyung Kim, Thomas Gleixner,
	Juergen Gross, Greg Kroah-Hartman, linux-usb, linux-perf-users,
	kernel, Frederic Barrat, Paul Mackerras, linuxppc-dev
In-Reply-To: <20210811080637.2596434-1-u.kleine-koenig@pengutronix.de>

struct pci_dev::driver contains (apart from a constant offset) the same
data as struct pci_dev::dev->driver. Replace all remaining users of the
former pointer by the latter to allow removing the former.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 arch/powerpc/kernel/eeh_driver.c | 10 ++++-----
 arch/x86/events/intel/uncore.c   |  2 +-
 arch/x86/kernel/probe_roms.c     |  2 +-
 drivers/misc/cxl/guest.c         | 24 ++++++++++++---------
 drivers/misc/cxl/pci.c           | 30 ++++++++++++++++----------
 drivers/pci/iov.c                | 25 ++++++++++++++--------
 drivers/pci/pci-driver.c         | 25 +++++++++++-----------
 drivers/pci/pci.c                |  4 ++--
 drivers/pci/pcie/err.c           | 36 ++++++++++++++++++--------------
 drivers/pci/xen-pcifront.c       |  4 ++--
 drivers/usb/host/xhci-pci.c      |  2 +-
 11 files changed, 93 insertions(+), 71 deletions(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 3eff6a4888e7..350dab18e137 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -104,13 +104,13 @@ static bool eeh_edev_actionable(struct eeh_dev *edev)
  */
 static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
 {
-	if (!pdev || !pdev->driver)
+	if (!pdev || !pdev->dev.driver)
 		return NULL;
 
-	if (!try_module_get(pdev->driver->driver.owner))
+	if (!try_module_get(pdev->dev.driver->owner))
 		return NULL;
 
-	return pdev->driver;
+	return to_pci_driver(pdev->dev.driver);
 }
 
 /**
@@ -122,10 +122,10 @@ static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
  */
 static inline void eeh_pcid_put(struct pci_dev *pdev)
 {
-	if (!pdev || !pdev->driver)
+	if (!pdev || !pdev->dev.driver)
 		return;
 
-	module_put(pdev->driver->driver.owner);
+	module_put(pdev->dev.driver->owner);
 }
 
 /**
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 9bf4dbbc26e2..2df95a483692 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1176,7 +1176,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 	 * PCI slot and func to indicate the uncore box.
 	 */
 	if (id->driver_data & ~0xffff) {
-		struct pci_driver *pci_drv = pdev->driver;
+		struct pci_driver *pci_drv = to_pci_driver(pdev->dev.driver);
 
 		pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
 		if (pmu == NULL)
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 9e1def3744f2..36e84d904260 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -80,7 +80,7 @@ static struct resource video_rom_resource = {
  */
 static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device)
 {
-	struct pci_driver *drv = pdev->driver;
+	struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
 	const struct pci_device_id *id;
 
 	if (pdev->vendor == vendor && pdev->device == device)
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 186308f1f8eb..d997c9c3ebb5 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -25,28 +25,32 @@ static void pci_error_handlers(struct cxl_afu *afu,
 		return;
 
 	list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
-		if (!afu_dev->driver)
+		struct pci_driver *afu_drv;
+
+		if (!afu_dev->dev.driver)
 			continue;
 
+		afu_drv = to_pci_driver(afu_dev->dev.driver);
+
 		switch (bus_error_event) {
 		case CXL_ERROR_DETECTED_EVENT:
 			afu_dev->error_state = state;
 
-			if (afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->error_detected)
-				afu_dev->driver->err_handler->error_detected(afu_dev, state);
+			if (afu_drv->err_handler &&
+			    afu_drv->err_handler->error_detected)
+				afu_drv->err_handler->error_detected(afu_dev, state);
 		break;
 		case CXL_SLOT_RESET_EVENT:
 			afu_dev->error_state = state;
 
-			if (afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->slot_reset)
-				afu_dev->driver->err_handler->slot_reset(afu_dev);
+			if (afu_drv->err_handler &&
+			    afu_drv->err_handler->slot_reset)
+				afu_drv->err_handler->slot_reset(afu_dev);
 		break;
 		case CXL_RESUME_EVENT:
-			if (afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->resume)
-				afu_dev->driver->err_handler->resume(afu_dev);
+			if (afu_drv->err_handler &&
+			    afu_drv->err_handler->resume)
+				afu_drv->err_handler->resume(afu_dev);
 		break;
 		}
 	}
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 2ba899f5659f..7e7545d01e27 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1805,14 +1805,16 @@ static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
 		return result;
 
 	list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
-		if (!afu_dev->driver)
+		struct pci_driver *afu_drv;
+		if (!afu_dev->dev.driver)
 			continue;
 
+		afu_drv = to_pci_driver(afu_dev->dev.driver);
+
 		afu_dev->error_state = state;
 
-		if (afu_dev->driver->err_handler)
-			afu_result = afu_dev->driver->err_handler->error_detected(afu_dev,
-										  state);
+		if (afu_drv->err_handler)
+			afu_result = afu_drv->err_handler->error_detected(afu_dev, state);
 		/* Disconnect trumps all, NONE trumps NEED_RESET */
 		if (afu_result == PCI_ERS_RESULT_DISCONNECT)
 			result = PCI_ERS_RESULT_DISCONNECT;
@@ -2003,6 +2005,8 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
 			continue;
 
 		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+			struct pci_driver *afu_drv;
+
 			/* Reset the device context.
 			 * TODO: make this less disruptive
 			 */
@@ -2028,12 +2032,14 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
 			 * shouldn't start new work until we call
 			 * their resume function.
 			 */
-			if (!afu_dev->driver)
+			if (!afu_dev->dev.driver)
 				continue;
 
-			if (afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->slot_reset)
-				afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev);
+			afu_drv = to_pci_driver(afu_dev->dev.driver);
+
+			if (afu_drv->err_handler &&
+			    afu_drv->err_handler->slot_reset)
+				afu_result = afu_drv->err_handler->slot_reset(afu_dev);
 
 			if (afu_result == PCI_ERS_RESULT_DISCONNECT)
 				result = PCI_ERS_RESULT_DISCONNECT;
@@ -2074,9 +2080,11 @@ static void cxl_pci_resume(struct pci_dev *pdev)
 			continue;
 
 		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
-			if (afu_dev->driver && afu_dev->driver->err_handler &&
-			    afu_dev->driver->err_handler->resume)
-				afu_dev->driver->err_handler->resume(afu_dev);
+			struct pci_driver *afu_drv;
+			if (afu_dev->dev.driver &&
+			    (afu_drv = to_pci_driver(afu_dev->dev.driver))->err_handler &&
+			    afu_drv->err_handler->resume)
+				afu_drv->err_handler->resume(afu_dev);
 		}
 	}
 	spin_unlock(&adapter->afu_list_lock);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index dafdc652fcd0..f94660927544 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -164,13 +164,15 @@ static ssize_t sriov_vf_total_msix_show(struct device *dev,
 					char *buf)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_driver *pdrv;
 	u32 vf_total_msix = 0;
 
 	device_lock(dev);
-	if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix)
+	pdrv = to_pci_driver(dev->driver);
+	if (!pdrv || !pdrv->sriov_get_vf_total_msix)
 		goto unlock;
 
-	vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev);
+	vf_total_msix = pdrv->sriov_get_vf_total_msix(pdev);
 unlock:
 	device_unlock(dev);
 	return sysfs_emit(buf, "%u\n", vf_total_msix);
@@ -183,6 +185,7 @@ static ssize_t sriov_vf_msix_count_store(struct device *dev,
 {
 	struct pci_dev *vf_dev = to_pci_dev(dev);
 	struct pci_dev *pdev = pci_physfn(vf_dev);
+	struct pci_driver *pdrv;
 	int val, ret;
 
 	ret = kstrtoint(buf, 0, &val);
@@ -193,13 +196,14 @@ static ssize_t sriov_vf_msix_count_store(struct device *dev,
 		return -EINVAL;
 
 	device_lock(&pdev->dev);
-	if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) {
+	pdrv = to_pci_driver(pdev->dev.driver);
+	if (!pdrv || !pdrv->sriov_set_msix_vec_count) {
 		ret = -EOPNOTSUPP;
 		goto err_pdev;
 	}
 
 	device_lock(&vf_dev->dev);
-	if (vf_dev->driver) {
+	if (vf_dev->dev.driver) {
 		/*
 		 * A driver is already attached to this VF and has configured
 		 * itself based on the current MSI-X vector count. Changing
@@ -209,7 +213,7 @@ static ssize_t sriov_vf_msix_count_store(struct device *dev,
 		goto err_dev;
 	}
 
-	ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val);
+	ret = pdrv->sriov_set_msix_vec_count(vf_dev, val);
 
 err_dev:
 	device_unlock(&vf_dev->dev);
@@ -376,6 +380,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
 				  const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_driver *pdrv;
 	int ret;
 	u16 num_vfs;
 
@@ -392,14 +397,16 @@ static ssize_t sriov_numvfs_store(struct device *dev,
 		goto exit;
 
 	/* is PF driver loaded */
-	if (!pdev->driver) {
+	if (!pdev->dev.driver) {
 		pci_info(pdev, "no driver bound to device; cannot configure SR-IOV\n");
 		ret = -ENOENT;
 		goto exit;
 	}
 
+	pdrv = to_pci_driver(pdev->dev.driver);
+
 	/* is PF driver loaded w/callback */
-	if (!pdev->driver->sriov_configure) {
+	if (!pdrv->sriov_configure) {
 		pci_info(pdev, "driver does not support SR-IOV configuration via sysfs\n");
 		ret = -ENOENT;
 		goto exit;
@@ -407,7 +414,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
 
 	if (num_vfs == 0) {
 		/* disable VFs */
-		ret = pdev->driver->sriov_configure(pdev, 0);
+		ret = pdrv->sriov_configure(pdev, 0);
 		goto exit;
 	}
 
@@ -419,7 +426,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
 		goto exit;
 	}
 
-	ret = pdev->driver->sriov_configure(pdev, num_vfs);
+	ret = pdrv->sriov_configure(pdev, num_vfs);
 	if (ret < 0)
 		goto exit;
 
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 7dff574bb2fa..e4bab9d0d6d8 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -443,7 +443,7 @@ static int pci_device_probe(struct device *dev)
 static int pci_device_remove(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
 
 	if (drv->remove) {
 		pm_runtime_get_sync(dev);
@@ -480,7 +480,7 @@ static int pci_device_remove(struct device *dev)
 static void pci_device_shutdown(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
 
 	pm_runtime_resume(dev);
 
@@ -576,7 +576,7 @@ static int pci_pm_reenable_device(struct pci_dev *pci_dev)
 static int pci_legacy_suspend(struct device *dev, pm_message_t state)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(dev->driver);
 
 	if (drv && drv->suspend) {
 		pci_power_t prev = pci_dev->current_state;
@@ -617,7 +617,7 @@ static int pci_legacy_suspend_late(struct device *dev, pm_message_t state)
 static int pci_legacy_resume(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
 
 	pci_fixup_device(pci_fixup_resume, pci_dev);
 
@@ -636,7 +636,7 @@ static void pci_pm_default_suspend(struct pci_dev *pci_dev)
 
 static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev)
 {
-	struct pci_driver *drv = pci_dev->driver;
+	struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
 	bool ret = drv && (drv->suspend || drv->resume);
 
 	/*
@@ -1229,11 +1229,11 @@ static int pci_pm_runtime_suspend(struct device *dev)
 	int error;
 
 	/*
-	 * If pci_dev->driver is not set (unbound), we leave the device in D0,
+	 * If pci_dev->dev.driver is not set (unbound), we leave the device in D0,
 	 * but it may go to D3cold when the bridge above it runtime suspends.
 	 * Save its config space in case that happens.
 	 */
-	if (!pci_dev->driver) {
+	if (!pci_dev->dev.driver) {
 		pci_save_state(pci_dev);
 		return 0;
 	}
@@ -1290,7 +1290,7 @@ static int pci_pm_runtime_resume(struct device *dev)
 	 */
 	pci_restore_standard_config(pci_dev);
 
-	if (!pci_dev->driver)
+	if (!dev->driver)
 		return 0;
 
 	pci_fixup_device(pci_fixup_resume_early, pci_dev);
@@ -1309,14 +1309,13 @@ static int pci_pm_runtime_resume(struct device *dev)
 
 static int pci_pm_runtime_idle(struct device *dev)
 {
-	struct pci_dev *pci_dev = to_pci_dev(dev);
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	/*
-	 * If pci_dev->driver is not set (unbound), the device should
+	 * If dev->driver is not set (unbound), the device should
 	 * always remain in D0 regardless of the runtime PM status
 	 */
-	if (!pci_dev->driver)
+	if (!dev->driver)
 		return 0;
 
 	if (!pm)
@@ -1423,8 +1422,8 @@ static struct pci_driver pci_compat_driver = {
  */
 struct pci_driver *pci_dev_driver(const struct pci_dev *dev)
 {
-	if (dev->driver)
-		return dev->driver;
+	if (dev->dev.driver)
+		return to_pci_driver(dev->dev.driver);
 	else {
 		int i;
 		for (i = 0; i <= PCI_ROM_RESOURCE; i++)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index aacf575c15cf..e21218a16098 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5070,7 +5070,7 @@ EXPORT_SYMBOL_GPL(pci_dev_unlock);
 static void pci_dev_save_and_disable(struct pci_dev *dev)
 {
 	const struct pci_error_handlers *err_handler =
-			dev->driver ? dev->driver->err_handler : NULL;
+			dev->dev.driver ? to_pci_driver(dev->dev.driver)->err_handler : NULL;
 
 	/*
 	 * dev->driver->err_handler->reset_prepare() is protected against
@@ -5101,7 +5101,7 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
 static void pci_dev_restore(struct pci_dev *dev)
 {
 	const struct pci_error_handlers *err_handler =
-			dev->driver ? dev->driver->err_handler : NULL;
+			dev->dev.driver ? to_pci_driver(dev->dev.driver)->err_handler : NULL;
 
 	pci_restore_state(dev);
 
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index b576aa890c76..b314b54f7821 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -49,14 +49,15 @@ static int report_error_detected(struct pci_dev *dev,
 				 pci_channel_state_t state,
 				 enum pci_ers_result *result)
 {
+	struct pci_driver *pdrv;
 	pci_ers_result_t vote;
 	const struct pci_error_handlers *err_handler;
 
 	device_lock(&dev->dev);
 	if (!pci_dev_set_io_state(dev, state) ||
-		!dev->driver ||
-		!dev->driver->err_handler ||
-		!dev->driver->err_handler->error_detected) {
+		!dev->dev.driver ||
+		!(pdrv = to_pci_driver(dev->dev.driver))->err_handler ||
+		!pdrv->err_handler->error_detected) {
 		/*
 		 * If any device in the subtree does not have an error_detected
 		 * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
@@ -70,7 +71,7 @@ static int report_error_detected(struct pci_dev *dev,
 			vote = PCI_ERS_RESULT_NONE;
 		}
 	} else {
-		err_handler = dev->driver->err_handler;
+		err_handler = pdrv->err_handler;
 		vote = err_handler->error_detected(dev, state);
 	}
 	pci_uevent_ers(dev, vote);
@@ -92,15 +93,16 @@ static int report_normal_detected(struct pci_dev *dev, void *data)
 static int report_mmio_enabled(struct pci_dev *dev, void *data)
 {
 	pci_ers_result_t vote, *result = data;
+	struct pci_driver *pdrv;
 	const struct pci_error_handlers *err_handler;
 
 	device_lock(&dev->dev);
-	if (!dev->driver ||
-		!dev->driver->err_handler ||
-		!dev->driver->err_handler->mmio_enabled)
+	if (!dev->dev.driver ||
+		!(pdrv = to_pci_driver(dev->dev.driver))->err_handler ||
+		!pdrv->err_handler->mmio_enabled)
 		goto out;
 
-	err_handler = dev->driver->err_handler;
+	err_handler = pdrv->err_handler;
 	vote = err_handler->mmio_enabled(dev);
 	*result = merge_result(*result, vote);
 out:
@@ -112,14 +114,15 @@ static int report_slot_reset(struct pci_dev *dev, void *data)
 {
 	pci_ers_result_t vote, *result = data;
 	const struct pci_error_handlers *err_handler;
+	struct pci_driver *pdrv;
 
 	device_lock(&dev->dev);
-	if (!dev->driver ||
-		!dev->driver->err_handler ||
-		!dev->driver->err_handler->slot_reset)
+	if (!dev->dev.driver ||
+		!(pdrv = to_pci_driver(dev->dev.driver))->err_handler ||
+		!pdrv->err_handler->slot_reset)
 		goto out;
 
-	err_handler = dev->driver->err_handler;
+	err_handler = pdrv->err_handler;
 	vote = err_handler->slot_reset(dev);
 	*result = merge_result(*result, vote);
 out:
@@ -130,15 +133,16 @@ static int report_slot_reset(struct pci_dev *dev, void *data)
 static int report_resume(struct pci_dev *dev, void *data)
 {
 	const struct pci_error_handlers *err_handler;
+	struct pci_driver *pdrv;
 
 	device_lock(&dev->dev);
 	if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
-		!dev->driver ||
-		!dev->driver->err_handler ||
-		!dev->driver->err_handler->resume)
+		!dev->dev.driver ||
+		!(pdrv = to_pci_driver(dev->dev.driver))->err_handler ||
+		!pdrv->err_handler->resume)
 		goto out;
 
-	err_handler = dev->driver->err_handler;
+	err_handler = pdrv->err_handler;
 	err_handler->resume(dev);
 out:
 	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 3c648e6cb8f8..2b2126dbc39b 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -598,12 +598,12 @@ static pci_ers_result_t pcifront_common_process(int cmd,
 	result = PCI_ERS_RESULT_NONE;
 
 	pcidev = pci_get_domain_bus_and_slot(domain, bus, devfn);
-	if (!pcidev || !pcidev->driver) {
+	if (!pcidev || !pcidev->dev.driver) {
 		dev_err(&pdev->xdev->dev, "device or AER driver is NULL\n");
 		pci_dev_put(pcidev);
 		return result;
 	}
-	pdrv = pcidev->driver;
+	pdrv = to_pci_driver(pcidev->dev.driver);
 
 	if (pdrv->err_handler && pdrv->err_handler->error_detected) {
 		pci_dbg(pcidev, "trying to call AER service\n");
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 18c2bbddf080..3ead2127967f 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -103,7 +103,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	struct xhci_driver_data         *driver_data;
 	const struct pci_device_id      *id;
 
-	id = pci_match_id(pdev->driver->id_table, pdev);
+	id = pci_match_id(to_pci_driver(pdev->dev.driver)->id_table, pdev);
 
 	if (id && id->driver_data) {
 		driver_data = (struct xhci_driver_data *)id->driver_data;
-- 
2.30.2


^ permalink raw reply related

* [powerpc:merge] BUILD SUCCESS 6f3d46e4a4a9ca09288540d39c0a31c9802d2602
From: kernel test robot @ 2021-08-11  8:22 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge
branch HEAD: 6f3d46e4a4a9ca09288540d39c0a31c9802d2602  Automatic merge of 'fixes' into merge (2021-08-10 22:50)

elapsed time: 1044m

configs tested: 59
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
x86_64                            allnoconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
s390                             allmodconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a004-20210810
x86_64               randconfig-a006-20210810
x86_64               randconfig-a003-20210810
x86_64               randconfig-a005-20210810
x86_64               randconfig-a002-20210810
x86_64               randconfig-a001-20210810
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
um                           x86_64_defconfig
um                             i386_defconfig
x86_64                           allyesconfig
x86_64                    rhel-8.3-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-c001-20210810

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH v1 16/55] powerpc/64s: Implement PMU override command line option
From: Athira Rajeev @ 2021-08-11 10:54 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, kvm-ppc
In-Reply-To: <1628246339.762vtrxskz.astroid@bobo.none>



> On 06-Aug-2021, at 4:12 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
> 
> Excerpts from Athira Rajeev's message of August 6, 2021 7:28 pm:
>> 
>> 
>>> On 26-Jul-2021, at 9:19 AM, Nicholas Piggin <npiggin@gmail.com> wrote:
>>> 
>>> It can be useful in simulators (with very constrained environments)
>>> to allow some PMCs to run from boot so they can be sampled directly
>>> by a test harness, rather than having to run perf.
>>> 
>>> A previous change freezes counters at boot by default, so provide
>>> a boot time option to un-freeze (plus a bit more flexibility).
>>> 
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>> .../admin-guide/kernel-parameters.txt         |  7 ++++
>>> arch/powerpc/perf/core-book3s.c               | 35 +++++++++++++++++++
>>> 2 files changed, 42 insertions(+)
>>> 
>>> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
>>> index bdb22006f713..96b7d0ebaa40 100644
>>> --- a/Documentation/admin-guide/kernel-parameters.txt
>>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>>> @@ -4089,6 +4089,13 @@
>>> 			Override pmtimer IOPort with a hex value.
>>> 			e.g. pmtmr=0x508
>>> 
>>> +	pmu=		[PPC] Manually enable the PMU.
>>> +			Enable the PMU by setting MMCR0 to 0 (clear FC bit).
>>> +			This option is implemented for Book3S processors.
>>> +			If a number is given, then MMCR1 is set to that number,
>>> +			otherwise (e.g., 'pmu=on'), it is left 0. The perf
>>> +			subsystem is disabled if this option is used.
>>> +
>>> 	pm_debug_messages	[SUSPEND,KNL]
>>> 			Enable suspend/resume debug messages during boot up.
>>> 
>>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>>> index 65795cadb475..e7cef4fe17d7 100644
>>> --- a/arch/powerpc/perf/core-book3s.c
>>> +++ b/arch/powerpc/perf/core-book3s.c
>>> @@ -2428,8 +2428,24 @@ int register_power_pmu(struct power_pmu *pmu)
>>> }
>>> 
>>> #ifdef CONFIG_PPC64
>>> +static bool pmu_override = false;
>>> +static unsigned long pmu_override_val;
>>> +static void do_pmu_override(void *data)
>>> +{
>>> +	ppc_set_pmu_inuse(1);
>>> +	if (pmu_override_val)
>>> +		mtspr(SPRN_MMCR1, pmu_override_val);
>>> +	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
>> 
>> Hi Nick
>> 
>> Here, we are not doing any validity check for the value used to set MMCR1. 
>> For advanced users, the option to pass value for MMCR1 is fine. But other cases, it could result in
>> invalid event getting used. Do we need to restrict this boot time option for only PMC5/6 ?
> 
> Depends what would be useful. We don't have to prevent the admin shooting 
> themselves in the foot with options like this, but if we can make it 
> safer without making it less useful then that's always a good option.

Hi Nick

I checked back on my comment and it will be difficult to add/maintain validity check for MMCR1 considering different platforms that we have.
We can go ahead with present approach you have in this patch. Changes looks good to me.

Reviewed-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>

> 
> Thanks,
> Nick


^ permalink raw reply

* Re: [PATCH 07/11] treewide: Replace the use of mem_encrypt_active() with prot_guest_has()
From: Kirill A. Shutemov @ 2021-08-11 12:19 UTC (permalink / raw)
  To: Tom Lendacky
  Cc: Kuppuswamy, Sathyanarayanan, linux-efi, Brijesh Singh, kvm,
	Peter Zijlstra, Dave Hansen, dri-devel, platform-driver-x86,
	Will Deacon, linux-s390, Andi Kleen, Baoquan He, Joerg Roedel,
	x86, amd-gfx, David Airlie, Ingo Molnar,
	linux-graphics-maintainer, Dave Young, Tianyu Lan,
	Thomas Zimmermann, Maarten Lankhorst, Maxime Ripard,
	Borislav Petkov, Andy Lutomirski, Thomas Gleixner, kexec,
	linux-kernel, iommu, Daniel Vetter, linux-fsdevel, linuxppc-dev
In-Reply-To: <4b885c52-f70a-147e-86bd-c71a8f4ef564@amd.com>

On Tue, Aug 10, 2021 at 02:48:54PM -0500, Tom Lendacky wrote:
> On 8/10/21 1:45 PM, Kuppuswamy, Sathyanarayanan wrote:
> > 
> > 
> > On 7/27/21 3:26 PM, Tom Lendacky wrote:
> >> diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
> >> index de01903c3735..cafed6456d45 100644
> >> --- a/arch/x86/kernel/head64.c
> >> +++ b/arch/x86/kernel/head64.c
> >> @@ -19,7 +19,7 @@
> >>   #include <linux/start_kernel.h>
> >>   #include <linux/io.h>
> >>   #include <linux/memblock.h>
> >> -#include <linux/mem_encrypt.h>
> >> +#include <linux/protected_guest.h>
> >>   #include <linux/pgtable.h>
> >>     #include <asm/processor.h>
> >> @@ -285,7 +285,7 @@ unsigned long __head __startup_64(unsigned long
> >> physaddr,
> >>        * there is no need to zero it after changing the memory encryption
> >>        * attribute.
> >>        */
> >> -    if (mem_encrypt_active()) {
> >> +    if (prot_guest_has(PATTR_MEM_ENCRYPT)) {
> >>           vaddr = (unsigned long)__start_bss_decrypted;
> >>           vaddr_end = (unsigned long)__end_bss_decrypted;
> > 
> > 
> > Since this change is specific to AMD, can you replace PATTR_MEM_ENCRYPT with
> > prot_guest_has(PATTR_SME) || prot_guest_has(PATTR_SEV). It is not used in
> > TDX.
> 
> This is a direct replacement for now.

With current implementation of prot_guest_has() for TDX it breaks boot for
me.

Looking at code agains, now I *think* the reason is accessing a global
variable from __startup_64() inside TDX version of prot_guest_has().

__startup_64() is special. If you access any global variable you need to
use fixup_pointer(). See comment before __startup_64().

I'm not sure how you get away with accessing sme_me_mask directly from
there. Any clues? Maybe just a luck and complier generates code just right
for your case, I donno.

A separate point is that TDX version of prot_guest_has() relies on
cpu_feature_enabled() which is not ready at this point.

I think __bss_decrypted fixup has to be done if sme_me_mask is non-zero.
Or just do it uncoditionally because it's NOP for sme_me_mask == 0.

> I think the change you're requesting
> should be done as part of the TDX support patches so it's clear why it is
> being changed.
> 
> But, wouldn't TDX still need to do something with this shared/unencrypted
> area, though? Or since it is shared, there's actually nothing you need to
> do (the bss decrpyted section exists even if CONFIG_AMD_MEM_ENCRYPT is not
> configured)?

AFAICS, only kvmclock uses __bss_decrypted. We don't enable kvmclock in
TDX at the moment. It may change in the future.

-- 
 Kirill A. Shutemov

^ permalink raw reply

* Re: [PATCH v1 16/55] powerpc/64s: Implement PMU override command line option
From: Madhavan Srinivasan @ 2021-08-11 12:46 UTC (permalink / raw)
  To: Nicholas Piggin, kvm-ppc; +Cc: linuxppc-dev
In-Reply-To: <1628245966.h9u2e2m21l.astroid@bobo.none>


On 8/6/21 4:08 PM, Nicholas Piggin wrote:
> Excerpts from Madhavan Srinivasan's message of August 6, 2021 5:33 pm:
>> On 7/26/21 9:19 AM, Nicholas Piggin wrote:
>>> It can be useful in simulators (with very constrained environments)
>>> to allow some PMCs to run from boot so they can be sampled directly
>>> by a test harness, rather than having to run perf.
>>>
>>> A previous change freezes counters at boot by default, so provide
>>> a boot time option to un-freeze (plus a bit more flexibility).
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>>    .../admin-guide/kernel-parameters.txt         |  7 ++++
>>>    arch/powerpc/perf/core-book3s.c               | 35 +++++++++++++++++++
>>>    2 files changed, 42 insertions(+)
>>>
>>> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
>>> index bdb22006f713..96b7d0ebaa40 100644
>>> --- a/Documentation/admin-guide/kernel-parameters.txt
>>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>>> @@ -4089,6 +4089,13 @@
>>>    			Override pmtimer IOPort with a hex value.
>>>    			e.g. pmtmr=0x508
>>>
>>> +	pmu=		[PPC] Manually enable the PMU.
>>
>> This is bit confusing, IIUC, we are manually disabling the perf
>> registration
>> with this option and not pmu.
>> If this option is used, we will unfreeze the
>> MMCR0_FC (only in the HV_mode) and not register perf subsystem.
> With the previous patch, this option un-freezes the PMU
> (and disables perf).
>
>> Since this option is valid only for HV_mode, canwe call it
>> kvm_disable_perf or kvm_dis_perf.
> It's only disabled for guests because it would require a bit
> of logic to set pmcregs_in_use when we register our lppaca. We could
> add that if needed, but the intention is for use on BML, not exactly
> KVM specific.
>
> I can add HV restriction to the help text. And we could rename the
> option. free_run_pmu= or something?


yeah having it a different name will be better. I am not sure
whether we should say "[PPC] Manually enable the PMU",
because IIUC, if we dont provide this option PMU and perf is
anyway enabled, but rest looks good to me.

Maddy


>
> Thanks,
> Nick
>
>>
>>> +			Enable the PMU by setting MMCR0 to 0 (clear FC bit).
>>> +			This option is implemented for Book3S processors.
>>> +			If a number is given, then MMCR1 is set to that number,
>>> +			otherwise (e.g., 'pmu=on'), it is left 0. The perf
>>> +			subsystem is disabled if this option is used.
>>> +
>>>    	pm_debug_messages	[SUSPEND,KNL]
>>>    			Enable suspend/resume debug messages during boot up.
>>>
>>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>>> index 65795cadb475..e7cef4fe17d7 100644
>>> --- a/arch/powerpc/perf/core-book3s.c
>>> +++ b/arch/powerpc/perf/core-book3s.c
>>> @@ -2428,8 +2428,24 @@ int register_power_pmu(struct power_pmu *pmu)
>>>    }
>>>
>>>    #ifdef CONFIG_PPC64
>>> +static bool pmu_override = false;
>>> +static unsigned long pmu_override_val;
>>> +static void do_pmu_override(void *data)
>>> +{
>>> +	ppc_set_pmu_inuse(1);
>>> +	if (pmu_override_val)
>>> +		mtspr(SPRN_MMCR1, pmu_override_val);
>>> +	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
>>> +}
>>> +
>>>    static int __init init_ppc64_pmu(void)
>>>    {
>>> +	if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
>>> +		printk(KERN_WARNING "perf: disabling perf due to pmu= command line option.\n");
>>> +		on_each_cpu(do_pmu_override, NULL, 1);
>>> +		return 0;
>>> +	}
>>> +
>>>    	/* run through all the pmu drivers one at a time */
>>>    	if (!init_power5_pmu())
>>>    		return 0;
>>> @@ -2451,4 +2467,23 @@ static int __init init_ppc64_pmu(void)
>>>    		return init_generic_compat_pmu();
>>>    }
>>>    early_initcall(init_ppc64_pmu);
>>> +
>>> +static int __init pmu_setup(char *str)
>>> +{
>>> +	unsigned long val;
>>> +
>>> +	if (!early_cpu_has_feature(CPU_FTR_HVMODE))
>>> +		return 0;
>>> +
>>> +	pmu_override = true;
>>> +
>>> +	if (kstrtoul(str, 0, &val))
>>> +		val = 0;
>>> +
>>> +	pmu_override_val = val;
>>> +
>>> +	return 1;
>>> +}
>>> +__setup("pmu=", pmu_setup);
>>> +
>>>    #endif

^ permalink raw reply

* [PATCH v3 0/8] PCI: Drop duplicated tracking of a pci_dev's bound driver
From: Uwe Kleine-König @ 2021-08-11  8:06 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Giovanni Cabiddu, Mark Rutland, Sathya Prakash,
	Alexander Shishkin, Alexander Duyck, x86, qat-linux, oss-drivers,
	Oliver O'Halloran, H. Peter Anvin, Jiri Olsa, Thomas Gleixner,
	Marco Chiappero, Stefano Stabellini, Arnd Bergmann, linux-scsi,
	Rafał Miłecki, Jesse Brandeburg, Peter Zijlstra,
	Ingo Molnar, linux-pci, linux-wireless, Jakub Kicinski,
	Yisen Zhuang, Suganath Prabu Subramani, Fiona Trahe,
	Andrew Donnellan, Uwe Kleine-König, Konrad Rzeszutek Wilk,
	Ido Schimmel, Simon Horman, linuxppc-dev,
	Arnaldo Carvalho de Melo, Jack Xu, Borislav Petkov,
	Michael Buesch, Jiri Pirko, Bjorn Helgaas, Namhyung Kim,
	Boris Ostrovsky, Andy Shevchenko, Herbert Xu, Juergen Gross,
	Salil Mehta, Sreekanth Reddy, xen-devel, Vadym Kochan,
	MPT-FusionLinux.pdl, Greg Kroah-Hartman, linux-usb,
	Wojciech Ziemba, linux-kernel, Mathias Nyman, Zhou Wang,
	linux-crypto, kernel, netdev, Frederic Barrat, Paul Mackerras,
	Tomaszx Kowalik, Taras Chornyi, David S. Miller, linux-perf-users

From: Uwe Kleine-König <uwe@kleine-koenig.org>

Hello,

Today the following is always true for a struct pci_dev *pdev:

	pdev->driver ==
		pdev->dev.driver ? to_pci_driver(pdev->dev.driver) : NULL

This series is about getting rid of struct pci_dev::driver. The first
three patches are unmodified compared to v2 (apart from an added
Reviewed-by tag) and are just minor cleanups.

Patch #4 replaces all usages of pci_dev::driver->name by
dev_driver_string().

Patch #5 simplifies struct mpt_pci_driver by dropping an unused
parameter from a function callback. The calculation of this parameter
made use of struct pci_dev::driver.

Patch #6 simplifies adf_enable_aer() and moves one assignment done in
that function to the initializer of the respective static data.

Patch #7 then modifies all remaining users of struct pci_dev::driver to
use to_pci_driver(pdev->dev.driver) instead and finally patch #8 gets
rid of the driver member.

Note this series is only build tested.

Theoretically patches #4 and #7 could be split by subsystem, there are
no dependencies, but I'd prefer that all patches go in together via the
pci tree to simplify the procedure. If you don't agree please speak up.

Best regards
Uwe

Uwe Kleine-König (8):
  PCI: Simplify pci_device_remove()
  PCI: Drop useless check from pci_device_probe()
  xen/pci: Drop some checks that are always true
  PCI: replace pci_dev::driver usage that gets the driver name
  scsi: message: fusion: Remove unused parameter of mpt_pci driver's
    probe()
  crypto: qat - simplify adf_enable_aer()
  PCI: Replace pci_dev::driver usage by pci_dev::dev.driver
  PCI: Drop duplicated tracking of a pci_dev's bound driver

 arch/powerpc/include/asm/ppc-pci.h            |  7 ++-
 arch/powerpc/kernel/eeh_driver.c              | 10 +--
 arch/x86/events/intel/uncore.c                |  2 +-
 arch/x86/kernel/probe_roms.c                  |  2 +-
 drivers/bcma/host_pci.c                       |  7 ++-
 drivers/crypto/hisilicon/qm.c                 |  2 +-
 drivers/crypto/qat/qat_4xxx/adf_drv.c         |  7 +--
 drivers/crypto/qat/qat_c3xxx/adf_drv.c        |  7 +--
 drivers/crypto/qat/qat_c62x/adf_drv.c         |  7 +--
 drivers/crypto/qat/qat_common/adf_aer.c       | 10 +--
 .../crypto/qat/qat_common/adf_common_drv.h    |  2 +-
 drivers/crypto/qat/qat_dh895xcc/adf_drv.c     |  7 +--
 drivers/message/fusion/mptbase.c              |  7 +--
 drivers/message/fusion/mptbase.h              |  2 +-
 drivers/message/fusion/mptctl.c               |  4 +-
 drivers/message/fusion/mptlan.c               |  2 +-
 drivers/misc/cxl/guest.c                      | 24 ++++---
 drivers/misc/cxl/pci.c                        | 30 +++++----
 .../ethernet/hisilicon/hns3/hns3_ethtool.c    |  2 +-
 .../ethernet/marvell/prestera/prestera_pci.c  |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/pci.c     |  2 +-
 .../ethernet/netronome/nfp/nfp_net_ethtool.c  |  2 +-
 drivers/pci/iov.c                             | 25 +++++---
 drivers/pci/pci-driver.c                      | 45 ++++++-------
 drivers/pci/pci.c                             |  4 +-
 drivers/pci/pcie/err.c                        | 36 ++++++-----
 drivers/pci/xen-pcifront.c                    | 63 +++++++++----------
 drivers/ssb/pcihost_wrapper.c                 |  8 ++-
 drivers/usb/host/xhci-pci.c                   |  2 +-
 include/linux/pci.h                           |  1 -
 30 files changed, 164 insertions(+), 167 deletions(-)

base-commit: 2734d6c1b1a089fb593ef6a23d4b70903526fe0c
-- 
2.30.2


^ permalink raw reply

* Re: [PATCH v3 7/8] PCI: Replace pci_dev::driver usage by pci_dev::dev.driver
From: Boris Ostrovsky @ 2021-08-11 14:07 UTC (permalink / raw)
  To: Uwe Kleine-König, Bjorn Helgaas
  Cc: Mark Rutland, Peter Zijlstra, H. Peter Anvin,
	Oliver O'Halloran, Jiri Olsa, Stefano Stabellini,
	Mathias Nyman, x86, Alexander Shishkin, Ingo Molnar, linux-pci,
	xen-devel, Andrew Donnellan, Arnd Bergmann, Konrad Rzeszutek Wilk,
	Arnaldo Carvalho de Melo, Borislav Petkov, Bjorn Helgaas,
	Namhyung Kim, Thomas Gleixner, Juergen Gross, Greg Kroah-Hartman,
	linux-usb, linux-perf-users, kernel, Frederic Barrat,
	Paul Mackerras, linuxppc-dev
In-Reply-To: <20210811080637.2596434-8-u.kleine-koenig@pengutronix.de>


On 8/11/21 4:06 AM, Uwe Kleine-König wrote:
> struct pci_dev::driver contains (apart from a constant offset) the same
> data as struct pci_dev::dev->driver. Replace all remaining users of the
> former pointer by the latter to allow removing the former.
>
> Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>


Xen:


Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>



^ permalink raw reply

* Re: clang/ld.lld build fails with `can't create dynamic relocation R_PPC64_ADDR64 against local symbol in readonly segment`
From: Christophe Leroy @ 2021-08-11 14:10 UTC (permalink / raw)
  To: Paul Menzel, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras
  Cc: Derek Parker, Dmitrii Okunev, linuxppc-dev
In-Reply-To: <77a69755-5291-285c-45be-c1e42423fddc@molgen.mpg.de>



Le 10/08/2021 à 20:38, Paul Menzel a écrit :
> Dear Linux folks,
> 
> 
> Am 29.07.21 um 10:23 schrieb Paul Menzel:
> 
>> I just wanted to make you aware that building Linux for ppc64le with clang/lld.ld fails with [1]:
>>
>>      ld.lld: error: can't create dynamic relocation R_PPC64_ADDR64 against symbol: empty_zero_page 
>> in readonly segment; recompile object files with -fPIC or pass '-Wl,-z,notext' to allow text 
>> relocations in the output
>>      >>> defined in arch/powerpc/kernel/head_64.o
>>      >>> referenced by arch/powerpc/kernel/head_64.o:(___ksymtab+empty_zero_page+0x0)
>>
>> The patch below from one of the comments [2] fixes it.
>>
>> --- i/arch/powerpc/Makefile
>> +++ w/arch/powerpc/Makefile
>> @@ -122,7 +122,7 @@ cflags-$(CONFIG_STACKPROTECTOR)     += -mstack-protector-guard-reg=r2
>>   endif
>>
>>   LDFLAGS_vmlinux-y := -Bstatic
>> -LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
>> +LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie -z notext
>>   LDFLAGS_vmlinux        := $(LDFLAGS_vmlinux-y)
>>   LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn)
> 
> Any comments, if this is the right fix? Current Linux master branch still fails to build with 
> `LLVM=1` on Ubuntu 21.04 without this change.

Which kernel version are you building ?

Since https://github.com/linuxppc/linux/commit/45b30fafe528601f1a4449c9d68d8ebe7bbc39ad , 
empty_zero_page[] is in arch/powerpc/mm/mem.c not in arch/powerpc/kernel/head_64.o

Do you still have the issue with kernel 5.14 ?

Christophe

^ permalink raw reply

* Re: [PATCH 01/11] mm: Introduce a function to check for virtualization protection features
From: Kuppuswamy, Sathyanarayanan @ 2021-08-11 14:53 UTC (permalink / raw)
  To: Tom Lendacky, linux-kernel, x86, linuxppc-dev, linux-s390, iommu,
	kvm, linux-efi, platform-driver-x86, linux-graphics-maintainer,
	amd-gfx, dri-devel, kexec, linux-fsdevel
  Cc: Tianyu Lan, Brijesh Singh, Joerg Roedel, Andi Kleen,
	Borislav Petkov
In-Reply-To: <cbc875b1d2113225c2b44a2384d5b303d0453cf7.1627424774.git.thomas.lendacky@amd.com>



On 7/27/21 3:26 PM, Tom Lendacky wrote:
> diff --git a/include/linux/protected_guest.h b/include/linux/protected_guest.h
> new file mode 100644
> index 000000000000..f8ed7b72967b
> --- /dev/null
> +++ b/include/linux/protected_guest.h
> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Protected Guest (and Host) Capability checks
> + *
> + * Copyright (C) 2021 Advanced Micro Devices, Inc.
> + *
> + * Author: Tom Lendacky<thomas.lendacky@amd.com>
> + */
> +
> +#ifndef _PROTECTED_GUEST_H
> +#define _PROTECTED_GUEST_H
> +
> +#ifndef __ASSEMBLY__

Can you include headers for bool type and false definition?

--- a/include/linux/protected_guest.h
+++ b/include/linux/protected_guest.h
@@ -12,6 +12,9 @@

  #ifndef __ASSEMBLY__

+#include <linux/types.h>
+#include <linux/stddef.h>

Otherwise, I see following errors in multi-config auto testing.

include/linux/protected_guest.h:40:15: error: unknown type name 'bool'
include/linux/protected_guest.h:40:63: error: 'false' undeclared (first use in this functi


> +
> +#define PATTR_MEM_ENCRYPT		0	/* Encrypted memory */
> +#define PATTR_HOST_MEM_ENCRYPT		1	/* Host encrypted memory */
> +#define PATTR_GUEST_MEM_ENCRYPT		2	/* Guest encrypted memory */
> +#define PATTR_GUEST_PROT_STATE		3	/* Guest encrypted state */
> +
> +#ifdef CONFIG_ARCH_HAS_PROTECTED_GUEST
> +
> +#include <asm/protected_guest.h>
> +
> +#else	/* !CONFIG_ARCH_HAS_PROTECTED_GUEST */
> +
> +static inline bool prot_guest_has(unsigned int attr) { return false; }
> +
> +#endif	/* CONFIG_ARCH_HAS_PROTECTED_GUEST */
> +
> +#endif	/* __ASSEMBLY__ */
> +
> +#endif	/* _PROTECTED_GUEST_H */

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer

^ permalink raw reply

* Re: [PATCH 01/11] mm: Introduce a function to check for virtualization protection features
From: Tom Lendacky @ 2021-08-11 15:39 UTC (permalink / raw)
  To: Kuppuswamy, Sathyanarayanan, linux-kernel, x86, linuxppc-dev,
	linux-s390, iommu, kvm, linux-efi, platform-driver-x86,
	linux-graphics-maintainer, amd-gfx, dri-devel, kexec,
	linux-fsdevel
  Cc: Tianyu Lan, Brijesh Singh, Joerg Roedel, Andi Kleen,
	Borislav Petkov
In-Reply-To: <805946e3-5138-3565-65eb-3cb8ac6f0b3e@linux.intel.com>

On 8/11/21 9:53 AM, Kuppuswamy, Sathyanarayanan wrote:
> On 7/27/21 3:26 PM, Tom Lendacky wrote:
>> diff --git a/include/linux/protected_guest.h
>> b/include/linux/protected_guest.h
>> new file mode 100644
>> index 000000000000..f8ed7b72967b
>> --- /dev/null
>> +++ b/include/linux/protected_guest.h
>> @@ -0,0 +1,32 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Protected Guest (and Host) Capability checks
>> + *
>> + * Copyright (C) 2021 Advanced Micro Devices, Inc.
>> + *
>> + * Author: Tom Lendacky<thomas.lendacky@amd.com>
>> + */
>> +
>> +#ifndef _PROTECTED_GUEST_H
>> +#define _PROTECTED_GUEST_H
>> +
>> +#ifndef __ASSEMBLY__
> 
> Can you include headers for bool type and false definition?

Can do.

Thanks,
Tom

> 
> --- a/include/linux/protected_guest.h
> +++ b/include/linux/protected_guest.h
> @@ -12,6 +12,9 @@
> 
>  #ifndef __ASSEMBLY__
> 
> +#include <linux/types.h>
> +#include <linux/stddef.h>
> 
> Otherwise, I see following errors in multi-config auto testing.
> 
> include/linux/protected_guest.h:40:15: error: unknown type name 'bool'
> include/linux/protected_guest.h:40:63: error: 'false' undeclared (first
> use in this functi
> 

^ permalink raw reply

* Re: [PATCH 07/11] treewide: Replace the use of mem_encrypt_active() with prot_guest_has()
From: Tom Lendacky @ 2021-08-11 15:52 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Kuppuswamy, Sathyanarayanan, linux-efi, Brijesh Singh, kvm,
	Peter Zijlstra, Dave Hansen, dri-devel, platform-driver-x86,
	Will Deacon, linux-s390, Andi Kleen, Baoquan He, Joerg Roedel,
	x86, amd-gfx, David Airlie, Ingo Molnar,
	linux-graphics-maintainer, Dave Young, Tianyu Lan,
	Thomas Zimmermann, Maarten Lankhorst, Maxime Ripard,
	Borislav Petkov, Andy Lutomirski, Thomas Gleixner, kexec,
	linux-kernel, iommu, Daniel Vetter, linux-fsdevel, linuxppc-dev
In-Reply-To: <20210811121917.ghxi7g4mctuybhbk@box.shutemov.name>

On 8/11/21 7:19 AM, Kirill A. Shutemov wrote:
> On Tue, Aug 10, 2021 at 02:48:54PM -0500, Tom Lendacky wrote:
>> On 8/10/21 1:45 PM, Kuppuswamy, Sathyanarayanan wrote:
>>>
>>>
>>> On 7/27/21 3:26 PM, Tom Lendacky wrote:
>>>> diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
>>>> index de01903c3735..cafed6456d45 100644
>>>> --- a/arch/x86/kernel/head64.c
>>>> +++ b/arch/x86/kernel/head64.c
>>>> @@ -19,7 +19,7 @@
>>>>   #include <linux/start_kernel.h>
>>>>   #include <linux/io.h>
>>>>   #include <linux/memblock.h>
>>>> -#include <linux/mem_encrypt.h>
>>>> +#include <linux/protected_guest.h>
>>>>   #include <linux/pgtable.h>
>>>>     #include <asm/processor.h>
>>>> @@ -285,7 +285,7 @@ unsigned long __head __startup_64(unsigned long
>>>> physaddr,
>>>>        * there is no need to zero it after changing the memory encryption
>>>>        * attribute.
>>>>        */
>>>> -    if (mem_encrypt_active()) {
>>>> +    if (prot_guest_has(PATTR_MEM_ENCRYPT)) {
>>>>           vaddr = (unsigned long)__start_bss_decrypted;
>>>>           vaddr_end = (unsigned long)__end_bss_decrypted;
>>>
>>>
>>> Since this change is specific to AMD, can you replace PATTR_MEM_ENCRYPT with
>>> prot_guest_has(PATTR_SME) || prot_guest_has(PATTR_SEV). It is not used in
>>> TDX.
>>
>> This is a direct replacement for now.
> 
> With current implementation of prot_guest_has() for TDX it breaks boot for
> me.
> 
> Looking at code agains, now I *think* the reason is accessing a global
> variable from __startup_64() inside TDX version of prot_guest_has().
> 
> __startup_64() is special. If you access any global variable you need to
> use fixup_pointer(). See comment before __startup_64().
> 
> I'm not sure how you get away with accessing sme_me_mask directly from
> there. Any clues? Maybe just a luck and complier generates code just right
> for your case, I donno.

Hmm... yeah, could be that the compiler is using rip-relative addressing
for it because it lives in the .data section?

For the static variables in mem_encrypt_identity.c I did an assembler rip
relative LEA, but probably could have passed physaddr to sme_enable() and
used a fixup_pointer() style function, instead.

> 
> A separate point is that TDX version of prot_guest_has() relies on
> cpu_feature_enabled() which is not ready at this point.

Does TDX have to do anything special to make memory able to be shared with
the hypervisor?  You might have to use something that is available earlier
than cpu_feature_enabled() in that case (should you eventually support
kvmclock).

> 
> I think __bss_decrypted fixup has to be done if sme_me_mask is non-zero.
> Or just do it uncoditionally because it's NOP for sme_me_mask == 0.

For SNP, we'll have to additionally call the HV to update the RMP to make
the memory shared. But that could also be done unconditionally since the
early_snp_set_memory_shared() routine will check for SNP before doing
anything.

Thanks,
Tom

> 
>> I think the change you're requesting
>> should be done as part of the TDX support patches so it's clear why it is
>> being changed.
>>
>> But, wouldn't TDX still need to do something with this shared/unencrypted
>> area, though? Or since it is shared, there's actually nothing you need to
>> do (the bss decrpyted section exists even if CONFIG_AMD_MEM_ENCRYPT is not
>> configured)?
> 
> AFAICS, only kvmclock uses __bss_decrypted. We don't enable kvmclock in
> TDX at the moment. It may change in the future.
> 

^ permalink raw reply

* [PATCH v2 00/60] KVM: PPC: Book3S HV P9: entry/exit optimisations
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin

This reduces radix guest full entry/exit latency on POWER9 and POWER10
by 2x.

Nested HV guests should see smaller improvements in their L1 entry/exit,
but this is also combined with most L0 speedups also applying to nested
entry. nginx localhost throughput test in a SMP nested guest is improved
about 10% (in a direct guest it doesn't change much because it uses XIVE
for IPIs) when L0 and L1 are patched.

It does this in several main ways:

- Rearrange code to optimise SPR accesses. Mainly, avoid scoreboard
  stalls.

- Test SPR values to avoid mtSPRs where possible. mtSPRs are expensive.

- Reduce mftb. mftb is expensive.

- Demand fault certain facilities to avoid saving and/or restoring them
  (at the cost of fault when they are used, but this is mitigated over
  a number of entries, like the facilities when context switching 
  processes). PM, TM, and EBB so far.

- Defer some sequences that are made just in case a guest is interrupted
  in the middle of a critical section to the case where the guest is
  scheduled on a different CPU, rather than every time (at the cost of
  an extra IPI in this case). Namely the tlbsync sequence for radix with
  GTSE, which is very expensive.

- Reduce locking, barriers, atomics related to the vcpus-per-vcore > 1
  handling that the P9 path does not require.

Changes since v1:
- Verified DPDES changes still work with msgsndp SMT emulation.
- Fixed HMI handling bug.
- Split softpatch handling fixes into smaller pieces.
- Rebased with Fabiano's latest HV sanitising patches.
- Fix TM demand faulting bug causing nested guest TM tests to TM Bad
  Thing the host in rare cases.
- Re-name new "pmu=" command line option to "pmu_override=" and update
  documentation wording.
- Add default=y config option rather than unconditionally removing the
  L0 nested PMU workaround.
- Remove unnecessary MSR[RI] updates in entry/exit. Down to about 4700
  cycles now.
- Another bugfix from Alexey's testing.

Changes since RFC:
- Rebased with Fabiano's HV sanitising patches at the front.
- Several demand faulting bug fixes mostly relating to nested guests.
- Removed facility demand-faulting from L0 nested entry/exit handler.
  Demand faulting is still done in the L1, but not the L0. The reason
  is to reduce complexity (although it's only a small amount of
  complexity), reduce demand faulting overhead that may require several

Fabiano Rosas (3):
  KVM: PPC: Book3S HV Nested: Sanitise vcpu registers
  KVM: PPC: Book3S HV Nested: Stop forwarding all HFUs to L1
  KVM: PPC: Book3S HV Nested: save_hv_return_state does not require trap
    argument

Nicholas Piggin (57):
  KVM: PPC: Book3S HV: Initialise vcpu MSR with MSR_ME
  KVM: PPC: Book3S HV: Remove TM emulation from POWER7/8 path
  KVM: PPC: Book3S HV P9: Fixes for TM softpatch interrupt NIP
  KVM: PPC: Book3S HV Nested: Fix TM softpatch HFAC interrupt emulation
  KVM: PPC: Book3S HV Nested: Make nested HFSCR state accessible
  KVM: PPC: Book3S HV Nested: Reflect guest PMU in-use to L0 when guest
    SPRs are live
  powerpc/64s: Remove WORT SPR from POWER9/10
  KMV: PPC: Book3S HV P9: Use set_dec to set decrementer to host
  KVM: PPC: Book3S HV P9: Use host timer accounting to avoid decrementer
    read
  KVM: PPC: Book3S HV P9: Use large decrementer for HDEC
  KVM: PPC: Book3S HV P9: Reduce mftb per guest entry/exit
  powerpc/time: add API for KVM to re-arm the host timer/decrementer
  KVM: PPC: Book3S HV: POWER10 enable HAIL when running radix guests
  powerpc/64s: Keep AMOR SPR a constant ~0 at runtime
  KVM: PPC: Book3S HV: Don't always save PMU for guest capable of
    nesting
  powerpc/64s: Always set PMU control registers to frozen/disabled when
    not in use
  powerpc/64s: Implement PMU override command line option
  KVM: PPC: Book3S HV P9: Implement PMU save/restore in C
  KVM: PPC: Book3S HV P9: Factor PMU save/load into context switch
    functions
  KVM: PPC: Book3S HV P9: Demand fault PMU SPRs when marked not inuse
  KVM: PPC: Book3S HV P9: Factor out yield_count increment
  KVM: PPC: Book3S HV: CTRL SPR does not require read-modify-write
  KVM: PPC: Book3S HV P9: Move SPRG restore to restore_p9_host_os_sprs
  KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save
    host SPRs
  KVM: PPC: Book3S HV P9: Improve mtmsrd scheduling by delaying MSR[EE]
    disable
  KVM: PPC: Book3S HV P9: Add kvmppc_stop_thread to match
    kvmppc_start_thread
  KVM: PPC: Book3S HV: Change dec_expires to be relative to guest
    timebase
  KVM: PPC: Book3S HV P9: Move TB updates
  KVM: PPC: Book3S HV P9: Optimise timebase reads
  KVM: PPC: Book3S HV P9: Avoid SPR scoreboard stalls
  KVM: PPC: Book3S HV P9: Only execute mtSPR if the value changed
  KVM: PPC: Book3S HV P9: Juggle SPR switching around
  KVM: PPC: Book3S HV P9: Move vcpu register save/restore into functions
  KVM: PPC: Book3S HV P9: Move host OS save/restore functions to
    built-in
  KVM: PPC: Book3S HV P9: Move nested guest entry into its own function
  KVM: PPC: Book3S HV P9: Move remaining SPR and MSR access into low
    level entry
  KVM: PPC: Book3S HV P9: Implement TM fastpath for guest entry/exit
  KVM: PPC: Book3S HV P9: Switch PMU to guest as late as possible
  KVM: PPC: Book3S HV P9: Restrict DSISR canary workaround to processors
    that require it
  KVM: PPC: Book3S HV P9: More SPR speed improvements
  KVM: PPC: Book3S HV P9: Demand fault EBB facility registers
  KVM: PPC: Book3S HV P9: Demand fault TM facility registers
  KVM: PPC: Book3S HV P9: Use Linux SPR save/restore to manage some host
    SPRs
  KVM: PPC: Book3S HV P9: Comment and fix MMU context switching code
  KVM: PPC: Book3S HV P9: Test dawr_enabled() before saving host DAWR
    SPRs
  KVM: PPC: Book3S HV P9: Don't restore PSSCR if not needed
  KVM: PPC: Book3S HV P9: Avoid tlbsync sequence on radix guest exit
  KVM: PPC: Book3S HV Nested: Avoid extra mftb() in nested entry
  KVM: PPC: Book3S HV P9: Improve mfmsr performance on entry
  KVM: PPC: Book3S HV P9: Optimise hash guest SLB saving
  KVM: PPC: Book3S HV P9: Avoid changing MSR[RI] in entry and exit
  KVM: PPC: Book3S HV P9: Add unlikely annotation for !mmu_ready
  KVM: PPC: Book3S HV P9: Avoid cpu_in_guest atomics on entry and exit
  KVM: PPC: Book3S HV P9: Remove most of the vcore logic
  KVM: PPC: Book3S HV P9: Tidy kvmppc_create_dtl_entry
  KVM: PPC: Book3S HV P9: Stop using vc->dpdes
  KVM: PPC: Book3S HV P9: Remove subcore HMI handling

 .../admin-guide/kernel-parameters.txt         |   8 +
 arch/powerpc/include/asm/asm-prototypes.h     |   5 -
 arch/powerpc/include/asm/kvm_asm.h            |   1 +
 arch/powerpc/include/asm/kvm_book3s.h         |   6 +
 arch/powerpc/include/asm/kvm_book3s_64.h      |   6 +-
 arch/powerpc/include/asm/kvm_host.h           |   7 +-
 arch/powerpc/include/asm/kvm_ppc.h            |   1 +
 arch/powerpc/include/asm/pmc.h                |   7 +
 arch/powerpc/include/asm/reg.h                |   3 +-
 arch/powerpc/include/asm/switch_to.h          |   2 +
 arch/powerpc/include/asm/time.h               |  19 +-
 arch/powerpc/kernel/cpu_setup_power.c         |  12 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c             |   8 +-
 arch/powerpc/kernel/process.c                 |  32 +
 arch/powerpc/kernel/time.c                    |  54 +-
 arch/powerpc/kvm/Kconfig                      |  15 +
 arch/powerpc/kvm/book3s_64_mmu_radix.c        |   4 +
 arch/powerpc/kvm/book3s_hv.c                  | 890 ++++++++++--------
 arch/powerpc/kvm/book3s_hv.h                  |  41 +
 arch/powerpc/kvm/book3s_hv_builtin.c          |   2 +
 arch/powerpc/kvm/book3s_hv_hmi.c              |   7 +-
 arch/powerpc/kvm/book3s_hv_interrupts.S       |  13 +-
 arch/powerpc/kvm/book3s_hv_nested.c           | 109 ++-
 arch/powerpc/kvm/book3s_hv_p9_entry.c         | 817 +++++++++++++---
 arch/powerpc/kvm/book3s_hv_ras.c              |  54 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S       | 115 +--
 arch/powerpc/kvm/book3s_hv_tm.c               |  61 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c      |  15 -
 arch/powerpc/perf/core-book3s.c               |  35 +
 arch/powerpc/platforms/powernv/idle.c         |  10 +-
 30 files changed, 1589 insertions(+), 770 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv.h

-- 
2.23.0


^ permalink raw reply

* [PATCH v2 01/60] KVM: PPC: Book3S HV: Initialise vcpu MSR with MSR_ME
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: Alexey Kardashevskiy, linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

It is possible to create a VCPU without setting the MSR before running
it, which results in a warning in kvmhv_vcpu_entry_p9() that MSR_ME is
not set. This is pretty harmless because the MSR_ME bit is added to
HSRR1 before HRFID to guest, and a normal qemu guest doesn't hit it.

Initialise the vcpu MSR with MSR_ME set.

Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ce7ff12cfc03..2afe7a95fc9c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2687,6 +2687,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 	spin_lock_init(&vcpu->arch.vpa_update_lock);
 	spin_lock_init(&vcpu->arch.tbacct_lock);
 	vcpu->arch.busy_preempt = TB_NIL;
+	vcpu->arch.shregs.msr = MSR_ME;
 	vcpu->arch.intr_msr = MSR_SF | MSR_ME;
 
 	/*
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 02/60] KVM: PPC: Book3S HV: Remove TM emulation from POWER7/8 path
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

TM fake-suspend emulation is only used by POWER9. Remove it from the old
code path.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 42 -------------------------
 1 file changed, 42 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8dd437d7a2c6..75079397c2a5 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1088,12 +1088,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
 	beq	kvmppc_hisi
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/* For softpatch interrupt, go off and do TM instruction emulation */
-	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-	beq	kvmppc_tm_emul
-#endif
-
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	bne	2f
@@ -1599,42 +1593,6 @@ maybe_reenter_guest:
 	blt	deliver_guest_interrupt
 	b	guest_exit_cont
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Softpatch interrupt for transactional memory emulation cases
- * on POWER9 DD2.2.  This is early in the guest exit path - we
- * haven't saved registers or done a treclaim yet.
- */
-kvmppc_tm_emul:
-	/* Save instruction image in HEIR */
-	mfspr	r3, SPRN_HEIR
-	stw	r3, VCPU_HEIR(r9)
-
-	/*
-	 * The cases we want to handle here are those where the guest
-	 * is in real suspend mode and is trying to transition to
-	 * transactional mode.
-	 */
-	lbz	r0, HSTATE_FAKE_SUSPEND(r13)
-	cmpwi	r0, 0		/* keep exiting guest if in fake suspend */
-	bne	guest_exit_cont
-	rldicl	r3, r11, 64 - MSR_TS_S_LG, 62
-	cmpwi	r3, 1		/* or if not in suspend state */
-	bne	guest_exit_cont
-
-	/* Call C code to do the emulation */
-	mr	r3, r9
-	bl	kvmhv_p9_tm_emulation_early
-	nop
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	li	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-	cmpwi	r3, 0
-	beq	guest_exit_cont		/* continue exiting if not handled */
-	ld	r10, VCPU_PC(r9)
-	ld	r11, VCPU_MSR(r9)
-	b	fast_interrupt_c_return	/* go back to guest if handled */
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
  * If it is an HPTE not found fault that is due to the guest accessing
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 03/60] KVM: PPC: Book3S HV P9: Fixes for TM softpatch interrupt NIP
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

The softpatch interrupt sets HSRR0 to the faulting instruction +4, so
it should subtract 4 for the faulting instruction address in the case
it is a TM softpatch interrupt (the instruction was not executed) and
it was not emulated.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv_tm.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index cc90b8b82329..e7c36f8bf205 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -46,6 +46,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 	u64 newmsr, bescr;
 	int ra, rs;
 
+	/*
+	 * The TM softpatch interrupt sets NIP to the instruction following
+	 * the faulting instruction, which is not executed. Rewind nip to the
+	 * faulting instruction so it looks like a normal synchronous
+	 * interrupt, then update nip in the places where the instruction is
+	 * emulated.
+	 */
+	vcpu->arch.regs.nip -= 4;
+
 	/*
 	 * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
 	 * in these instructions, so masking bit 31 out doesn't change these
@@ -67,7 +76,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 			       (newmsr & MSR_TM)));
 		newmsr = sanitize_msr(newmsr);
 		vcpu->arch.shregs.msr = newmsr;
-		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.cfar = vcpu->arch.regs.nip;
 		vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
 		return RESUME_GUEST;
 
@@ -100,7 +109,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.bescr = bescr;
 		msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
 		vcpu->arch.shregs.msr = msr;
-		vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+		vcpu->arch.cfar = vcpu->arch.regs.nip;
 		vcpu->arch.regs.nip = vcpu->arch.ebbrr;
 		return RESUME_GUEST;
 
@@ -116,6 +125,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
 		newmsr = sanitize_msr(newmsr);
 		vcpu->arch.shregs.msr = newmsr;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 
 	/* ignore bit 31, see comment above */
@@ -152,6 +162,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 				msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
 		}
 		vcpu->arch.shregs.msr = msr;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 
 	/* ignore bit 31, see comment above */
@@ -189,6 +200,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
 			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
 		vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 
 	/* ignore bit 31, see comment above */
@@ -220,6 +232,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
 			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
 		vcpu->arch.shregs.msr = msr | MSR_TS_S;
+		vcpu->arch.regs.nip += 4;
 		return RESUME_GUEST;
 	}
 
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 04/60] KVM: PPC: Book3S HV Nested: Fix TM softpatch HFAC interrupt emulation
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

Have the TM softpatch emulation code set up the HFAC interrupt and
return -1 in case an instruction was executed with HFSCR bits clear,
and have the interrupt exit handler fall through to the HFAC handler.
When the L0 is running a nested guest, this ensures the HFAC interrupt
is correctly passed up to the L1.

The "direct guest" exit handler will turn these into PROGILL program
interrupts so functionality in practice will be unchanged. But it's
possible an L1 would want to handle these in a different way.

Also rearrange the FAC interrupt emulation code to match the HFAC format
while here (mainly, adding the FSCR_INTR_CAUSE mask).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/reg.h  |  3 ++-
 arch/powerpc/kvm/book3s_hv.c    | 35 ++++++++++++++++----------
 arch/powerpc/kvm/book3s_hv_tm.c | 44 ++++++++++++++++++---------------
 3 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index be85cf156a1f..e9d27265253b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -415,6 +415,7 @@
 #define   FSCR_TAR	__MASK(FSCR_TAR_LG)
 #define   FSCR_EBB	__MASK(FSCR_EBB_LG)
 #define   FSCR_DSCR	__MASK(FSCR_DSCR_LG)
+#define   FSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)	/* interrupt cause */
 #define SPRN_HFSCR	0xbe	/* HV=1 Facility Status & Control Register */
 #define   HFSCR_PREFIX	__MASK(FSCR_PREFIX_LG)
 #define   HFSCR_MSGP	__MASK(FSCR_MSGP_LG)
@@ -426,7 +427,7 @@
 #define   HFSCR_DSCR	__MASK(FSCR_DSCR_LG)
 #define   HFSCR_VECVSX	__MASK(FSCR_VECVSX_LG)
 #define   HFSCR_FP	__MASK(FSCR_FP_LG)
-#define   HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)	/* interrupt cause */
+#define   HFSCR_INTR_CAUSE FSCR_INTR_CAUSE
 #define SPRN_TAR	0x32f	/* Target Address Register */
 #define SPRN_LPCR	0x13E	/* LPAR Control Register */
 #define   LPCR_VPM0		ASM_CONST(0x8000000000000000)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2afe7a95fc9c..e79eedb65e6b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1682,6 +1682,21 @@ XXX benchmark guest exits
 			r = RESUME_GUEST;
 		}
 		break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+		/*
+		 * This occurs for various TM-related instructions that
+		 * we need to emulate on POWER9 DD2.2.  We have already
+		 * handled the cases where the guest was in real-suspend
+		 * mode and was transitioning to transactional state.
+		 */
+		r = kvmhv_p9_tm_emulation(vcpu);
+		if (r != -1)
+			break;
+		fallthrough; /* go to facility unavailable handler */
+#endif
+
 	/*
 	 * This occurs if the guest (kernel or userspace), does something that
 	 * is prohibited by HFSCR.
@@ -1700,18 +1715,6 @@ XXX benchmark guest exits
 		}
 		break;
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
-		/*
-		 * This occurs for various TM-related instructions that
-		 * we need to emulate on POWER9 DD2.2.  We have already
-		 * handled the cases where the guest was in real-suspend
-		 * mode and was transitioning to transactional state.
-		 */
-		r = kvmhv_p9_tm_emulation(vcpu);
-		break;
-#endif
-
 	case BOOK3S_INTERRUPT_HV_RM_HARD:
 		r = RESUME_PASSTHROUGH;
 		break;
@@ -1814,9 +1817,15 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		 * mode and was transitioning to transactional state.
 		 */
 		r = kvmhv_p9_tm_emulation(vcpu);
-		break;
+		if (r != -1)
+			break;
+		fallthrough; /* go to facility unavailable handler */
 #endif
 
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+		r = RESUME_HOST;
+		break;
+
 	case BOOK3S_INTERRUPT_HV_RM_HARD:
 		vcpu->arch.trap = 0;
 		r = RESUME_GUEST;
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index e7c36f8bf205..866cadd70094 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -88,14 +88,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		}
 		/* check EBB facility is available */
 		if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_EBB_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_EBB_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_EBB_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
 		}
@@ -138,14 +139,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		}
 		/* check for TM disabled in the HFSCR or MSR */
 		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if (!(msr & MSR_TM)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_TM_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu,
 						BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
@@ -169,14 +171,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 	case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK):
 		/* check for TM disabled in the HFSCR or MSR */
 		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if (!(msr & MSR_TM)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_TM_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu,
 						BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
@@ -208,14 +211,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
 		/* XXX do we need to check for PR=0 here? */
 		/* check for TM disabled in the HFSCR or MSR */
 		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
-			/* generate an illegal instruction interrupt */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-			return RESUME_GUEST;
+			vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+			vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+			return -1; /* rerun host interrupt handler */
 		}
 		if (!(msr & MSR_TM)) {
 			/* generate a facility unavailable interrupt */
-			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
-				((u64)FSCR_TM_LG << 56);
+			vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+			vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
 			kvmppc_book3s_queue_irqprio(vcpu,
 						BOOK3S_INTERRUPT_FAC_UNAVAIL);
 			return RESUME_GUEST;
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 05/60] KVM: PPC: Book3S HV Nested: Sanitise vcpu registers
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin, Fabiano Rosas
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

From: Fabiano Rosas <farosas@linux.ibm.com>

As one of the arguments of the H_ENTER_NESTED hypercall, the nested
hypervisor (L1) prepares a structure containing the values of various
hypervisor-privileged registers with which it wants the nested guest
(L2) to run. Since the nested HV runs in supervisor mode it needs the
host to write to these registers.

To stop a nested HV manipulating this mechanism and using a nested
guest as a proxy to access a facility that has been made unavailable
to it, we have a routine that sanitises the values of the HV registers
before copying them into the nested guest's vcpu struct.

However, when coming out of the guest the values are copied as they
were back into L1 memory, which means that any sanitisation we did
during guest entry will be exposed to L1 after H_ENTER_NESTED returns.

This patch alters this sanitisation to have effect on the vcpu->arch
registers directly before entering and after exiting the guest,
leaving the structure that is copied back into L1 unchanged (except
when we really want L1 to access the value, e.g the Cause bits of
HFSCR).

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv_nested.c | 94 ++++++++++++++---------------
 1 file changed, 46 insertions(+), 48 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 898f942eb198..1eb4e989edc7 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -105,7 +105,6 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
 	hr->dpdes = vc->dpdes;
-	hr->hfscr = vcpu->arch.hfscr;
 	hr->purr = vcpu->arch.purr;
 	hr->spurr = vcpu->arch.spurr;
 	hr->ic = vcpu->arch.ic;
@@ -128,55 +127,17 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	case BOOK3S_INTERRUPT_H_INST_STORAGE:
 		hr->asdr = vcpu->arch.fault_gpa;
 		break;
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+		hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
+			     (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
+		break;
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
 		hr->heir = vcpu->arch.emul_inst;
 		break;
 	}
 }
 
-/*
- * This can result in some L0 HV register state being leaked to an L1
- * hypervisor when the hv_guest_state is copied back to the guest after
- * being modified here.
- *
- * There is no known problem with such a leak, and in many cases these
- * register settings could be derived by the guest by observing behaviour
- * and timing, interrupts, etc., but it is an issue to consider.
- */
-static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
-{
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
-	u64 mask;
-
-	/*
-	 * Don't let L1 change LPCR bits for the L2 except these:
-	 */
-	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
-		LPCR_LPES | LPCR_MER;
-
-	/*
-	 * Additional filtering is required depending on hardware
-	 * and configuration.
-	 */
-	hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
-			(vc->lpcr & ~mask) | (hr->lpcr & mask));
-
-	/*
-	 * Don't let L1 enable features for L2 which we've disabled for L1,
-	 * but preserve the interrupt cause field.
-	 */
-	hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
-
-	/* Don't let data address watchpoint match in hypervisor state */
-	hr->dawrx0 &= ~DAWRX_HYP;
-	hr->dawrx1 &= ~DAWRX_HYP;
-
-	/* Don't let completed instruction address breakpt match in HV state */
-	if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
-		hr->ciabr &= ~CIABR_PRIV;
-}
-
-static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
@@ -288,6 +249,43 @@ static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
 				     sizeof(struct pt_regs));
 }
 
+static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
+			    const struct hv_guest_state *l2_hv,
+			    const struct hv_guest_state *l1_hv, u64 *lpcr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 mask;
+
+	restore_hv_regs(vcpu, l2_hv);
+
+	/*
+	 * Don't let L1 change LPCR bits for the L2 except these:
+	 */
+	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+		LPCR_LPES | LPCR_MER;
+
+	/*
+	 * Additional filtering is required depending on hardware
+	 * and configuration.
+	 */
+	*lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+				      (vc->lpcr & ~mask) | (*lpcr & mask));
+
+	/*
+	 * Don't let L1 enable features for L2 which we've disabled for L1,
+	 * but preserve the interrupt cause field.
+	 */
+	vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
+
+	/* Don't let data address watchpoint match in hypervisor state */
+	vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
+	vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP;
+
+	/* Don't let completed instruction address breakpt match in HV state */
+	if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+		vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV;
+}
+
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 {
 	long int err, r;
@@ -296,7 +294,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	struct hv_guest_state l2_hv = {0}, saved_l1_hv;
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	u64 hv_ptr, regs_ptr;
-	u64 hdec_exp;
+	u64 hdec_exp, lpcr;
 	s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
 
 	if (vcpu->kvm->arch.l1_ptcr == 0)
@@ -369,8 +367,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	/* Guest must always run with ME enabled, HV disabled. */
 	vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
 
-	sanitise_hv_regs(vcpu, &l2_hv);
-	restore_hv_regs(vcpu, &l2_hv);
+	lpcr = l2_hv.lpcr;
+	load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr);
 
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
@@ -380,7 +378,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 			r = RESUME_HOST;
 			break;
 		}
-		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
+		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
 	} while (is_kvmppc_resume_guest(r));
 
 	/* save L2 state for return */
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 06/60] KVM: PPC: Book3S HV Nested: Make nested HFSCR state accessible
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

When the L0 runs a nested L2, there are several permutations of HFSCR
that can be relevant. The HFSCR that the L1 vcpu L1 requested, the
HFSCR that the L1 vcpu may use, and the HFSCR that is actually being
used to run the L2.

The L1 requested HFSCR is not accessible outside the nested hcall
handler, so copy that into a new kvm_nested_guest.hfscr field.

The permitted HFSCR is taken from the HFSCR that the L1 runs with,
which is also not accessible while the hcall is being made. Move
this into a new kvm_vcpu_arch.hfscr_permitted field.

These will be used by the next patch to improve facility handling
for nested guests, and later by facility demand faulting patches.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h | 1 +
 arch/powerpc/include/asm/kvm_host.h      | 2 ++
 arch/powerpc/kvm/book3s_hv.c             | 2 ++
 arch/powerpc/kvm/book3s_hv_nested.c      | 5 +++--
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index eaf3a562bf1e..19b6942c6969 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -39,6 +39,7 @@ struct kvm_nested_guest {
 	pgd_t *shadow_pgtable;		/* our page table for this guest */
 	u64 l1_gr_to_hr;		/* L1's addr of part'n-scoped table */
 	u64 process_table;		/* process table entry for this guest */
+	u64 hfscr;			/* HFSCR that the L1 requested for this nested guest */
 	long refcnt;			/* number of pointers to this struct */
 	struct mutex tlb_lock;		/* serialize page faults and tlbies */
 	struct kvm_nested_guest *next;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9f52f282b1aa..a779f7849cfb 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -811,6 +811,8 @@ struct kvm_vcpu_arch {
 
 	u32 online;
 
+	u64 hfscr_permitted;	/* A mask of permitted HFSCR facilities */
+
 	/* For support of nested guests */
 	struct kvm_nested_guest *nested;
 	u32 nested_vcpu_id;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e79eedb65e6b..c65bd8fa4368 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2718,6 +2718,8 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 	if (cpu_has_feature(CPU_FTR_TM_COMP))
 		vcpu->arch.hfscr |= HFSCR_TM;
 
+	vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
+
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
 	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 1eb4e989edc7..5ad5014c6f68 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -272,10 +272,10 @@ static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
 				      (vc->lpcr & ~mask) | (*lpcr & mask));
 
 	/*
-	 * Don't let L1 enable features for L2 which we've disabled for L1,
+	 * Don't let L1 enable features for L2 which we don't allow for L1,
 	 * but preserve the interrupt cause field.
 	 */
-	vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
+	vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted);
 
 	/* Don't let data address watchpoint match in hypervisor state */
 	vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
@@ -362,6 +362,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	/* set L1 state to L2 state */
 	vcpu->arch.nested = l2;
 	vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+	l2->hfscr = l2_hv.hfscr;
 	vcpu->arch.regs = l2_regs;
 
 	/* Guest must always run with ME enabled, HV disabled. */
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 07/60] KVM: PPC: Book3S HV Nested: Stop forwarding all HFUs to L1
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin, Fabiano Rosas
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

From: Fabiano Rosas <farosas@linux.ibm.com>

If the nested hypervisor has no access to a facility because it has
been disabled by the host, it should also not be able to see the
Hypervisor Facility Unavailable that arises from one of its guests
trying to access the facility.

This patch turns a HFU that happened in L2 into a Hypervisor Emulation
Assistance interrupt and forwards it to L1 for handling. The ones that
happened because L1 explicitly disabled the facility for L2 are still
let through, along with the corresponding Cause bits in the HFSCR.

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
[np: move handling into kvmppc_handle_nested_exit]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c65bd8fa4368..7bc9d487bc1a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1733,6 +1733,7 @@ XXX benchmark guest exits
 
 static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 {
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
 	int r;
 	int srcu_idx;
 
@@ -1822,9 +1823,35 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		fallthrough; /* go to facility unavailable handler */
 #endif
 
-	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
-		r = RESUME_HOST;
+	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+		u64 cause = vcpu->arch.hfscr >> 56;
+
+		/*
+		 * Only pass HFU interrupts to the L1 if the facility is
+		 * permitted but disabled by the L1's HFSCR, otherwise
+		 * the interrupt does not make sense to the L1 so turn
+		 * it into a HEAI.
+		 */
+		if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
+					(nested->hfscr & (1UL << cause))) {
+			vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+
+			/*
+			 * If the fetch failed, return to guest and
+			 * try executing it again.
+			 */
+			r = kvmppc_get_last_inst(vcpu, INST_GENERIC,
+						 &vcpu->arch.emul_inst);
+			if (r != EMULATE_DONE)
+				r = RESUME_GUEST;
+			else
+				r = RESUME_HOST;
+		} else {
+			r = RESUME_HOST;
+		}
+
 		break;
+	}
 
 	case BOOK3S_INTERRUPT_HV_RM_HARD:
 		vcpu->arch.trap = 0;
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 08/60] KVM: PPC: Book3S HV Nested: save_hv_return_state does not require trap argument
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin, Fabiano Rosas
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

From: Fabiano Rosas <farosas@linux.ibm.com>

vcpu is already anargument so vcpu->arch.trap can be used directly.

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv_nested.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 5ad5014c6f68..ed8a2c9f5629 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -99,7 +99,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
 	hr->dawrx1 = swab64(hr->dawrx1);
 }
 
-static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
+static void save_hv_return_state(struct kvm_vcpu *vcpu,
 				 struct hv_guest_state *hr)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -118,7 +118,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
 	hr->pidr = vcpu->arch.pid;
 	hr->cfar = vcpu->arch.cfar;
 	hr->ppr = vcpu->arch.ppr;
-	switch (trap) {
+	switch (vcpu->arch.trap) {
 	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
 		hr->hdar = vcpu->arch.fault_dar;
 		hr->hdsisr = vcpu->arch.fault_dsisr;
@@ -389,7 +389,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
 	delta_ic = vcpu->arch.ic - l2_hv.ic;
 	delta_vtb = vc->vtb - l2_hv.vtb;
-	save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
+	save_hv_return_state(vcpu, &l2_hv);
 
 	/* restore L1 state */
 	vcpu->arch.nested = NULL;
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 09/60] KVM: PPC: Book3S HV Nested: Reflect guest PMU in-use to L0 when guest SPRs are live
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin, Fabiano Rosas
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

After the L1 saves its PMU SPRs but before loading the L2's PMU SPRs,
switch the pmcregs_in_use field in the L1 lppaca to the value advertised
by the L2 in its VPA. On the way out of the L2, set it back after saving
the L2 PMU registers (if they were in-use).

This transfers the PMU liveness indication between the L1 and L2 at the
points where the registers are not live.

This fixes the nested HV bug for which a workaround was added to the L0
HV by commit 63279eeb7f93a ("KVM: PPC: Book3S HV: Always save guest pmu
for guest capable of nesting"), which explains the problem in detail.
That workaround is no longer required for guests that include this bug
fix.

Fixes: 360cae313702 ("KVM: PPC: Book3S HV: Nested guest entry via hypercall")
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/pmc.h |  7 +++++++
 arch/powerpc/kvm/book3s_hv.c   | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index c6bbe9778d3c..3c09109e708e 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -34,6 +34,13 @@ static inline void ppc_set_pmu_inuse(int inuse)
 #endif
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static inline int ppc_get_pmu_inuse(void)
+{
+	return get_paca()->pmcregs_in_use;
+}
+#endif
+
 extern void power4_enable_pmcs(void);
 
 #else /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7bc9d487bc1a..1a3ea0ea7514 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -59,6 +59,7 @@
 #include <asm/kvm_book3s.h>
 #include <asm/mmu_context.h>
 #include <asm/lppaca.h>
+#include <asm/pmc.h>
 #include <asm/processor.h>
 #include <asm/cputhreads.h>
 #include <asm/page.h>
@@ -3894,6 +3895,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
 		kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
+#ifdef CONFIG_PPC_PSERIES
+	if (kvmhv_on_pseries()) {
+		barrier();
+		if (vcpu->arch.vpa.pinned_addr) {
+			struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+			get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
+		} else {
+			get_lppaca()->pmcregs_in_use = 1;
+		}
+		barrier();
+	}
+#endif
 	kvmhv_load_guest_pmu(vcpu);
 
 	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
@@ -4028,6 +4041,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	save_pmu |= nesting_enabled(vcpu->kvm);
 
 	kvmhv_save_guest_pmu(vcpu, save_pmu);
+#ifdef CONFIG_PPC_PSERIES
+	if (kvmhv_on_pseries()) {
+		barrier();
+		get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+		barrier();
+	}
+#endif
 
 	vc->entry_exit_map = 0x101;
 	vc->in_guest = 0;
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 10/60] powerpc/64s: Remove WORT SPR from POWER9/10
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin, Fabiano Rosas
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

This register is not architected and not implemented in POWER9 or 10,
it just reads back zeroes for compatibility.

Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c          | 3 ---
 arch/powerpc/platforms/powernv/idle.c | 2 --
 2 files changed, 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1a3ea0ea7514..3198f79572d8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3770,7 +3770,6 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
 	mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
 	mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
 	mtspr(SPRN_BESCR, vcpu->arch.bescr);
-	mtspr(SPRN_WORT, vcpu->arch.wort);
 	mtspr(SPRN_TIDR, vcpu->arch.tid);
 	mtspr(SPRN_AMR, vcpu->arch.amr);
 	mtspr(SPRN_UAMOR, vcpu->arch.uamor);
@@ -3797,7 +3796,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
 	vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
 	vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
 	vcpu->arch.bescr = mfspr(SPRN_BESCR);
-	vcpu->arch.wort = mfspr(SPRN_WORT);
 	vcpu->arch.tid = mfspr(SPRN_TIDR);
 	vcpu->arch.amr = mfspr(SPRN_AMR);
 	vcpu->arch.uamor = mfspr(SPRN_UAMOR);
@@ -3829,7 +3827,6 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
 				    struct p9_host_os_sprs *host_os_sprs)
 {
 	mtspr(SPRN_PSPB, 0);
-	mtspr(SPRN_WORT, 0);
 	mtspr(SPRN_UAMOR, 0);
 
 	mtspr(SPRN_DSCR, host_os_sprs->dscr);
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 1e908536890b..df19e2ff9d3c 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -667,7 +667,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 		sprs.purr	= mfspr(SPRN_PURR);
 		sprs.spurr	= mfspr(SPRN_SPURR);
 		sprs.dscr	= mfspr(SPRN_DSCR);
-		sprs.wort	= mfspr(SPRN_WORT);
 		sprs.ciabr	= mfspr(SPRN_CIABR);
 
 		sprs.mmcra	= mfspr(SPRN_MMCRA);
@@ -785,7 +784,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 	mtspr(SPRN_PURR,	sprs.purr);
 	mtspr(SPRN_SPURR,	sprs.spurr);
 	mtspr(SPRN_DSCR,	sprs.dscr);
-	mtspr(SPRN_WORT,	sprs.wort);
 	mtspr(SPRN_CIABR,	sprs.ciabr);
 
 	mtspr(SPRN_MMCRA,	sprs.mmcra);
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2 11/60] KMV: PPC: Book3S HV P9: Use set_dec to set decrementer to host
From: Nicholas Piggin @ 2021-08-11 16:00 UTC (permalink / raw)
  To: kvm-ppc; +Cc: Alexey Kardashevskiy, linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210811160134.904987-1-npiggin@gmail.com>

The host Linux timer code arms the decrementer with the value
'decrementers_next_tb - current_tb' using set_dec(), which stores
val - 1 on Book3S-64, which is not quite the same as what KVM does
to re-arm the host decrementer when exiting the guest.

This shouldn't be a significant change, but it makes the logic match
and avoids this small extra change being brought into the next patch.

Suggested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3198f79572d8..b60a70177507 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4049,7 +4049,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc->entry_exit_map = 0x101;
 	vc->in_guest = 0;
 
-	mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+	set_dec(local_paca->kvm_hstate.dec_expires - mftb());
 	/* We may have raced with new irq work */
 	if (test_irq_work_pending())
 		set_dec(1);
-- 
2.23.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox