* [PATCH v2 09/18] fpga: dfl: afu: add userclock sysfs interfaces.
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel
Cc: linux-api, Wu Hao, Ananda Ravuri, Russ Weight, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
This patch introduces userclock sysfs interfaces for AFU, user
could use these interfaces for clock setting to AFU.
Please note that, this is only working for port header feature
with revision 0, for later revisions, userclock setting is moved
to a separated private feature, so one revision sysfs interface
is exposed to userspace application for this purpose too.
Signed-off-by: Ananda Ravuri <ananda.ravuri@intel.com>
Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
---
Documentation/ABI/testing/sysfs-platform-dfl-port | 35 +++++++
drivers/fpga/dfl-afu-main.c | 114 +++++++++++++++++++++-
drivers/fpga/dfl.h | 4 +
3 files changed, 152 insertions(+), 1 deletion(-)
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-port b/Documentation/ABI/testing/sysfs-platform-dfl-port
index d7122a4..71f4892 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-port
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-port
@@ -44,3 +44,38 @@ Contact: Wu Hao <hao.wu@intel.com>
Description: Read-write. Read and set AFU latency tolerance reporting value.
Set ltr to 1 if the AFU can tolerate latency >= 40us or set it
to 0 if it is latency sensitive.
+
+What: /sys/bus/platform/devices/dfl-port.0/revision
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the revision of port header
+ feature.
+
+What: /sys/bus/platform/devices/dfl-port.0/userclk_freqcmd
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Write-only. User writes command to this interface to set
+ userclock to AFU.
+
+What: /sys/bus/platform/devices/dfl-port.0/userclk_freqsts
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the status of issued command
+ to userclck_freqcmd.
+
+What: /sys/bus/platform/devices/dfl-port.0/userclk_freqcntrcmd
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Write-only. User writes command to this interface to set
+ userclock counter.
+
+What: /sys/bus/platform/devices/dfl-port.0/userclk_freqcntrsts
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the status of issued command
+ to userclck_freqcntrcmd.
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index 2ffec06..82fd80a 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -144,6 +144,17 @@ static int port_get_id(struct platform_device *pdev)
static DEVICE_ATTR_RO(id);
static ssize_t
+revision_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_HEADER);
+
+ return scnprintf(buf, PAGE_SIZE, "%x\n", dfl_feature_revision(base));
+}
+static DEVICE_ATTR_RO(revision);
+
+static ssize_t
ltr_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
@@ -282,6 +293,7 @@ static int port_get_id(struct platform_device *pdev)
static const struct attribute *port_hdr_attrs[] = {
&dev_attr_id.attr,
+ &dev_attr_revision.attr,
&dev_attr_ltr.attr,
&dev_attr_ap1_event.attr,
&dev_attr_ap2_event.attr,
@@ -289,14 +301,113 @@ static int port_get_id(struct platform_device *pdev)
NULL,
};
+static ssize_t
+userclk_freqcmd_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
+ u64 userclk_freq_cmd;
+ void __iomem *base;
+
+ if (kstrtou64(buf, 0, &userclk_freq_cmd))
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_HEADER);
+
+ mutex_lock(&pdata->lock);
+ writeq(userclk_freq_cmd, base + PORT_HDR_USRCLK_CMD0);
+ mutex_unlock(&pdata->lock);
+
+ return count;
+}
+static DEVICE_ATTR_WO(userclk_freqcmd);
+
+static ssize_t
+userclk_freqcntrcmd_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
+ u64 userclk_freqcntr_cmd;
+ void __iomem *base;
+
+ if (kstrtou64(buf, 0, &userclk_freqcntr_cmd))
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_HEADER);
+
+ mutex_lock(&pdata->lock);
+ writeq(userclk_freqcntr_cmd, base + PORT_HDR_USRCLK_CMD1);
+ mutex_unlock(&pdata->lock);
+
+ return count;
+}
+static DEVICE_ATTR_WO(userclk_freqcntrcmd);
+
+static ssize_t
+userclk_freqsts_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ u64 userclk_freqsts;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_HEADER);
+
+ userclk_freqsts = readq(base + PORT_HDR_USRCLK_STS0);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)userclk_freqsts);
+}
+static DEVICE_ATTR_RO(userclk_freqsts);
+
+static ssize_t
+userclk_freqcntrsts_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ u64 userclk_freqcntrsts;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_HEADER);
+
+ userclk_freqcntrsts = readq(base + PORT_HDR_USRCLK_STS1);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)userclk_freqcntrsts);
+}
+static DEVICE_ATTR_RO(userclk_freqcntrsts);
+
+static const struct attribute *port_hdr_userclk_attrs[] = {
+ &dev_attr_userclk_freqcmd.attr,
+ &dev_attr_userclk_freqcntrcmd.attr,
+ &dev_attr_userclk_freqsts.attr,
+ &dev_attr_userclk_freqcntrsts.attr,
+ NULL,
+};
+
static int port_hdr_init(struct platform_device *pdev,
struct dfl_feature *feature)
{
+ int ret;
+
dev_dbg(&pdev->dev, "PORT HDR Init.\n");
port_reset(pdev);
- return sysfs_create_files(&pdev->dev.kobj, port_hdr_attrs);
+ ret = sysfs_create_files(&pdev->dev.kobj, port_hdr_attrs);
+ if (ret)
+ return ret;
+
+ /*
+ * if revision > 0, the userclock will be moved from port hdr register
+ * region to a separated private feature.
+ */
+ if (dfl_feature_revision(feature->ioaddr) > 0)
+ return 0;
+
+ ret = sysfs_create_files(&pdev->dev.kobj, port_hdr_userclk_attrs);
+ if (ret)
+ sysfs_remove_files(&pdev->dev.kobj, port_hdr_attrs);
+
+ return ret;
}
static void port_hdr_uinit(struct platform_device *pdev,
@@ -304,6 +415,7 @@ static void port_hdr_uinit(struct platform_device *pdev,
{
dev_dbg(&pdev->dev, "PORT HDR UInit.\n");
+ sysfs_remove_files(&pdev->dev.kobj, port_hdr_userclk_attrs);
sysfs_remove_files(&pdev->dev.kobj, port_hdr_attrs);
}
diff --git a/drivers/fpga/dfl.h b/drivers/fpga/dfl.h
index 1525098..3c5dc3a 100644
--- a/drivers/fpga/dfl.h
+++ b/drivers/fpga/dfl.h
@@ -120,6 +120,10 @@
#define PORT_HDR_CAP 0x30
#define PORT_HDR_CTRL 0x38
#define PORT_HDR_STS 0x40
+#define PORT_HDR_USRCLK_CMD0 0x50
+#define PORT_HDR_USRCLK_CMD1 0x58
+#define PORT_HDR_USRCLK_STS0 0x60
+#define PORT_HDR_USRCLK_STS1 0x68
/* Port Capability Register Bitfield */
#define PORT_CAP_PORT_NUM GENMASK_ULL(1, 0) /* ID of this port */
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 10/18] fpga: dfl: add id_table for dfl private feature driver
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel; +Cc: linux-api, Wu Hao, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
This patch adds id_table for each dfl private feature driver,
it allows to reuse same private feature driver to match and support
multiple dfl private features.
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Moritz Fischer <mdf@kernel.org>
Acked-by: Alan Tull <atull@kernel.org>
---
drivers/fpga/dfl-afu-main.c | 14 ++++++++++++--
drivers/fpga/dfl-fme-main.c | 11 ++++++++---
drivers/fpga/dfl-fme-pr.c | 7 ++++++-
drivers/fpga/dfl-fme.h | 3 ++-
drivers/fpga/dfl.c | 21 +++++++++++++++++++--
drivers/fpga/dfl.h | 21 +++++++++++++++------
6 files changed, 62 insertions(+), 15 deletions(-)
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index 82fd80a..2916876 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -440,6 +440,11 @@ static void port_hdr_uinit(struct platform_device *pdev,
return ret;
}
+static const struct dfl_feature_id port_hdr_id_table[] = {
+ {.id = PORT_FEATURE_ID_HEADER,},
+ {0,}
+};
+
static const struct dfl_feature_ops port_hdr_ops = {
.init = port_hdr_init,
.uinit = port_hdr_uinit,
@@ -500,6 +505,11 @@ static void port_afu_uinit(struct platform_device *pdev,
sysfs_remove_files(&pdev->dev.kobj, port_afu_attrs);
}
+static const struct dfl_feature_id port_afu_id_table[] = {
+ {.id = PORT_FEATURE_ID_AFU,},
+ {0,}
+};
+
static const struct dfl_feature_ops port_afu_ops = {
.init = port_afu_init,
.uinit = port_afu_uinit,
@@ -507,11 +517,11 @@ static void port_afu_uinit(struct platform_device *pdev,
static struct dfl_feature_driver port_feature_drvs[] = {
{
- .id = PORT_FEATURE_ID_HEADER,
+ .id_table = port_hdr_id_table,
.ops = &port_hdr_ops,
},
{
- .id = PORT_FEATURE_ID_AFU,
+ .id_table = port_afu_id_table,
.ops = &port_afu_ops,
},
{
diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index 8b2a337..38c6342 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -158,6 +158,11 @@ static long fme_hdr_ioctl(struct platform_device *pdev,
return -ENODEV;
}
+static const struct dfl_feature_id fme_hdr_id_table[] = {
+ {.id = FME_FEATURE_ID_HEADER,},
+ {0,}
+};
+
static const struct dfl_feature_ops fme_hdr_ops = {
.init = fme_hdr_init,
.uinit = fme_hdr_uinit,
@@ -166,12 +171,12 @@ static long fme_hdr_ioctl(struct platform_device *pdev,
static struct dfl_feature_driver fme_feature_drvs[] = {
{
- .id = FME_FEATURE_ID_HEADER,
+ .id_table = fme_hdr_id_table,
.ops = &fme_hdr_ops,
},
{
- .id = FME_FEATURE_ID_PR_MGMT,
- .ops = &pr_mgmt_ops,
+ .id_table = fme_pr_mgmt_id_table,
+ .ops = &fme_pr_mgmt_ops,
},
{
.ops = NULL,
diff --git a/drivers/fpga/dfl-fme-pr.c b/drivers/fpga/dfl-fme-pr.c
index cd94ba8..52f1745 100644
--- a/drivers/fpga/dfl-fme-pr.c
+++ b/drivers/fpga/dfl-fme-pr.c
@@ -483,7 +483,12 @@ static long fme_pr_ioctl(struct platform_device *pdev,
return ret;
}
-const struct dfl_feature_ops pr_mgmt_ops = {
+const struct dfl_feature_id fme_pr_mgmt_id_table[] = {
+ {.id = FME_FEATURE_ID_PR_MGMT,},
+ {0}
+};
+
+const struct dfl_feature_ops fme_pr_mgmt_ops = {
.init = pr_mgmt_init,
.uinit = pr_mgmt_uinit,
.ioctl = fme_pr_ioctl,
diff --git a/drivers/fpga/dfl-fme.h b/drivers/fpga/dfl-fme.h
index de20755..7a021c4 100644
--- a/drivers/fpga/dfl-fme.h
+++ b/drivers/fpga/dfl-fme.h
@@ -35,6 +35,7 @@ struct dfl_fme {
struct dfl_feature_platform_data *pdata;
};
-extern const struct dfl_feature_ops pr_mgmt_ops;
+extern const struct dfl_feature_ops fme_pr_mgmt_ops;
+extern const struct dfl_feature_id fme_pr_mgmt_id_table[];
#endif /* __DFL_FME_H */
diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c
index c5aa287..65f91ef 100644
--- a/drivers/fpga/dfl.c
+++ b/drivers/fpga/dfl.c
@@ -14,6 +14,8 @@
#include "dfl.h"
+#define DRV_VERSION "0.8"
+
static DEFINE_MUTEX(dfl_id_mutex);
/*
@@ -274,6 +276,21 @@ static int dfl_feature_instance_init(struct platform_device *pdev,
return ret;
}
+static bool dfl_feature_drv_match(struct dfl_feature *feature,
+ struct dfl_feature_driver *driver)
+{
+ const struct dfl_feature_id *ids = driver->id_table;
+
+ if (ids) {
+ while (ids->id) {
+ if (ids->id == feature->id)
+ return true;
+ ids++;
+ }
+ }
+ return false;
+}
+
/**
* dfl_fpga_dev_feature_init - init for sub features of dfl feature device
* @pdev: feature device.
@@ -294,8 +311,7 @@ int dfl_fpga_dev_feature_init(struct platform_device *pdev,
while (drv->ops) {
dfl_fpga_dev_for_each_feature(pdata, feature) {
- /* match feature and drv using id */
- if (feature->id == drv->id) {
+ if (dfl_feature_drv_match(feature, drv)) {
ret = dfl_feature_instance_init(pdev, pdata,
feature, drv);
if (ret)
@@ -1164,3 +1180,4 @@ static void __exit dfl_fpga_exit(void)
MODULE_DESCRIPTION("FPGA Device Feature List (DFL) Support");
MODULE_AUTHOR("Intel Corporation");
MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/fpga/dfl.h b/drivers/fpga/dfl.h
index 3c5dc3a..fbc57f0 100644
--- a/drivers/fpga/dfl.h
+++ b/drivers/fpga/dfl.h
@@ -30,8 +30,8 @@
/* plus one for fme device */
#define MAX_DFL_FEATURE_DEV_NUM (MAX_DFL_FPGA_PORT_NUM + 1)
-/* Reserved 0x0 for Header Group Register and 0xff for AFU */
-#define FEATURE_ID_FIU_HEADER 0x0
+/* Reserved 0xfe for Header Group Register and 0xff for AFU */
+#define FEATURE_ID_FIU_HEADER 0xfe
#define FEATURE_ID_AFU 0xff
#define FME_FEATURE_ID_HEADER FEATURE_ID_FIU_HEADER
@@ -169,13 +169,22 @@ struct dfl_fpga_port_ops {
int dfl_fpga_check_port_id(struct platform_device *pdev, void *pport_id);
/**
- * struct dfl_feature_driver - sub feature's driver
+ * struct dfl_feature_id - dfl private feature id
*
- * @id: sub feature id.
- * @ops: ops of this sub feature.
+ * @id: unique dfl private feature id.
*/
-struct dfl_feature_driver {
+struct dfl_feature_id {
u64 id;
+};
+
+/**
+ * struct dfl_feature_driver - dfl private feature driver
+ *
+ * @id_table: id_table for dfl private features supported by this driver.
+ * @ops: ops of this dfl private feature driver.
+ */
+struct dfl_feature_driver {
+ const struct dfl_feature_id *id_table;
const struct dfl_feature_ops *ops;
};
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 11/18] fpga: dfl: afu: export __port_enable/disable function.
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel; +Cc: linux-api, Wu Hao, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
As these two functions are used by other private features. e.g.
in error reporting private feature, it requires to check port status
and reset port for error clearing.
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Moritz Fischer <mdf@kernel.org>
Acked-by: Alan Tull <atull@kernel.org>
---
drivers/fpga/dfl-afu-main.c | 25 ++++++++++++++-----------
drivers/fpga/dfl-afu.h | 3 +++
2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index 2916876..e727d9b 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -24,14 +24,16 @@
#define DRV_VERSION "0.8"
/**
- * port_enable - enable a port
+ * __port_enable - enable a port
* @pdev: port platform device.
*
* Enable Port by clear the port soft reset bit, which is set by default.
* The AFU is unable to respond to any MMIO access while in reset.
- * port_enable function should only be used after port_disable function.
+ * __port_enable function should only be used after __port_disable function.
+ *
+ * The caller needs to hold lock for protection.
*/
-static void port_enable(struct platform_device *pdev)
+void __port_enable(struct platform_device *pdev)
{
struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
void __iomem *base;
@@ -54,13 +56,14 @@ static void port_enable(struct platform_device *pdev)
#define RST_POLL_TIMEOUT 1000 /* us */
/**
- * port_disable - disable a port
+ * __port_disable - disable a port
* @pdev: port platform device.
*
- * Disable Port by setting the port soft reset bit, it puts the port into
- * reset.
+ * Disable Port by setting the port soft reset bit, it puts the port into reset.
+ *
+ * The caller needs to hold lock for protection.
*/
-static int port_disable(struct platform_device *pdev)
+int __port_disable(struct platform_device *pdev)
{
struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
void __iomem *base;
@@ -106,9 +109,9 @@ static int __port_reset(struct platform_device *pdev)
{
int ret;
- ret = port_disable(pdev);
+ ret = __port_disable(pdev);
if (!ret)
- port_enable(pdev);
+ __port_enable(pdev);
return ret;
}
@@ -810,9 +813,9 @@ static int port_enable_set(struct platform_device *pdev, bool enable)
mutex_lock(&pdata->lock);
if (enable)
- port_enable(pdev);
+ __port_enable(pdev);
else
- ret = port_disable(pdev);
+ ret = __port_disable(pdev);
mutex_unlock(&pdata->lock);
return ret;
diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
index 0c7630a..35e60c5 100644
--- a/drivers/fpga/dfl-afu.h
+++ b/drivers/fpga/dfl-afu.h
@@ -79,6 +79,9 @@ struct dfl_afu {
struct dfl_feature_platform_data *pdata;
};
+void __port_enable(struct platform_device *pdev);
+int __port_disable(struct platform_device *pdev);
+
void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);
int afu_mmio_region_add(struct dfl_feature_platform_data *pdata,
u32 region_index, u64 region_size, u64 phys, u32 flags);
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 12/18] fpga: dfl: afu: add error reporting support.
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel; +Cc: linux-api, Wu Hao, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
Error reporting is one important private feature, it reports error
detected on port and accelerated function unit (AFU). It introduces
several sysfs interfaces to allow userspace to check and clear
errors detected by hardware.
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
---
v2: add more error code description for error clear sysfs in doc.
return -EINVAL instead of -EBUSY when input error code doesn't
match in error clear sysfs.
---
Documentation/ABI/testing/sysfs-platform-dfl-port | 39 ++++
drivers/fpga/Makefile | 1 +
drivers/fpga/dfl-afu-error.c | 225 ++++++++++++++++++++++
drivers/fpga/dfl-afu-main.c | 4 +
drivers/fpga/dfl-afu.h | 4 +
5 files changed, 273 insertions(+)
create mode 100644 drivers/fpga/dfl-afu-error.c
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-port b/Documentation/ABI/testing/sysfs-platform-dfl-port
index 71f4892..8e3eed5 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-port
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-port
@@ -79,3 +79,42 @@ KernelVersion: 5.2
Contact: Wu Hao <hao.wu@intel.com>
Description: Read-only. Read this file to get the status of issued command
to userclck_freqcntrcmd.
+
+What: /sys/bus/platform/devices/dfl-port.0/errors/revision
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the revision of this error
+ reporting private feature.
+
+What: /sys/bus/platform/devices/dfl-port.0/errors/errors
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get errors detected on port and
+ Accelerated Function Unit (AFU).
+
+What: /sys/bus/platform/devices/dfl-port.0/errors/first_error
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the first error detected by
+ hardware.
+
+What: /sys/bus/platform/devices/dfl-port.0/errors/first_malformed_req
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the first malformed request
+ captured by hardware.
+
+What: /sys/bus/platform/devices/dfl-port.0/errors/clear
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Write-only. Write error code to this file to clear errors.
+ Write fails with -EINVAL if input parsing fails or input error
+ code doesn't match.
+ Write fails with -EBUSY or -ETIMEDOUT if error can't be cleared
+ as hardware is in low power state (-EBUSY) or not responding
+ (-ETIMEDOUT).
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index c0dd4c8..f1f0af7 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_FPGA_DFL_AFU) += dfl-afu.o
dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o
dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o
+dfl-afu-objs += dfl-afu-error.o
# Drivers for FPGAs which implement DFL
obj-$(CONFIG_FPGA_DFL_PCI) += dfl-pci.o
diff --git a/drivers/fpga/dfl-afu-error.c b/drivers/fpga/dfl-afu-error.c
new file mode 100644
index 0000000..ed2be1d
--- /dev/null
+++ b/drivers/fpga/dfl-afu-error.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for FPGA Accelerated Function Unit (AFU) Error Reporting
+ *
+ * Copyright 2019 Intel Corporation, Inc.
+ *
+ * Authors:
+ * Wu Hao <hao.wu@linux.intel.com>
+ * Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ * Joseph Grecco <joe.grecco@intel.com>
+ * Enno Luebbers <enno.luebbers@intel.com>
+ * Tim Whisonant <tim.whisonant@intel.com>
+ * Ananda Ravuri <ananda.ravuri@intel.com>
+ * Mitchel Henry <henry.mitchel@intel.com>
+ */
+
+#include <linux/uaccess.h>
+
+#include "dfl-afu.h"
+
+#define PORT_ERROR_MASK 0x8
+#define PORT_ERROR 0x10
+#define PORT_FIRST_ERROR 0x18
+#define PORT_MALFORMED_REQ0 0x20
+#define PORT_MALFORMED_REQ1 0x28
+
+#define ERROR_MASK GENMASK_ULL(63, 0)
+
+/* mask or unmask port errors by the error mask register. */
+static void __port_err_mask(struct device *dev, bool mask)
+{
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
+
+ writeq(mask ? ERROR_MASK : 0, base + PORT_ERROR_MASK);
+}
+
+/* clear port errors. */
+static int __port_err_clear(struct device *dev, u64 err)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ void __iomem *base_err, *base_hdr;
+ int ret;
+ u64 v;
+
+ base_err = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
+ base_hdr = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_HEADER);
+
+ /*
+ * clear Port Errors
+ *
+ * - Check for AP6 State
+ * - Halt Port by keeping Port in reset
+ * - Set PORT Error mask to all 1 to mask errors
+ * - Clear all errors
+ * - Set Port mask to all 0 to enable errors
+ * - All errors start capturing new errors
+ * - Enable Port by pulling the port out of reset
+ */
+
+ /* if device is still in AP6 power state, can not clear any error. */
+ v = readq(base_hdr + PORT_HDR_STS);
+ if (FIELD_GET(PORT_STS_PWR_STATE, v) == PORT_STS_PWR_STATE_AP6) {
+ dev_err(dev, "Could not clear errors, device in AP6 state.\n");
+ return -EBUSY;
+ }
+
+ /* Halt Port by keeping Port in reset */
+ ret = __port_disable(pdev);
+ if (ret)
+ return ret;
+
+ /* Mask all errors */
+ __port_err_mask(dev, true);
+
+ /* Clear errors if err input matches with current port errors.*/
+ v = readq(base_err + PORT_ERROR);
+
+ if (v == err) {
+ writeq(v, base_err + PORT_ERROR);
+
+ v = readq(base_err + PORT_FIRST_ERROR);
+ writeq(v, base_err + PORT_FIRST_ERROR);
+ } else {
+ ret = -EINVAL;
+ }
+
+ /* Clear mask */
+ __port_err_mask(dev, false);
+
+ /* Enable the Port by clear the reset */
+ __port_enable(pdev);
+
+ return ret;
+}
+
+static ssize_t revision_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dfl_feature_revision(base));
+}
+static DEVICE_ATTR_RO(revision);
+
+static ssize_t errors_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
+ void __iomem *base;
+ u64 error;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
+
+ mutex_lock(&pdata->lock);
+ error = readq(base + PORT_ERROR);
+ mutex_unlock(&pdata->lock);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n", (unsigned long long)error);
+}
+static DEVICE_ATTR_RO(errors);
+
+static ssize_t first_error_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
+ void __iomem *base;
+ u64 error;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
+
+ mutex_lock(&pdata->lock);
+ error = readq(base + PORT_FIRST_ERROR);
+ mutex_unlock(&pdata->lock);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n", (unsigned long long)error);
+}
+static DEVICE_ATTR_RO(first_error);
+
+static ssize_t first_malformed_req_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
+ void __iomem *base;
+ u64 req0, req1;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
+
+ mutex_lock(&pdata->lock);
+ req0 = readq(base + PORT_MALFORMED_REQ0);
+ req1 = readq(base + PORT_MALFORMED_REQ1);
+ mutex_unlock(&pdata->lock);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%016llx%016llx\n",
+ (unsigned long long)req1, (unsigned long long)req0);
+}
+static DEVICE_ATTR_RO(first_malformed_req);
+
+static ssize_t clear_store(struct device *dev, struct device_attribute *attr,
+ const char *buff, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
+ u64 value;
+ int ret;
+
+ if (kstrtou64(buff, 0, &value))
+ return -EINVAL;
+
+ mutex_lock(&pdata->lock);
+ ret = __port_err_clear(dev, value);
+ mutex_unlock(&pdata->lock);
+
+ return ret ? ret : count;
+}
+static DEVICE_ATTR_WO(clear);
+
+static struct attribute *port_err_attrs[] = {
+ &dev_attr_revision.attr,
+ &dev_attr_errors.attr,
+ &dev_attr_first_error.attr,
+ &dev_attr_first_malformed_req.attr,
+ &dev_attr_clear.attr,
+ NULL,
+};
+
+static struct attribute_group port_err_attr_group = {
+ .attrs = port_err_attrs,
+ .name = "errors",
+};
+
+static int port_err_init(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+
+ dev_dbg(&pdev->dev, "PORT ERR Init.\n");
+
+ mutex_lock(&pdata->lock);
+ __port_err_mask(&pdev->dev, false);
+ mutex_unlock(&pdata->lock);
+
+ return sysfs_create_group(&pdev->dev.kobj, &port_err_attr_group);
+}
+
+static void port_err_uinit(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ dev_dbg(&pdev->dev, "PORT ERR UInit.\n");
+
+ sysfs_remove_group(&pdev->dev.kobj, &port_err_attr_group);
+}
+
+const struct dfl_feature_id port_err_id_table[] = {
+ {.id = PORT_FEATURE_ID_ERROR,},
+ {0,}
+};
+
+const struct dfl_feature_ops port_err_ops = {
+ .init = port_err_init,
+ .uinit = port_err_uinit,
+};
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index e727d9b..754729e 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -528,6 +528,10 @@ static void port_afu_uinit(struct platform_device *pdev,
.ops = &port_afu_ops,
},
{
+ .id_table = port_err_id_table,
+ .ops = &port_err_ops,
+ },
+ {
.ops = NULL,
}
};
diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
index 35e60c5..c3182a2 100644
--- a/drivers/fpga/dfl-afu.h
+++ b/drivers/fpga/dfl-afu.h
@@ -100,4 +100,8 @@ int afu_dma_map_region(struct dfl_feature_platform_data *pdata,
struct dfl_afu_dma_region *
afu_dma_region_find(struct dfl_feature_platform_data *pdata,
u64 iova, u64 size);
+
+extern const struct dfl_feature_ops port_err_ops;
+extern const struct dfl_feature_id port_err_id_table[];
+
#endif /* __DFL_AFU_H */
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 13/18] fpga: dfl: afu: add STP (SignalTap) support
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel; +Cc: linux-api, Wu Hao, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
STP (SignalTap) is one of the private features under the port for
debugging. This patch adds private feature driver support for it
to allow userspace applications to mmap related mmio region and
provide STP service.
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Moritz Fischer <mdf@kernel.org>
Acked-by: Alan Tull <atull@kernel.org>
---
drivers/fpga/dfl-afu-main.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index 754729e..14970a4 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -518,6 +518,36 @@ static void port_afu_uinit(struct platform_device *pdev,
.uinit = port_afu_uinit,
};
+static int port_stp_init(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct resource *res = &pdev->resource[feature->resource_index];
+
+ dev_dbg(&pdev->dev, "PORT STP Init.\n");
+
+ return afu_mmio_region_add(dev_get_platdata(&pdev->dev),
+ DFL_PORT_REGION_INDEX_STP,
+ resource_size(res), res->start,
+ DFL_PORT_REGION_MMAP | DFL_PORT_REGION_READ |
+ DFL_PORT_REGION_WRITE);
+}
+
+static void port_stp_uinit(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ dev_dbg(&pdev->dev, "PORT STP UInit.\n");
+}
+
+static const struct dfl_feature_id port_stp_id_table[] = {
+ {.id = PORT_FEATURE_ID_STP,},
+ {0,}
+};
+
+static const struct dfl_feature_ops port_stp_ops = {
+ .init = port_stp_init,
+ .uinit = port_stp_uinit,
+};
+
static struct dfl_feature_driver port_feature_drvs[] = {
{
.id_table = port_hdr_id_table,
@@ -532,6 +562,10 @@ static void port_afu_uinit(struct platform_device *pdev,
.ops = &port_err_ops,
},
{
+ .id_table = port_stp_id_table,
+ .ops = &port_stp_ops,
+ },
+ {
.ops = NULL,
}
};
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 14/18] fpga: dfl: fme: add capability sysfs interfaces
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel
Cc: linux-api, Wu Hao, Luwei Kang, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
This patch adds 3 read-only sysfs interfaces for FPGA Management Engine
(FME) block for capabilities including cache_size, fabric_version and
socket_id.
Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
---
Documentation/ABI/testing/sysfs-platform-dfl-fme | 23 ++++++++++++
drivers/fpga/dfl-fme-main.c | 48 ++++++++++++++++++++++++
2 files changed, 71 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-fme b/Documentation/ABI/testing/sysfs-platform-dfl-fme
index 8fa4feb..d1aa375 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-fme
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-fme
@@ -21,3 +21,26 @@ Contact: Wu Hao <hao.wu@intel.com>
Description: Read-only. It returns Bitstream (static FPGA region) meta
data, which includes the synthesis date, seed and other
information of this static FPGA region.
+
+What: /sys/bus/platform/devices/dfl-fme.0/cache_size
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns cache size of this FPGA device.
+
+What: /sys/bus/platform/devices/dfl-fme.0/fabric_version
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns fabric version of this FPGA device.
+ Userspace applications need this information to select
+ best data channels per different fabric design.
+
+What: /sys/bus/platform/devices/dfl-fme.0/socket_id
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns socket_id to indicate which socket
+ this FPGA belongs to, only valid for integrated solution.
+ User only needs this information, in case standard numa node
+ can't provide correct information.
diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index 38c6342..8339ee8 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -75,10 +75,58 @@ static ssize_t bitstream_metadata_show(struct device *dev,
}
static DEVICE_ATTR_RO(bitstream_metadata);
+static ssize_t cache_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ void __iomem *base;
+ u64 v;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, FME_FEATURE_ID_HEADER);
+
+ v = readq(base + FME_HDR_CAP);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(FME_CAP_CACHE_SIZE, v));
+}
+static DEVICE_ATTR_RO(cache_size);
+
+static ssize_t fabric_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ void __iomem *base;
+ u64 v;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, FME_FEATURE_ID_HEADER);
+
+ v = readq(base + FME_HDR_CAP);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(FME_CAP_FABRIC_VERID, v));
+}
+static DEVICE_ATTR_RO(fabric_version);
+
+static ssize_t socket_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ void __iomem *base;
+ u64 v;
+
+ base = dfl_get_feature_ioaddr_by_id(dev, FME_FEATURE_ID_HEADER);
+
+ v = readq(base + FME_HDR_CAP);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(FME_CAP_SOCKET_ID, v));
+}
+static DEVICE_ATTR_RO(socket_id);
+
static const struct attribute *fme_hdr_attrs[] = {
&dev_attr_ports_num.attr,
&dev_attr_bitstream_id.attr,
&dev_attr_bitstream_metadata.attr,
+ &dev_attr_cache_size.attr,
+ &dev_attr_fabric_version.attr,
+ &dev_attr_socket_id.attr,
NULL,
};
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 15/18] fpga: dfl: fme: add thermal management support
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel
Cc: linux-api, Wu Hao, Luwei Kang, Russ Weight, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
This patch adds support to thermal management private feature for DFL
FPGA Management Engine (FME). This private feature driver registers
a hwmon for thermal/temperature monitoring (hwmon temp1_input).
If hardware automatic throttling is supported by this hardware, then
driver also exposes sysfs interfaces under hwmon for thresholds
(temp1_alarm/ crit/ emergency), threshold status (temp1_alarm_status/
temp1_crit_status) and throttling policy (temp1_alarm_policy).
Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
---
v2: create a dfl_fme_thermal hwmon to expose thermal information.
move all sysfs interfaces under hwmon
tempareture --> hwmon temp1_input
threshold1 --> hwmon temp1_alarm
threshold2 --> hwmon temp1_crit
trip_threshold --> hwmon temp1_emergency
threshold1_status --> hwmon temp1_alarm_status
threshold2_status --> hwmon temp1_crit_status
threshold1_policy --> hwmon temp1_alarm_policy
---
Documentation/ABI/testing/sysfs-platform-dfl-fme | 64 +++++++
drivers/fpga/Kconfig | 2 +-
drivers/fpga/dfl-fme-main.c | 212 +++++++++++++++++++++++
3 files changed, 277 insertions(+), 1 deletion(-)
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-fme b/Documentation/ABI/testing/sysfs-platform-dfl-fme
index d1aa375..dfbd315 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-fme
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-fme
@@ -44,3 +44,67 @@ Description: Read-only. It returns socket_id to indicate which socket
this FPGA belongs to, only valid for integrated solution.
User only needs this information, in case standard numa node
can't provide correct information.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/name
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read this file to get the name of hwmon device, it
+ supports values:
+ 'dfl_fme_thermal' - thermal hwmon device name
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_input
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns FPGA device temperature in millidegrees
+ Celsius.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_alarm
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns hardware threshold1 temperature in
+ millidegrees Celsius. If temperature rises at or above this
+ threshold, hardware starts 50% or 90% throttling (see
+ 'temp1_alarm_policy').
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_crit
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns hardware threshold2 temperature in
+ millidegrees Celsius. If temperature rises at or above this
+ threshold, hardware starts 100% throttling.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_emergency
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns hardware trip threshold temperature in
+ millidegrees Celsius. If temperature rises at or above this
+ threshold, a fatal event will be triggered to board management
+ controller (BMC) to shutdown FPGA.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_alarm_status
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns 1 if temperature is currently at or above
+ hardware threshold1 (see 'temp1_alarm'), otherwise 0.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_crit_status
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns 1 if temperature is currently at or above
+ hardware threshold2 (see 'temp1_crit'), otherwise 0.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_alarm_policy
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read this file to get the policy of hardware threshold1
+ (see 'temp1_alarm'). It only supports two values (policies):
+ 0 - AP2 state (90% throttling)
+ 1 - AP1 state (50% throttling)
diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index c20445b..a6d7588 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -154,7 +154,7 @@ config FPGA_DFL
config FPGA_DFL_FME
tristate "FPGA DFL FME Driver"
- depends on FPGA_DFL
+ depends on FPGA_DFL && HWMON
help
The FPGA Management Engine (FME) is a feature device implemented
under Device Feature List (DFL) framework. Select this option to
diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index 8339ee8..b9a68b8 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -14,6 +14,8 @@
* Henry Mitchel <henry.mitchel@intel.com>
*/
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/uaccess.h>
@@ -217,6 +219,212 @@ static long fme_hdr_ioctl(struct platform_device *pdev,
.ioctl = fme_hdr_ioctl,
};
+#define FME_THERM_THRESHOLD 0x8
+#define TEMP_THRESHOLD1 GENMASK_ULL(6, 0)
+#define TEMP_THRESHOLD1_EN BIT_ULL(7)
+#define TEMP_THRESHOLD2 GENMASK_ULL(14, 8)
+#define TEMP_THRESHOLD2_EN BIT_ULL(15)
+#define TRIP_THRESHOLD GENMASK_ULL(30, 24)
+#define TEMP_THRESHOLD1_STATUS BIT_ULL(32) /* threshold1 reached */
+#define TEMP_THRESHOLD2_STATUS BIT_ULL(33) /* threshold2 reached */
+/* threshold1 policy: 0 - AP2 (90% throttle) / 1 - AP1 (50% throttle) */
+#define TEMP_THRESHOLD1_POLICY BIT_ULL(44)
+
+#define FME_THERM_RDSENSOR_FMT1 0x10
+#define FPGA_TEMPERATURE GENMASK_ULL(6, 0)
+
+#define FME_THERM_CAP 0x20
+#define THERM_NO_THROTTLE BIT_ULL(0)
+
+#define MD_PRE_DEG
+
+static bool fme_thermal_throttle_support(void __iomem *base)
+{
+ u64 v = readq(base + FME_THERM_CAP);
+
+ return FIELD_GET(THERM_NO_THROTTLE, v) ? false : true;
+}
+
+static umode_t thermal_hwmon_attrs_visible(const void *drvdata,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ const struct dfl_feature *feature = drvdata;
+
+ /* temperature is always supported, and check hardware cap for others */
+ if (attr == hwmon_temp_input)
+ return 0444;
+
+ return fme_thermal_throttle_support(feature->ioaddr) ? 0444 : 0;
+}
+
+static int thermal_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u64 v;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ v = readq(feature->ioaddr + FME_THERM_RDSENSOR_FMT1);
+ *val = (long)(FIELD_GET(FPGA_TEMPERATURE, v) * 1000);
+ break;
+ case hwmon_temp_alarm:
+ v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+ *val = (long)(FIELD_GET(TEMP_THRESHOLD1, v) * 1000);
+ break;
+ case hwmon_temp_crit:
+ v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+ *val = (long)(FIELD_GET(TEMP_THRESHOLD2, v) * 1000);
+ break;
+ case hwmon_temp_emergency:
+ v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+ *val = (long)(FIELD_GET(TRIP_THRESHOLD, v) * 1000);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static const struct hwmon_ops thermal_hwmon_ops = {
+ .is_visible = thermal_hwmon_attrs_visible,
+ .read = thermal_hwmon_read,
+};
+
+static const u32 thermal_hwmon_temp_config[] = {
+ HWMON_T_INPUT | HWMON_T_ALARM | HWMON_T_CRIT | HWMON_T_EMERGENCY,
+ 0
+};
+
+static const struct hwmon_channel_info hwmon_temp_info = {
+ .type = hwmon_temp,
+ .config = thermal_hwmon_temp_config,
+};
+
+static const struct hwmon_channel_info *thermal_hwmon_info[] = {
+ &hwmon_temp_info,
+ NULL
+};
+
+static const struct hwmon_chip_info thermal_hwmon_chip_info = {
+ .ops = &thermal_hwmon_ops,
+ .info = thermal_hwmon_info,
+};
+
+static ssize_t temp1_alarm_status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u64 v;
+
+ v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(TEMP_THRESHOLD1_STATUS, v));
+}
+
+static ssize_t temp1_crit_status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u64 v;
+
+ v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(TEMP_THRESHOLD2_STATUS, v));
+}
+
+static ssize_t temp1_alarm_policy_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u64 v;
+
+ v = readq(feature->ioaddr + FME_THERM_THRESHOLD);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(TEMP_THRESHOLD1_POLICY, v));
+}
+
+static DEVICE_ATTR_RO(temp1_alarm_status);
+static DEVICE_ATTR_RO(temp1_crit_status);
+static DEVICE_ATTR_RO(temp1_alarm_policy);
+
+static struct attribute *thermal_extra_attrs[] = {
+ &dev_attr_temp1_alarm_status.attr,
+ &dev_attr_temp1_crit_status.attr,
+ &dev_attr_temp1_alarm_policy.attr,
+ NULL,
+};
+
+static umode_t thermal_extra_attrs_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+
+ return fme_thermal_throttle_support(feature->ioaddr) ? attr->mode : 0;
+}
+
+static const struct attribute_group thermal_extra_group = {
+ .attrs = thermal_extra_attrs,
+ .is_visible = thermal_extra_attrs_visible,
+};
+__ATTRIBUTE_GROUPS(thermal_extra);
+
+static int fme_thermal_mgmt_init(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct device *hwmon;
+
+ dev_dbg(&pdev->dev, "FME Thermal Management Init.\n");
+
+ /*
+ * create hwmon to allow userspace monitoring temperature and other
+ * threshold information.
+ *
+ * temp1_alarm -> hardware threshold 1 -> 50% or 90% throttling
+ * temp1_crit -> hardware threshold 2 -> 100% throttling
+ * temp1_emergency -> hardware trip_threshold to shutdown FPGA
+ *
+ * create device specific sysfs interfaces, e.g. read temp1_alarm_policy
+ * to understand the actual hardware throttling action (50% vs 90%).
+ *
+ * If hardware doesn't support automatic throttling per thresholds,
+ * then all above sysfs interfaces are not visible except temp1_input
+ * for temperature.
+ */
+ hwmon = devm_hwmon_device_register_with_info(&pdev->dev,
+ "dfl_fme_thermal", feature,
+ &thermal_hwmon_chip_info,
+ thermal_extra_groups);
+ if (IS_ERR(hwmon)) {
+ dev_err(&pdev->dev, "Fail to register thermal hwmon\n");
+ return PTR_ERR(hwmon);
+ }
+
+ return 0;
+}
+
+static void fme_thermal_mgmt_uinit(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ dev_dbg(&pdev->dev, "FME Thermal Management UInit.\n");
+}
+
+static const struct dfl_feature_id fme_thermal_mgmt_id_table[] = {
+ {.id = FME_FEATURE_ID_THERMAL_MGMT,},
+ {0,}
+};
+
+static const struct dfl_feature_ops fme_thermal_mgmt_ops = {
+ .init = fme_thermal_mgmt_init,
+ .uinit = fme_thermal_mgmt_uinit,
+};
+
static struct dfl_feature_driver fme_feature_drvs[] = {
{
.id_table = fme_hdr_id_table,
@@ -227,6 +435,10 @@ static long fme_hdr_ioctl(struct platform_device *pdev,
.ops = &fme_pr_mgmt_ops,
},
{
+ .id_table = fme_thermal_mgmt_id_table,
+ .ops = &fme_thermal_mgmt_ops,
+ },
+ {
.ops = NULL,
},
};
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 16/18] fpga: dfl: fme: add power management support
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel
Cc: linux-api, Wu Hao, Luwei Kang, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
This patch adds support for power management private feature under
FPGA Management Engine (FME). This private feature driver registers
a hwmon for power (power1_input), thresholds information, e.g.
(power1_cap / crit) and also read-only sysfs interfaces for other
power management information. For configuration, user could write
threshold values via above power1_cap / crit sysfs interface
under hwmon too.
Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
---
v2: create a dfl_fme_power hwmon to expose power sysfs interfaces.
move all sysfs interfaces under hwmon
consumed --> hwmon power1_input
threshold1 --> hwmon power1_cap
threshold2 --> hwmon power1_crit
threshold1_status --> hwmon power1_cap_status
threshold2_status --> hwmon power1_crit_status
xeon_limit --> hwmon power1_xeon_limit
fpga_limit --> hwmon power1_fpga_limit
ltr --> hwmon power1_ltr
---
Documentation/ABI/testing/sysfs-platform-dfl-fme | 67 ++++++
drivers/fpga/dfl-fme-main.c | 247 +++++++++++++++++++++++
2 files changed, 314 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-fme b/Documentation/ABI/testing/sysfs-platform-dfl-fme
index dfbd315..e2ba92d 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-fme
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-fme
@@ -52,6 +52,7 @@ Contact: Wu Hao <hao.wu@intel.com>
Description: Read-Only. Read this file to get the name of hwmon device, it
supports values:
'dfl_fme_thermal' - thermal hwmon device name
+ 'dfl_fme_power' - power hwmon device name
What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_input
Date: April 2019
@@ -108,3 +109,69 @@ Description: Read-Only. Read this file to get the policy of hardware threshold1
(see 'temp1_alarm'). It only supports two values (policies):
0 - AP2 state (90% throttling)
1 - AP1 state (50% throttling)
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_input
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns current FPGA power consumption in uW.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_cap
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file to get current hardware power
+ threshold1 in uW. If power consumption rises at or above
+ this threshold, hardware starts 50% throttling.
+ Write this file to set current hardware power threshold1 in uW.
+ As hardware only accepts values in Watts, so input value will
+ be round down per Watts (< 1 watts part will be discarded).
+ Write fails with -EINVAL if input parsing fails or input isn't
+ in the valid range (0 - 127000000 uW).
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_crit
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file to get current hardware power
+ threshold2 in uW. If power consumption rises at or above
+ this threshold, hardware starts 90% throttling.
+ Write this file to set current hardware power threshold2 in uW.
+ As hardware only accepts values in Watts, so input value will
+ be round down per Watts (< 1 watts part will be discarded).
+ Write fails with -EINVAL if input parsing fails or input isn't
+ in the valid range (0 - 127000000 uW).
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_cap_status
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns 1 if power consumption is currently at or
+ above hardware threshold1 (see 'power1_cap'), otherwise 0.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_crit_status
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns 1 if power consumption is currently at or
+ above hardware threshold2 (see 'power1_crit'), otherwise 0.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_xeon_limit
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns power limit for XEON in uW.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_fpga_limit
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. It returns power limit for FPGA in uW.
+
+What: /sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_ltr
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get current Latency Tolerance
+ Reporting (ltr) value. This ltr impacts the CPU low power
+ state in integrated solution.
diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index b9a68b8..7005316 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -425,6 +425,249 @@ static void fme_thermal_mgmt_uinit(struct platform_device *pdev,
.uinit = fme_thermal_mgmt_uinit,
};
+#define FME_PWR_STATUS 0x8
+#define FME_LATENCY_TOLERANCE BIT_ULL(18)
+#define PWR_CONSUMED GENMASK_ULL(17, 0)
+
+#define FME_PWR_THRESHOLD 0x10
+#define PWR_THRESHOLD1 GENMASK_ULL(6, 0) /* in Watts */
+#define PWR_THRESHOLD2 GENMASK_ULL(14, 8) /* in Watts */
+#define PWR_THRESHOLD_MAX 0x7f /* in Watts */
+#define PWR_THRESHOLD1_STATUS BIT_ULL(16)
+#define PWR_THRESHOLD2_STATUS BIT_ULL(17)
+
+#define FME_PWR_XEON_LIMIT 0x18
+#define XEON_PWR_LIMIT GENMASK_ULL(14, 0) /* in 0.1 Watts */
+#define XEON_PWR_EN BIT_ULL(15)
+#define FME_PWR_FPGA_LIMIT 0x20
+#define FPGA_PWR_LIMIT GENMASK_ULL(14, 0) /* in 0.1 Watts */
+#define FPGA_PWR_EN BIT_ULL(15)
+
+#define PWR_THRESHOLD_MAX_IN_UW (PWR_THRESHOLD_MAX * 1000000)
+
+static int power_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u64 v;
+
+ switch (attr) {
+ case hwmon_power_input:
+ v = readq(feature->ioaddr + FME_PWR_STATUS);
+ *val = (long)(FIELD_GET(PWR_CONSUMED, v) * 1000000);
+ break;
+ case hwmon_power_cap:
+ v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+ *val = (long)(FIELD_GET(PWR_THRESHOLD1, v) * 1000000);
+ break;
+ case hwmon_power_crit:
+ v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+ *val = (long)(FIELD_GET(PWR_THRESHOLD2, v) * 1000000);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int power_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ int ret = 0;
+ u64 v;
+
+ if (val < 0 || val > PWR_THRESHOLD_MAX_IN_UW)
+ return -EINVAL;
+
+ val = val / 1000000;
+
+ mutex_lock(&pdata->lock);
+
+ switch (attr) {
+ case hwmon_power_cap:
+ v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+ v &= ~PWR_THRESHOLD1;
+ v |= FIELD_PREP(PWR_THRESHOLD1, val);
+ writeq(v, feature->ioaddr + FME_PWR_THRESHOLD);
+ break;
+ case hwmon_power_crit:
+ v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+ v &= ~PWR_THRESHOLD2;
+ v |= FIELD_PREP(PWR_THRESHOLD2, val);
+ writeq(v, feature->ioaddr + FME_PWR_THRESHOLD);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ mutex_unlock(&pdata->lock);
+
+ return ret;
+}
+
+static umode_t power_hwmon_attrs_visible(const void *drvdata,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ switch (attr) {
+ case hwmon_power_input:
+ return 0444;
+ case hwmon_power_cap:
+ case hwmon_power_crit:
+ return 0644;
+ }
+
+ return 0;
+}
+
+static const u32 power_hwmon_config[] = {
+ HWMON_P_INPUT | HWMON_P_CAP | HWMON_P_CRIT,
+ 0
+};
+
+static const struct hwmon_channel_info hwmon_pwr_info = {
+ .type = hwmon_power,
+ .config = power_hwmon_config,
+};
+
+static const struct hwmon_channel_info *power_hwmon_info[] = {
+ &hwmon_pwr_info,
+ NULL
+};
+
+static const struct hwmon_ops power_hwmon_ops = {
+ .is_visible = power_hwmon_attrs_visible,
+ .read = power_hwmon_read,
+ .write = power_hwmon_write,
+};
+
+static const struct hwmon_chip_info power_hwmon_chip_info = {
+ .ops = &power_hwmon_ops,
+ .info = power_hwmon_info,
+};
+
+static ssize_t power1_cap_status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u64 v;
+
+ v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(PWR_THRESHOLD1_STATUS, v));
+}
+
+static ssize_t power1_crit_status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u64 v;
+
+ v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(PWR_THRESHOLD2_STATUS, v));
+}
+
+static ssize_t power1_xeon_limit_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u16 xeon_limit = 0;
+ u64 v;
+
+ v = readq(feature->ioaddr + FME_PWR_XEON_LIMIT);
+
+ if (FIELD_GET(XEON_PWR_EN, v))
+ xeon_limit = FIELD_GET(XEON_PWR_LIMIT, v);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", xeon_limit * 100000);
+}
+
+static ssize_t power1_fpga_limit_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u16 fpga_limit = 0;
+ u64 v;
+
+ v = readq(feature->ioaddr + FME_PWR_FPGA_LIMIT);
+
+ if (FIELD_GET(FPGA_PWR_EN, v))
+ fpga_limit = FIELD_GET(FPGA_PWR_LIMIT, v);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", fpga_limit * 100000);
+}
+
+static ssize_t power1_ltr_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dfl_feature *feature = dev_get_drvdata(dev);
+ u64 v;
+
+ v = readq(feature->ioaddr + FME_PWR_STATUS);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(FME_LATENCY_TOLERANCE, v));
+}
+
+static DEVICE_ATTR_RO(power1_cap_status);
+static DEVICE_ATTR_RO(power1_crit_status);
+static DEVICE_ATTR_RO(power1_xeon_limit);
+static DEVICE_ATTR_RO(power1_fpga_limit);
+static DEVICE_ATTR_RO(power1_ltr);
+
+static struct attribute *power_extra_attrs[] = {
+ &dev_attr_power1_cap_status.attr,
+ &dev_attr_power1_crit_status.attr,
+ &dev_attr_power1_xeon_limit.attr,
+ &dev_attr_power1_fpga_limit.attr,
+ &dev_attr_power1_ltr.attr,
+ NULL
+};
+
+ATTRIBUTE_GROUPS(power_extra);
+
+static int fme_power_mgmt_init(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct device *hwmon;
+
+ dev_dbg(&pdev->dev, "FME Power Management Init.\n");
+
+ hwmon = devm_hwmon_device_register_with_info(&pdev->dev,
+ "dfl_fme_power", feature,
+ &power_hwmon_chip_info,
+ power_extra_groups);
+ if (IS_ERR(hwmon)) {
+ dev_err(&pdev->dev, "Fail to register power hwmon\n");
+ return PTR_ERR(hwmon);
+ }
+
+ return 0;
+}
+
+static void fme_power_mgmt_uinit(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ dev_dbg(&pdev->dev, "FME Power Management UInit.\n");
+}
+
+static const struct dfl_feature_id fme_power_mgmt_id_table[] = {
+ {.id = FME_FEATURE_ID_POWER_MGMT,},
+ {0,}
+};
+
+static const struct dfl_feature_ops fme_power_mgmt_ops = {
+ .init = fme_power_mgmt_init,
+ .uinit = fme_power_mgmt_uinit,
+};
+
static struct dfl_feature_driver fme_feature_drvs[] = {
{
.id_table = fme_hdr_id_table,
@@ -439,6 +682,10 @@ static void fme_thermal_mgmt_uinit(struct platform_device *pdev,
.ops = &fme_thermal_mgmt_ops,
},
{
+ .id_table = fme_power_mgmt_id_table,
+ .ops = &fme_power_mgmt_ops,
+ },
+ {
.ops = NULL,
},
};
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 17/18] fpga: dfl: fme: add global error reporting support
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel
Cc: linux-api, Wu Hao, Luwei Kang, Ananda Ravuri, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
This patch adds support for global error reporting for FPGA
Management Engine (FME), it introduces sysfs interfaces to
report different error detected by the hardware, and allow
user to clear errors or inject error for testing purpose.
Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Ananda Ravuri <ananda.ravuri@intel.com>
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
---
v2: fix issues found in sysfs doc.
fix returned error code issues for writable sysfs interfaces.
(use -EINVAL if input doesn't match error code)
reorder the sysfs groups in code.
---
Documentation/ABI/testing/sysfs-platform-dfl-fme | 75 +++++
drivers/fpga/Makefile | 2 +-
drivers/fpga/dfl-fme-error.c | 385 +++++++++++++++++++++++
drivers/fpga/dfl-fme-main.c | 4 +
drivers/fpga/dfl-fme.h | 2 +
drivers/fpga/dfl.h | 2 +
6 files changed, 469 insertions(+), 1 deletion(-)
create mode 100644 drivers/fpga/dfl-fme-error.c
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-fme b/Documentation/ABI/testing/sysfs-platform-dfl-fme
index e2ba92d..503984b 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-fme
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-fme
@@ -175,3 +175,78 @@ Contact: Wu Hao <hao.wu@intel.com>
Description: Read-only. Read this file to get current Latency Tolerance
Reporting (ltr) value. This ltr impacts the CPU low power
state in integrated solution.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/revision
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the revision of this global
+ error reporting private feature.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/pcie0_errors
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for errors detected on pcie0 link.
+ Write this file to clear errors logged in pcie0_errors. Write
+ fails with -EINVAL if input parsing fails or input error code
+ doesn't match.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/pcie1_errors
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for errors detected on pcie1 link.
+ Write this file to clear errors logged in pcie1_errors. Write
+ fails with -EINVAL if input parsing fails or input error code
+ doesn't match.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/nonfatal_errors
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns non-fatal errors detected.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/catfatal_errors
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. It returns catastrophic and fatal errors detected.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/inject_error
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file to check errors injected. Write this
+ file to inject errors for testing purpose. Write fails with
+ -EINVAL if input parsing fails or input inject error code isn't
+ supported.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/errors
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get errors detected by hardware.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/first_error
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the first error detected by
+ hardware.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/next_error
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-only. Read this file to get the second error detected by
+ hardware.
+
+What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/clear
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Write-only. Write error code to this file to clear all errors
+ logged in errors, first_error and next_error. Write fails with
+ -EINVAL if input parsing fails or input error code doesn't
+ match.
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index f1f0af7..1a9fa3d 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_FPGA_DFL_FME_BRIDGE) += dfl-fme-br.o
obj-$(CONFIG_FPGA_DFL_FME_REGION) += dfl-fme-region.o
obj-$(CONFIG_FPGA_DFL_AFU) += dfl-afu.o
-dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o
+dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o dfl-fme-error.o
dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o
dfl-afu-objs += dfl-afu-error.o
diff --git a/drivers/fpga/dfl-fme-error.c b/drivers/fpga/dfl-fme-error.c
new file mode 100644
index 0000000..772b53e
--- /dev/null
+++ b/drivers/fpga/dfl-fme-error.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for FPGA Management Engine Error Management
+ *
+ * Copyright 2019 Intel Corporation, Inc.
+ *
+ * Authors:
+ * Kang Luwei <luwei.kang@intel.com>
+ * Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ * Wu Hao <hao.wu@intel.com>
+ * Joseph Grecco <joe.grecco@intel.com>
+ * Enno Luebbers <enno.luebbers@intel.com>
+ * Tim Whisonant <tim.whisonant@intel.com>
+ * Ananda Ravuri <ananda.ravuri@intel.com>
+ * Mitchel, Henry <henry.mitchel@intel.com>
+ */
+
+#include <linux/uaccess.h>
+
+#include "dfl.h"
+#include "dfl-fme.h"
+
+#define FME_ERROR_MASK 0x8
+#define FME_ERROR 0x10
+#define MBP_ERROR BIT_ULL(6)
+#define PCIE0_ERROR_MASK 0x18
+#define PCIE0_ERROR 0x20
+#define PCIE1_ERROR_MASK 0x28
+#define PCIE1_ERROR 0x30
+#define FME_FIRST_ERROR 0x38
+#define FME_NEXT_ERROR 0x40
+#define RAS_NONFAT_ERROR_MASK 0x48
+#define RAS_NONFAT_ERROR 0x50
+#define RAS_CATFAT_ERROR_MASK 0x58
+#define RAS_CATFAT_ERROR 0x60
+#define RAS_ERROR_INJECT 0x68
+#define INJECT_ERROR_MASK GENMASK_ULL(2, 0)
+
+static ssize_t revision_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dfl_feature_revision(base));
+}
+static DEVICE_ATTR_RO(revision);
+
+static ssize_t pcie0_errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)readq(base + PCIE0_ERROR));
+}
+
+static ssize_t pcie0_errors_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ int ret = 0;
+ u64 v, val;
+
+ if (kstrtou64(buf, 0, &val))
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ mutex_lock(&pdata->lock);
+ writeq(GENMASK_ULL(63, 0), base + PCIE0_ERROR_MASK);
+
+ v = readq(base + PCIE0_ERROR);
+ if (val == v)
+ writeq(v, base + PCIE0_ERROR);
+ else
+ ret = -EINVAL;
+
+ writeq(0ULL, base + PCIE0_ERROR_MASK);
+ mutex_unlock(&pdata->lock);
+ return ret ? ret : count;
+}
+static DEVICE_ATTR_RW(pcie0_errors);
+
+static ssize_t pcie1_errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)readq(base + PCIE1_ERROR));
+}
+
+static ssize_t pcie1_errors_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ int ret = 0;
+ u64 v, val;
+
+ if (kstrtou64(buf, 0, &val))
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ mutex_lock(&pdata->lock);
+ writeq(GENMASK_ULL(63, 0), base + PCIE1_ERROR_MASK);
+
+ v = readq(base + PCIE1_ERROR);
+ if (val == v)
+ writeq(v, base + PCIE1_ERROR);
+ else
+ ret = -EINVAL;
+
+ writeq(0ULL, base + PCIE1_ERROR_MASK);
+ mutex_unlock(&pdata->lock);
+ return ret ? ret : count;
+}
+static DEVICE_ATTR_RW(pcie1_errors);
+
+static ssize_t nonfatal_errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)readq(base + RAS_NONFAT_ERROR));
+}
+static DEVICE_ATTR_RO(nonfatal_errors);
+
+static ssize_t catfatal_errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)readq(base + RAS_CATFAT_ERROR));
+}
+static DEVICE_ATTR_RO(catfatal_errors);
+
+static ssize_t inject_error_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ u64 v;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ v = readq(base + RAS_ERROR_INJECT);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)FIELD_GET(INJECT_ERROR_MASK, v));
+}
+
+static ssize_t inject_error_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ u8 inject_error;
+ u64 v;
+
+ if (kstrtou8(buf, 0, &inject_error))
+ return -EINVAL;
+
+ if (inject_error & ~INJECT_ERROR_MASK)
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ mutex_lock(&pdata->lock);
+ v = readq(base + RAS_ERROR_INJECT);
+ v &= ~INJECT_ERROR_MASK;
+ v |= FIELD_PREP(INJECT_ERROR_MASK, inject_error);
+ writeq(v, base + RAS_ERROR_INJECT);
+ mutex_unlock(&pdata->lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(inject_error);
+
+static struct attribute *errors_attrs[] = {
+ &dev_attr_revision.attr,
+ &dev_attr_pcie0_errors.attr,
+ &dev_attr_pcie1_errors.attr,
+ &dev_attr_nonfatal_errors.attr,
+ &dev_attr_catfatal_errors.attr,
+ &dev_attr_inject_error.attr,
+ NULL,
+};
+
+static struct attribute_group errors_attr_group = {
+ .attrs = errors_attrs,
+};
+
+static ssize_t errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)readq(base + FME_ERROR));
+}
+static DEVICE_ATTR_RO(errors);
+
+static ssize_t first_error_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)readq(base + FME_FIRST_ERROR));
+}
+static DEVICE_ATTR_RO(first_error);
+
+static ssize_t next_error_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)readq(base + FME_NEXT_ERROR));
+}
+static DEVICE_ATTR_RO(next_error);
+
+static ssize_t clear_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
+ struct device *err_dev = dev->parent;
+ void __iomem *base;
+ u64 v, val;
+ int ret = 0;
+
+ if (kstrtou64(buf, 0, &val))
+ return -EINVAL;
+
+ base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR);
+
+ mutex_lock(&pdata->lock);
+ writeq(GENMASK_ULL(63, 0), base + FME_ERROR_MASK);
+
+ v = readq(base + FME_ERROR);
+ if (val == v) {
+ writeq(v, base + FME_ERROR);
+ v = readq(base + FME_FIRST_ERROR);
+ writeq(v, base + FME_FIRST_ERROR);
+ v = readq(base + FME_NEXT_ERROR);
+ writeq(v, base + FME_NEXT_ERROR);
+ } else {
+ ret = -EINVAL;
+ }
+
+ /* Workaround: disable MBP_ERROR if feature revision is 0 */
+ writeq(dfl_feature_revision(base) ? 0ULL : MBP_ERROR,
+ base + FME_ERROR_MASK);
+ mutex_unlock(&pdata->lock);
+ return ret ? ret : count;
+}
+static DEVICE_ATTR_WO(clear);
+
+static struct attribute *fme_errors_attrs[] = {
+ &dev_attr_errors.attr,
+ &dev_attr_first_error.attr,
+ &dev_attr_next_error.attr,
+ &dev_attr_clear.attr,
+ NULL,
+};
+
+static struct attribute_group fme_errors_attr_group = {
+ .attrs = fme_errors_attrs,
+ .name = "fme-errors",
+};
+
+static const struct attribute_group *error_groups[] = {
+ &fme_errors_attr_group,
+ &errors_attr_group,
+ NULL
+};
+
+static void fme_error_enable(struct dfl_feature *feature)
+{
+ void __iomem *base = feature->ioaddr;
+
+ /* Workaround: disable MBP_ERROR if revision is 0 */
+ writeq(dfl_feature_revision(feature->ioaddr) ? 0ULL : MBP_ERROR,
+ base + FME_ERROR_MASK);
+ writeq(0ULL, base + PCIE0_ERROR_MASK);
+ writeq(0ULL, base + PCIE1_ERROR_MASK);
+ writeq(0ULL, base + RAS_NONFAT_ERROR_MASK);
+ writeq(0ULL, base + RAS_CATFAT_ERROR_MASK);
+}
+
+static void err_dev_release(struct device *dev)
+{
+ kfree(dev);
+}
+
+static int fme_global_err_init(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct device *dev;
+ int ret = 0;
+
+ dev_dbg(&pdev->dev, "FME Global Error Reporting Init.\n");
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->parent = &pdev->dev;
+ dev->release = err_dev_release;
+ dev_set_name(dev, "errors");
+
+ fme_error_enable(feature);
+
+ ret = device_register(dev);
+ if (ret) {
+ put_device(dev);
+ return ret;
+ }
+
+ ret = sysfs_create_groups(&dev->kobj, error_groups);
+ if (ret) {
+ device_unregister(dev);
+ return ret;
+ }
+
+ feature->priv = dev;
+
+ return ret;
+}
+
+static void fme_global_err_uinit(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct device *dev = feature->priv;
+
+ dev_dbg(&pdev->dev, "FME Global Error Reporting UInit.\n");
+
+ sysfs_remove_groups(&dev->kobj, error_groups);
+ device_unregister(dev);
+}
+
+const struct dfl_feature_id fme_global_err_id_table[] = {
+ {.id = FME_FEATURE_ID_GLOBAL_ERR,},
+ {0,}
+};
+
+const struct dfl_feature_ops fme_global_err_ops = {
+ .init = fme_global_err_init,
+ .uinit = fme_global_err_uinit,
+};
diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index 7005316..1986b32 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -686,6 +686,10 @@ static void fme_power_mgmt_uinit(struct platform_device *pdev,
.ops = &fme_power_mgmt_ops,
},
{
+ .id_table = fme_global_err_id_table,
+ .ops = &fme_global_err_ops,
+ },
+ {
.ops = NULL,
},
};
diff --git a/drivers/fpga/dfl-fme.h b/drivers/fpga/dfl-fme.h
index 7a021c4..5fbe3f5 100644
--- a/drivers/fpga/dfl-fme.h
+++ b/drivers/fpga/dfl-fme.h
@@ -37,5 +37,7 @@ struct dfl_fme {
extern const struct dfl_feature_ops fme_pr_mgmt_ops;
extern const struct dfl_feature_id fme_pr_mgmt_id_table[];
+extern const struct dfl_feature_ops fme_global_err_ops;
+extern const struct dfl_feature_id fme_global_err_id_table[];
#endif /* __DFL_FME_H */
diff --git a/drivers/fpga/dfl.h b/drivers/fpga/dfl.h
index fbc57f0..6c32080 100644
--- a/drivers/fpga/dfl.h
+++ b/drivers/fpga/dfl.h
@@ -197,12 +197,14 @@ struct dfl_feature_driver {
* feature dev (platform device)'s reources.
* @ioaddr: mapped mmio resource address.
* @ops: ops of this sub feature.
+ * @priv: priv data of this feature.
*/
struct dfl_feature {
u64 id;
int resource_index;
void __iomem *ioaddr;
const struct dfl_feature_ops *ops;
+ void *priv;
};
#define DEV_STATUS_IN_USE 0
--
1.8.3.1
^ permalink raw reply related
* [PATCH v2 18/18] fpga: dfl: fme: add performance reporting support
From: Wu Hao @ 2019-04-29 8:55 UTC (permalink / raw)
To: atull, mdf, linux-fpga, linux-kernel
Cc: linux-api, Wu Hao, Luwei Kang, Xu Yilun
In-Reply-To: <1556528151-17221-1-git-send-email-hao.wu@intel.com>
This patch adds support for performance reporting private feature
for FPGA Management Engine (FME). Actually it supports 4 categories
performance counters, 'clock', 'cache', 'iommu' and 'fabric', user
could read the performance counter via exposed sysfs interfaces.
Please refer to sysfs doc for more details.
Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
---
v2: improve sysfs doc
---
Documentation/ABI/testing/sysfs-platform-dfl-fme | 93 +++
drivers/fpga/Makefile | 1 +
drivers/fpga/dfl-fme-main.c | 4 +
drivers/fpga/dfl-fme-perf.c | 950 +++++++++++++++++++++++
drivers/fpga/dfl-fme.h | 2 +
drivers/fpga/dfl.c | 1 +
drivers/fpga/dfl.h | 2 +
7 files changed, 1053 insertions(+)
create mode 100644 drivers/fpga/dfl-fme-perf.c
diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-fme b/Documentation/ABI/testing/sysfs-platform-dfl-fme
index 503984b..a7f7eb6 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-fme
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-fme
@@ -250,3 +250,96 @@ Description: Write-only. Write error code to this file to clear all errors
logged in errors, first_error and next_error. Write fails with
-EINVAL if input parsing fails or input error code doesn't
match.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/clock
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read for Accelerator Function Unit (AFU) clock
+ counter.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/cache/freeze
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for the current status of 'cache'
+ category performance counters, and Write '1' or '0' to freeze
+ or unfreeze 'cache' performance counters.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/cache/<counter>
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read 'cache' category performance counters:
+ read_hit, read_miss, write_hit, write_miss, hold_request,
+ data_write_port_contention, tag_write_port_contention,
+ tx_req_stall, rx_req_stall and rx_eviction.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/iommu/freeze
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for the current status of 'iommu'
+ category performance counters, and Write '1' or '0' to freeze
+ or unfreeze 'iommu' performance counters.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/iommu/<sip_counter>
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read 'iommu' category 'sip' sub category
+ performance counters: iotlb_4k_hit, iotlb_2m_hit,
+ iotlb_1g_hit, slpwc_l3_hit, slpwc_l4_hit, rcc_hit,
+ rcc_miss, iotlb_4k_miss, iotlb_2m_miss, iotlb_1g_miss,
+ slpwc_l3_miss and slpwc_l4_miss.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/iommu/afu0/<counter>
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read 'iommu' category 'afuX' sub category
+ performance counters: read_transaction, write_transaction,
+ devtlb_read_hit, devtlb_write_hit, devtlb_4k_fill,
+ devtlb_2m_fill and devtlb_1g_fill.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/fabric/freeze
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for the current status of 'fabric'
+ category performance counters, and Write '1' or '0' to freeze
+ or unfreeze 'fabric' performance counters.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/fabric/<counter>
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read 'fabric' category performance counters:
+ pcie0_read, pcie0_write, pcie1_read, pcie1_write,
+ upi_read, upi_write and mmio_read.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/fabric/enable
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for current status of device level
+ fabric counters. Write "1" to enable device level fabric
+ counters. Once device level fabric counters are enabled, port
+ level fabric counters will be disabled automatically.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/fabric/port0/<counter>
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Only. Read 'fabric' category "portX" sub category
+ performance counters: pcie0_read, pcie0_write, pcie1_read,
+ pcie1_write, upi_read, upi_write and mmio_read.
+
+What: /sys/bus/platform/devices/dfl-fme.0/perf/fabric/port0/enable
+Date: April 2019
+KernelVersion: 5.2
+Contact: Wu Hao <hao.wu@intel.com>
+Description: Read-Write. Read this file for current status of port level
+ fabric counters. Write "1" to enable port level fabric counters.
+ Once port level fabric counters are enabled, device level fabric
+ counters will be disabled automatically.
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index 1a9fa3d..7df3971 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_FPGA_DFL_FME_REGION) += dfl-fme-region.o
obj-$(CONFIG_FPGA_DFL_AFU) += dfl-afu.o
dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o dfl-fme-error.o
+dfl-fme-objs += dfl-fme-perf.o
dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o
dfl-afu-objs += dfl-afu-error.o
diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index 1986b32..221f4ec 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -690,6 +690,10 @@ static void fme_power_mgmt_uinit(struct platform_device *pdev,
.ops = &fme_global_err_ops,
},
{
+ .id_table = fme_perf_id_table,
+ .ops = &fme_perf_ops,
+ },
+ {
.ops = NULL,
},
};
diff --git a/drivers/fpga/dfl-fme-perf.c b/drivers/fpga/dfl-fme-perf.c
new file mode 100644
index 0000000..035bb68
--- /dev/null
+++ b/drivers/fpga/dfl-fme-perf.c
@@ -0,0 +1,950 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for FPGA Management Engine (FME) Global Performance Reporting
+ *
+ * Copyright 2019 Intel Corporation, Inc.
+ *
+ * Authors:
+ * Kang Luwei <luwei.kang@intel.com>
+ * Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ * Wu Hao <hao.wu@intel.com>
+ * Joseph Grecco <joe.grecco@intel.com>
+ * Enno Luebbers <enno.luebbers@intel.com>
+ * Tim Whisonant <tim.whisonant@intel.com>
+ * Ananda Ravuri <ananda.ravuri@intel.com>
+ * Mitchel, Henry <henry.mitchel@intel.com>
+ */
+
+#include "dfl.h"
+#include "dfl-fme.h"
+
+/*
+ * Performance Counter Registers for Cache.
+ *
+ * Cache Events are listed below as CACHE_EVNT_*.
+ */
+#define CACHE_CTRL 0x8
+#define CACHE_RESET_CNTR BIT_ULL(0)
+#define CACHE_FREEZE_CNTR BIT_ULL(8)
+#define CACHE_CTRL_EVNT GENMASK_ULL(19, 16)
+#define CACHE_EVNT_RD_HIT 0x0
+#define CACHE_EVNT_WR_HIT 0x1
+#define CACHE_EVNT_RD_MISS 0x2
+#define CACHE_EVNT_WR_MISS 0x3
+#define CACHE_EVNT_RSVD 0x4
+#define CACHE_EVNT_HOLD_REQ 0x5
+#define CACHE_EVNT_DATA_WR_PORT_CONTEN 0x6
+#define CACHE_EVNT_TAG_WR_PORT_CONTEN 0x7
+#define CACHE_EVNT_TX_REQ_STALL 0x8
+#define CACHE_EVNT_RX_REQ_STALL 0x9
+#define CACHE_EVNT_EVICTIONS 0xa
+#define CACHE_EVNT_MAX CACHE_EVNT_EVICTIONS
+#define CACHE_CHANNEL_SEL BIT_ULL(20)
+#define CACHE_CHANNEL_RD 0
+#define CACHE_CHANNEL_WR 1
+#define CACHE_CHANNEL_MAX 2
+#define CACHE_CNTR0 0x10
+#define CACHE_CNTR1 0x18
+#define CACHE_CNTR_EVNT_CNTR GENMASK_ULL(47, 0)
+#define CACHE_CNTR_EVNT GENMASK_ULL(63, 60)
+
+/*
+ * Performance Counter Registers for Fabric.
+ *
+ * Fabric Events are listed below as FAB_EVNT_*
+ */
+#define FAB_CTRL 0x20
+#define FAB_RESET_CNTR BIT_ULL(0)
+#define FAB_FREEZE_CNTR BIT_ULL(8)
+#define FAB_CTRL_EVNT GENMASK_ULL(19, 16)
+#define FAB_EVNT_PCIE0_RD 0x0
+#define FAB_EVNT_PCIE0_WR 0x1
+#define FAB_EVNT_PCIE1_RD 0x2
+#define FAB_EVNT_PCIE1_WR 0x3
+#define FAB_EVNT_UPI_RD 0x4
+#define FAB_EVNT_UPI_WR 0x5
+#define FAB_EVNT_MMIO_RD 0x6
+#define FAB_EVNT_MMIO_WR 0x7
+#define FAB_EVNT_MAX FAB_EVNT_MMIO_WR
+#define FAB_PORT_ID GENMASK_ULL(21, 20)
+#define FAB_PORT_FILTER BIT_ULL(23)
+#define FAB_PORT_FILTER_DISABLE 0
+#define FAB_PORT_FILTER_ENABLE 1
+#define FAB_CNTR 0x28
+#define FAB_CNTR_EVNT_CNTR GENMASK_ULL(59, 0)
+#define FAB_CNTR_EVNT GENMASK_ULL(63, 60)
+
+/*
+ * Performance Counter Registers for Clock.
+ *
+ * Clock Counter can't be reset or frozen by SW.
+ */
+#define CLK_CNTR 0x30
+
+/*
+ * Performance Counter Registers for IOMMU / VT-D.
+ *
+ * VT-D Events are listed below as VTD_EVNT_* and VTD_SIP_EVNT_*
+ */
+#define VTD_CTRL 0x38
+#define VTD_RESET_CNTR BIT_ULL(0)
+#define VTD_FREEZE_CNTR BIT_ULL(8)
+#define VTD_CTRL_EVNT GENMASK_ULL(19, 16)
+#define VTD_EVNT_AFU_MEM_RD_TRANS 0x0
+#define VTD_EVNT_AFU_MEM_WR_TRANS 0x1
+#define VTD_EVNT_AFU_DEVTLB_RD_HIT 0x2
+#define VTD_EVNT_AFU_DEVTLB_WR_HIT 0x3
+#define VTD_EVNT_DEVTLB_4K_FILL 0x4
+#define VTD_EVNT_DEVTLB_2M_FILL 0x5
+#define VTD_EVNT_DEVTLB_1G_FILL 0x6
+#define VTD_EVNT_MAX VTD_EVNT_DEVTLB_1G_FILL
+#define VTD_CNTR 0x40
+#define VTD_CNTR_EVNT GENMASK_ULL(63, 60)
+#define VTD_CNTR_EVNT_CNTR GENMASK_ULL(47, 0)
+#define VTD_SIP_CTRL 0x48
+#define VTD_SIP_RESET_CNTR BIT_ULL(0)
+#define VTD_SIP_FREEZE_CNTR BIT_ULL(8)
+#define VTD_SIP_CTRL_EVNT GENMASK_ULL(19, 16)
+#define VTD_SIP_EVNT_IOTLB_4K_HIT 0x0
+#define VTD_SIP_EVNT_IOTLB_2M_HIT 0x1
+#define VTD_SIP_EVNT_IOTLB_1G_HIT 0x2
+#define VTD_SIP_EVNT_SLPWC_L3_HIT 0x3
+#define VTD_SIP_EVNT_SLPWC_L4_HIT 0x4
+#define VTD_SIP_EVNT_RCC_HIT 0x5
+#define VTD_SIP_EVNT_IOTLB_4K_MISS 0x6
+#define VTD_SIP_EVNT_IOTLB_2M_MISS 0x7
+#define VTD_SIP_EVNT_IOTLB_1G_MISS 0x8
+#define VTD_SIP_EVNT_SLPWC_L3_MISS 0x9
+#define VTD_SIP_EVNT_SLPWC_L4_MISS 0xa
+#define VTD_SIP_EVNT_RCC_MISS 0xb
+#define VTD_SIP_EVNT_MAX VTD_SIP_EVNT_RCC_MISS
+#define VTD_SIP_CNTR 0X50
+#define VTD_SIP_CNTR_EVNT GENMASK_ULL(63, 60)
+#define VTD_SIP_CNTR_EVNT_CNTR GENMASK_ULL(47, 0)
+
+#define PERF_OBJ_ROOT_ID (~0)
+
+#define PERF_TIMEOUT 30
+
+/**
+ * struct perf_object - object of performance counter
+ *
+ * @id: instance id. PERF_OBJ_ROOT_ID indicates it is a parent object which
+ * counts performance counters for all instances.
+ * @attr_groups: the sysfs files are associated with this object.
+ * @feature: pointer to related private feature.
+ * @node: used to link itself to parent's children list.
+ * @children: used to link its children objects together.
+ * @kobj: generic kobject interface.
+ *
+ * 'node' and 'children' are used to construct parent-children hierarchy.
+ */
+struct perf_object {
+ int id;
+ const struct attribute_group **attr_groups;
+ struct dfl_feature *feature;
+
+ struct list_head node;
+ struct list_head children;
+ struct kobject kobj;
+};
+
+/**
+ * struct perf_obj_attribute - attribute of perf object
+ *
+ * @attr: attribute of this perf object.
+ * @show: show callback for sysfs attribute.
+ * @store: store callback for sysfs attribute.
+ */
+struct perf_obj_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct perf_object *pobj, char *buf);
+ ssize_t (*store)(struct perf_object *pobj,
+ const char *buf, size_t n);
+};
+
+#define to_perf_obj_attr(_attr) \
+ container_of(_attr, struct perf_obj_attribute, attr)
+#define to_perf_obj(_kobj) \
+ container_of(_kobj, struct perf_object, kobj)
+
+#define PERF_OBJ_ATTR(_name, _filename, _mode, _show, _store) \
+struct perf_obj_attribute perf_obj_attr_##_name = \
+ __ATTR(_filename, _mode, _show, _store)
+
+#define PERF_OBJ_ATTR_RW(_name) \
+ struct perf_obj_attribute perf_obj_attr_##_name = __ATTR_RW(_name)
+#define PERF_OBJ_ATTR_RO(_name) \
+ struct perf_obj_attribute perf_obj_attr_##_name = __ATTR_RO(_name)
+#define PERF_OBJ_ATTR_WO(_name) \
+ struct perf_obj_attribute perf_obj_attr_##_name = __ATTR_WO(_name)
+
+static ssize_t perf_obj_attr_show(struct kobject *kobj,
+ struct attribute *__attr, char *buf)
+{
+ struct perf_obj_attribute *attr = to_perf_obj_attr(__attr);
+ struct perf_object *pobj = to_perf_obj(kobj);
+ ssize_t ret = -EIO;
+
+ if (attr->show)
+ ret = attr->show(pobj, buf);
+ return ret;
+}
+
+static ssize_t perf_obj_attr_store(struct kobject *kobj,
+ struct attribute *__attr,
+ const char *buf, size_t n)
+{
+ struct perf_obj_attribute *attr = to_perf_obj_attr(__attr);
+ struct perf_object *pobj = to_perf_obj(kobj);
+ ssize_t ret = -EIO;
+
+ if (attr->store)
+ ret = attr->store(pobj, buf, n);
+ return ret;
+}
+
+static const struct sysfs_ops perf_obj_sysfs_ops = {
+ .show = perf_obj_attr_show,
+ .store = perf_obj_attr_store,
+};
+
+static void perf_obj_release(struct kobject *kobj)
+{
+ kfree(to_perf_obj(kobj));
+}
+
+static struct kobj_type perf_obj_ktype = {
+ .sysfs_ops = &perf_obj_sysfs_ops,
+ .release = perf_obj_release,
+};
+
+static struct perf_object *
+create_perf_obj(struct dfl_feature *feature, struct kobject *parent, int id,
+ const struct attribute_group **groups, const char *name)
+{
+ struct perf_object *pobj;
+ int ret;
+
+ pobj = kzalloc(sizeof(*pobj), GFP_KERNEL);
+ if (!pobj)
+ return ERR_PTR(-ENOMEM);
+
+ pobj->id = id;
+ pobj->feature = feature;
+ pobj->attr_groups = groups;
+ INIT_LIST_HEAD(&pobj->node);
+ INIT_LIST_HEAD(&pobj->children);
+
+ if (id != PERF_OBJ_ROOT_ID)
+ ret = kobject_init_and_add(&pobj->kobj, &perf_obj_ktype,
+ parent, "%s%d", name, id);
+ else
+ ret = kobject_init_and_add(&pobj->kobj, &perf_obj_ktype,
+ parent, "%s", name);
+ if (ret)
+ goto put_exit;
+
+ if (pobj->attr_groups) {
+ ret = sysfs_create_groups(&pobj->kobj, pobj->attr_groups);
+ if (ret)
+ goto del_exit;
+ }
+
+ return pobj;
+
+del_exit:
+ kobject_del(&pobj->kobj);
+put_exit:
+ kobject_put(&pobj->kobj);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Counter Sysfs Interface for Clock.
+ */
+static ssize_t clock_show(struct perf_object *pobj, char *buf)
+{
+ void __iomem *base = pobj->feature->ioaddr;
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long)readq(base + CLK_CNTR));
+}
+static PERF_OBJ_ATTR_RO(clock);
+
+static struct attribute *clock_attrs[] = {
+ &perf_obj_attr_clock.attr,
+ NULL,
+};
+
+static struct attribute_group clock_attr_group = {
+ .attrs = clock_attrs,
+};
+
+static const struct attribute_group *perf_dev_attr_groups[] = {
+ &clock_attr_group,
+ NULL,
+};
+
+static void destroy_perf_obj(struct perf_object *pobj)
+{
+ struct perf_object *obj, *obj_tmp;
+
+ list_for_each_entry_safe(obj, obj_tmp, &pobj->children, node)
+ destroy_perf_obj(obj);
+
+ list_del(&pobj->node);
+ if (pobj->attr_groups)
+ sysfs_remove_groups(&pobj->kobj, pobj->attr_groups);
+ kobject_put(&pobj->kobj);
+}
+
+static struct perf_object *create_perf_dev(struct dfl_feature *feature)
+{
+ struct platform_device *pdev = feature->pdev;
+
+ return create_perf_obj(feature, &pdev->dev.kobj, PERF_OBJ_ROOT_ID,
+ perf_dev_attr_groups, "perf");
+}
+
+/*
+ * Counter Sysfs Interfaces for Cache.
+ */
+static ssize_t cache_freeze_show(struct perf_object *pobj, char *buf)
+{
+ void __iomem *base = pobj->feature->ioaddr;
+ u64 v;
+
+ v = readq(base + CACHE_CTRL);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(CACHE_FREEZE_CNTR, v));
+}
+
+static ssize_t cache_freeze_store(struct perf_object *pobj,
+ const char *buf, size_t n)
+{
+ struct dfl_feature *feature = pobj->feature;
+ struct dfl_feature_platform_data *pdata;
+ void __iomem *base = feature->ioaddr;
+ bool state;
+ u64 v;
+
+ if (strtobool(buf, &state))
+ return -EINVAL;
+
+ pdata = dev_get_platdata(&feature->pdev->dev);
+
+ mutex_lock(&pdata->lock);
+ v = readq(base + CACHE_CTRL);
+ v &= ~CACHE_FREEZE_CNTR;
+ v |= FIELD_PREP(CACHE_FREEZE_CNTR, state ? 1 : 0);
+ writeq(v, base + CACHE_CTRL);
+ mutex_unlock(&pdata->lock);
+
+ return n;
+}
+static PERF_OBJ_ATTR(cache_freeze, freeze, 0644,
+ cache_freeze_show, cache_freeze_store);
+
+static ssize_t read_cache_counter(struct perf_object *pobj, char *buf,
+ u8 channel, u8 event)
+{
+ struct dfl_feature *feature = pobj->feature;
+ struct dfl_feature_platform_data *pdata;
+ void __iomem *base = feature->ioaddr;
+ u64 v, count;
+
+ if (event > CACHE_EVNT_MAX || channel > CACHE_CHANNEL_MAX)
+ return -EINVAL;
+
+ pdata = dev_get_platdata(&feature->pdev->dev);
+
+ mutex_lock(&pdata->lock);
+ /* set channel access type and cache event code. */
+ v = readq(base + CACHE_CTRL);
+ v &= ~(CACHE_CHANNEL_SEL | CACHE_CTRL_EVNT);
+ v |= FIELD_PREP(CACHE_CHANNEL_SEL, channel);
+ v |= FIELD_PREP(CACHE_CTRL_EVNT, event);
+ writeq(v, base + CACHE_CTRL);
+
+ if (readq_poll_timeout(base + CACHE_CNTR0, v,
+ FIELD_GET(CACHE_CNTR_EVNT, v) == event,
+ 1, PERF_TIMEOUT)) {
+ dev_err(&feature->pdev->dev, "timeout, unmatched cache event type in counter registers.\n");
+ mutex_unlock(&pdata->lock);
+ return -ETIMEDOUT;
+ }
+
+ v = readq(base + CACHE_CNTR0);
+ count = FIELD_GET(CACHE_CNTR_EVNT_CNTR, v);
+ v = readq(base + CACHE_CNTR1);
+ count += FIELD_GET(CACHE_CNTR_EVNT_CNTR, v);
+ mutex_unlock(&pdata->lock);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n", (unsigned long long)count);
+}
+
+#define CACHE_SHOW(name, type, event) \
+static ssize_t name##_show(struct perf_object *pobj, char *buf) \
+{ \
+ return read_cache_counter(pobj, buf, type, event); \
+} \
+static PERF_OBJ_ATTR_RO(name)
+
+CACHE_SHOW(read_hit, CACHE_CHANNEL_RD, CACHE_EVNT_RD_HIT);
+CACHE_SHOW(read_miss, CACHE_CHANNEL_RD, CACHE_EVNT_RD_MISS);
+CACHE_SHOW(write_hit, CACHE_CHANNEL_WR, CACHE_EVNT_WR_HIT);
+CACHE_SHOW(write_miss, CACHE_CHANNEL_WR, CACHE_EVNT_WR_MISS);
+CACHE_SHOW(hold_request, CACHE_CHANNEL_RD, CACHE_EVNT_HOLD_REQ);
+CACHE_SHOW(tx_req_stall, CACHE_CHANNEL_RD, CACHE_EVNT_TX_REQ_STALL);
+CACHE_SHOW(rx_req_stall, CACHE_CHANNEL_RD, CACHE_EVNT_RX_REQ_STALL);
+CACHE_SHOW(rx_eviction, CACHE_CHANNEL_RD, CACHE_EVNT_EVICTIONS);
+CACHE_SHOW(data_write_port_contention, CACHE_CHANNEL_WR,
+ CACHE_EVNT_DATA_WR_PORT_CONTEN);
+CACHE_SHOW(tag_write_port_contention, CACHE_CHANNEL_WR,
+ CACHE_EVNT_TAG_WR_PORT_CONTEN);
+
+static struct attribute *cache_attrs[] = {
+ &perf_obj_attr_read_hit.attr,
+ &perf_obj_attr_read_miss.attr,
+ &perf_obj_attr_write_hit.attr,
+ &perf_obj_attr_write_miss.attr,
+ &perf_obj_attr_hold_request.attr,
+ &perf_obj_attr_data_write_port_contention.attr,
+ &perf_obj_attr_tag_write_port_contention.attr,
+ &perf_obj_attr_tx_req_stall.attr,
+ &perf_obj_attr_rx_req_stall.attr,
+ &perf_obj_attr_rx_eviction.attr,
+ &perf_obj_attr_cache_freeze.attr,
+ NULL,
+};
+
+static struct attribute_group cache_attr_group = {
+ .attrs = cache_attrs,
+};
+
+static const struct attribute_group *cache_attr_groups[] = {
+ &cache_attr_group,
+ NULL,
+};
+
+static int create_perf_cache_obj(struct perf_object *perf_dev)
+{
+ struct perf_object *pobj;
+
+ pobj = create_perf_obj(perf_dev->feature, &perf_dev->kobj,
+ PERF_OBJ_ROOT_ID, cache_attr_groups, "cache");
+ if (IS_ERR(pobj))
+ return PTR_ERR(pobj);
+
+ list_add(&pobj->node, &perf_dev->children);
+
+ return 0;
+}
+
+/*
+ * Counter Sysfs Interfaces for VT-D / IOMMU.
+ */
+static ssize_t vtd_freeze_show(struct perf_object *pobj, char *buf)
+{
+ void __iomem *base = pobj->feature->ioaddr;
+ u64 v;
+
+ v = readq(base + VTD_CTRL);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(VTD_FREEZE_CNTR, v));
+}
+
+static ssize_t vtd_freeze_store(struct perf_object *pobj,
+ const char *buf, size_t n)
+{
+ struct dfl_feature *feature = pobj->feature;
+ struct dfl_feature_platform_data *pdata;
+ void __iomem *base = feature->ioaddr;
+ bool state;
+ u64 v;
+
+ if (strtobool(buf, &state))
+ return -EINVAL;
+
+ pdata = dev_get_platdata(&feature->pdev->dev);
+
+ mutex_lock(&pdata->lock);
+ v = readq(base + VTD_CTRL);
+ v &= ~VTD_FREEZE_CNTR;
+ v |= FIELD_PREP(VTD_FREEZE_CNTR, state ? 1 : 0);
+ writeq(v, base + VTD_CTRL);
+ mutex_unlock(&pdata->lock);
+
+ return n;
+}
+static PERF_OBJ_ATTR(vtd_freeze, freeze, 0644,
+ vtd_freeze_show, vtd_freeze_store);
+
+static struct attribute *iommu_top_attrs[] = {
+ &perf_obj_attr_vtd_freeze.attr,
+ NULL,
+};
+
+static struct attribute_group iommu_top_attr_group = {
+ .attrs = iommu_top_attrs,
+};
+
+static ssize_t read_iommu_sip_counter(struct perf_object *pobj,
+ u8 event, char *buf)
+{
+ struct dfl_feature *feature = pobj->feature;
+ struct dfl_feature_platform_data *pdata;
+ void __iomem *base = feature->ioaddr;
+ u64 v, count;
+
+ if (event > VTD_SIP_EVNT_MAX)
+ return -EINVAL;
+
+ pdata = dev_get_platdata(&feature->pdev->dev);
+
+ mutex_lock(&pdata->lock);
+ v = readq(base + VTD_SIP_CTRL);
+ v &= ~VTD_SIP_CTRL_EVNT;
+ v |= FIELD_PREP(VTD_SIP_CTRL_EVNT, event);
+ writeq(v, base + VTD_SIP_CTRL);
+
+ if (readq_poll_timeout(base + VTD_SIP_CNTR, v,
+ FIELD_GET(VTD_SIP_CNTR_EVNT, v) == event,
+ 1, PERF_TIMEOUT)) {
+ dev_err(&feature->pdev->dev, "timeout, unmatched VTd SIP event type in counter registers\n");
+ mutex_unlock(&pdata->lock);
+ return -ETIMEDOUT;
+ }
+
+ v = readq(base + VTD_SIP_CNTR);
+ count = FIELD_GET(VTD_SIP_CNTR_EVNT_CNTR, v);
+ mutex_unlock(&pdata->lock);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n", (unsigned long long)count);
+}
+
+#define VTD_SIP_SHOW(name, event) \
+static ssize_t name##_show(struct perf_object *pobj, char *buf) \
+{ \
+ return read_iommu_sip_counter(pobj, event, buf); \
+} \
+static PERF_OBJ_ATTR_RO(name)
+
+VTD_SIP_SHOW(iotlb_4k_hit, VTD_SIP_EVNT_IOTLB_4K_HIT);
+VTD_SIP_SHOW(iotlb_2m_hit, VTD_SIP_EVNT_IOTLB_2M_HIT);
+VTD_SIP_SHOW(iotlb_1g_hit, VTD_SIP_EVNT_IOTLB_1G_HIT);
+VTD_SIP_SHOW(slpwc_l3_hit, VTD_SIP_EVNT_SLPWC_L3_HIT);
+VTD_SIP_SHOW(slpwc_l4_hit, VTD_SIP_EVNT_SLPWC_L4_HIT);
+VTD_SIP_SHOW(rcc_hit, VTD_SIP_EVNT_RCC_HIT);
+VTD_SIP_SHOW(iotlb_4k_miss, VTD_SIP_EVNT_IOTLB_4K_MISS);
+VTD_SIP_SHOW(iotlb_2m_miss, VTD_SIP_EVNT_IOTLB_2M_MISS);
+VTD_SIP_SHOW(iotlb_1g_miss, VTD_SIP_EVNT_IOTLB_1G_MISS);
+VTD_SIP_SHOW(slpwc_l3_miss, VTD_SIP_EVNT_SLPWC_L3_MISS);
+VTD_SIP_SHOW(slpwc_l4_miss, VTD_SIP_EVNT_SLPWC_L4_MISS);
+VTD_SIP_SHOW(rcc_miss, VTD_SIP_EVNT_RCC_MISS);
+
+static struct attribute *iommu_sip_attrs[] = {
+ &perf_obj_attr_iotlb_4k_hit.attr,
+ &perf_obj_attr_iotlb_2m_hit.attr,
+ &perf_obj_attr_iotlb_1g_hit.attr,
+ &perf_obj_attr_slpwc_l3_hit.attr,
+ &perf_obj_attr_slpwc_l4_hit.attr,
+ &perf_obj_attr_rcc_hit.attr,
+ &perf_obj_attr_iotlb_4k_miss.attr,
+ &perf_obj_attr_iotlb_2m_miss.attr,
+ &perf_obj_attr_iotlb_1g_miss.attr,
+ &perf_obj_attr_slpwc_l3_miss.attr,
+ &perf_obj_attr_slpwc_l4_miss.attr,
+ &perf_obj_attr_rcc_miss.attr,
+ NULL,
+};
+
+static struct attribute_group iommu_sip_attr_group = {
+ .attrs = iommu_sip_attrs,
+};
+
+static const struct attribute_group *iommu_top_attr_groups[] = {
+ &iommu_top_attr_group,
+ &iommu_sip_attr_group,
+ NULL,
+};
+
+static ssize_t read_iommu_counter(struct perf_object *pobj, u8 event, char *buf)
+{
+ struct dfl_feature *feature = pobj->feature;
+ struct dfl_feature_platform_data *pdata;
+ void __iomem *base = feature->ioaddr;
+ u64 v, count;
+
+ if (event > VTD_EVNT_MAX)
+ return -EINVAL;
+
+ event += pobj->id;
+ pdata = dev_get_platdata(&feature->pdev->dev);
+
+ mutex_lock(&pdata->lock);
+ v = readq(base + VTD_CTRL);
+ v &= ~VTD_CTRL_EVNT;
+ v |= FIELD_PREP(VTD_CTRL_EVNT, event);
+ writeq(v, base + VTD_CTRL);
+
+ if (readq_poll_timeout(base + VTD_CNTR, v,
+ FIELD_GET(VTD_CNTR_EVNT, v) == event, 1,
+ PERF_TIMEOUT)) {
+ dev_err(&feature->pdev->dev, "timeout, unmatched VTd event type in counter registers\n");
+ mutex_unlock(&pdata->lock);
+ return -ETIMEDOUT;
+ }
+
+ v = readq(base + VTD_CNTR);
+ count = FIELD_GET(VTD_CNTR_EVNT_CNTR, v);
+ mutex_unlock(&pdata->lock);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n", (unsigned long long)count);
+}
+
+#define VTD_SHOW(name, base_event) \
+static ssize_t name##_show(struct perf_object *pobj, char *buf) \
+{ \
+ return read_iommu_counter(pobj, base_event, buf); \
+} \
+static PERF_OBJ_ATTR_RO(name)
+
+VTD_SHOW(read_transaction, VTD_EVNT_AFU_MEM_RD_TRANS);
+VTD_SHOW(write_transaction, VTD_EVNT_AFU_MEM_WR_TRANS);
+VTD_SHOW(devtlb_read_hit, VTD_EVNT_AFU_DEVTLB_RD_HIT);
+VTD_SHOW(devtlb_write_hit, VTD_EVNT_AFU_DEVTLB_WR_HIT);
+VTD_SHOW(devtlb_4k_fill, VTD_EVNT_DEVTLB_4K_FILL);
+VTD_SHOW(devtlb_2m_fill, VTD_EVNT_DEVTLB_2M_FILL);
+VTD_SHOW(devtlb_1g_fill, VTD_EVNT_DEVTLB_1G_FILL);
+
+static struct attribute *iommu_attrs[] = {
+ &perf_obj_attr_read_transaction.attr,
+ &perf_obj_attr_write_transaction.attr,
+ &perf_obj_attr_devtlb_read_hit.attr,
+ &perf_obj_attr_devtlb_write_hit.attr,
+ &perf_obj_attr_devtlb_4k_fill.attr,
+ &perf_obj_attr_devtlb_2m_fill.attr,
+ &perf_obj_attr_devtlb_1g_fill.attr,
+ NULL,
+};
+
+static struct attribute_group iommu_attr_group = {
+ .attrs = iommu_attrs,
+};
+
+static const struct attribute_group *iommu_attr_groups[] = {
+ &iommu_attr_group,
+ NULL,
+};
+
+#define PERF_MAX_PORT_NUM 1
+
+static int create_perf_iommu_obj(struct perf_object *perf_dev)
+{
+ struct dfl_feature *feature = perf_dev->feature;
+ struct device *dev = &feature->pdev->dev;
+ struct perf_object *pobj, *obj;
+ void __iomem *base;
+ u64 v;
+ int i;
+
+ /* check if iommu is not supported on this device. */
+ base = dfl_get_feature_ioaddr_by_id(dev, FME_FEATURE_ID_HEADER);
+ v = readq(base + FME_HDR_CAP);
+ if (!FIELD_GET(FME_CAP_IOMMU_AVL, v))
+ return 0;
+
+ pobj = create_perf_obj(feature, &perf_dev->kobj, PERF_OBJ_ROOT_ID,
+ iommu_top_attr_groups, "iommu");
+ if (IS_ERR(pobj))
+ return PTR_ERR(pobj);
+
+ list_add(&pobj->node, &perf_dev->children);
+
+ for (i = 0; i < PERF_MAX_PORT_NUM; i++) {
+ obj = create_perf_obj(feature, &pobj->kobj, i,
+ iommu_attr_groups, "afu");
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ list_add(&obj->node, &pobj->children);
+ }
+
+ return 0;
+}
+
+/*
+ * Counter Sysfs Interfaces for Fabric
+ */
+static bool fabric_pobj_is_enabled(struct perf_object *pobj)
+{
+ struct dfl_feature *feature = pobj->feature;
+ void __iomem *base = feature->ioaddr;
+ u64 v;
+
+ v = readq(base + FAB_CTRL);
+
+ if (FIELD_GET(FAB_PORT_FILTER, v) == FAB_PORT_FILTER_DISABLE)
+ return pobj->id == PERF_OBJ_ROOT_ID;
+
+ return pobj->id == FIELD_GET(FAB_PORT_ID, v);
+}
+
+static ssize_t read_fabric_counter(struct perf_object *pobj,
+ u8 event, char *buf)
+{
+ struct dfl_feature *feature = pobj->feature;
+ struct dfl_feature_platform_data *pdata;
+ void __iomem *base = feature->ioaddr;
+ u64 v, count = 0;
+
+ if (event > FAB_EVNT_MAX)
+ return -EINVAL;
+
+ pdata = dev_get_platdata(&feature->pdev->dev);
+
+ mutex_lock(&pdata->lock);
+ /* if it is disabled, force the counter to return zero. */
+ if (!fabric_pobj_is_enabled(pobj))
+ goto exit;
+
+ v = readq(base + FAB_CTRL);
+ v &= ~FAB_CTRL_EVNT;
+ v |= FIELD_PREP(FAB_CTRL_EVNT, event);
+ writeq(v, base + FAB_CTRL);
+
+ if (readq_poll_timeout(base + FAB_CNTR, v,
+ FIELD_GET(FAB_CNTR_EVNT, v) == event,
+ 1, PERF_TIMEOUT)) {
+ dev_err(&feature->pdev->dev, "timeout, unmatched fab event type in counter registers.\n");
+ mutex_unlock(&pdata->lock);
+ return -ETIMEDOUT;
+ }
+
+ v = readq(base + FAB_CNTR);
+ count = FIELD_GET(FAB_CNTR_EVNT_CNTR, v);
+exit:
+ mutex_unlock(&pdata->lock);
+ return scnprintf(buf, PAGE_SIZE, "0x%llx\n", (unsigned long long)count);
+}
+
+#define FAB_SHOW(name, event) \
+static ssize_t name##_show(struct perf_object *pobj, char *buf) \
+{ \
+ return read_fabric_counter(pobj, event, buf); \
+} \
+static PERF_OBJ_ATTR_RO(name)
+
+FAB_SHOW(pcie0_read, FAB_EVNT_PCIE0_RD);
+FAB_SHOW(pcie0_write, FAB_EVNT_PCIE0_WR);
+FAB_SHOW(pcie1_read, FAB_EVNT_PCIE1_RD);
+FAB_SHOW(pcie1_write, FAB_EVNT_PCIE1_WR);
+FAB_SHOW(upi_read, FAB_EVNT_UPI_RD);
+FAB_SHOW(upi_write, FAB_EVNT_UPI_WR);
+FAB_SHOW(mmio_read, FAB_EVNT_MMIO_RD);
+FAB_SHOW(mmio_write, FAB_EVNT_MMIO_WR);
+
+static ssize_t fab_enable_show(struct perf_object *pobj, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)!!fabric_pobj_is_enabled(pobj));
+}
+
+/*
+ * If enable one port or all port event counter in fabric, other
+ * fabric event counter originally enabled will be disable automatically.
+ */
+static ssize_t fab_enable_store(struct perf_object *pobj,
+ const char *buf, size_t n)
+{
+ struct dfl_feature *feature = pobj->feature;
+ struct dfl_feature_platform_data *pdata;
+ void __iomem *base = feature->ioaddr;
+ bool state;
+ u64 v;
+
+ if (strtobool(buf, &state) || !state)
+ return -EINVAL;
+
+ pdata = dev_get_platdata(&feature->pdev->dev);
+
+ /* if it is already enabled. */
+ if (fabric_pobj_is_enabled(pobj))
+ return n;
+
+ mutex_lock(&pdata->lock);
+ v = readq(base + FAB_CTRL);
+ v &= ~(FAB_PORT_FILTER | FAB_PORT_ID);
+
+ if (pobj->id == PERF_OBJ_ROOT_ID) {
+ v |= FIELD_PREP(FAB_PORT_FILTER, FAB_PORT_FILTER_DISABLE);
+ } else {
+ v |= FIELD_PREP(FAB_PORT_FILTER, FAB_PORT_FILTER_ENABLE);
+ v |= FIELD_PREP(FAB_PORT_ID, pobj->id);
+ }
+ writeq(v, base + FAB_CTRL);
+ mutex_unlock(&pdata->lock);
+
+ return n;
+}
+static PERF_OBJ_ATTR(fab_enable, enable, 0644,
+ fab_enable_show, fab_enable_store);
+
+static struct attribute *fabric_attrs[] = {
+ &perf_obj_attr_pcie0_read.attr,
+ &perf_obj_attr_pcie0_write.attr,
+ &perf_obj_attr_pcie1_read.attr,
+ &perf_obj_attr_pcie1_write.attr,
+ &perf_obj_attr_upi_read.attr,
+ &perf_obj_attr_upi_write.attr,
+ &perf_obj_attr_mmio_read.attr,
+ &perf_obj_attr_mmio_write.attr,
+ &perf_obj_attr_fab_enable.attr,
+ NULL,
+};
+
+static struct attribute_group fabric_attr_group = {
+ .attrs = fabric_attrs,
+};
+
+static const struct attribute_group *fabric_attr_groups[] = {
+ &fabric_attr_group,
+ NULL,
+};
+
+static ssize_t fab_freeze_show(struct perf_object *pobj, char *buf)
+{
+ void __iomem *base = pobj->feature->ioaddr;
+ u64 v;
+
+ v = readq(base + FAB_CTRL);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (unsigned int)FIELD_GET(FAB_FREEZE_CNTR, v));
+}
+
+static ssize_t fab_freeze_store(struct perf_object *pobj,
+ const char *buf, size_t n)
+{
+ struct dfl_feature *feature = pobj->feature;
+ struct dfl_feature_platform_data *pdata;
+ void __iomem *base = feature->ioaddr;
+ bool state;
+ u64 v;
+
+ if (strtobool(buf, &state))
+ return -EINVAL;
+
+ pdata = dev_get_platdata(&feature->pdev->dev);
+
+ mutex_lock(&pdata->lock);
+ v = readq(base + FAB_CTRL);
+ v &= ~FAB_FREEZE_CNTR;
+ v |= FIELD_PREP(FAB_FREEZE_CNTR, state ? 1 : 0);
+ writeq(v, base + FAB_CTRL);
+ mutex_unlock(&pdata->lock);
+
+ return n;
+}
+static PERF_OBJ_ATTR(fab_freeze, freeze, 0644,
+ fab_freeze_show, fab_freeze_store);
+
+static struct attribute *fabric_top_attrs[] = {
+ &perf_obj_attr_fab_freeze.attr,
+ NULL,
+};
+
+static struct attribute_group fabric_top_attr_group = {
+ .attrs = fabric_top_attrs,
+};
+
+static const struct attribute_group *fabric_top_attr_groups[] = {
+ &fabric_attr_group,
+ &fabric_top_attr_group,
+ NULL,
+};
+
+static int create_perf_fabric_obj(struct perf_object *perf_dev)
+{
+ struct perf_object *pobj, *obj;
+ int i;
+
+ pobj = create_perf_obj(perf_dev->feature, &perf_dev->kobj,
+ PERF_OBJ_ROOT_ID, fabric_top_attr_groups,
+ "fabric");
+ if (IS_ERR(pobj))
+ return PTR_ERR(pobj);
+
+ list_add(&pobj->node, &perf_dev->children);
+
+ for (i = 0; i < PERF_MAX_PORT_NUM; i++) {
+ obj = create_perf_obj(perf_dev->feature, &pobj->kobj, i,
+ fabric_attr_groups, "port");
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ list_add(&obj->node, &pobj->children);
+ }
+
+ return 0;
+}
+
+static int fme_perf_init(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct perf_object *perf_dev;
+ int ret;
+
+ perf_dev = create_perf_dev(feature);
+ if (IS_ERR(perf_dev))
+ return PTR_ERR(perf_dev);
+
+ ret = create_perf_fabric_obj(perf_dev);
+ if (ret)
+ goto done;
+
+ if (feature->id == FME_FEATURE_ID_GLOBAL_IPERF) {
+ /*
+ * Cache and IOMMU(VT-D) performance counters are not supported
+ * on discreted solutions e.g. Intel Programmable Acceleration
+ * Card based on PCIe.
+ */
+ ret = create_perf_cache_obj(perf_dev);
+ if (ret)
+ goto done;
+
+ ret = create_perf_iommu_obj(perf_dev);
+ if (ret)
+ goto done;
+ }
+
+ feature->priv = perf_dev;
+ return 0;
+
+done:
+ destroy_perf_obj(perf_dev);
+ return ret;
+}
+
+static void fme_perf_uinit(struct platform_device *pdev,
+ struct dfl_feature *feature)
+{
+ struct perf_object *perf_dev = feature->priv;
+
+ destroy_perf_obj(perf_dev);
+}
+
+const struct dfl_feature_id fme_perf_id_table[] = {
+ {.id = FME_FEATURE_ID_GLOBAL_IPERF,},
+ {.id = FME_FEATURE_ID_GLOBAL_DPERF,},
+ {0,}
+};
+
+const struct dfl_feature_ops fme_perf_ops = {
+ .init = fme_perf_init,
+ .uinit = fme_perf_uinit,
+};
diff --git a/drivers/fpga/dfl-fme.h b/drivers/fpga/dfl-fme.h
index 5fbe3f5..dc71048 100644
--- a/drivers/fpga/dfl-fme.h
+++ b/drivers/fpga/dfl-fme.h
@@ -39,5 +39,7 @@ struct dfl_fme {
extern const struct dfl_feature_id fme_pr_mgmt_id_table[];
extern const struct dfl_feature_ops fme_global_err_ops;
extern const struct dfl_feature_id fme_global_err_id_table[];
+extern const struct dfl_feature_ops fme_perf_ops;
+extern const struct dfl_feature_id fme_perf_id_table[];
#endif /* __DFL_FME_H */
diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c
index 65f91ef..637692a 100644
--- a/drivers/fpga/dfl.c
+++ b/drivers/fpga/dfl.c
@@ -507,6 +507,7 @@ static int build_info_commit_dev(struct build_feature_devs_info *binfo)
struct dfl_feature *feature = &pdata->features[index];
/* save resource information for each feature */
+ feature->pdev = fdev;
feature->id = finfo->fid;
feature->resource_index = index;
feature->ioaddr = finfo->ioaddr;
diff --git a/drivers/fpga/dfl.h b/drivers/fpga/dfl.h
index 6c32080..bf23436 100644
--- a/drivers/fpga/dfl.h
+++ b/drivers/fpga/dfl.h
@@ -191,6 +191,7 @@ struct dfl_feature_driver {
/**
* struct dfl_feature - sub feature of the feature devices
*
+ * @pdev: parent platform device.
* @id: sub feature id.
* @resource_index: each sub feature has one mmio resource for its registers.
* this index is used to find its mmio resource from the
@@ -200,6 +201,7 @@ struct dfl_feature_driver {
* @priv: priv data of this feature.
*/
struct dfl_feature {
+ struct platform_device *pdev;
u64 id;
int resource_index;
void __iomem *ioaddr;
--
1.8.3.1
^ permalink raw reply related
* [PATCH v3] moduleparam: Save information about built-in modules in separate file
From: Alexey Gladkov @ 2019-04-29 9:08 UTC (permalink / raw)
To: Masahiro Yamada
Cc: Linux Kernel Mailing List, Linux Kbuild mailing list, linux-api,
linux-modules, Kirill A . Shutemov, Gleb Fotengauer-Malinovskiy,
Dmitry V. Levin, Michal Marek, Dmitry Torokhov, Rusty Russell,
Jessica Yu, Lucas De Marchi
Problem:
When a kernel module is compiled as a separate module, some important
information about the kernel module is available via .modinfo section of
the module. In contrast, when the kernel module is compiled into the
kernel, that information is not available.
Information about built-in modules is necessary in the following cases:
1. When it is necessary to find out what additional parameters can be
passed to the kernel at boot time.
2. When you need to know which module names and their aliases are in
the kernel. This is very useful for creating an initrd image.
Proposal:
The proposed patch does not remove .modinfo section with module
information from the vmlinux at the build time and saves it into a
separate file after kernel linking. So, the kernel does not increase in
size and no additional information remains in it. Information is stored
in the same format as in the separate modules (null-terminated string
array). Because the .modinfo section is already exported with a separate
modules, we are not creating a new API.
It can be easily read in the userspace:
$ tr '\0' '\n' < kernel.builtin
ext4.softdep=pre: crc32c
ext4.license=GPL
ext4.description=Fourth Extended Filesystem
ext4.author=Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others
ext4.alias=fs-ext4
ext4.alias=ext3
ext4.alias=fs-ext3
ext4.alias=ext2
ext4.alias=fs-ext2
md_mod.alias=block-major-9-*
md_mod.alias=md
md_mod.description=MD RAID framework
md_mod.license=GPL
md_mod.parmtype=create_on_open:bool
md_mod.parmtype=start_dirty_degraded:int
...
v2:
* Extract modinfo from vmlinux.o as suggested by Masahiro Yamada;
* Rename output file to kernel.builtin;
* Add MODULE_VERSION to modinfo that is saved to the kernel.builtin;
* Fix build warnings on powerpc.
v3:
* Rename output file to modules.builtin.modinfo as suggested by Masahiro Yamada;
* Update Documentation/dontdiff, Documentation/kbuild/kbuild.txt.
Co-Developed-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Signed-off-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
.gitignore | 1 +
Documentation/dontdiff | 1 +
Documentation/kbuild/kbuild.txt | 5 +++++
Makefile | 2 ++
include/asm-generic/vmlinux.lds.h | 1 +
include/linux/module.h | 1 +
include/linux/moduleparam.h | 12 +++++-------
scripts/link-vmlinux.sh | 4 ++++
8 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/.gitignore b/.gitignore
index a20ac26aa2f5..8ef2c87703b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@
*.xz
Module.symvers
modules.builtin
+modules.builtin.modinfo
#
# Top-level generic files
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 2228fcc8e29f..3d4d5a402b8b 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -179,6 +179,7 @@ mktables
mktree
modpost
modules.builtin
+modules.builtin.modinfo
modules.order
modversions.h*
nconf
diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index c9e3d93e7a89..63e118c8c837 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -11,6 +11,11 @@ modules.builtin
This file lists all modules that are built into the kernel. This is used
by modprobe to not fail when trying to load something builtin.
+modules.builtin.modinfo
+--------------------------------------------------
+This file contains modinfo from all modules that are built into the kernel.
+Unlike modinfo of a separate module, all fields are prefixed with module name.
+
Environment variables
diff --git a/Makefile b/Makefile
index d5713e7b1e50..5e6657fcf08c 100644
--- a/Makefile
+++ b/Makefile
@@ -1288,6 +1288,7 @@ _modinst_:
fi
@cp -f $(objtree)/modules.order $(MODLIB)/
@cp -f $(objtree)/modules.builtin $(MODLIB)/
+ @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
# This depmod is only for convenience to give the initial
@@ -1328,6 +1329,7 @@ endif # CONFIG_MODULES
# Directories & files removed with 'make clean'
CLEAN_DIRS += $(MODVERDIR) include/ksym
+CLEAN_FILES += modules.builtin.modinfo
# Directories & files removed with 'make mrproper'
MRPROPER_DIRS += include/config usr/include include/generated \
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3d7a6a9c2370..44c724bf7d3a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -844,6 +844,7 @@
EXIT_CALL \
*(.discard) \
*(.discard.*) \
+ *(.modinfo) \
}
/**
diff --git a/include/linux/module.h b/include/linux/module.h
index f5bc4c046461..1cae28b1172a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -237,6 +237,7 @@ extern typeof(name) __mod_##type##__##name##_device_table \
#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
#else
#define MODULE_VERSION(_version) \
+ MODULE_INFO(version, _version); \
static struct module_version_attribute ___modver_attr = { \
.mattr = { \
.attr = { \
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index ba36506db4fb..5ba250d9172a 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -10,23 +10,21 @@
module name. */
#ifdef MODULE
#define MODULE_PARAM_PREFIX /* empty */
+#define __MODULE_INFO_PREFIX /* empty */
#else
#define MODULE_PARAM_PREFIX KBUILD_MODNAME "."
+/* We cannot use MODULE_PARAM_PREFIX because some modules override it. */
+#define __MODULE_INFO_PREFIX KBUILD_MODNAME "."
#endif
/* Chosen so that structs with an unsigned long line up. */
#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
-#ifdef MODULE
#define __MODULE_INFO(tag, name, info) \
static const char __UNIQUE_ID(name)[] \
__used __attribute__((section(".modinfo"), unused, aligned(1))) \
- = __stringify(tag) "=" info
-#else /* !MODULE */
-/* This struct is here for syntactic coherency, it is not used */
-#define __MODULE_INFO(tag, name, info) \
- struct __UNIQUE_ID(name) {}
-#endif
+ = __MODULE_INFO_PREFIX __stringify(tag) "=" info
+
#define __MODULE_PARM_TYPE(name, _type) \
__MODULE_INFO(parmtype, name##type, #name ":" _type)
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index c8cf45362bd6..41ef7cb043c1 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -226,6 +226,10 @@ modpost_link vmlinux.o
# modpost vmlinux.o to check for section mismatches
${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
+info MODINFO modules.builtin.modinfo
+"${OBJCOPY}" -j .modinfo -O binary vmlinux.o modules.builtin.modinfo
+chmod 444 modules.builtin.modinfo
+
kallsymso=""
kallsyms_vmlinux=""
if [ -n "${CONFIG_KALLSYMS}" ]; then
--
2.21.0
^ permalink raw reply related
* [PATCH 1/2] y2038: make CONFIG_64BIT_TIME unconditional
From: Arnd Bergmann @ 2019-04-29 13:19 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Joseph Myers, libc-alpha, linux-api, linux-kernel, Deepa Dinamani,
Arnd Bergmann, Lukasz Majewski, Stepan Golosunov
As Stepan Golosunov points out, we made a small mistake in the
get_timespec64() function in the kernel. It was originally added under
the assumption that CONFIG_64BIT_TIME would get enabled on all 32-bit
and 64-bit architectures, but when I did the conversion, I only turned
it on for 32-bit ones.
The effect is that the get_timespec64() function never clears the upper
half of the tv_nsec field for 32-bit tasks in compat mode. Clearing this
is required for POSIX compliant behavior of functions that pass a
'timespec' structure with a 64-bit tv_sec and a 32-bit tv_nsec, plus
uninitialized padding.
The easiest fix for linux-5.1 is to just make the Kconfig symbol
unconditional, as it was originally intended. As a follow-up,
we should remove any #ifdef CONFIG_64BIT_TIME completely.
Note: for native 32-bit mode, no change is needed, this works as
designed and user space should never need to clear the upper 32
bits of the tv_nsec field, in or out of the kernel.
Fixes: 00bf25d693e7 ("y2038: use time32 syscall names on 32-bit")
Link: https://lore.kernel.org/lkml/20190422090710.bmxdhhankurhafxq@sghpc.golosunov.pp.ru/
Cc: Lukasz Majewski <lukma@denx.de>
Cc: Stepan Golosunov <stepan@golosunov.pp.ru>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
Please apply this one as a bugfix for 5.1
---
arch/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 33687dddd86a..9092e0ffe4d3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -764,7 +764,7 @@ config COMPAT_OLD_SIGACTION
bool
config 64BIT_TIME
- def_bool ARCH_HAS_64BIT_TIME
+ def_bool y
help
This should be selected by all architectures that need to support
new system calls with a 64-bit time_t. This is relevant on all 32-bit
--
2.20.0
^ permalink raw reply related
* [PATCH 2/2] y2038: remove CONFIG_64BIT_TIME
From: Arnd Bergmann @ 2019-04-29 13:19 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Joseph Myers, libc-alpha, linux-api, linux-kernel, Deepa Dinamani,
Arnd Bergmann, Lukasz Majewski, Stepan Golosunov
In-Reply-To: <20190429131951.471701-1-arnd@arndb.de>
The CONFIG_64BIT_TIME option is defined on all architectures, and can
be removed for simplicity now.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
This would make sense for 5.2, or could be part of a later
cleanup series when I have more patches for the remaining
y2038 bits
---
arch/Kconfig | 8 --------
fs/aio.c | 2 +-
ipc/syscall.c | 2 +-
kernel/time/hrtimer.c | 2 +-
kernel/time/time.c | 4 ++--
net/socket.c | 2 +-
6 files changed, 6 insertions(+), 14 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 9092e0ffe4d3..23ee740182aa 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -763,14 +763,6 @@ config OLD_SIGACTION
config COMPAT_OLD_SIGACTION
bool
-config 64BIT_TIME
- def_bool y
- help
- This should be selected by all architectures that need to support
- new system calls with a 64-bit time_t. This is relevant on all 32-bit
- architectures, and 64-bit architectures as part of compat syscall
- handling.
-
config COMPAT_32BIT_TIME
def_bool !64BIT || COMPAT
help
diff --git a/fs/aio.c b/fs/aio.c
index 3490d1fa0e16..b1b949ae1a93 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2057,7 +2057,7 @@ static long do_io_getevents(aio_context_t ctx_id,
* specifies an infinite timeout. Note that the timeout pointed to by
* timeout is relative. Will fail with -ENOSYS if not implemented.
*/
-#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+#ifdef CONFIG_64BIT
SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
long, min_nr,
diff --git a/ipc/syscall.c b/ipc/syscall.c
index 581bdff4e7c5..dfb0e988d542 100644
--- a/ipc/syscall.c
+++ b/ipc/syscall.c
@@ -30,7 +30,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second,
return ksys_semtimedop(first, (struct sembuf __user *)ptr,
second, NULL);
case SEMTIMEDOP:
- if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
+ if (IS_ENABLED(CONFIG_64BIT))
return ksys_semtimedop(first, ptr, second,
(const struct __kernel_timespec __user *)fifth);
else if (IS_ENABLED(CONFIG_COMPAT_32BIT_TIME))
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 41dfff23c1f9..61f03faf783a 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1749,7 +1749,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
return ret;
}
-#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+#ifdef CONFIG_64BIT
SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
struct __kernel_timespec __user *, rmtp)
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 74105fa3ce80..a4c72577aa92 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -264,7 +264,7 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
}
#endif
-#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+#ifdef CONFIG_64BIT
SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p)
{
struct __kernel_timex txc; /* Local copy of parameter */
@@ -871,7 +871,7 @@ int get_timespec64(struct timespec64 *ts,
ts->tv_sec = kts.tv_sec;
/* Zero out the padding for 32 bit systems or in compat mode */
- if (IS_ENABLED(CONFIG_64BIT_TIME) && in_compat_syscall())
+ if (in_compat_syscall())
kts.tv_nsec &= 0xFFFFFFFFUL;
ts->tv_nsec = kts.tv_nsec;
diff --git a/net/socket.c b/net/socket.c
index a180e1a9ff23..2ff80e03d97b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2787,7 +2787,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
a[2], true);
break;
case SYS_RECVMMSG:
- if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
+ if (IS_ENABLED(CONFIG_64BIT))
err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
a[2], a[3],
(struct __kernel_timespec __user *)a[4],
--
2.20.0
^ permalink raw reply related
* Re: [PATCH 1/2] y2038: make CONFIG_64BIT_TIME unconditional
From: Arnd Bergmann @ 2019-04-29 13:22 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Joseph Myers, GNU C Library, Linux API, Linux Kernel Mailing List,
Deepa Dinamani, Lukasz Majewski, Stepan Golosunov
In-Reply-To: <alpine.DEB.2.21.1904290933130.1626@nanos.tec.linutronix.de>
On Mon, Apr 29, 2019 at 9:34 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Fri, 26 Apr 2019, Arnd Bergmann wrote:
>
> > As Stepan Golosunov points out, we made a small mistake in the
> > get_timespec64() function in the kernel. It was originally added under
> > the assumption that CONFIG_64BIT_TIME would get enabled on all 32-bit
> > and 64-bit architectures, but when I did the conversion, I only turned
> > it on for 32-bit ones.
> >
> > The effect is that the get_timespec64() function never clears the upper
> > half of the tv_nsec field for 32-bit tasks in compat mode. Clearing this
> > is required for POSIX compliant behavior of functions that pass a
> > 'timespec' structure with a 64-bit tv_sec and a 32-bit tv_nsec, plus
> > uninitialized padding.
> >
> > The easiest fix for linux-5.1 is to just make the Kconfig symbol
> > unconditional, as it was originally intended. As a follow-up,
> > we should remove any #ifdef CONFIG_64BIT_TIME completely.
> >
> > Link: https://lore.kernel.org/lkml/20190422090710.bmxdhhankurhafxq@sghpc.golosunov.pp.ru/
> > Cc: Lukasz Majewski <lukma@denx.de>
> > Cc: Stepan Golosunov <stepan@golosunov.pp.ru>
> > Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> > ---
> > Please apply this one as a bugfix for 5.1
>
> Can you provide a 'Fixes: ....' tag please?
Ok, resent both patches now. I also took the chance to add a clarification
for the point that Lukasz missed on the first submission.
Arnd
^ permalink raw reply
* Re: [PATCH v1 1/2] Add polling support to pidfd
From: Joel Fernandes @ 2019-04-29 14:02 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Christian Brauner, linux-kernel, luto, rostedt, dancol, sspatil,
jannh, surenb, timmurray, Jonathan Kowalski, torvalds,
kernel-team, Andrew Morton, Arnd Bergmann, Eric W. Biederman,
Greg Kroah-Hartman, Ingo Molnar, Jann Horn, linux-kselftest,
Michal Hocko, Peter Zijlstra (Intel), Serge Hallyn, Shuah Khan,
Stephen Rothwell, Thomas Gleixner <tgl>
In-Reply-To: <20190428162405.GA6757@redhat.com>
On Sun, Apr 28, 2019 at 06:24:06PM +0200, Oleg Nesterov wrote:
> Thanks for cc'ing me...
>
> On 04/26, Christian Brauner wrote:
> >
> > On Thu, Apr 25, 2019 at 03:00:09PM -0400, Joel Fernandes (Google) wrote:
> > > +static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
> > > +{
> > > + struct task_struct *task;
> > > + struct pid *pid;
> > > + int poll_flags = 0;
> > > +
> > > + /*
> > > + * tasklist_lock must be held because to avoid racing with
> > > + * changes in exit_state and wake up. Basically to avoid:
> > > + *
> > > + * P0: read exit_state = 0
> > > + * P1: write exit_state = EXIT_DEAD
> > > + * P1: Do a wake up - wq is empty, so do nothing
> > > + * P0: Queue for polling - wait forever.
> > > + */
> > > + read_lock(&tasklist_lock);
> > > + pid = file->private_data;
> > > + task = pid_task(pid, PIDTYPE_PID);
> > > + WARN_ON_ONCE(task && !thread_group_leader(task));
> > > +
> > > + if (!task || (task->exit_state && thread_group_empty(task)))
> > > + poll_flags = POLLIN | POLLRDNORM;
>
> Joel, I still can't understand why do we need tasklist... and I don't really
> understand the comment. The code looks as if you are trying to avoid poll_wait(),
> but this would be strange.
>
> OK, why can't pidfd_poll() do
>
> poll_wait(file, &pid->wait_pidfd, pts);
>
> rcu_read_lock();
> task = pid_task(pid, PIDTYPE_PID);
> if (!task || task->exit_state && thread_group_empty(task))
> poll_flags = POLLIN | ...;
> rcu_read_unlock();
>
> return poll_flags;
>
> ?
Oh that's much better Oleg, and would avoid the race I had in mind: Basically
I was acquiring the tasklist_lock to avoid a case where a polling task is not
woken up because it was added to the waitqueue too late. The reading of the
exit_state and the conditional adding to the wait queue, needed to be atomic.
Otherwise something like the following may be possible:
Task A (poller) Task B (exiting task being polled)
------------ ----------------
poll() called
exit_state is set to non-zero
read exit_state
wake_up_all()
add_wait_queue()
----------------------------------------------
However, in your code above, it is avoided because we get:
Task A (poller) Task B (exiting task being polled)
------------ ----------------
poll() called
add_wait_queue()
exit_state is set to non-zero
read exit_state
remove_wait_queue()
wake_up_all()
I don't see any other issues with your code above so I can try it out and
update the patches. Thanks.
> > > +static void do_notify_pidfd(struct task_struct *task)
> >
> > Maybe a short command that this helper can only be called when we know
> > that task is a thread-group leader wouldn't hurt so there's no confusion
> > later.
>
> Not really. If the task is traced, do_notify_parent() (and thus do_notify_pidfd())
> can be called to notify the debugger even if the task is not a leader and/or if
> it is not the last thread. The latter means a spurious wakeup for pidfd_poll().
Seems like you are replying to Christian's point. I agree with you.
> > > +{
> > > + struct pid *pid;
> > > +
> > > + lockdep_assert_held(&tasklist_lock);
> > > +
> > > + pid = get_task_pid(task, PIDTYPE_PID);
> > > + wake_up_all(&pid->wait_pidfd);
> > > + put_pid(pid);
>
> Why get/put?
Yes, pid_task() should do it. Will update it. Thanks!
- Joel
^ permalink raw reply
* Re: [PATCH v1 1/2] Add polling support to pidfd
From: Joel Fernandes @ 2019-04-29 14:07 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Christian Brauner, linux-kernel, luto, rostedt, dancol, sspatil,
jannh, surenb, timmurray, Jonathan Kowalski, torvalds,
kernel-team, Andrew Morton, Arnd Bergmann, Eric W. Biederman,
Greg Kroah-Hartman, Ingo Molnar, Jann Horn, linux-kselftest,
Michal Hocko, Peter Zijlstra (Intel), Serge Hallyn, Shuah Khan,
Stephen Rothwell, Thomas Gleixner <tgl>
In-Reply-To: <20190429140245.GB233442@google.com>
On Mon, Apr 29, 2019 at 10:02:45AM -0400, Joel Fernandes wrote:
> On Sun, Apr 28, 2019 at 06:24:06PM +0200, Oleg Nesterov wrote:
[snip]
> > > > +{
> > > > + struct pid *pid;
> > > > +
> > > > + lockdep_assert_held(&tasklist_lock);
> > > > +
> > > > + pid = get_task_pid(task, PIDTYPE_PID);
> > > > + wake_up_all(&pid->wait_pidfd);
> > > > + put_pid(pid);
> >
> > Why get/put?
>
> Yes, pid_task() should do it. Will update it. Thanks!
I spoke too soon. We need the task's pid of type PIDTYPE_PID. How else can we
get it? This does an atomic_inc on the pid->count, so we need to put_pid()
after we are done with it. Did I miss something?
thanks,
- Joel
^ permalink raw reply
* Re: [PATCH v1 1/2] Add polling support to pidfd
From: Oleg Nesterov @ 2019-04-29 14:20 UTC (permalink / raw)
To: Joel Fernandes
Cc: Christian Brauner, linux-kernel, luto, rostedt, dancol, sspatil,
jannh, surenb, timmurray, Jonathan Kowalski, torvalds,
kernel-team, Andrew Morton, Arnd Bergmann, Eric W. Biederman,
Greg Kroah-Hartman, Ingo Molnar, Jann Horn, linux-kselftest,
Michal Hocko, Peter Zijlstra (Intel), Serge Hallyn, Shuah Khan,
Stephen Rothwell, Thomas Gleixner <tgl>
In-Reply-To: <20190429140245.GB233442@google.com>
On 04/29, Joel Fernandes wrote:
>
> However, in your code above, it is avoided because we get:
>
> Task A (poller) Task B (exiting task being polled)
> ------------ ----------------
> poll() called
> add_wait_queue()
> exit_state is set to non-zero
> read exit_state
> remove_wait_queue()
> wake_up_all()
just to clarify... No, sys_poll() path doesn't do remove_wait_queue() until
it returns to user mode, and that is why we can't race with set-exit_code +
wake_up().
pidfd_poll() can race with the exiting task, miss exit_code != 0, and return
zero. However, do_poll() won't block after that and pidfd_poll() will be called
again.
Oleg.
^ permalink raw reply
* Re: [PATCH v3] moduleparam: Save information about built-in modules in separate file
From: Jessica Yu @ 2019-04-29 14:21 UTC (permalink / raw)
To: Alexey Gladkov
Cc: Masahiro Yamada, Linux Kernel Mailing List,
Linux Kbuild mailing list, linux-api, linux-modules,
Kirill A . Shutemov, Gleb Fotengauer-Malinovskiy, Dmitry V. Levin,
Michal Marek, Dmitry Torokhov, Rusty Russell, Lucas De Marchi
In-Reply-To: <20190429090854.GU9023@dhcp129-178.brq.redhat.com>
+++ Alexey Gladkov [29/04/19 11:08 +0200]:
>Problem:
>
>When a kernel module is compiled as a separate module, some important
>information about the kernel module is available via .modinfo section of
>the module. In contrast, when the kernel module is compiled into the
>kernel, that information is not available.
>
>Information about built-in modules is necessary in the following cases:
>
>1. When it is necessary to find out what additional parameters can be
>passed to the kernel at boot time.
>
>2. When you need to know which module names and their aliases are in
>the kernel. This is very useful for creating an initrd image.
>
>Proposal:
>
>The proposed patch does not remove .modinfo section with module
>information from the vmlinux at the build time and saves it into a
>separate file after kernel linking. So, the kernel does not increase in
>size and no additional information remains in it. Information is stored
>in the same format as in the separate modules (null-terminated string
>array). Because the .modinfo section is already exported with a separate
>modules, we are not creating a new API.
>
>It can be easily read in the userspace:
>
>$ tr '\0' '\n' < kernel.builtin
s/kernel.builtin/modules.builtin.modinfo/
Otherwise, for module.h and moduleparam.h:
Acked-by: Jessica Yu <jeyu@kernel.org>
And it would be great if Lucas can confirm if the file format and name
would be OK for kmod.
Thanks!
Jessica
^ permalink raw reply
* Re: [PATCH v1 1/2] Add polling support to pidfd
From: Oleg Nesterov @ 2019-04-29 14:25 UTC (permalink / raw)
To: Joel Fernandes
Cc: Christian Brauner, linux-kernel, luto, rostedt, dancol, sspatil,
jannh, surenb, timmurray, Jonathan Kowalski, torvalds,
kernel-team, Andrew Morton, Arnd Bergmann, Eric W. Biederman,
Greg Kroah-Hartman, Ingo Molnar, Jann Horn, linux-kselftest,
Michal Hocko, Peter Zijlstra (Intel), Serge Hallyn, Shuah Khan,
Stephen Rothwell, Thomas Gleixner <tgl>
In-Reply-To: <20190429140743.GB173743@google.com>
On 04/29, Joel Fernandes wrote:
>
> On Mon, Apr 29, 2019 at 10:02:45AM -0400, Joel Fernandes wrote:
> > On Sun, Apr 28, 2019 at 06:24:06PM +0200, Oleg Nesterov wrote:
> [snip]
> > > > > +{
> > > > > + struct pid *pid;
> > > > > +
> > > > > + lockdep_assert_held(&tasklist_lock);
> > > > > +
> > > > > + pid = get_task_pid(task, PIDTYPE_PID);
> > > > > + wake_up_all(&pid->wait_pidfd);
> > > > > + put_pid(pid);
> > >
> > > Why get/put?
> >
> > Yes, pid_task() should do it. Will update it. Thanks!
>
> I spoke too soon. We need the task's pid of type PIDTYPE_PID. How else can we
> get it? This does an atomic_inc on the pid->count, so we need to put_pid()
> after we are done with it. Did I miss something?
Just use task_pid(task);
Oleg.
^ permalink raw reply
* Re: [PATCH v3] moduleparam: Save information about built-in modules in separate file
From: Masahiro Yamada @ 2019-04-29 15:08 UTC (permalink / raw)
To: Alexey Gladkov
Cc: Linux Kernel Mailing List, Linux Kbuild mailing list, linux-api,
linux-modules, Kirill A . Shutemov, Gleb Fotengauer-Malinovskiy,
Dmitry V. Levin, Michal Marek, Dmitry Torokhov, Rusty Russell,
Jessica Yu, Lucas De Marchi
In-Reply-To: <20190429090854.GU9023@dhcp129-178.brq.redhat.com>
On Mon, Apr 29, 2019 at 6:09 PM Alexey Gladkov <gladkov.alexey@gmail.com> wrote:
>
> Problem:
>
> When a kernel module is compiled as a separate module, some important
> information about the kernel module is available via .modinfo section of
> the module. In contrast, when the kernel module is compiled into the
> kernel, that information is not available.
>
> Information about built-in modules is necessary in the following cases:
>
> 1. When it is necessary to find out what additional parameters can be
> passed to the kernel at boot time.
>
> 2. When you need to know which module names and their aliases are in
> the kernel. This is very useful for creating an initrd image.
>
> Proposal:
>
> The proposed patch does not remove .modinfo section with module
> information from the vmlinux at the build time and saves it into a
> separate file after kernel linking. So, the kernel does not increase in
> size and no additional information remains in it. Information is stored
> in the same format as in the separate modules (null-terminated string
> array). Because the .modinfo section is already exported with a separate
> modules, we are not creating a new API.
>
> It can be easily read in the userspace:
>
> $ tr '\0' '\n' < kernel.builtin
> ext4.softdep=pre: crc32c
> ext4.license=GPL
> ext4.description=Fourth Extended Filesystem
> ext4.author=Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others
> ext4.alias=fs-ext4
> ext4.alias=ext3
> ext4.alias=fs-ext3
> ext4.alias=ext2
> ext4.alias=fs-ext2
> md_mod.alias=block-major-9-*
> md_mod.alias=md
> md_mod.description=MD RAID framework
> md_mod.license=GPL
> md_mod.parmtype=create_on_open:bool
> md_mod.parmtype=start_dirty_degraded:int
> ...
>
> v2:
> * Extract modinfo from vmlinux.o as suggested by Masahiro Yamada;
> * Rename output file to kernel.builtin;
> * Add MODULE_VERSION to modinfo that is saved to the kernel.builtin;
> * Fix build warnings on powerpc.
>
> v3:
> * Rename output file to modules.builtin.modinfo as suggested by Masahiro Yamada;
> * Update Documentation/dontdiff, Documentation/kbuild/kbuild.txt.
>
> Co-Developed-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
> Signed-off-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
> Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
> ---
> .gitignore | 1 +
> Documentation/dontdiff | 1 +
> Documentation/kbuild/kbuild.txt | 5 +++++
> Makefile | 2 ++
> include/asm-generic/vmlinux.lds.h | 1 +
> include/linux/module.h | 1 +
> include/linux/moduleparam.h | 12 +++++-------
> scripts/link-vmlinux.sh | 4 ++++
> 8 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/.gitignore b/.gitignore
> index a20ac26aa2f5..8ef2c87703b3 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -45,6 +45,7 @@
> *.xz
> Module.symvers
> modules.builtin
> +modules.builtin.modinfo
>
> #
> # Top-level generic files
Let me repeat the same comments as in v2
(https://patchwork.kernel.org/patch/10888207/#22595563)
as you ignored them.
This file is generated only in the top of the tree.
Please add '/' prefix and move it to
the "# Top-level generic files" section.
#
# Top-level generic files
#
/tags
/TAGS
/linux
/vmlinux
/vmlinux.32
/vmlinux-gdb.py
/vmlinuz
/System.map
/Module.markers
/modules.builtin.modinfo
> diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
> index c8cf45362bd6..41ef7cb043c1 100755
> --- a/scripts/link-vmlinux.sh
> +++ b/scripts/link-vmlinux.sh
> @@ -226,6 +226,10 @@ modpost_link vmlinux.o
> # modpost vmlinux.o to check for section mismatches
> ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
>
> +info MODINFO modules.builtin.modinfo
> +"${OBJCOPY}" -j .modinfo -O binary vmlinux.o modules.builtin.modinfo
> +chmod 444 modules.builtin.modinfo
Why is this 'chmod 444' necessary?
> +
> kallsymso=""
> kallsyms_vmlinux=""
> if [ -n "${CONFIG_KALLSYMS}" ]; then
--
Best Regards
Masahiro Yamada
^ permalink raw reply
* [PATCH for 5.2 00/12] Restartable Sequences selftests updates
From: Mathieu Desnoyers @ 2019-04-29 15:27 UTC (permalink / raw)
To: Shuah Khan
Cc: linux-kernel, linux-api, Thomas Gleixner, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H . Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer,
Steven Rostedt, Josh Triplett, Linus Torvalds, Catalin Marinas,
Will Deacon
Those rseq selftests updates are hereby submitted to Shuah Khan,
maintainer of kernel selftests, for the next merge window (5.2).
They change the per-architecture pre-abort signatures to ensure those
are valid trap instructions.
The way exit points are presented to debuggers is enhanced, ensuring
all exit points are present, so debuggers don't have to disassemble
rseq critical section to properly skip over them.
Discussions with the glibc community is reaching a concensus of exposing
a __rseq_handled symbol from glibc to coexist with rseq early adopters.
Update the rseq selftest code to expose and use this symbol.
Support for compiling asm goto with clang is added with the
"-no-integrated-as" compiler switch, similarly to the toplevel kernel
Makefile.
Thanks,
Mathieu
Martin Schwidefsky (1):
rseq/selftests: s390: use trap4 for RSEQ_SIG
Mathieu Desnoyers (11):
rseq/selftests: x86: Work-around bogus gcc-8 optimisation
rseq/selftests: Add __rseq_exit_point_array section for debuggers
rseq/selftests: Introduce __rseq_cs_ptr_array, rename __rseq_table to
__rseq_cs
rseq/selftests: Use __rseq_handled symbol to coexist with glibc
rseq/selftests: s390: use jg instruction for jumps outside of the asm
rseq/selftests: x86: use ud1 instruction as RSEQ_SIG opcode
rseq/selftests: arm: use udf instruction for RSEQ_SIG
rseq/selftests: aarch64 code signature: handle big-endian environment
rseq/selftests: powerpc code signature: generate valid instructions
rseq/selftests: mips: use break instruction for RSEQ_SIG
rseq/selftests: add -no-integrated-as for clang
tools/testing/selftests/rseq/Makefile | 8 +-
tools/testing/selftests/rseq/rseq-arm.h | 132 +++++++++++++--
tools/testing/selftests/rseq/rseq-arm64.h | 74 ++++++++-
tools/testing/selftests/rseq/rseq-mips.h | 115 +++++++++++--
tools/testing/selftests/rseq/rseq-ppc.h | 90 +++++++++-
tools/testing/selftests/rseq/rseq-s390.h | 78 ++++++++-
tools/testing/selftests/rseq/rseq-x86.h | 264 +++++++++++++++++++++---------
tools/testing/selftests/rseq/rseq.c | 55 ++++++-
tools/testing/selftests/rseq/rseq.h | 1 +
9 files changed, 688 insertions(+), 129 deletions(-)
--
2.11.0
^ permalink raw reply
* [PATCH for 5.2 01/12] rseq/selftests: x86: Work-around bogus gcc-8 optimisation
From: Mathieu Desnoyers @ 2019-04-29 15:27 UTC (permalink / raw)
To: Shuah Khan
Cc: linux-kernel, linux-api, Thomas Gleixner, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H . Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer,
Steven Rostedt, Josh Triplett, Linus Torvalds, Catalin Marinas,
Will Deacon
In-Reply-To: <20190429152803.7719-1-mathieu.desnoyers@efficios.com>
gcc-8 version 8.1.0, 8.2.0, and 8.3.0 generate broken assembler with asm
goto that have a thread-local storage "m" input operand on both x86-32
and x86-64. For instance:
__thread int var;
static int fct(void)
{
asm goto ( "jmp %l[testlabel]\n\t"
: : [var] "m" (var) : : testlabel);
return 0;
testlabel:
return 1;
}
int main()
{
return fct();
}
% gcc-8 -O2 -o test-asm-goto test-asm-goto.c
/tmp/ccAdHJbe.o: In function `main':
test-asm-goto.c:(.text.startup+0x1): undefined reference to `.L2'
collect2: error: ld returned 1 exit status
% gcc-8 -m32 -O2 -o test-asm-goto test-asm-goto.c
/tmp/ccREsVXA.o: In function `main':
test-asm-goto.c:(.text.startup+0x1): undefined reference to `.L2'
collect2: error: ld returned 1 exit status
Work-around this compiler bug in the rseq-x86.h header by passing the
address of the __rseq_abi TLS as a register operand rather than using
the "m" input operand.
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90193
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: Ingo Molnar <mingo@redhat.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Joel Fernandes <joelaf@google.com>
CC: Catalin Marinas <catalin.marinas@arm.com>
CC: Dave Watson <davejwatson@fb.com>
CC: Will Deacon <will.deacon@arm.com>
CC: Shuah Khan <shuah@kernel.org>
CC: Andi Kleen <andi@firstfloor.org>
CC: linux-kselftest@vger.kernel.org
CC: "H . Peter Anvin" <hpa@zytor.com>
CC: Chris Lameter <cl@linux.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
CC: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
CC: Paul Turner <pjt@google.com>
CC: Boqun Feng <boqun.feng@gmail.com>
CC: Josh Triplett <josh@joshtriplett.org>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ben Maurer <bmaurer@fb.com>
CC: linux-api@vger.kernel.org
CC: Andy Lutomirski <luto@amacapital.net>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linus Torvalds <torvalds@linux-foundation.org>
---
tools/testing/selftests/rseq/rseq-x86.h | 144 ++++++++++++++++----------------
1 file changed, 70 insertions(+), 74 deletions(-)
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index 089410a314e9..a5341044a2f5 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -9,6 +9,16 @@
#define RSEQ_SIG 0x53053053
+/*
+ * Due to a compiler optimization bug in gcc-8 with asm goto and TLS asm input
+ * operands, we cannot use "m" input operands, and rather pass the __rseq_abi
+ * address through a "r" input operand.
+ */
+
+/* Offset of cpu_id and rseq_cs fields in struct rseq. */
+#define RSEQ_CPU_ID_OFFSET 4
+#define RSEQ_CS_OFFSET 8
+
#ifdef __x86_64__
#define rseq_smp_mb() \
@@ -51,12 +61,12 @@ do { \
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
"leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \
- "movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t" \
+ "movq %%rax, " __rseq_str(rseq_cs) "\n\t" \
__rseq_str(label) ":\n\t"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
RSEQ_INJECT_ASM(2) \
- "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmpl %[" __rseq_str(cpu_id) "], " __rseq_str(current_cpu_id) "\n\t" \
"jnz " __rseq_str(label) "\n\t"
#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
@@ -84,14 +94,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -102,8 +112,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -141,15 +150,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movq %[v], %%rbx\n\t"
"cmpq %%rbx, %[expectnot]\n\t"
"je %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"movq %[v], %%rbx\n\t"
"cmpq %%rbx, %[expectnot]\n\t"
"je %l[error2]\n\t"
@@ -164,8 +173,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -200,11 +208,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
#endif
/* final store */
"addq %[count], %[v]\n\t"
@@ -213,8 +221,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[count] "er" (count)
@@ -245,14 +252,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -266,8 +273,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -315,8 +321,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
@@ -325,7 +331,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(5)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
"cmpq %[v2], %[expect2]\n\t"
@@ -338,8 +344,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -385,14 +390,14 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
"movq %[dst], %[rseq_scratch1]\n\t"
"movq %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
"cmpq %[v], %[expect]\n\t"
"jnz 7f\n\t"
#endif
@@ -440,8 +445,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -533,12 +537,12 @@ do { \
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
- "movl $" __rseq_str(cs_label) ", %[rseq_cs]\n\t" \
+ "movl $" __rseq_str(cs_label) ", " __rseq_str(rseq_cs) "\n\t" \
__rseq_str(label) ":\n\t"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
RSEQ_INJECT_ASM(2) \
- "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmpl %[" __rseq_str(cpu_id) "], " __rseq_str(current_cpu_id) "\n\t" \
"jnz " __rseq_str(label) "\n\t"
#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
@@ -566,14 +570,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -584,8 +588,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -623,15 +626,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[v], %%ebx\n\t"
"cmpl %%ebx, %[expectnot]\n\t"
"je %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"movl %[v], %%ebx\n\t"
"cmpl %%ebx, %[expectnot]\n\t"
"je %l[error2]\n\t"
@@ -646,8 +649,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -682,11 +684,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
#endif
/* final store */
"addl %[count], %[v]\n\t"
@@ -695,8 +697,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[count] "ir" (count)
@@ -727,14 +728,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -749,8 +750,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* try store input */
[v2] "m" (*v2),
[newv2] "m" (newv2),
@@ -789,15 +789,15 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %[v], %%eax\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"movl %[expect], %%eax\n\t"
"cmpl %[v], %%eax\n\t"
"jnz %l[error2]\n\t"
@@ -813,8 +813,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -854,8 +853,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
@@ -864,7 +863,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(5)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
"cmpl %[expect2], %[v2]\n\t"
@@ -878,8 +877,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -926,15 +924,15 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 7f\n\t"
@@ -984,8 +982,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expect] "m" (expect),
@@ -1034,15 +1031,15 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 7f\n\t"
@@ -1093,8 +1090,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expect] "m" (expect),
--
2.11.0
^ permalink raw reply related
* [PATCH for 5.2 02/12] rseq/selftests: Add __rseq_exit_point_array section for debuggers
From: Mathieu Desnoyers @ 2019-04-29 15:27 UTC (permalink / raw)
To: Shuah Khan
Cc: linux-kernel, linux-api, Thomas Gleixner, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H . Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer,
Steven Rostedt, Josh Triplett, Linus Torvalds, Catalin Marinas,
Will Deacon
In-Reply-To: <20190429152803.7719-1-mathieu.desnoyers@efficios.com>
Knowing all exit points is useful to assist debuggers stepping over the
rseq critical sections without requiring them to disassemble the content
of the critical section to figure out the exit points.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Joel Fernandes <joelaf@google.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
CC: Dave Watson <davejwatson@fb.com>
CC: Will Deacon <will.deacon@arm.com>
CC: Shuah Khan <shuah@kernel.org>
CC: Andi Kleen <andi@firstfloor.org>
CC: linux-kselftest@vger.kernel.org
CC: "H . Peter Anvin" <hpa@zytor.com>
CC: Chris Lameter <cl@linux.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
CC: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
CC: Paul Turner <pjt@google.com>
CC: Boqun Feng <boqun.feng@gmail.com>
CC: Josh Triplett <josh@joshtriplett.org>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ben Maurer <bmaurer@fb.com>
CC: linux-api@vger.kernel.org
CC: Andy Lutomirski <luto@amacapital.net>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linus Torvalds <torvalds@linux-foundation.org>
---
tools/testing/selftests/rseq/rseq-arm.h | 52 +++++++++++++++++
tools/testing/selftests/rseq/rseq-arm64.h | 52 +++++++++++++++++
tools/testing/selftests/rseq/rseq-mips.h | 53 +++++++++++++++++
tools/testing/selftests/rseq/rseq-ppc.h | 66 ++++++++++++++++++++++
tools/testing/selftests/rseq/rseq-s390.h | 55 ++++++++++++++++++
tools/testing/selftests/rseq/rseq-x86.h | 94 +++++++++++++++++++++++++++++++
6 files changed, 372 insertions(+)
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 3cea19877227..17e8d231943a 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -42,6 +42,19 @@ do { \
__RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_table section and should not
+ * be explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) ", 0x0\n\t" \
+ ".popsection\n\t"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
"adr r0, " __rseq_str(cs_label) "\n\t" \
@@ -87,6 +100,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -149,6 +167,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -215,6 +238,9 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -267,6 +293,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -337,6 +368,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -408,6 +444,12 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -486,6 +528,11 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"str %[src], %[rseq_scratch0]\n\t"
"str %[dst], %[rseq_scratch1]\n\t"
"str %[len], %[rseq_scratch2]\n\t"
@@ -605,6 +652,11 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"str %[src], %[rseq_scratch0]\n\t"
"str %[dst], %[rseq_scratch1]\n\t"
"str %[len], %[rseq_scratch2]\n\t"
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
index 954f34671ca6..2079f71e0ca2 100644
--- a/tools/testing/selftests/rseq/rseq-arm64.h
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -95,6 +95,19 @@ do { \
__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_table section and should not
+ * be explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ " .pushsection __rseq_exit_point_array, \"aw\"\n" \
+ " .quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n" \
+ " .popsection\n"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
" adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \
@@ -182,6 +195,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
@@ -231,6 +249,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
@@ -282,6 +305,9 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
@@ -325,6 +351,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
@@ -379,6 +410,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
@@ -433,6 +469,12 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error3])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
@@ -490,6 +532,11 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
@@ -545,6 +592,11 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
index 7f48ecf46994..25d10ff54769 100644
--- a/tools/testing/selftests/rseq/rseq-mips.h
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -68,6 +68,20 @@ do { \
__RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_table section and should not
+ * be explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(exit_ip)) "\n\t" \
+ ".popsection\n\t"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \
@@ -114,6 +128,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -174,6 +193,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -238,6 +262,9 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -290,6 +317,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -358,6 +390,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -427,6 +464,12 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -501,6 +544,11 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
LONG_S " %[src], %[rseq_scratch0]\n\t"
LONG_S " %[dst], %[rseq_scratch1]\n\t"
LONG_S " %[len], %[rseq_scratch2]\n\t"
@@ -617,6 +665,11 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
LONG_S " %[src], %[rseq_scratch0]\n\t"
LONG_S " %[dst], %[rseq_scratch1]\n\t"
LONG_S " %[len], %[rseq_scratch2]\n\t"
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index 52630c9f42be..24f95649d71e 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -63,6 +63,19 @@ do { \
"std %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
__rseq_str(label) ":\n\t"
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_table section and should not
+ * be explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \
+ ".popsection\n\t"
+
#else /* #ifdef __PPC64__ */
#define STORE_WORD "stw "
@@ -80,6 +93,20 @@ do { \
".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
".popsection\n\t"
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_table section and should not
+ * be explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ /* 32-bit only supported on BE */ \
+ ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n\t" \
+ ".popsection\n\t"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
"lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \
@@ -169,6 +196,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
@@ -224,6 +256,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
@@ -286,6 +323,9 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
@@ -337,6 +377,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
@@ -400,6 +445,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
@@ -465,6 +515,12 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
@@ -532,6 +588,11 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* setup for mempcy */
"mr %%r19, %[len]\n\t"
"mr %%r20, %[src]\n\t"
@@ -601,6 +662,11 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* setup for mempcy */
"mr %%r19, %[len]\n\t"
"mr %%r20, %[src]\n\t"
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 1069e85258ce..b8b5b6f900af 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -44,6 +44,19 @@ do { \
".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
".popsection\n\t"
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_table section and should not
+ * be explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \
+ ".popsection\n\t"
+
#elif __s390__
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
@@ -55,6 +68,19 @@ do { \
".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
".popsection\n\t"
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_table section and should not
+ * be explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n\t" \
+ ".popsection\n\t"
+
#define LONG_L "l"
#define LONG_S "st"
#define LONG_LT_R "ltr"
@@ -102,6 +128,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -160,6 +191,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -220,6 +256,9 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -268,6 +307,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -339,6 +383,12 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
@@ -407,6 +457,11 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
LONG_S " %[src], %[rseq_scratch0]\n\t"
LONG_S " %[dst], %[rseq_scratch1]\n\t"
LONG_S " %[len], %[rseq_scratch2]\n\t"
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index a5341044a2f5..0668608d3674 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -58,6 +58,19 @@ do { \
__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_table section and should not
+ * be explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \
+ ".popsection\n\t"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
"leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \
@@ -93,6 +106,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -149,6 +167,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -207,6 +230,9 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -251,6 +277,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -320,6 +351,12 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -386,6 +423,11 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"movq %[src], %[rseq_scratch0]\n\t"
"movq %[dst], %[rseq_scratch1]\n\t"
"movq %[len], %[rseq_scratch2]\n\t"
@@ -535,6 +577,19 @@ do { \
__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_table section and should not
+ * be explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) ", 0x0\n\t" \
+ ".popsection\n\t"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
"movl $" __rseq_str(cs_label) ", " __rseq_str(rseq_cs) "\n\t" \
@@ -569,6 +624,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -625,6 +685,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -683,6 +748,9 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -727,6 +795,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -788,6 +861,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -852,6 +930,12 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
@@ -920,6 +1004,11 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"movl %[src], %[rseq_scratch0]\n\t"
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
@@ -1027,6 +1116,11 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"movl %[src], %[rseq_scratch0]\n\t"
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
--
2.11.0
^ permalink raw reply related
* [PATCH for 5.2 03/12] rseq/selftests: Introduce __rseq_cs_ptr_array, rename __rseq_table to __rseq_cs
From: Mathieu Desnoyers @ 2019-04-29 15:27 UTC (permalink / raw)
To: Shuah Khan
Cc: linux-kernel, linux-api, Thomas Gleixner, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H . Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer,
Steven Rostedt, Josh Triplett, Linus Torvalds, Catalin Marinas,
Will Deacon
In-Reply-To: <20190429152803.7719-1-mathieu.desnoyers@efficios.com>
The entries within __rseq_table are aligned on 32 bytes due to
linux/rseq.h struct rseq_cs uapi requirements, but the start of the
__rseq_table section is not guaranteed to be 32-byte aligned. It can
cause padding to be added at the start of the section, which makes it
hard to use as an array of items by debuggers.
Considering that __rseq_table does not really consist of a table due to
the presence of padding, rename this section to __rseq_cs.
Create a new __rseq_cs_ptr_array section which contains 64-bit packed
pointers to entries within the __rseq_cs section.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Joel Fernandes <joelaf@google.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
CC: Dave Watson <davejwatson@fb.com>
CC: Will Deacon <will.deacon@arm.com>
CC: Shuah Khan <shuah@kernel.org>
CC: Andi Kleen <andi@firstfloor.org>
CC: linux-kselftest@vger.kernel.org
CC: "H . Peter Anvin" <hpa@zytor.com>
CC: Chris Lameter <cl@linux.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
CC: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
CC: Paul Turner <pjt@google.com>
CC: Boqun Feng <boqun.feng@gmail.com>
CC: Josh Triplett <josh@joshtriplett.org>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ben Maurer <bmaurer@fb.com>
CC: linux-api@vger.kernel.org
CC: Andy Lutomirski <luto@amacapital.net>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linus Torvalds <torvalds@linux-foundation.org>
---
tools/testing/selftests/rseq/rseq-arm.h | 32 +++++++++++++++++--------------
tools/testing/selftests/rseq/rseq-arm64.h | 9 ++++++---
tools/testing/selftests/rseq/rseq-mips.h | 32 +++++++++++++++++--------------
tools/testing/selftests/rseq/rseq-ppc.h | 22 +++++++++++++--------
tools/testing/selftests/rseq/rseq-s390.h | 18 +++++++++++------
tools/testing/selftests/rseq/rseq-x86.h | 19 ++++++++++++------
6 files changed, 81 insertions(+), 51 deletions(-)
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 17e8d231943a..5f262c54364f 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -30,24 +30,28 @@ do { \
#include "rseq-skip.h"
#else /* !RSEQ_SKIP_FASTPATH */
-#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".word " __rseq_str(label) "b, 0x0\n\t" \
".popsection\n\t"
-#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
- __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
/*
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_table section and should not
- * be explicitly defined as additional exit points. Knowing all exit points is
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
@@ -99,7 +103,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -166,7 +170,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -237,7 +241,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
#endif
@@ -292,7 +296,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -367,7 +371,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -443,7 +447,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -527,7 +531,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -651,7 +655,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
index 2079f71e0ca2..b41a2a48e965 100644
--- a/tools/testing/selftests/rseq/rseq-arm64.h
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -82,13 +82,16 @@ do { \
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
post_commit_offset, abort_ip) \
- " .pushsection __rseq_table, \"aw\"\n" \
+ " .pushsection __rseq_cs, \"aw\"\n" \
" .balign 32\n" \
__rseq_str(label) ":\n" \
" .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \
" .quad " __rseq_str(start_ip) ", " \
__rseq_str(post_commit_offset) ", " \
__rseq_str(abort_ip) "\n" \
+ " .popsection\n\t" \
+ " .pushsection __rseq_cs_ptr_array, \"aw\"\n" \
+ " .quad " __rseq_str(label) "b\n" \
" .popsection\n"
#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
@@ -99,8 +102,8 @@ do { \
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_table section and should not
- * be explicitly defined as additional exit points. Knowing all exit points is
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
index 25d10ff54769..fe3eabcdcbe5 100644
--- a/tools/testing/selftests/rseq/rseq-mips.h
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -54,26 +54,30 @@ do { \
# error unsupported _MIPS_SZLONG
#endif
-#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(label) "b") "\n\t" \
".popsection\n\t"
-#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
- __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
/*
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_table section and should not
- * be explicitly defined as additional exit points. Knowing all exit points is
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
@@ -127,7 +131,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -192,7 +196,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -261,7 +265,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
#endif
@@ -316,7 +320,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -389,7 +393,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -463,7 +467,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -543,7 +547,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
@@ -664,7 +668,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index 24f95649d71e..9df18487fa9f 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -33,8 +33,8 @@ do { \
#else /* !RSEQ_SKIP_FASTPATH */
/*
- * The __rseq_table section can be used by debuggers to better handle
- * single-stepping through the restartable critical sections.
+ * The __rseq_cs_ptr_array and __rseq_cs sections can be used by debuggers to
+ * better handle single-stepping through the restartable critical sections.
*/
#ifdef __PPC64__
@@ -46,11 +46,14 @@ do { \
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(label) "b\n\t" \
".popsection\n\t"
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
@@ -67,8 +70,8 @@ do { \
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_table section and should not
- * be explicitly defined as additional exit points. Knowing all exit points is
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
@@ -85,20 +88,23 @@ do { \
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
/* 32-bit only supported on BE */ \
".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".long 0x0, " __rseq_str(label) "b\n\t" \
".popsection\n\t"
/*
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_table section and should not
- * be explicitly defined as additional exit points. Knowing all exit points is
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index b8b5b6f900af..fbb97815d71c 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -37,19 +37,22 @@ do { \
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(label) "b\n\t" \
".popsection\n\t"
/*
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_table section and should not
- * be explicitly defined as additional exit points. Knowing all exit points is
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
@@ -61,19 +64,22 @@ do { \
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".long 0x0, " __rseq_str(label) "b\n\t" \
".popsection\n\t"
/*
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_table section and should not
- * be explicitly defined as additional exit points. Knowing all exit points is
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index 0668608d3674..03095236f6fa 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -47,13 +47,17 @@ do { \
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(label) "b\n\t" \
".popsection\n\t"
+
#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
@@ -62,8 +66,8 @@ do { \
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_table section and should not
- * be explicitly defined as additional exit points. Knowing all exit points is
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
@@ -566,11 +570,14 @@ do { \
*/
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".long " __rseq_str(label) "b, 0x0\n\t" \
".popsection\n\t"
#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
@@ -581,8 +588,8 @@ do { \
* Exit points of a rseq critical section consist of all instructions outside
* of the critical section where a critical section can either branch to or
* reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_table section and should not
- * be explicitly defined as additional exit points. Knowing all exit points is
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
* useful to assist debuggers stepping over the critical section.
*/
#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
--
2.11.0
^ permalink raw reply related
* [PATCH for 5.2 04/12] rseq/selftests: Use __rseq_handled symbol to coexist with glibc
From: Mathieu Desnoyers @ 2019-04-29 15:27 UTC (permalink / raw)
To: Shuah Khan
Cc: linux-kernel, linux-api, Thomas Gleixner, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H . Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer,
Steven Rostedt, Josh Triplett, Linus Torvalds, Catalin Marinas,
Will Deacon
In-Reply-To: <20190429152803.7719-1-mathieu.desnoyers@efficios.com>
In order to integrate rseq into user-space applications, expose a
__rseq_handled symbol so many rseq users can be linked into the same
application (e.g. librseq and glibc).
The __rseq_refcount TLS variable is static to the librseq library. It
ensures that rseq syscall registration/unregistration happens only for
the most early/late caller to rseq_{,un}register_current_thread for each
thread, thus ensuring that rseq is registered across the lifetime of all
rseq users for a given thread.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: Shuah Khan <shuah@kernel.org>
CC: Carlos O'Donell <carlos@redhat.com>
CC: Florian Weimer <fweimer@redhat.com>
CC: Joseph Myers <joseph@codesourcery.com>
CC: Szabolcs Nagy <szabolcs.nagy@arm.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ben Maurer <bmaurer@fb.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
CC: Boqun Feng <boqun.feng@gmail.com>
CC: Will Deacon <will.deacon@arm.com>
CC: Dave Watson <davejwatson@fb.com>
CC: Paul Turner <pjt@google.com>
CC: linux-api@vger.kernel.org
---
tools/testing/selftests/rseq/rseq.c | 55 +++++++++++++++++++++++++++++++------
tools/testing/selftests/rseq/rseq.h | 1 +
2 files changed, 48 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 4847e97ed049..7159eb777fd3 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -25,18 +25,27 @@
#include <syscall.h>
#include <assert.h>
#include <signal.h>
+#include <limits.h>
#include "rseq.h"
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-__attribute__((tls_model("initial-exec"))) __thread
-volatile struct rseq __rseq_abi = {
+__thread volatile struct rseq __rseq_abi = {
.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
};
-static __attribute__((tls_model("initial-exec"))) __thread
-volatile int refcount;
+/*
+ * Shared with other libraries. This library may take rseq ownership if it is
+ * still 0 when executing the library constructor. Set to 1 by library
+ * constructor when handling rseq. Set to 0 in destructor if handling rseq.
+ */
+int __rseq_handled;
+
+/* Whether this library have ownership of rseq registration. */
+static int rseq_ownership;
+
+static __thread volatile uint32_t __rseq_refcount;
static void signal_off_save(sigset_t *oldset)
{
@@ -69,8 +78,14 @@ int rseq_register_current_thread(void)
int rc, ret = 0;
sigset_t oldset;
+ if (!rseq_ownership)
+ return 0;
signal_off_save(&oldset);
- if (refcount++)
+ if (__rseq_refcount == UINT_MAX) {
+ ret = -1;
+ goto end;
+ }
+ if (__rseq_refcount++)
goto end;
rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
if (!rc) {
@@ -78,9 +93,9 @@ int rseq_register_current_thread(void)
goto end;
}
if (errno != EBUSY)
- __rseq_abi.cpu_id = -2;
+ __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
ret = -1;
- refcount--;
+ __rseq_refcount--;
end:
signal_restore(oldset);
return ret;
@@ -91,13 +106,20 @@ int rseq_unregister_current_thread(void)
int rc, ret = 0;
sigset_t oldset;
+ if (!rseq_ownership)
+ return 0;
signal_off_save(&oldset);
- if (--refcount)
+ if (!__rseq_refcount) {
+ ret = -1;
+ goto end;
+ }
+ if (--__rseq_refcount)
goto end;
rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
if (!rc)
goto end;
+ __rseq_refcount = 1;
ret = -1;
end:
signal_restore(oldset);
@@ -115,3 +137,20 @@ int32_t rseq_fallback_current_cpu(void)
}
return cpu;
}
+
+void __attribute__((constructor)) rseq_init(void)
+{
+ /* Check whether rseq is handled by another library. */
+ if (__rseq_handled)
+ return;
+ __rseq_handled = 1;
+ rseq_ownership = 1;
+}
+
+void __attribute__((destructor)) rseq_fini(void)
+{
+ if (!rseq_ownership)
+ return;
+ __rseq_handled = 0;
+ rseq_ownership = 0;
+}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index c72eb70f9b52..26348e2c44f3 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -45,6 +45,7 @@
#endif
extern __thread volatile struct rseq __rseq_abi;
+extern int __rseq_handled;
#define rseq_likely(x) __builtin_expect(!!(x), 1)
#define rseq_unlikely(x) __builtin_expect(!!(x), 0)
--
2.11.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox