Linux userland API discussions
 help / color / mirror / Atom feed
* [PATCH v8 8/8] tpm: TPM 2.0 FIFO Interface
From: Jarkko Sakkinen @ 2014-12-02 22:31 UTC (permalink / raw)
  To: Peter Huewe, Ashley Lai, Marcel Selhorst
  Cc: tpmdd-devel, linux-kernel, josh.triplett, christophe.ricard,
	jason.gunthorpe, linux-api, trousers-tech, Will Arthur,
	Jarkko Sakkinen
In-Reply-To: <1417559480-13757-1-git-send-email-jarkko.sakkinen@linux.intel.com>

From: Will Arthur <will.c.arthur@intel.com>

Detect TPM 2.0 by using the extended STS (STS3) register. For TPM 2.0,
instead of calling tpm_get_timeouts(), assign duration and timeout
values defined in the TPM 2.0 PTP specification.

Signed-off-by: Will Arthur <will.c.arthur@intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm_tis.c | 71 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 56 insertions(+), 15 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 7a2c59b..ecf8e68 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2005, 2006 IBM Corporation
+ * Copyright (C) 2014 Intel Corporation
  *
  * Authors:
  * Leendert van Doorn <leendert@watson.ibm.com>
@@ -44,6 +45,10 @@ enum tis_status {
 	TPM_STS_DATA_EXPECT = 0x08,
 };
 
+enum tis_status3 {
+	TPM_STS3_TPM2_FAM = 0x04,
+};
+
 enum tis_int_flags {
 	TPM_GLOBAL_INT_ENABLE = 0x80000000,
 	TPM_INTF_BURST_COUNT_STATIC = 0x100,
@@ -70,6 +75,7 @@ enum tis_defaults {
 #define	TPM_INT_STATUS(l)		(0x0010 | ((l) << 12))
 #define	TPM_INTF_CAPS(l)		(0x0014 | ((l) << 12))
 #define	TPM_STS(l)			(0x0018 | ((l) << 12))
+#define	TPM_STS3(l)			(0x001b | ((l) << 12))
 #define	TPM_DATA_FIFO(l)		(0x0024 | ((l) << 12))
 
 #define	TPM_DID_VID(l)			(0x0F00 | ((l) << 12))
@@ -344,6 +350,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
 {
 	int rc;
 	u32 ordinal;
+	unsigned long dur;
 
 	rc = tpm_tis_send_data(chip, buf, len);
 	if (rc < 0)
@@ -355,9 +362,14 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
 
 	if (chip->vendor.irq) {
 		ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
+
+		if (chip->flags & TPM_CHIP_FLAG_TPM2)
+			dur = tpm2_calc_ordinal_duration(chip, ordinal);
+		else
+			dur = tpm_calc_ordinal_duration(chip, ordinal);
+
 		if (wait_for_tpm_stat
-		    (chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID,
-		     tpm_calc_ordinal_duration(chip, ordinal),
+		    (chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID, dur,
 		     &chip->vendor.read_queue, false) < 0) {
 			rc = -ETIME;
 			goto out_err;
@@ -543,6 +555,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
 	u32 vendor, intfcaps, intmask;
 	int rc, i, irq_s, irq_e, probe;
 	struct tpm_chip *chip;
+	u8 sts3;
 
 	chip = tpmm_chip_alloc(dev, &tpm_tis);
 	if (IS_ERR(chip))
@@ -554,11 +567,28 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
 	if (!chip->vendor.iobase)
 		return -EIO;
 
+	sts3 = ioread8(chip->vendor.iobase + TPM_STS3(1));
+	if ((sts3 & TPM_STS3_TPM2_FAM) == TPM_STS3_TPM2_FAM)
+		chip->flags = TPM_CHIP_FLAG_TPM2;
+
 	/* Default timeouts */
-	chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-	chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
-	chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-	chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+		chip->vendor.timeout_a = usecs_to_jiffies(TPM2_TIMEOUT_A);
+		chip->vendor.timeout_b = usecs_to_jiffies(TPM2_TIMEOUT_B);
+		chip->vendor.timeout_c = usecs_to_jiffies(TPM2_TIMEOUT_C);
+		chip->vendor.timeout_d = usecs_to_jiffies(TPM2_TIMEOUT_D);
+		chip->vendor.duration[TPM_SHORT] =
+			usecs_to_jiffies(TPM2_DURATION_SHORT);
+		chip->vendor.duration[TPM_MEDIUM] =
+			usecs_to_jiffies(TPM2_DURATION_MEDIUM);
+		chip->vendor.duration[TPM_LONG] =
+			usecs_to_jiffies(TPM2_DURATION_LONG);
+	} else {
+		chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+		chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
+		chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+		chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+	}
 
 	if (wait_startup(chip, 0) != 0) {
 		rc = -ENODEV;
@@ -573,8 +603,8 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
 	vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0));
 	chip->vendor.manufacturer_id = vendor;
 
-	dev_info(dev,
-		 "1.2 TPM (device-id 0x%X, rev-id %d)\n",
+	dev_info(dev, "%s TPM (device-id 0x%X, rev-id %d)\n",
+		 (chip->flags & TPM_CHIP_FLAG_TPM2) ? "2.0" : "1.2",
 		 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
 
 	if (!itpm) {
@@ -616,13 +646,17 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
 		dev_dbg(dev, "\tData Avail Int Support\n");
 
 	/* get the timeouts before testing for irqs */
-	if (tpm_get_timeouts(chip)) {
+	if (!(chip->flags & TPM_CHIP_FLAG_TPM2) && tpm_get_timeouts(chip)) {
 		dev_err(dev, "Could not get TPM timeouts and durations\n");
 		rc = -ENODEV;
 		goto out_err;
 	}
 
-	if (tpm_do_selftest(chip)) {
+	if (chip->flags & TPM_CHIP_FLAG_TPM2)
+		rc = tpm2_do_selftest(chip);
+	else
+		rc = tpm_do_selftest(chip);
+	if (rc) {
 		dev_err(dev, "TPM self test failed\n");
 		rc = -ENODEV;
 		goto out_err;
@@ -683,7 +717,10 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
 			chip->vendor.probed_irq = 0;
 
 			/* Generate Interrupts */
-			tpm_gen_interrupt(chip);
+			if (chip->flags & TPM_CHIP_FLAG_TPM2)
+				tpm2_gen_interrupt(chip);
+			else
+				tpm_gen_interrupt(chip);
 
 			chip->vendor.irq = chip->vendor.probed_irq;
 
@@ -759,14 +796,18 @@ static void tpm_tis_reenable_interrupts(struct tpm_chip *chip)
 static int tpm_tis_resume(struct device *dev)
 {
 	struct tpm_chip *chip = dev_get_drvdata(dev);
-	int ret;
+	int ret = 0;
 
 	if (chip->vendor.irq)
 		tpm_tis_reenable_interrupts(chip);
 
-	ret = tpm_pm_resume(dev);
-	if (!ret)
-		tpm_do_selftest(chip);
+	if (chip->flags & TPM_CHIP_FLAG_TPM2)
+		tpm2_do_selftest(chip);
+	else {
+		ret = tpm_pm_resume(dev);
+		if (!ret)
+			tpm_do_selftest(chip);
+	}
 
 	return ret;
 }
-- 
2.1.0

^ permalink raw reply related

* [PATCH v8 7/8] tpm: TPM 2.0 CRB Interface
From: Jarkko Sakkinen @ 2014-12-02 22:31 UTC (permalink / raw)
  To: Peter Huewe, Ashley Lai, Marcel Selhorst
  Cc: tpmdd-devel, linux-kernel, josh.triplett, christophe.ricard,
	jason.gunthorpe, linux-api, trousers-tech, Jarkko Sakkinen
In-Reply-To: <1417559480-13757-1-git-send-email-jarkko.sakkinen@linux.intel.com>

tpm_crb is a driver for TPM 2.0 Command Response Buffer (CRB) Interface
as defined in PC Client Platform TPM Profile (PTP) Specification.

Only polling and single locality is supported as these are the limitations
of the available hardware, Platform Trust Techonlogy (PTT) in Haswell
CPUs.

The driver always applies CRB with ACPI start because PTT reports using
only ACPI start as start method but as a result of my testing it requires
also CRB start.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/Kconfig   |   9 ++
 drivers/char/tpm/Makefile  |   1 +
 drivers/char/tpm/tpm_crb.c | 356 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 366 insertions(+)
 create mode 100644 drivers/char/tpm/tpm_crb.c

diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index c54cac3..10c9419 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -122,4 +122,13 @@ config TCG_XEN
 	  To compile this driver as a module, choose M here; the module
 	  will be called xen-tpmfront.
 
+config TCG_CRB
+	tristate "TPM 2.0 CRB Interface"
+	depends on X86 && ACPI
+	---help---
+	  If you have a TPM security chip that is compliant with the
+	  TCG CRB 2.0 TPM specification say Yes and it will be accessible
+	  from within Linux.  To compile this driver as a module, choose
+	  M here; the module will be called tpm_crb.
+
 endif # TCG_TPM
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index ae56af9..e6d26dd 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
 obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o
 obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o
 obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o
+obj-$(CONFIG_TCG_CRB) += tpm_crb.o
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
new file mode 100644
index 0000000..cff80d3
--- /dev/null
+++ b/drivers/char/tpm/tpm_crb.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Authors:
+ * Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+ *
+ * Maintained by: <tpmdd-devel@lists.sourceforge.net>
+ *
+ * This device driver implements the TPM interface as defined in
+ * the TCG CRB 2.0 TPM specification.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/acpi.h>
+#include <linux/highmem.h>
+#include <linux/rculist.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include "tpm.h"
+
+#define ACPI_SIG_TPM2 "TPM2"
+
+static const u8 CRB_ACPI_START_UUID[] = {
+	/* 0000 */ 0xAB, 0x6C, 0xBF, 0x6B, 0x63, 0x54, 0x14, 0x47,
+	/* 0008 */ 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4
+};
+
+enum crb_defaults {
+	CRB_ACPI_START_REVISION_ID = 1,
+	CRB_ACPI_START_INDEX = 1,
+};
+
+enum crb_start_method {
+	CRB_SM_ACPI_START = 2,
+	CRB_SM_CRB = 7,
+	CRB_SM_CRB_WITH_ACPI_START = 8,
+};
+
+struct acpi_tpm2 {
+	struct acpi_table_header hdr;
+	u16 platform_class;
+	u16 reserved;
+	u64 control_area_pa;
+	u32 start_method;
+} __packed;
+
+enum crb_ca_request {
+	CRB_CA_REQ_GO_IDLE	= BIT(0),
+	CRB_CA_REQ_CMD_READY	= BIT(1),
+};
+
+enum crb_ca_status {
+	CRB_CA_STS_ERROR	= BIT(0),
+	CRB_CA_STS_TPM_IDLE	= BIT(1),
+};
+
+enum crb_start {
+	CRB_START_INVOKE	= BIT(0),
+};
+
+enum crb_cancel {
+	CRB_CANCEL_INVOKE	= BIT(0),
+};
+
+struct crb_control_area {
+	u32 req;
+	u32 sts;
+	u32 cancel;
+	u32 start;
+	u32 int_enable;
+	u32 int_sts;
+	u32 cmd_size;
+	u64 cmd_pa;
+	u32 rsp_size;
+	u64 rsp_pa;
+} __packed;
+
+enum crb_status {
+	CRB_STS_COMPLETE	= BIT(0),
+};
+
+enum crb_flags {
+	CRB_FL_ACPI_START	= BIT(0),
+	CRB_FL_CRB_START	= BIT(1),
+};
+
+struct crb_priv {
+	unsigned int flags;
+	struct crb_control_area *cca;
+	u8 *cmd;
+	u8 *rsp;
+};
+
+#ifdef CONFIG_PM_SLEEP
+int crb_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int crb_resume(struct device *dev)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+
+	(void) tpm2_do_selftest(chip);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(crb_pm, crb_suspend, crb_resume);
+
+static u8 crb_status(struct tpm_chip *chip)
+{
+	struct crb_priv *priv = chip->vendor.priv;
+	u8 sts = 0;
+
+	if ((le32_to_cpu(ioread32(&priv->cca->start)) & CRB_START_INVOKE) !=
+	    CRB_START_INVOKE)
+		sts |= CRB_STS_COMPLETE;
+
+	return sts;
+}
+
+static int crb_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+	struct crb_priv *priv = chip->vendor.priv;
+	unsigned int expected;
+
+	/* sanity check */
+	if (count < 6)
+		return -EIO;
+
+	if (le32_to_cpu(ioread32(&priv->cca->sts)) & CRB_CA_STS_ERROR)
+		return -EIO;
+
+	memcpy(buf, priv->rsp, 6);
+	expected = be32_to_cpup((__be32 *) &buf[2]);
+
+	if (expected > count)
+		return -EIO;
+
+	memcpy(&buf[6], &priv->rsp[6], expected - 6);
+
+	return expected;
+}
+
+static int crb_do_acpi_start(struct tpm_chip *chip)
+{
+	union acpi_object *obj;
+	int rc;
+
+	obj = acpi_evaluate_dsm(chip->acpi_dev_handle,
+				CRB_ACPI_START_UUID,
+				CRB_ACPI_START_REVISION_ID,
+				CRB_ACPI_START_INDEX,
+				NULL);
+	if (!obj)
+		return -ENXIO;
+	rc = obj->integer.value == 0 ? 0 : -ENXIO;
+	ACPI_FREE(obj);
+	return rc;
+}
+
+static int crb_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+	struct crb_priv *priv = chip->vendor.priv;
+	int rc = 0;
+
+	if (len > le32_to_cpu(ioread32(&priv->cca->cmd_size))) {
+		dev_err(&chip->dev,
+			"invalid command count value %x %zx\n",
+			(unsigned int) len,
+			(size_t) le32_to_cpu(ioread32(&priv->cca->cmd_size)));
+		return -E2BIG;
+	}
+
+	memcpy(priv->cmd, buf, len);
+
+	/* Make sure that cmd is populated before issuing start. */
+	wmb();
+
+	if (priv->flags & CRB_FL_CRB_START)
+		iowrite32(cpu_to_le32(CRB_START_INVOKE), &priv->cca->start);
+
+	if (priv->flags & CRB_FL_ACPI_START)
+		rc = crb_do_acpi_start(chip);
+
+	return rc;
+}
+
+static void crb_cancel(struct tpm_chip *chip)
+{
+	struct crb_priv *priv = chip->vendor.priv;
+
+	iowrite32(cpu_to_le32(CRB_CANCEL_INVOKE), &priv->cca->cancel);
+
+	/* Make sure that cmd is populated before issuing cancel. */
+	wmb();
+
+	if ((priv->flags & CRB_FL_ACPI_START) && crb_do_acpi_start(chip))
+		dev_err(&chip->dev, "ACPI Start failed\n");
+
+	iowrite32(0, &priv->cca->cancel);
+}
+
+static bool crb_req_canceled(struct tpm_chip *chip, u8 status)
+{
+	struct crb_priv *priv = chip->vendor.priv;
+
+	return (le32_to_cpu(priv->cca->cancel) & CRB_CANCEL_INVOKE) ==
+		CRB_CANCEL_INVOKE;
+}
+
+static const struct tpm_class_ops tpm_crb = {
+	.status = crb_status,
+	.recv = crb_recv,
+	.send = crb_send,
+	.cancel = crb_cancel,
+	.req_canceled = crb_req_canceled,
+	.req_complete_mask = CRB_STS_COMPLETE,
+	.req_complete_val = CRB_STS_COMPLETE,
+};
+
+static int crb_acpi_add(struct acpi_device *device)
+{
+	struct tpm_chip *chip;
+	struct acpi_tpm2 *buf;
+	struct crb_priv *priv;
+	struct device *dev = &device->dev;
+	acpi_status status;
+	u32 sm;
+	u64 pa;
+	int rc;
+
+	chip = tpmm_chip_alloc(dev, &tpm_crb);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
+
+	chip->flags = TPM_CHIP_FLAG_TPM2;
+
+	status = acpi_get_table(ACPI_SIG_TPM2, 1,
+				(struct acpi_table_header **) &buf);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to get TPM2 ACPI table\n");
+		return -ENODEV;
+	}
+
+	if (buf->hdr.length != sizeof(struct acpi_tpm2)) {
+		dev_err(dev, "TPM2 ACPI table has wrong size");
+		return -EINVAL;
+	}
+
+	priv = (struct crb_priv *) devm_kzalloc(dev, sizeof(struct crb_priv),
+						GFP_KERNEL);
+	if (!priv) {
+		dev_err(dev, "failed to devm_kzalloc for private data\n");
+		return -ENOMEM;
+	}
+
+	sm = le32_to_cpu(buf->start_method);
+
+	/* The reason for the extra quirk is that the PTT in 4th Gen Core CPUs
+	 * report only ACPI start but in practice seems to require both
+	 * ACPI start and CRB start.
+	 */
+	if (sm == CRB_SM_CRB || sm == CRB_SM_CRB_WITH_ACPI_START ||
+	    !strcmp(acpi_device_hid(device, "MSFT0101")))
+		priv->flags |= CRB_FL_CRB_START;
+
+	if (sm == CRB_SM_ACPI_START || sm == CRB_SM_CRB_WITH_ACPI_START)
+		priv->flags |= CRB_FL_ACPI_START;
+
+	priv->cca = (struct crb_control_area *)
+		devm_ioremap_nocache(dev, buf->control_area_pa, 0x1000);
+	if (!priv->cca) {
+		dev_err(dev, "ioremap of the control area failed\n");
+		return -ENOMEM;
+	}
+
+	memcpy_fromio(&pa, &priv->cca->cmd_pa, 8);
+	pa = le64_to_cpu(pa);
+	priv->cmd = devm_ioremap_nocache(dev, le64_to_cpu(pa),
+					 ioread32(&priv->cca->cmd_size));
+	if (!priv->cmd) {
+		dev_err(dev, "ioremap of the command buffer failed\n");
+		return -ENOMEM;
+	}
+
+	memcpy_fromio(&pa, &priv->cca->rsp_pa, 8);
+	pa = le64_to_cpu(pa);
+	priv->rsp = devm_ioremap_nocache(dev, le64_to_cpu(pa),
+					 ioread32(&priv->cca->rsp_size));
+	if (!priv->rsp) {
+		dev_err(dev, "ioremap of the response buffer failed\n");
+		return -ENOMEM;
+	}
+
+	chip->vendor.priv = priv;
+
+	/* Default timeouts and durations */
+	chip->vendor.timeout_a = usecs_to_jiffies(TPM2_TIMEOUT_A);
+	chip->vendor.timeout_b = usecs_to_jiffies(TPM2_TIMEOUT_B);
+	chip->vendor.timeout_c = usecs_to_jiffies(TPM2_TIMEOUT_C);
+	chip->vendor.timeout_d = usecs_to_jiffies(TPM2_TIMEOUT_D);
+	chip->vendor.duration[TPM_SHORT] =
+		usecs_to_jiffies(TPM2_DURATION_SHORT);
+	chip->vendor.duration[TPM_MEDIUM] =
+		usecs_to_jiffies(TPM2_DURATION_MEDIUM);
+	chip->vendor.duration[TPM_LONG] =
+		usecs_to_jiffies(TPM2_DURATION_LONG);
+
+	chip->acpi_dev_handle = device->handle;
+
+	rc = tpm2_do_selftest(chip);
+	if (rc)
+		return rc;
+
+	return tpm_chip_register(chip);
+}
+
+int crb_acpi_remove(struct acpi_device *device)
+{
+	struct device *dev = &device->dev;
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+
+	tpm_chip_unregister(chip);
+	return 0;
+}
+
+static struct acpi_device_id crb_device_ids[] = {
+	{"MSFT0101", 0},
+	{"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, crb_device_ids);
+
+static struct acpi_driver crb_acpi_driver = {
+	.name = "tpm_crb",
+	.ids = crb_device_ids,
+	.ops = {
+		.add = crb_acpi_add,
+		.remove = crb_acpi_remove,
+	},
+	.drv = {
+		.pm = &crb_pm,
+	},
+};
+
+module_acpi_driver(crb_acpi_driver);
+MODULE_AUTHOR("Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>");
+MODULE_DESCRIPTION("TPM2 Driver");
+MODULE_VERSION("0.1");
+MODULE_LICENSE("GPL");
-- 
2.1.0

^ permalink raw reply related

* [PATCH v8 6/8] tpm: TPM 2.0 baseline support
From: Jarkko Sakkinen @ 2014-12-02 22:31 UTC (permalink / raw)
  To: Peter Huewe, Ashley Lai, Marcel Selhorst
  Cc: tpmdd-devel, linux-kernel, josh.triplett, christophe.ricard,
	jason.gunthorpe, linux-api, trousers-tech, Jarkko Sakkinen,
	Will Arthur
In-Reply-To: <1417559480-13757-1-git-send-email-jarkko.sakkinen@linux.intel.com>

TPM 2.0 devices are separated by adding a field 'flags' to struct
tpm_chip and defining a flag TPM_CHIP_FLAG_TPM2 for tagging them.

This patch adds the following internal functions:

- tpm2_get_random()
- tpm2_get_tpm_pt()
- tpm2_pcr_extend()
- tpm2_pcr_read()
- tpm2_startup()

Additionally, the following exported functions are implemented for
implementing TPM 2.0 device drivers:

- tpm2_do_selftest()
- tpm2_calc_ordinal_durations()
- tpm2_gen_interrupt()

The existing functions that are exported for the use for existing
subsystems have been changed to check the flags field in struct
tpm_chip and use appropriate TPM 2.0 counterpart if
TPM_CHIP_FLAG_TPM2 is est.

The code for tpm2_calc_ordinal_duration() and tpm2_startup() were
originally written by Will Arthur.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Will Arthur <will.c.arthur@intel.com>
---
 drivers/char/tpm/Makefile        |   2 +-
 drivers/char/tpm/tpm-chip.c      |  27 +-
 drivers/char/tpm/tpm-interface.c |  24 +-
 drivers/char/tpm/tpm.h           |  67 +++++
 drivers/char/tpm/tpm2-cmd.c      | 571 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 676 insertions(+), 15 deletions(-)
 create mode 100644 drivers/char/tpm/tpm2-cmd.c

diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 837da04..ae56af9 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the kernel tpm device drivers.
 #
 obj-$(CONFIG_TCG_TPM) += tpm.o
-tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o
+tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o tpm2-cmd.o
 tpm-$(CONFIG_ACPI) += tpm_ppi.o
 
 ifdef CONFIG_ACPI
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index e034bd7..3f3f2de 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -195,15 +195,18 @@ int tpm_chip_register(struct tpm_chip *chip)
 	if (rc)
 		return rc;
 
-	rc = tpm_sysfs_add_device(chip);
-	if (rc)
-		goto del_misc;
+	/* Populate sysfs for TPM1 devices. */
+	if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
+		rc = tpm_sysfs_add_device(chip);
+		if (rc)
+			goto del_misc;
 
-	rc = tpm_add_ppi(chip);
-	if (rc)
-		goto del_sysfs;
+		rc = tpm_add_ppi(chip);
+		if (rc)
+			goto del_sysfs;
 
-	chip->bios_dir = tpm_bios_log_setup(chip->devname);
+		chip->bios_dir = tpm_bios_log_setup(chip->devname);
+	}
 
 	/* Make the chip available. */
 	spin_lock(&driver_lock);
@@ -236,10 +239,12 @@ void tpm_chip_unregister(struct tpm_chip *chip)
 	spin_unlock(&driver_lock);
 	synchronize_rcu();
 
-	if (chip->bios_dir)
-		tpm_bios_log_teardown(chip->bios_dir);
-	tpm_remove_ppi(&chip->dev->kobj);
-	tpm_sysfs_del_device(chip);
+	if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
+		if (chip->bios_dir)
+			tpm_bios_log_teardown(chip->bios_dir);
+		tpm_remove_ppi(chip);
+		tpm_sysfs_del_device(chip);
+	}
 
 	tpm_dev_del_device(chip);
 }
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index b6f6b17..8a14887 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -360,7 +360,10 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
 	if (chip->vendor.irq)
 		goto out_recv;
 
-	stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal);
+	if (chip->flags & TPM_CHIP_FLAG_TPM2)
+		stop = jiffies + tpm2_calc_ordinal_duration(chip, ordinal);
+	else
+		stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal);
 	do {
 		u8 status = chip->ops->status(chip);
 		if ((status & chip->ops->req_complete_mask) ==
@@ -483,7 +486,7 @@ static const struct tpm_input_header tpm_startup_header = {
 static int tpm_startup(struct tpm_chip *chip, __be16 startup_type)
 {
 	struct tpm_cmd_t start_cmd;
-	start_cmd.header.in = tpm_startup_header;
+
 	start_cmd.params.startup_in.startup_type = startup_type;
 	return tpm_transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
 				"attempting to start the TPM");
@@ -680,7 +683,10 @@ int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf)
 	chip = tpm_chip_find_get(chip_num);
 	if (chip == NULL)
 		return -ENODEV;
-	rc = tpm_pcr_read_dev(chip, pcr_idx, res_buf);
+	if (chip->flags & TPM_CHIP_FLAG_TPM2)
+		rc = tpm2_pcr_read(chip, pcr_idx, res_buf);
+	else
+		rc = tpm_pcr_read_dev(chip, pcr_idx, res_buf);
 	tpm_chip_put(chip);
 	return rc;
 }
@@ -714,6 +720,12 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
 	if (chip == NULL)
 		return -ENODEV;
 
+	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+		rc = tpm2_pcr_extend(chip, pcr_idx, hash);
+		tpm_chip_put(chip);
+		return rc;
+	}
+
 	cmd.header.in = pcrextend_header;
 	cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
 	memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
@@ -974,6 +986,12 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max)
 	if (chip == NULL)
 		return -ENODEV;
 
+	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+		err = tpm2_get_random(chip, out, max);
+		tpm_chip_put(chip);
+		return err;
+	}
+
 	do {
 		tpm_cmd.header.in = tpm_getrandom_header;
 		tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes);
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 83103e0..c49868d 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -62,6 +62,57 @@ enum tpm_duration {
 #define TPM_ERR_INVALID_POSTINIT 38
 
 #define TPM_HEADER_SIZE		10
+
+enum tpm2_const {
+	TPM2_PLATFORM_PCR	= 24,
+	TPM2_PCR_SELECT_MIN	= ((TPM2_PLATFORM_PCR + 7) / 8),
+	TPM2_TIMEOUT_A		= 750 * 1000,
+	TPM2_TIMEOUT_B		= 2000 * 1000,
+	TPM2_TIMEOUT_C		= 200 * 1000,
+	TPM2_TIMEOUT_D		= 30 * 1000,
+	TPM2_DURATION_SHORT	= 20 * 1000,
+	TPM2_DURATION_MEDIUM	= 750 * 1000,
+	TPM2_DURATION_LONG	= 2000 * 1000,
+};
+
+enum tpm2_structures {
+	TPM2_ST_NO_SESSIONS	= 0x8001,
+	TPM2_ST_SESSIONS	= 0x8002,
+};
+
+enum tpm2_return_codes {
+	TPM2_RC_TESTING		= 0x090A,
+	TPM2_RC_DISABLED	= 0x0120,
+};
+
+enum tpm2_algorithms {
+	TPM2_ALG_SHA1		= 0x0004,
+};
+
+enum tpm2_command_codes {
+	TPM2_CC_FIRST		= 0x011F,
+	TPM2_CC_SELF_TEST	= 0x0143,
+	TPM2_CC_STARTUP		= 0x0144,
+	TPM2_CC_GET_CAPABILITY	= 0x017A,
+	TPM2_CC_GET_RANDOM	= 0x017B,
+	TPM2_CC_PCR_READ	= 0x017E,
+	TPM2_CC_PCR_EXTEND	= 0x0182,
+	TPM2_CC_LAST		= 0x018F,
+};
+
+enum tpm2_permanent_handles {
+	TPM2_RS_PW		= 0x40000009,
+};
+
+enum tpm2_capabilities {
+	TPM2_CAP_TPM_PROPERTIES = 6,
+};
+
+enum tpm2_startup_types {
+	TPM2_SU_CLEAR	= 0x0000,
+	TPM2_SU_STATE	= 0x0001,
+};
+
 struct tpm_chip;
 
 struct tpm_vendor_specific {
@@ -96,12 +147,17 @@ struct tpm_vendor_specific {
 
 #define TPM_PPI_VERSION_LEN		3
 
+enum tpm_chip_flags {
+	TPM_CHIP_FLAG_TPM2	= BIT(0),
+};
+
 struct tpm_chip {
 	struct device *pdev;	/* Device stuff */
 	struct device dev;
 	struct cdev cdev;
 
 	const struct tpm_class_ops *ops;
+	unsigned int flags;
 
 	int dev_num;		/* /dev/tpm# */
 	char devname[7];
@@ -364,3 +420,14 @@ static inline void tpm_remove_ppi(struct tpm_chip *chip)
 {
 }
 #endif
+
+int tpm2_startup(struct tpm_chip *chip, __be16 startup_type);
+int tpm2_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf);
+int tpm2_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash);
+int tpm2_get_random(struct tpm_chip *chip, u8 *out, size_t max);
+
+extern ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id,
+			       u32 *value, const char *desc);
+extern unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *, u32);
+extern int tpm2_do_selftest(struct tpm_chip *chip);
+extern int tpm2_gen_interrupt(struct tpm_chip *chip);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
new file mode 100644
index 0000000..b2b1d79
--- /dev/null
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Authors:
+ * Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+ *
+ * Maintained by: <tpmdd-devel@lists.sourceforge.net>
+ *
+ * This file contains TPM2 protocol implementations of the commands
+ * used by the kernel internally.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include "tpm.h"
+
+struct tpm2_startup_in {
+	__be16	startup_type;
+} __packed;
+
+struct tpm2_self_test_in {
+	u8	full_test;
+} __packed;
+
+struct tpm2_pcr_read_in {
+	__be32	pcr_selects_cnt;
+	__be16	hash_alg;
+	u8	pcr_select_size;
+	u8	pcr_select[TPM2_PCR_SELECT_MIN];
+} __packed;
+
+struct tpm2_pcr_read_out {
+	__be32	update_cnt;
+	__be32	pcr_selects_cnt;
+	__be16	hash_alg;
+	u8	pcr_select_size;
+	u8	pcr_select[TPM2_PCR_SELECT_MIN];
+	__be32	digests_cnt;
+	__be16	digest_size;
+	u8	digest[TPM_DIGEST_SIZE];
+} __packed;
+
+struct tpm2_null_auth_area {
+	__be32			handle;
+	__be16			nonce_size;
+	u8			attributes;
+	__be16			auth_size;
+} __packed;
+
+struct tpm2_pcr_extend_in {
+	__be32				pcr_idx;
+	__be32				auth_area_size;
+	struct tpm2_null_auth_area	auth_area;
+	__be32				digest_cnt;
+	__be16				hash_alg;
+	u8				digest[TPM_DIGEST_SIZE];
+} __packed;
+
+struct tpm2_get_tpm_pt_in {
+	__be32	cap_id;
+	__be32	property_id;
+	__be32	property_cnt;
+} __packed;
+
+struct tpm2_get_tpm_pt_out {
+	u8	more_data;
+	__be32	subcap_id;
+	__be32	property_cnt;
+	__be32	property_id;
+	__be32	value;
+} __packed;
+
+struct tpm2_get_random_in {
+	__be16	size;
+} __packed;
+
+struct tpm2_get_random_out {
+	__be16	size;
+	u8	buffer[TPM_MAX_RNG_DATA];
+} __packed;
+
+union tpm2_cmd_params {
+	struct	tpm2_startup_in		startup_in;
+	struct	tpm2_self_test_in	selftest_in;
+	struct	tpm2_pcr_read_in	pcrread_in;
+	struct	tpm2_pcr_read_out	pcrread_out;
+	struct	tpm2_pcr_extend_in	pcrextend_in;
+	struct	tpm2_get_tpm_pt_in	get_tpm_pt_in;
+	struct	tpm2_get_tpm_pt_out	get_tpm_pt_out;
+	struct	tpm2_get_random_in	getrandom_in;
+	struct	tpm2_get_random_out	getrandom_out;
+};
+
+struct tpm2_cmd {
+	tpm_cmd_header		header;
+	union tpm2_cmd_params	params;
+} __packed;
+
+/*
+ * Array with one entry per ordinal defining the maximum amount
+ * of time the chip could take to return the result. The values
+ * of the SHORT, MEDIUM, and LONG durations are taken from the
+ * PC Client Profile (PTP) specification.
+ */
+static const u8 tpm2_ordinal_duration[TPM2_CC_LAST - TPM2_CC_FIRST + 1] = {
+	TPM_UNDEFINED,		/* 11F */
+	TPM_UNDEFINED,		/* 120 */
+	TPM_LONG,		/* 121 */
+	TPM_UNDEFINED,		/* 122 */
+	TPM_UNDEFINED,		/* 123 */
+	TPM_UNDEFINED,		/* 124 */
+	TPM_UNDEFINED,		/* 125 */
+	TPM_UNDEFINED,		/* 126 */
+	TPM_UNDEFINED,		/* 127 */
+	TPM_UNDEFINED,		/* 128 */
+	TPM_LONG,		/* 129 */
+	TPM_UNDEFINED,		/* 12a */
+	TPM_UNDEFINED,		/* 12b */
+	TPM_UNDEFINED,		/* 12c */
+	TPM_UNDEFINED,		/* 12d */
+	TPM_UNDEFINED,		/* 12e */
+	TPM_UNDEFINED,		/* 12f */
+	TPM_UNDEFINED,		/* 130 */
+	TPM_UNDEFINED,		/* 131 */
+	TPM_UNDEFINED,		/* 132 */
+	TPM_UNDEFINED,		/* 133 */
+	TPM_UNDEFINED,		/* 134 */
+	TPM_UNDEFINED,		/* 135 */
+	TPM_UNDEFINED,		/* 136 */
+	TPM_UNDEFINED,		/* 137 */
+	TPM_UNDEFINED,		/* 138 */
+	TPM_UNDEFINED,		/* 139 */
+	TPM_UNDEFINED,		/* 13a */
+	TPM_UNDEFINED,		/* 13b */
+	TPM_UNDEFINED,		/* 13c */
+	TPM_UNDEFINED,		/* 13d */
+	TPM_MEDIUM,		/* 13e */
+	TPM_UNDEFINED,		/* 13f */
+	TPM_UNDEFINED,		/* 140 */
+	TPM_UNDEFINED,		/* 141 */
+	TPM_UNDEFINED,		/* 142 */
+	TPM_LONG,		/* 143 */
+	TPM_MEDIUM,		/* 144 */
+	TPM_UNDEFINED,		/* 145 */
+	TPM_UNDEFINED,		/* 146 */
+	TPM_UNDEFINED,		/* 147 */
+	TPM_UNDEFINED,		/* 148 */
+	TPM_UNDEFINED,		/* 149 */
+	TPM_UNDEFINED,		/* 14a */
+	TPM_UNDEFINED,		/* 14b */
+	TPM_UNDEFINED,		/* 14c */
+	TPM_UNDEFINED,		/* 14d */
+	TPM_LONG,		/* 14e */
+	TPM_UNDEFINED,		/* 14f */
+	TPM_UNDEFINED,		/* 150 */
+	TPM_UNDEFINED,		/* 151 */
+	TPM_UNDEFINED,		/* 152 */
+	TPM_UNDEFINED,		/* 153 */
+	TPM_UNDEFINED,		/* 154 */
+	TPM_UNDEFINED,		/* 155 */
+	TPM_UNDEFINED,		/* 156 */
+	TPM_UNDEFINED,		/* 157 */
+	TPM_UNDEFINED,		/* 158 */
+	TPM_UNDEFINED,		/* 159 */
+	TPM_UNDEFINED,		/* 15a */
+	TPM_UNDEFINED,		/* 15b */
+	TPM_MEDIUM,		/* 15c */
+	TPM_UNDEFINED,		/* 15d */
+	TPM_UNDEFINED,		/* 15e */
+	TPM_UNDEFINED,		/* 15f */
+	TPM_UNDEFINED,		/* 160 */
+	TPM_UNDEFINED,		/* 161 */
+	TPM_UNDEFINED,		/* 162 */
+	TPM_UNDEFINED,		/* 163 */
+	TPM_UNDEFINED,		/* 164 */
+	TPM_UNDEFINED,		/* 165 */
+	TPM_UNDEFINED,		/* 166 */
+	TPM_UNDEFINED,		/* 167 */
+	TPM_UNDEFINED,		/* 168 */
+	TPM_UNDEFINED,		/* 169 */
+	TPM_UNDEFINED,		/* 16a */
+	TPM_UNDEFINED,		/* 16b */
+	TPM_UNDEFINED,		/* 16c */
+	TPM_UNDEFINED,		/* 16d */
+	TPM_UNDEFINED,		/* 16e */
+	TPM_UNDEFINED,		/* 16f */
+	TPM_UNDEFINED,		/* 170 */
+	TPM_UNDEFINED,		/* 171 */
+	TPM_UNDEFINED,		/* 172 */
+	TPM_UNDEFINED,		/* 173 */
+	TPM_UNDEFINED,		/* 174 */
+	TPM_UNDEFINED,		/* 175 */
+	TPM_UNDEFINED,		/* 176 */
+	TPM_LONG,		/* 177 */
+	TPM_UNDEFINED,		/* 178 */
+	TPM_UNDEFINED,		/* 179 */
+	TPM_MEDIUM,		/* 17a */
+	TPM_LONG,		/* 17b */
+	TPM_UNDEFINED,		/* 17c */
+	TPM_UNDEFINED,		/* 17d */
+	TPM_UNDEFINED,		/* 17e */
+	TPM_UNDEFINED,		/* 17f */
+	TPM_UNDEFINED,		/* 180 */
+	TPM_UNDEFINED,		/* 181 */
+	TPM_MEDIUM,		/* 182 */
+	TPM_UNDEFINED,		/* 183 */
+	TPM_UNDEFINED,		/* 184 */
+	TPM_MEDIUM,		/* 185 */
+	TPM_MEDIUM,		/* 186 */
+	TPM_UNDEFINED,		/* 187 */
+	TPM_UNDEFINED,		/* 188 */
+	TPM_UNDEFINED,		/* 189 */
+	TPM_UNDEFINED,		/* 18a */
+	TPM_UNDEFINED,		/* 18b */
+	TPM_UNDEFINED,		/* 18c */
+	TPM_UNDEFINED,		/* 18d */
+	TPM_UNDEFINED,		/* 18e */
+	TPM_UNDEFINED		/* 18f */
+};
+
+#define TPM2_STARTUP_IN_SIZE \
+	(sizeof(struct tpm_input_header) + \
+	 sizeof(struct tpm2_pcr_read_in))
+
+static const struct tpm_input_header tpm2_startup_header = {
+	.tag = cpu_to_be16(TPM2_ST_NO_SESSIONS),
+	.length = cpu_to_be32(TPM2_STARTUP_IN_SIZE),
+	.ordinal = cpu_to_be32(TPM2_CC_STARTUP)
+};
+
+/**
+ * tpm2_startup() - send startup command to the TPM chip
+ * @chip:		TPM chip to use.
+ * @startup_type	startup type. The value is either
+ *			TPM_SU_CLEAR or TPM_SU_STATE.
+ *
+ * 0 is returned when the operation is successful. If a negative number is
+ * returned it remarks a POSIX error code. If a positive number is returned
+ * it remarks a TPM error.
+ */
+int tpm2_startup(struct tpm_chip *chip, __be16 startup_type)
+{
+	struct tpm2_cmd cmd;
+
+	cmd.header.in = tpm2_startup_header;
+
+	cmd.params.startup_in.startup_type = startup_type;
+	return tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
+				"attempting to start the TPM");
+}
+
+#define TPM2_PCR_READ_IN_SIZE \
+	(sizeof(struct tpm_input_header) + \
+	 sizeof(struct tpm2_pcr_read_in))
+
+static const struct tpm_input_header tpm2_pcrread_header = {
+	.tag = cpu_to_be16(TPM2_ST_NO_SESSIONS),
+	.length = cpu_to_be32(TPM2_PCR_READ_IN_SIZE),
+	.ordinal = cpu_to_be32(TPM2_CC_PCR_READ)
+};
+
+/**
+ * tpm2_pcr_read() - read a PCR value
+ * @chip:	TPM chip to use.
+ * @pcr_idx:	index of the PCR to read.
+ * @ref_buf:	buffer to store the resulting hash,
+ *
+ * 0 is returned when the operation is successful. If a negative number is
+ * returned it remarks a POSIX error code. If a positive number is returned
+ * it remarks a TPM error.
+ */
+int tpm2_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
+{
+	int rc;
+	struct tpm2_cmd cmd;
+	u8 *buf;
+
+	if (pcr_idx >= TPM2_PLATFORM_PCR)
+		return -EINVAL;
+
+	cmd.header.in = tpm2_pcrread_header;
+	cmd.params.pcrread_in.pcr_selects_cnt = cpu_to_be32(1);
+	cmd.params.pcrread_in.hash_alg = cpu_to_be16(TPM2_ALG_SHA1);
+	cmd.params.pcrread_in.pcr_select_size = TPM2_PCR_SELECT_MIN;
+
+	memset(cmd.params.pcrread_in.pcr_select, 0,
+	       sizeof(cmd.params.pcrread_in.pcr_select));
+	cmd.params.pcrread_in.pcr_select[pcr_idx >> 3] = 1 << (pcr_idx & 0x7);
+
+	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
+			      "attempting to read a pcr value");
+	if (rc == 0) {
+		buf = cmd.params.pcrread_out.digest;
+		memcpy(res_buf, buf, TPM_DIGEST_SIZE);
+	}
+
+	return rc;
+}
+
+/**
+ * tpm2_pcr_extend() - extend a PCR value
+ * @chip:	TPM chip to use.
+ * @pcr_idx:	index of the PCR.
+ * @hash:	hash value to use for the extend operation.
+ *
+ * 0 is returned when the operation is successful. If a negative number is
+ * returned it remarks a POSIX error code. If a positive number is returned
+ * it remarks a TPM error.
+ */
+static const struct tpm_input_header tpm2_pcrextend_header = {
+	.tag = cpu_to_be16(TPM2_ST_SESSIONS),
+	.length = cpu_to_be32(sizeof(struct tpm_input_header) +
+			      sizeof(struct tpm2_pcr_extend_in)),
+	.ordinal = cpu_to_be32(TPM2_CC_PCR_EXTEND)
+};
+
+int tpm2_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash)
+{
+	struct tpm2_cmd cmd;
+	int rc;
+
+	cmd.header.in = tpm2_pcrextend_header;
+	cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
+	cmd.params.pcrextend_in.auth_area_size =
+		cpu_to_be32(sizeof(struct tpm2_null_auth_area));
+	cmd.params.pcrextend_in.auth_area.handle =
+		cpu_to_be32(TPM2_RS_PW);
+	cmd.params.pcrextend_in.auth_area.nonce_size = 0;
+	cmd.params.pcrextend_in.auth_area.attributes = 0;
+	cmd.params.pcrextend_in.auth_area.auth_size = 0;
+	cmd.params.pcrextend_in.digest_cnt = cpu_to_be32(1);
+	cmd.params.pcrextend_in.hash_alg = cpu_to_be16(TPM2_ALG_SHA1);
+	memcpy(cmd.params.pcrextend_in.digest, hash, TPM_DIGEST_SIZE);
+
+	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
+			      "attempting extend a PCR value");
+
+	return rc;
+}
+
+static const struct tpm_input_header tpm2_getrandom_header = {
+	.tag = cpu_to_be16(TPM2_ST_NO_SESSIONS),
+	.length = cpu_to_be32(sizeof(struct tpm_input_header) +
+			      sizeof(struct tpm2_get_random_in)),
+	.ordinal = cpu_to_be32(TPM2_CC_GET_RANDOM)
+};
+
+/**
+ * tpm2_get_random() - get random bytes from the TPM RNG
+ * @chip: TPM chip to use
+ * @out: destination buffer for the random bytes
+ * @max: the max number of bytes to write to @out
+ *
+ * 0 is returned when the operation is successful. If a negative number is
+ * returned it remarks a POSIX error code. If a positive number is returned
+ * it remarks a TPM error.
+ */
+int tpm2_get_random(struct tpm_chip *chip, u8 *out, size_t max)
+{
+	struct tpm2_cmd cmd;
+	u32 recd;
+	u32 num_bytes;
+	int err;
+	int total = 0;
+	int retries = 5;
+	u8 *dest = out;
+
+	num_bytes = min_t(u32, max, sizeof(cmd.params.getrandom_out.buffer));
+
+	if (!out || !num_bytes ||
+	    max > sizeof(cmd.params.getrandom_out.buffer))
+		return -EINVAL;
+
+	do {
+		cmd.header.in = tpm2_getrandom_header;
+		cmd.params.getrandom_in.size = cpu_to_be16(num_bytes);
+
+		err = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
+				       "attempting get random");
+		if (err)
+			break;
+
+		recd = min_t(u32, be16_to_cpu(cmd.params.getrandom_out.size),
+			     num_bytes);
+		memcpy(dest, cmd.params.getrandom_out.buffer, recd);
+
+		dest += recd;
+		total += recd;
+		num_bytes -= recd;
+	} while (retries-- && total < max);
+
+	return total ? total : -EIO;
+}
+
+#define TPM2_GET_TPM_PT_IN_SIZE \
+	(sizeof(struct tpm_input_header) + \
+	 sizeof(struct tpm2_get_tpm_pt_in))
+
+static const struct tpm_input_header tpm2_get_tpm_pt_header = {
+	.tag = cpu_to_be16(TPM2_ST_NO_SESSIONS),
+	.length = cpu_to_be32(TPM2_GET_TPM_PT_IN_SIZE),
+	.ordinal = cpu_to_be32(TPM2_CC_GET_CAPABILITY)
+};
+
+/**
+ * tpm2_get_tpm_pt() - get value of a TPM_CAP_TPM_PROPERTIES type property
+ * @chip:		TPM chip to use.
+ * @property_id:	property ID.
+ * @value:		output variable.
+ * @desc:		passed to tpm_transmit_cmd()
+ *
+ * 0 is returned when the operation is successful. If a negative number is
+ * returned it remarks a POSIX error code. If a positive number is returned
+ * it remarks a TPM error.
+ */
+ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id,  u32 *value,
+			const char *desc)
+{
+	struct tpm2_cmd cmd;
+	int rc;
+
+	cmd.header.in = tpm2_get_tpm_pt_header;
+	cmd.params.get_tpm_pt_in.cap_id = cpu_to_be32(TPM2_CAP_TPM_PROPERTIES);
+	cmd.params.get_tpm_pt_in.property_id = cpu_to_be32(property_id);
+	cmd.params.get_tpm_pt_in.property_cnt = cpu_to_be32(1);
+
+	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), desc);
+	if (!rc)
+		*value = cmd.params.get_tpm_pt_out.value;
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(tpm2_get_tpm_pt);
+
+/*
+ * tpm2_calc_ordinal_duration() - maximum duration for a command
+ * @chip:	TPM chip to use.
+ * @ordinal:	command code number.
+ *
+ * 0 is returned when the operation is successful. If a negative number is
+ * returned it remarks a POSIX error code. If a positive number is returned
+ * it remarks a TPM error.
+ */
+unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal)
+{
+	int index = TPM_UNDEFINED;
+	int duration = 0;
+
+	if (ordinal >= TPM2_CC_FIRST && ordinal <= TPM2_CC_LAST)
+		index = tpm2_ordinal_duration[ordinal - TPM2_CC_FIRST];
+
+	if (index != TPM_UNDEFINED)
+		duration = chip->vendor.duration[index];
+	if (duration <= 0)
+		return 2 * 60 * HZ;
+	else
+		return duration;
+}
+EXPORT_SYMBOL_GPL(tpm2_calc_ordinal_duration);
+
+#define TPM2_SELF_TEST_IN_SIZE \
+	(sizeof(struct tpm_input_header) + sizeof(struct tpm2_self_test_in))
+
+static const struct tpm_input_header tpm2_selftest_header = {
+	.tag = cpu_to_be16(TPM2_ST_NO_SESSIONS),
+	.length = cpu_to_be32(TPM2_SELF_TEST_IN_SIZE),
+	.ordinal = cpu_to_be32(TPM2_CC_SELF_TEST)
+};
+
+/**
+ * tpm2_continue_selftest() - start a self test
+ * @chip: TPM chip to use
+ * @full: test all commands instead of testing only those that were not
+ *        previously tested.
+ *
+ * 0 is returned when the operation is successful. If a negative number is
+ * returned it remarks a POSIX error code. If a positive number is returned
+ * it remarks a TPM error.
+ */
+static int tpm2_start_selftest(struct tpm_chip *chip, bool full)
+{
+	int rc;
+	struct tpm2_cmd cmd;
+
+	cmd.header.in = tpm2_selftest_header;
+	cmd.params.selftest_in.full_test = full;
+
+	rc = tpm_transmit_cmd(chip, &cmd, TPM2_SELF_TEST_IN_SIZE,
+			      "continue selftest");
+
+	return rc;
+}
+
+/**
+ * tpm2_do_selftest() - run a full self test
+ * @chip: TPM chip to use
+ *
+ * During the self test TPM2 commands return with the error code RC_TESTING.
+ * Waiting is done by issuing PCR read until it executes successfully.
+ *
+ * 0 is returned when the operation is successful. If a negative number is
+ * returned it remarks a POSIX error code. If a positive number is returned
+ * it remarks a TPM error.
+ */
+int tpm2_do_selftest(struct tpm_chip *chip)
+{
+	int rc;
+	unsigned int loops;
+	unsigned int delay_msec = 100;
+	unsigned long duration;
+	struct tpm2_cmd cmd;
+	int i;
+
+	duration = tpm2_calc_ordinal_duration(chip, TPM2_CC_SELF_TEST);
+
+	loops = jiffies_to_msecs(duration) / delay_msec;
+
+	rc = tpm2_start_selftest(chip, true);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < loops; i++) {
+		/* Attempt to read a PCR value */
+		cmd.header.in = tpm2_pcrread_header;
+		cmd.params.pcrread_in.pcr_selects_cnt = cpu_to_be32(1);
+		cmd.params.pcrread_in.hash_alg = cpu_to_be16(TPM2_ALG_SHA1);
+		cmd.params.pcrread_in.pcr_select_size = TPM2_PCR_SELECT_MIN;
+		cmd.params.pcrread_in.pcr_select[0] = 0x01;
+		cmd.params.pcrread_in.pcr_select[1] = 0x00;
+		cmd.params.pcrread_in.pcr_select[2] = 0x00;
+
+		rc = tpm_transmit_cmd(chip, (u8 *) &cmd, sizeof(cmd), NULL);
+		if (rc < 0)
+			break;
+
+		rc = be32_to_cpu(cmd.header.out.return_code);
+		if (rc != TPM2_RC_TESTING)
+			break;
+
+		msleep(delay_msec);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(tpm2_do_selftest);
+
+/**
+ * tpm2_gen_interrupt() - generate an interrupt
+ * @chip: TPM chip to use
+ *
+ * 0 is returned when the operation is successful. If a negative number is
+ * returned it remarks a POSIX error code. If a positive number is returned
+ * it remarks a TPM error.
+ */
+
+int tpm2_gen_interrupt(struct tpm_chip *chip)
+{
+	u32 dummy;
+	int rc;
+
+	rc = tpm2_get_tpm_pt(chip,
+			     TPM2_CAP_TPM_PROPERTIES,
+			     &dummy,
+			     "attempting to generate an interrupt");
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(tpm2_gen_interrupt);
-- 
2.1.0

^ permalink raw reply related

* [PATCH v8 5/8] tpm: device class for tpm
From: Jarkko Sakkinen @ 2014-12-02 22:31 UTC (permalink / raw)
  To: Peter Huewe, Ashley Lai, Marcel Selhorst
  Cc: tpmdd-devel, linux-kernel, josh.triplett, christophe.ricard,
	jason.gunthorpe, linux-api, trousers-tech, Jarkko Sakkinen
In-Reply-To: <1417559480-13757-1-git-send-email-jarkko.sakkinen@linux.intel.com>

Added own device class for TPM. Uses MISC_MAJOR:TPM_MINOR for the
first character device in order to retain backwards compatibility.
Added tpm_dev_release() back attached to the character device.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-chip.c        | 72 ++++++++++++++++++++++++++++++++------
 drivers/char/tpm/tpm-dev.c         | 36 ++-----------------
 drivers/char/tpm/tpm-interface.c   | 29 +++++++++++++++
 drivers/char/tpm/tpm.h             | 12 ++++---
 drivers/char/tpm/tpm_i2c_nuvoton.c |  2 +-
 drivers/char/tpm/tpm_tis.c         |  4 +--
 6 files changed, 105 insertions(+), 50 deletions(-)

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index da97354..e034bd7 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -25,6 +25,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/freezer.h>
+#include <linux/major.h>
 #include "tpm.h"
 #include "tpm_eventlog.h"
 
@@ -32,6 +33,9 @@ static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES);
 static LIST_HEAD(tpm_chip_list);
 static DEFINE_SPINLOCK(driver_lock);
 
+struct class *tpm_class;
+dev_t tpm_devt;
+
 /*
  * tpm_chip_find_get - return tpm_chip for a given chip number
  * @chip_num the device number for the chip
@@ -55,16 +59,14 @@ struct tpm_chip *tpm_chip_find_get(int chip_num)
 }
 
 /**
- * tpmm_chip_remove() - free chip memory and device number
- * @data: points to struct tpm_chip instance
+ * tpm_dev_release() - free chip memory and the device number
+ * @dev: the character device for the TPM chip
  *
- * This is used internally by tpmm_chip_alloc() and called by devres
- * when the device is released. This function does the opposite of
- * tpmm_chip_alloc() freeing memory and the device number.
+ * This is used as the release function for the character device.
  */
-static void tpmm_chip_remove(void *data)
+static void tpm_dev_release(struct device *dev)
 {
-	struct tpm_chip *chip = (struct tpm_chip *) data;
+	struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev);
 
 	spin_lock(&driver_lock);
 	clear_bit(chip->dev_num, dev_mask);
@@ -111,18 +113,68 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev,
 	scnprintf(chip->devname, sizeof(chip->devname), "tpm%d", chip->dev_num);
 
 	chip->pdev = dev;
-	devm_add_action(dev, tpmm_chip_remove, chip);
+
 	dev_set_drvdata(dev, chip);
 
+	chip->dev.class = tpm_class;
+	chip->dev.release = tpm_dev_release;
+	chip->dev.parent = chip->pdev;
+
+	if (chip->dev_num == 0)
+		chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR);
+	else
+		chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num);
+
+	dev_set_name(&chip->dev, chip->devname);
+
+	device_initialize(&chip->dev);
+
+	chip->cdev.owner = chip->pdev->driver->owner;
+	cdev_init(&chip->cdev, &tpm_fops);
+
 	return chip;
 }
 EXPORT_SYMBOL_GPL(tpmm_chip_alloc);
 
+static int tpm_dev_add_device(struct tpm_chip *chip)
+{
+	int rc;
+
+	rc = device_add(&chip->dev);
+	if (rc) {
+		dev_err(&chip->dev,
+			"unable to device_register() %s, major %d, minor %d, err=%d\n",
+			chip->devname, MAJOR(chip->dev.devt),
+			MINOR(chip->dev.devt), rc);
+
+		return rc;
+	}
+
+	rc = cdev_add(&chip->cdev, chip->dev.devt, 1);
+	if (rc) {
+		dev_err(&chip->dev,
+			"unable to cdev_add() %s, major %d, minor %d, err=%d\n",
+			chip->devname, MAJOR(chip->dev.devt),
+			MINOR(chip->dev.devt), rc);
+
+		device_unregister(&chip->dev);
+		return rc;
+	}
+
+	return rc;
+}
+
+static void tpm_dev_del_device(struct tpm_chip *chip)
+{
+	cdev_del(&chip->cdev);
+	device_unregister(&chip->dev);
+}
+
 /*
- * tpm_chip_register() - create a misc driver for the TPM chip
+ * tpm_chip_register() - create a character device for the TPM chip
  * @chip: TPM chip to use.
  *
- * Creates a misc driver for the TPM chip and adds sysfs interfaces for
+ * Creates a character device for the TPM chip and adds sysfs interfaces for
  * the device, PPI and TCPA. As the last step this function adds the
  * chip to the list of TPM chips available for use.
  *
diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
index 3568321..de0337e 100644
--- a/drivers/char/tpm/tpm-dev.c
+++ b/drivers/char/tpm/tpm-dev.c
@@ -17,7 +17,6 @@
  * License.
  *
  */
-#include <linux/miscdevice.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include "tpm.h"
@@ -54,9 +53,8 @@ static void timeout_work(struct work_struct *work)
 
 static int tpm_open(struct inode *inode, struct file *file)
 {
-	struct miscdevice *misc = file->private_data;
-	struct tpm_chip *chip = container_of(misc, struct tpm_chip,
-					     vendor.miscdev);
+	struct tpm_chip *chip =
+		container_of(inode->i_cdev, struct tpm_chip, cdev);
 	struct file_priv *priv;
 
 	/* It's assured that the chip will be opened just once,
@@ -173,7 +171,7 @@ static int tpm_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations tpm_fops = {
+const struct file_operations tpm_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.open = tpm_open,
@@ -182,32 +180,4 @@ static const struct file_operations tpm_fops = {
 	.release = tpm_release,
 };
 
-int tpm_dev_add_device(struct tpm_chip *chip)
-{
-	int rc;
 
-	chip->vendor.miscdev.fops = &tpm_fops;
-	if (chip->dev_num == 0)
-		chip->vendor.miscdev.minor = TPM_MINOR;
-	else
-		chip->vendor.miscdev.minor = MISC_DYNAMIC_MINOR;
-
-	chip->vendor.miscdev.name = chip->devname;
-	chip->vendor.miscdev.parent = chip->pdev;
-
-	rc = misc_register(&chip->vendor.miscdev);
-	if (rc) {
-		chip->vendor.miscdev.name = NULL;
-		dev_err(chip->pdev,
-			"unable to misc_register %s, minor %d err=%d\n",
-			chip->vendor.miscdev.name,
-			chip->vendor.miscdev.minor, rc);
-	}
-	return rc;
-}
-
-void tpm_dev_del_device(struct tpm_chip *chip)
-{
-	if (chip->vendor.miscdev.name)
-		misc_deregister(&chip->vendor.miscdev);
-}
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index e2af28f..b6f6b17 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -997,6 +997,35 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max)
 }
 EXPORT_SYMBOL_GPL(tpm_get_random);
 
+static int __init tpm_init(void)
+{
+	int rc;
+
+	tpm_class = class_create(THIS_MODULE, "tpm");
+	if (IS_ERR(tpm_class)) {
+		pr_err("couldn't create tpm class\n");
+		return PTR_ERR(tpm_class);
+	}
+
+	rc = alloc_chrdev_region(&tpm_devt, 0, TPM_NUM_DEVICES, "tpm");
+	if (rc < 0) {
+		pr_err("tpm: failed to allocate char dev region\n");
+		class_destroy(tpm_class);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void __exit tpm_exit(void)
+{
+	class_destroy(tpm_class);
+	unregister_chrdev_region(tpm_devt, TPM_NUM_DEVICES);
+}
+
+subsys_initcall(tpm_init);
+module_exit(tpm_exit);
+
 MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
 MODULE_DESCRIPTION("TPM Driver");
 MODULE_VERSION("2.0");
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index b3a7c76..83103e0 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -23,11 +23,11 @@
 #include <linux/fs.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
-#include <linux/miscdevice.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/tpm.h>
 #include <linux/acpi.h>
+#include <linux/cdev.h>
 
 enum tpm_const {
 	TPM_MINOR = 224,	/* officially assigned */
@@ -74,7 +74,6 @@ struct tpm_vendor_specific {
 	int region_size;
 	int have_region;
 
-	struct miscdevice miscdev;
 	struct list_head list;
 	int locality;
 	unsigned long timeout_a, timeout_b, timeout_c, timeout_d; /* jiffies */
@@ -99,6 +98,9 @@ struct tpm_vendor_specific {
 
 struct tpm_chip {
 	struct device *pdev;	/* Device stuff */
+	struct device dev;
+	struct cdev cdev;
+
 	const struct tpm_class_ops *ops;
 
 	int dev_num;		/* /dev/tpm# */
@@ -320,6 +322,10 @@ struct tpm_cmd_t {
 	tpm_cmd_params	params;
 } __packed;
 
+extern struct class *tpm_class;
+extern dev_t tpm_devt;
+extern const struct file_operations tpm_fops;
+
 ssize_t	tpm_getcap(struct device *, __be32, cap_t *, const char *);
 ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
 		     size_t bufsiz);
@@ -340,8 +346,6 @@ extern struct tpm_chip *tpmm_chip_alloc(struct device *dev,
 extern int tpm_chip_register(struct tpm_chip *chip);
 extern void tpm_chip_unregister(struct tpm_chip *chip);
 
-int tpm_dev_add_device(struct tpm_chip *chip);
-void tpm_dev_del_device(struct tpm_chip *chip);
 int tpm_sysfs_add_device(struct tpm_chip *chip);
 void tpm_sysfs_del_device(struct tpm_chip *chip);
 
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
index 92ee9fa..14246e2 100644
--- a/drivers/char/tpm/tpm_i2c_nuvoton.c
+++ b/drivers/char/tpm/tpm_i2c_nuvoton.c
@@ -557,7 +557,7 @@ static int i2c_nuvoton_probe(struct i2c_client *client,
 		rc = devm_request_irq(dev, chip->vendor.irq,
 				      i2c_nuvoton_int_handler,
 				      IRQF_TRIGGER_LOW,
-				      chip->vendor.miscdev.name,
+				      chip->devname,
 				      chip);
 		if (rc) {
 			dev_err(dev, "%s() Unable to request irq: %d for use\n",
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 660d9af..7a2c59b 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -661,7 +661,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
 				 TPM_INT_VECTOR(chip->vendor.locality));
 			if (devm_request_irq
 			    (dev, i, tis_int_probe, IRQF_SHARED,
-			     chip->vendor.miscdev.name, chip) != 0) {
+			     chip->devname, chip) != 0) {
 				dev_info(chip->pdev,
 					 "Unable to request irq: %d for probe\n",
 					 i);
@@ -708,7 +708,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
 			 TPM_INT_VECTOR(chip->vendor.locality));
 		if (devm_request_irq
 		    (dev, chip->vendor.irq, tis_int_handler, IRQF_SHARED,
-		     chip->vendor.miscdev.name, chip) != 0) {
+		     chip->devname, chip) != 0) {
 			dev_info(chip->pdev,
 				 "Unable to request irq: %d for use\n",
 				 chip->vendor.irq);
-- 
2.1.0

^ permalink raw reply related

* [PATCH v8 4/8] tpm: rename chip->dev to chip->pdev
From: Jarkko Sakkinen @ 2014-12-02 22:31 UTC (permalink / raw)
  To: Peter Huewe, Ashley Lai, Marcel Selhorst
  Cc: tpmdd-devel, linux-kernel, josh.triplett, christophe.ricard,
	jason.gunthorpe, linux-api, trousers-tech, Jarkko Sakkinen
In-Reply-To: <1417559480-13757-1-git-send-email-jarkko.sakkinen@linux.intel.com>

Rename chip->dev to chip->pdev to make it explicit that this not the
character device but actually represents the platform device.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-chip.c         |  4 ++--
 drivers/char/tpm/tpm-dev.c          | 10 +++++-----
 drivers/char/tpm/tpm-interface.c    | 29 +++++++++++++++--------------
 drivers/char/tpm/tpm-sysfs.c        |  6 +++---
 drivers/char/tpm/tpm.h              |  4 ++--
 drivers/char/tpm/tpm_atmel.c        | 14 +++++++-------
 drivers/char/tpm/tpm_i2c_atmel.c    | 16 ++++++++--------
 drivers/char/tpm/tpm_i2c_infineon.c |  6 +++---
 drivers/char/tpm/tpm_i2c_nuvoton.c  | 22 +++++++++++-----------
 drivers/char/tpm/tpm_i2c_stm_st33.c |  6 +++---
 drivers/char/tpm/tpm_infineon.c     | 22 +++++++++++-----------
 drivers/char/tpm/tpm_nsc.c          | 20 ++++++++++----------
 drivers/char/tpm/tpm_ppi.c          |  4 ++--
 drivers/char/tpm/tpm_tis.c          | 12 ++++++------
 14 files changed, 88 insertions(+), 87 deletions(-)

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 870f8f0..da97354 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -45,7 +45,7 @@ struct tpm_chip *tpm_chip_find_get(int chip_num)
 		if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num)
 			continue;
 
-		if (try_module_get(pos->dev->driver->owner)) {
+		if (try_module_get(pos->pdev->driver->owner)) {
 			chip = pos;
 			break;
 		}
@@ -110,7 +110,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev,
 
 	scnprintf(chip->devname, sizeof(chip->devname), "tpm%d", chip->dev_num);
 
-	chip->dev = dev;
+	chip->pdev = dev;
 	devm_add_action(dev, tpmm_chip_remove, chip);
 	dev_set_drvdata(dev, chip);
 
diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
index d9b774e..3568321 100644
--- a/drivers/char/tpm/tpm-dev.c
+++ b/drivers/char/tpm/tpm-dev.c
@@ -63,7 +63,7 @@ static int tpm_open(struct inode *inode, struct file *file)
 	 * by the check of is_open variable, which is protected
 	 * by driver_lock. */
 	if (test_and_set_bit(0, &chip->is_open)) {
-		dev_dbg(chip->dev, "Another process owns this TPM\n");
+		dev_dbg(chip->pdev, "Another process owns this TPM\n");
 		return -EBUSY;
 	}
 
@@ -81,7 +81,7 @@ static int tpm_open(struct inode *inode, struct file *file)
 	INIT_WORK(&priv->work, timeout_work);
 
 	file->private_data = priv;
-	get_device(chip->dev);
+	get_device(chip->pdev);
 	return 0;
 }
 
@@ -168,7 +168,7 @@ static int tpm_release(struct inode *inode, struct file *file)
 	file->private_data = NULL;
 	atomic_set(&priv->data_pending, 0);
 	clear_bit(0, &priv->chip->is_open);
-	put_device(priv->chip->dev);
+	put_device(priv->chip->pdev);
 	kfree(priv);
 	return 0;
 }
@@ -193,12 +193,12 @@ int tpm_dev_add_device(struct tpm_chip *chip)
 		chip->vendor.miscdev.minor = MISC_DYNAMIC_MINOR;
 
 	chip->vendor.miscdev.name = chip->devname;
-	chip->vendor.miscdev.parent = chip->dev;
+	chip->vendor.miscdev.parent = chip->pdev;
 
 	rc = misc_register(&chip->vendor.miscdev);
 	if (rc) {
 		chip->vendor.miscdev.name = NULL;
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"unable to misc_register %s, minor %d err=%d\n",
 			chip->vendor.miscdev.name,
 			chip->vendor.miscdev.minor, rc);
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 4dbed1e..e2af28f 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -343,7 +343,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
 	if (count == 0)
 		return -ENODATA;
 	if (count > bufsiz) {
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"invalid count value %x %zx\n", count, bufsiz);
 		return -E2BIG;
 	}
@@ -352,7 +352,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
 
 	rc = chip->ops->send(chip, (u8 *) buf, count);
 	if (rc < 0) {
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"tpm_transmit: tpm_send: error %zd\n", rc);
 		goto out;
 	}
@@ -368,7 +368,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
 			goto out_recv;
 
 		if (chip->ops->req_canceled(chip, status)) {
-			dev_err(chip->dev, "Operation Canceled\n");
+			dev_err(chip->pdev, "Operation Canceled\n");
 			rc = -ECANCELED;
 			goto out;
 		}
@@ -378,14 +378,14 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
 	} while (time_before(jiffies, stop));
 
 	chip->ops->cancel(chip);
-	dev_err(chip->dev, "Operation Timed out\n");
+	dev_err(chip->pdev, "Operation Timed out\n");
 	rc = -ETIME;
 	goto out;
 
 out_recv:
 	rc = chip->ops->recv(chip, (u8 *) buf, bufsiz);
 	if (rc < 0)
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"tpm_transmit: tpm_recv: error %zd\n", rc);
 out:
 	mutex_unlock(&chip->tpm_mutex);
@@ -411,7 +411,8 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd,
 
 	err = be32_to_cpu(header->return_code);
 	if (err != 0 && desc)
-		dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc);
+		dev_err(chip->pdev, "A TPM error (%d) occurred %s\n", err,
+			desc);
 
 	return err;
 }
@@ -505,7 +506,7 @@ int tpm_get_timeouts(struct tpm_chip *chip)
 	if (rc == TPM_ERR_INVALID_POSTINIT) {
 		/* The TPM is not started, we are the first to talk to it.
 		   Execute a startup command. */
-		dev_info(chip->dev, "Issuing TPM_STARTUP");
+		dev_info(chip->pdev, "Issuing TPM_STARTUP");
 		if (tpm_startup(chip, TPM_ST_CLEAR))
 			return rc;
 
@@ -517,7 +518,7 @@ int tpm_get_timeouts(struct tpm_chip *chip)
 				  NULL);
 	}
 	if (rc) {
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"A TPM error (%zd) occurred attempting to determine the timeouts\n",
 			rc);
 		goto duration;
@@ -556,7 +557,7 @@ int tpm_get_timeouts(struct tpm_chip *chip)
 
 	/* Report adjusted timeouts */
 	if (chip->vendor.timeout_adjusted) {
-		dev_info(chip->dev,
+		dev_info(chip->pdev,
 			 HW_ERR "Adjusting reported timeouts: A %lu->%luus B %lu->%luus C %lu->%luus D %lu->%luus\n",
 			 old_timeout[0], new_timeout[0],
 			 old_timeout[1], new_timeout[1],
@@ -603,7 +604,7 @@ duration:
 		chip->vendor.duration[TPM_MEDIUM] *= 1000;
 		chip->vendor.duration[TPM_LONG] *= 1000;
 		chip->vendor.duration_adjusted = true;
-		dev_info(chip->dev, "Adjusting TPM timeout parameters.");
+		dev_info(chip->pdev, "Adjusting TPM timeout parameters.");
 	}
 	return 0;
 }
@@ -760,7 +761,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
 		 * around 300ms while the self test is ongoing, keep trying
 		 * until the self test duration expires. */
 		if (rc == -ETIME) {
-			dev_info(chip->dev, HW_ERR "TPM command timed out during continue self test");
+			dev_info(chip->pdev, HW_ERR "TPM command timed out during continue self test");
 			msleep(delay_msec);
 			continue;
 		}
@@ -770,7 +771,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
 
 		rc = be32_to_cpu(cmd.header.out.return_code);
 		if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) {
-			dev_info(chip->dev,
+			dev_info(chip->pdev,
 				 "TPM is disabled/deactivated (0x%X)\n", rc);
 			/* TPM is disabled and/or deactivated; driver can
 			 * proceed and TPM does handle commands for
@@ -918,10 +919,10 @@ int tpm_pm_suspend(struct device *dev)
 	}
 
 	if (rc)
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"Error (%d) sending savestate before suspend\n", rc);
 	else if (try > 0)
-		dev_warn(chip->dev, "TPM savestate took %dms\n",
+		dev_warn(chip->pdev, "TPM savestate took %dms\n",
 			 try * TPM_TIMEOUT_RETRY);
 
 	return rc;
diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c
index 8ecb052..ee66fd4 100644
--- a/drivers/char/tpm/tpm-sysfs.c
+++ b/drivers/char/tpm/tpm-sysfs.c
@@ -284,16 +284,16 @@ static const struct attribute_group tpm_dev_group = {
 int tpm_sysfs_add_device(struct tpm_chip *chip)
 {
 	int err;
-	err = sysfs_create_group(&chip->dev->kobj,
+	err = sysfs_create_group(&chip->pdev->kobj,
 				 &tpm_dev_group);
 
 	if (err)
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"failed to create sysfs attributes, %d\n", err);
 	return err;
 }
 
 void tpm_sysfs_del_device(struct tpm_chip *chip)
 {
-	sysfs_remove_group(&chip->dev->kobj, &tpm_dev_group);
+	sysfs_remove_group(&chip->pdev->kobj, &tpm_dev_group);
 }
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 69f4003..b3a7c76 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -98,7 +98,7 @@ struct tpm_vendor_specific {
 #define TPM_PPI_VERSION_LEN		3
 
 struct tpm_chip {
-	struct device *dev;	/* Device stuff */
+	struct device *pdev;	/* Device stuff */
 	const struct tpm_class_ops *ops;
 
 	int dev_num;		/* /dev/tpm# */
@@ -124,7 +124,7 @@ struct tpm_chip {
 
 static inline void tpm_chip_put(struct tpm_chip *chip)
 {
-	module_put(chip->dev->driver->owner);
+	module_put(chip->pdev->driver->owner);
 }
 
 static inline int tpm_read_index(int base, int index)
diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
index 8e2576a..8a52ebe 100644
--- a/drivers/char/tpm/tpm_atmel.c
+++ b/drivers/char/tpm/tpm_atmel.c
@@ -49,7 +49,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	for (i = 0; i < 6; i++) {
 		status = ioread8(chip->vendor.iobase + 1);
 		if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-			dev_err(chip->dev, "error reading header\n");
+			dev_err(chip->pdev, "error reading header\n");
 			return -EIO;
 		}
 		*buf++ = ioread8(chip->vendor.iobase);
@@ -60,12 +60,12 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	size = be32_to_cpu(*native_size);
 
 	if (count < size) {
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"Recv size(%d) less than available space\n", size);
 		for (; i < size; i++) {	/* clear the waiting data anyway */
 			status = ioread8(chip->vendor.iobase + 1);
 			if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-				dev_err(chip->dev, "error reading data\n");
+				dev_err(chip->pdev, "error reading data\n");
 				return -EIO;
 			}
 		}
@@ -76,7 +76,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	for (; i < size; i++) {
 		status = ioread8(chip->vendor.iobase + 1);
 		if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-			dev_err(chip->dev, "error reading data\n");
+			dev_err(chip->pdev, "error reading data\n");
 			return -EIO;
 		}
 		*buf++ = ioread8(chip->vendor.iobase);
@@ -86,7 +86,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	status = ioread8(chip->vendor.iobase + 1);
 
 	if (status & ATML_STATUS_DATA_AVAIL) {
-		dev_err(chip->dev, "data available is stuck\n");
+		dev_err(chip->pdev, "data available is stuck\n");
 		return -EIO;
 	}
 
@@ -97,9 +97,9 @@ static int tpm_atml_send(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	int i;
 
-	dev_dbg(chip->dev, "tpm_atml_send:\n");
+	dev_dbg(chip->pdev, "tpm_atml_send:\n");
 	for (i = 0; i < count; i++) {
-		dev_dbg(chip->dev, "%d 0x%x(%d)\n",  i, buf[i], buf[i]);
+		dev_dbg(chip->pdev, "%d 0x%x(%d)\n",  i, buf[i], buf[i]);
  		iowrite8(buf[i], chip->vendor.iobase);
 	}
 
diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c
index 8af3b4a..dfef1ae 100644
--- a/drivers/char/tpm/tpm_i2c_atmel.c
+++ b/drivers/char/tpm/tpm_i2c_atmel.c
@@ -52,7 +52,7 @@ struct priv_data {
 static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len)
 {
 	struct priv_data *priv = chip->vendor.priv;
-	struct i2c_client *client = to_i2c_client(chip->dev);
+	struct i2c_client *client = to_i2c_client(chip->pdev);
 	s32 status;
 
 	priv->len = 0;
@@ -62,7 +62,7 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len)
 
 	status = i2c_master_send(client, buf, len);
 
-	dev_dbg(chip->dev,
+	dev_dbg(chip->pdev,
 		"%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__,
 		(int)min_t(size_t, 64, len), buf, len, status);
 	return status;
@@ -71,7 +71,7 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len)
 static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	struct priv_data *priv = chip->vendor.priv;
-	struct i2c_client *client = to_i2c_client(chip->dev);
+	struct i2c_client *client = to_i2c_client(chip->pdev);
 	struct tpm_output_header *hdr =
 		(struct tpm_output_header *)priv->buffer;
 	u32 expected_len;
@@ -88,7 +88,7 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 		return -ENOMEM;
 
 	if (priv->len >= expected_len) {
-		dev_dbg(chip->dev,
+		dev_dbg(chip->pdev,
 			"%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
 			(int)min_t(size_t, 64, expected_len), buf, count,
 			expected_len);
@@ -97,7 +97,7 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	}
 
 	rc = i2c_master_recv(client, buf, expected_len);
-	dev_dbg(chip->dev,
+	dev_dbg(chip->pdev,
 		"%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
 		(int)min_t(size_t, 64, expected_len), buf, count,
 		expected_len);
@@ -106,13 +106,13 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 
 static void i2c_atmel_cancel(struct tpm_chip *chip)
 {
-	dev_err(chip->dev, "TPM operation cancellation was requested, but is not supported");
+	dev_err(chip->pdev, "TPM operation cancellation was requested, but is not supported");
 }
 
 static u8 i2c_atmel_read_status(struct tpm_chip *chip)
 {
 	struct priv_data *priv = chip->vendor.priv;
-	struct i2c_client *client = to_i2c_client(chip->dev);
+	struct i2c_client *client = to_i2c_client(chip->pdev);
 	int rc;
 
 	/* The TPM fails the I2C read until it is ready, so we do the entire
@@ -125,7 +125,7 @@ static u8 i2c_atmel_read_status(struct tpm_chip *chip)
 	/* Once the TPM has completed the command the command remains readable
 	 * until another command is issued. */
 	rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer));
-	dev_dbg(chip->dev,
+	dev_dbg(chip->pdev,
 		"%s: sts=%d", __func__, rc);
 	if (rc <= 0)
 		return 0;
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c
index 03708e6..33c5f36 100644
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -446,7 +446,7 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	/* read first 10 bytes, including tag, paramsize, and result */
 	size = recv_data(chip, buf, TPM_HEADER_SIZE);
 	if (size < TPM_HEADER_SIZE) {
-		dev_err(chip->dev, "Unable to read header\n");
+		dev_err(chip->pdev, "Unable to read header\n");
 		goto out;
 	}
 
@@ -459,14 +459,14 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	size += recv_data(chip, &buf[TPM_HEADER_SIZE],
 			  expected - TPM_HEADER_SIZE);
 	if (size < expected) {
-		dev_err(chip->dev, "Unable to read remainder of result\n");
+		dev_err(chip->pdev, "Unable to read remainder of result\n");
 		size = -ETIME;
 		goto out;
 	}
 
 	wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, &status);
 	if (status & TPM_STS_DATA_AVAIL) {	/* retry? */
-		dev_err(chip->dev, "Error left over data\n");
+		dev_err(chip->pdev, "Error left over data\n");
 		size = -EIO;
 		goto out;
 	}
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
index 09f0c46..92ee9fa 100644
--- a/drivers/char/tpm/tpm_i2c_nuvoton.c
+++ b/drivers/char/tpm/tpm_i2c_nuvoton.c
@@ -96,13 +96,13 @@ static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size,
 /* read TPM_STS register */
 static u8 i2c_nuvoton_read_status(struct tpm_chip *chip)
 {
-	struct i2c_client *client = to_i2c_client(chip->dev);
+	struct i2c_client *client = to_i2c_client(chip->pdev);
 	s32 status;
 	u8 data;
 
 	status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data);
 	if (status <= 0) {
-		dev_err(chip->dev, "%s() error return %d\n", __func__,
+		dev_err(chip->pdev, "%s() error return %d\n", __func__,
 			status);
 		data = TPM_STS_ERR_VAL;
 	}
@@ -127,13 +127,13 @@ static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data)
 /* write commandReady to TPM_STS register */
 static void i2c_nuvoton_ready(struct tpm_chip *chip)
 {
-	struct i2c_client *client = to_i2c_client(chip->dev);
+	struct i2c_client *client = to_i2c_client(chip->pdev);
 	s32 status;
 
 	/* this causes the current command to be aborted */
 	status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY);
 	if (status < 0)
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"%s() fail to write TPM_STS.commandReady\n", __func__);
 }
 
@@ -212,7 +212,7 @@ static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value,
 				return 0;
 		} while (time_before(jiffies, stop));
 	}
-	dev_err(chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask,
+	dev_err(chip->pdev, "%s(%02x, %02x) -> timeout\n", __func__, mask,
 		value);
 	return -ETIMEDOUT;
 }
@@ -240,7 +240,7 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client,
 					       &chip->vendor.read_queue) == 0) {
 		burst_count = i2c_nuvoton_get_burstcount(client, chip);
 		if (burst_count < 0) {
-			dev_err(chip->dev,
+			dev_err(chip->pdev,
 				"%s() fail to read burstCount=%d\n", __func__,
 				burst_count);
 			return -EIO;
@@ -249,12 +249,12 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client,
 		rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R,
 					  bytes2read, &buf[size]);
 		if (rc < 0) {
-			dev_err(chip->dev,
+			dev_err(chip->pdev,
 				"%s() fail on i2c_nuvoton_read_buf()=%d\n",
 				__func__, rc);
 			return -EIO;
 		}
-		dev_dbg(chip->dev, "%s(%d):", __func__, bytes2read);
+		dev_dbg(chip->pdev, "%s(%d):", __func__, bytes2read);
 		size += bytes2read;
 	}
 
@@ -264,7 +264,7 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client,
 /* Read TPM command results */
 static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
-	struct device *dev = chip->dev;
+	struct device *dev = chip->pdev;
 	struct i2c_client *client = to_i2c_client(dev);
 	s32 rc;
 	int expected, status, burst_count, retries, size = 0;
@@ -334,7 +334,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 		break;
 	}
 	i2c_nuvoton_ready(chip);
-	dev_dbg(chip->dev, "%s() -> %d\n", __func__, size);
+	dev_dbg(chip->pdev, "%s() -> %d\n", __func__, size);
 	return size;
 }
 
@@ -347,7 +347,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
  */
 static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len)
 {
-	struct device *dev = chip->dev;
+	struct device *dev = chip->pdev;
 	struct i2c_client *client = to_i2c_client(dev);
 	u32 ordinal;
 	size_t count = 0;
diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c
index b9d1a38..64ef510 100644
--- a/drivers/char/tpm/tpm_i2c_stm_st33.c
+++ b/drivers/char/tpm/tpm_i2c_stm_st33.c
@@ -544,7 +544,7 @@ static int tpm_stm_i2c_recv(struct tpm_chip *chip, unsigned char *buf,
 
 	size = recv_data(chip, buf, TPM_HEADER_SIZE);
 	if (size < TPM_HEADER_SIZE) {
-		dev_err(chip->dev, "Unable to read header\n");
+		dev_err(chip->pdev, "Unable to read header\n");
 		goto out;
 	}
 
@@ -557,7 +557,7 @@ static int tpm_stm_i2c_recv(struct tpm_chip *chip, unsigned char *buf,
 	size += recv_data(chip, &buf[TPM_HEADER_SIZE],
 					expected - TPM_HEADER_SIZE);
 	if (size < expected) {
-		dev_err(chip->dev, "Unable to read remainder of result\n");
+		dev_err(chip->pdev, "Unable to read remainder of result\n");
 		size = -ETIME;
 		goto out;
 	}
@@ -671,7 +671,7 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
 				IRQF_TRIGGER_HIGH,
 				"TPM SERIRQ management", chip);
 		if (err < 0) {
-			dev_err(chip->dev , "TPM SERIRQ signals %d not available\n",
+			dev_err(chip->pdev , "TPM SERIRQ signals %d not available\n",
 				gpio_to_irq(platform_data->io_serirq));
 			goto _irq_set;
 		}
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index dcdb671..6d49213 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -195,9 +195,9 @@ static int wait(struct tpm_chip *chip, int wait_for_bit)
 	}
 	if (i == TPM_MAX_TRIES) {	/* timeout occurs */
 		if (wait_for_bit == STAT_XFE)
-			dev_err(chip->dev, "Timeout in wait(STAT_XFE)\n");
+			dev_err(chip->pdev, "Timeout in wait(STAT_XFE)\n");
 		if (wait_for_bit == STAT_RDA)
-			dev_err(chip->dev, "Timeout in wait(STAT_RDA)\n");
+			dev_err(chip->pdev, "Timeout in wait(STAT_RDA)\n");
 		return -EIO;
 	}
 	return 0;
@@ -220,7 +220,7 @@ static void wait_and_send(struct tpm_chip *chip, u8 sendbyte)
 static void tpm_wtx(struct tpm_chip *chip)
 {
 	number_of_wtx++;
-	dev_info(chip->dev, "Granting WTX (%02d / %02d)\n",
+	dev_info(chip->pdev, "Granting WTX (%02d / %02d)\n",
 		 number_of_wtx, TPM_MAX_WTX_PACKAGES);
 	wait_and_send(chip, TPM_VL_VER);
 	wait_and_send(chip, TPM_CTRL_WTX);
@@ -231,7 +231,7 @@ static void tpm_wtx(struct tpm_chip *chip)
 
 static void tpm_wtx_abort(struct tpm_chip *chip)
 {
-	dev_info(chip->dev, "Aborting WTX\n");
+	dev_info(chip->pdev, "Aborting WTX\n");
 	wait_and_send(chip, TPM_VL_VER);
 	wait_and_send(chip, TPM_CTRL_WTX_ABORT);
 	wait_and_send(chip, 0x00);
@@ -257,7 +257,7 @@ recv_begin:
 	}
 
 	if (buf[0] != TPM_VL_VER) {
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"Wrong transport protocol implementation!\n");
 		return -EIO;
 	}
@@ -272,7 +272,7 @@ recv_begin:
 		}
 
 		if ((size == 0x6D00) && (buf[1] == 0x80)) {
-			dev_err(chip->dev, "Error handling on vendor layer!\n");
+			dev_err(chip->pdev, "Error handling on vendor layer!\n");
 			return -EIO;
 		}
 
@@ -284,7 +284,7 @@ recv_begin:
 	}
 
 	if (buf[1] == TPM_CTRL_WTX) {
-		dev_info(chip->dev, "WTX-package received\n");
+		dev_info(chip->pdev, "WTX-package received\n");
 		if (number_of_wtx < TPM_MAX_WTX_PACKAGES) {
 			tpm_wtx(chip);
 			goto recv_begin;
@@ -295,14 +295,14 @@ recv_begin:
 	}
 
 	if (buf[1] == TPM_CTRL_WTX_ABORT_ACK) {
-		dev_info(chip->dev, "WTX-abort acknowledged\n");
+		dev_info(chip->pdev, "WTX-abort acknowledged\n");
 		return size;
 	}
 
 	if (buf[1] == TPM_CTRL_ERROR) {
-		dev_err(chip->dev, "ERROR-package received:\n");
+		dev_err(chip->pdev, "ERROR-package received:\n");
 		if (buf[4] == TPM_INF_NAK)
-			dev_err(chip->dev,
+			dev_err(chip->pdev,
 				"-> Negative acknowledgement"
 				" - retransmit command!\n");
 		return -EIO;
@@ -321,7 +321,7 @@ static int tpm_inf_send(struct tpm_chip *chip, u8 * buf, size_t count)
 
 	ret = empty_fifo(chip, 1);
 	if (ret) {
-		dev_err(chip->dev, "Timeout while clearing FIFO\n");
+		dev_err(chip->pdev, "Timeout while clearing FIFO\n");
 		return -EIO;
 	}
 
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index 00c5470..072c298 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -113,7 +113,7 @@ static int nsc_wait_for_ready(struct tpm_chip *chip)
 	}
 	while (time_before(jiffies, stop));
 
-	dev_info(chip->dev, "wait for ready failed\n");
+	dev_info(chip->pdev, "wait for ready failed\n");
 	return -EBUSY;
 }
 
@@ -129,12 +129,12 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count)
 		return -EIO;
 
 	if (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0) {
-		dev_err(chip->dev, "F0 timeout\n");
+		dev_err(chip->pdev, "F0 timeout\n");
 		return -EIO;
 	}
 	if ((data =
 	     inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_NORMAL) {
-		dev_err(chip->dev, "not in normal mode (0x%x)\n",
+		dev_err(chip->pdev, "not in normal mode (0x%x)\n",
 			data);
 		return -EIO;
 	}
@@ -143,7 +143,7 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count)
 	for (p = buffer; p < &buffer[count]; p++) {
 		if (wait_for_stat
 		    (chip, NSC_STATUS_OBF, NSC_STATUS_OBF, &data) < 0) {
-			dev_err(chip->dev,
+			dev_err(chip->pdev,
 				"OBF timeout (while reading data)\n");
 			return -EIO;
 		}
@@ -154,11 +154,11 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count)
 
 	if ((data & NSC_STATUS_F0) == 0 &&
 	(wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0)) {
-		dev_err(chip->dev, "F0 not set\n");
+		dev_err(chip->pdev, "F0 not set\n");
 		return -EIO;
 	}
 	if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_EOC) {
-		dev_err(chip->dev,
+		dev_err(chip->pdev,
 			"expected end of command(0x%x)\n", data);
 		return -EIO;
 	}
@@ -189,19 +189,19 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count)
 		return -EIO;
 
 	if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
-		dev_err(chip->dev, "IBF timeout\n");
+		dev_err(chip->pdev, "IBF timeout\n");
 		return -EIO;
 	}
 
 	outb(NSC_COMMAND_NORMAL, chip->vendor.base + NSC_COMMAND);
 	if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) {
-		dev_err(chip->dev, "IBR timeout\n");
+		dev_err(chip->pdev, "IBR timeout\n");
 		return -EIO;
 	}
 
 	for (i = 0; i < count; i++) {
 		if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
-			dev_err(chip->dev,
+			dev_err(chip->pdev,
 				"IBF timeout (while writing data)\n");
 			return -EIO;
 		}
@@ -209,7 +209,7 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count)
 	}
 
 	if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
-		dev_err(chip->dev, "IBF timeout\n");
+		dev_err(chip->pdev, "IBF timeout\n");
 		return -EIO;
 	}
 	outb(NSC_COMMAND_EOC, chip->vendor.base + NSC_COMMAND);
diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c
index 6652025..d8d51ff 100644
--- a/drivers/char/tpm/tpm_ppi.c
+++ b/drivers/char/tpm/tpm_ppi.c
@@ -357,11 +357,11 @@ int tpm_add_ppi(struct tpm_chip *chip)
 
 	ACPI_FREE(obj);
 
-	return sysfs_create_group(&chip->dev->kobj, &ppi_attr_grp);
+	return sysfs_create_group(&chip->pdev->kobj, &ppi_attr_grp);
 }
 
 void tpm_remove_ppi(struct tpm_chip *chip)
 {
 	if (chip->ppi_version[0] != '\0')
-		sysfs_remove_group(&chip->dev->kobj, &ppi_attr_grp);
+		sysfs_remove_group(&chip->pdev->kobj, &ppi_attr_grp);
 }
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 00ed222..660d9af 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -238,7 +238,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	/* read first 10 bytes, including tag, paramsize, and result */
 	if ((size =
 	     recv_data(chip, buf, TPM_HEADER_SIZE)) < TPM_HEADER_SIZE) {
-		dev_err(chip->dev, "Unable to read header\n");
+		dev_err(chip->pdev, "Unable to read header\n");
 		goto out;
 	}
 
@@ -251,7 +251,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	if ((size +=
 	     recv_data(chip, &buf[TPM_HEADER_SIZE],
 		       expected - TPM_HEADER_SIZE)) < expected) {
-		dev_err(chip->dev, "Unable to read remainder of result\n");
+		dev_err(chip->pdev, "Unable to read remainder of result\n");
 		size = -ETIME;
 		goto out;
 	}
@@ -260,7 +260,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 			  &chip->vendor.int_queue, false);
 	status = tpm_tis_status(chip);
 	if (status & TPM_STS_DATA_AVAIL) {	/* retry? */
-		dev_err(chip->dev, "Error left over data\n");
+		dev_err(chip->pdev, "Error left over data\n");
 		size = -EIO;
 		goto out;
 	}
@@ -433,7 +433,7 @@ static int probe_itpm(struct tpm_chip *chip)
 
 	rc = tpm_tis_send_data(chip, cmd_getticks, len);
 	if (rc == 0) {
-		dev_info(chip->dev, "Detected an iTPM.\n");
+		dev_info(chip->pdev, "Detected an iTPM.\n");
 		rc = 1;
 	} else
 		rc = -EFAULT;
@@ -662,7 +662,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
 			if (devm_request_irq
 			    (dev, i, tis_int_probe, IRQF_SHARED,
 			     chip->vendor.miscdev.name, chip) != 0) {
-				dev_info(chip->dev,
+				dev_info(chip->pdev,
 					 "Unable to request irq: %d for probe\n",
 					 i);
 				continue;
@@ -709,7 +709,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
 		if (devm_request_irq
 		    (dev, chip->vendor.irq, tis_int_handler, IRQF_SHARED,
 		     chip->vendor.miscdev.name, chip) != 0) {
-			dev_info(chip->dev,
+			dev_info(chip->pdev,
 				 "Unable to request irq: %d for use\n",
 				 chip->vendor.irq);
 			chip->vendor.irq = 0;
-- 
2.1.0

^ permalink raw reply related

* [PATCH v8 3/8] tpm: fix raciness of PPI interface lookup
From: Jarkko Sakkinen @ 2014-12-02 22:31 UTC (permalink / raw)
  To: Peter Huewe, Ashley Lai, Marcel Selhorst
  Cc: tpmdd-devel, linux-kernel, josh.triplett, christophe.ricard,
	jason.gunthorpe, linux-api, trousers-tech, Jarkko Sakkinen
In-Reply-To: <1417559480-13757-1-git-send-email-jarkko.sakkinen@linux.intel.com>

Traversal of the ACPI device tree was not done right. PPI interface
should be looked up only from the ACPI device that is the platform
device for the TPM. This could cause problems with systems with
two TPM chips such as 4th gen Intel systems.

In addition, added the missing license and copyright platter to
the tpm_ppi.c.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-chip.c |   2 +-
 drivers/char/tpm/tpm.h      |  16 ++++--
 drivers/char/tpm/tpm_ppi.c  | 137 +++++++++++++++++++++++++++-----------------
 drivers/char/tpm/tpm_tis.c  |  15 +++--
 4 files changed, 107 insertions(+), 63 deletions(-)

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 6a21ef7..870f8f0 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -147,7 +147,7 @@ int tpm_chip_register(struct tpm_chip *chip)
 	if (rc)
 		goto del_misc;
 
-	rc = tpm_add_ppi(&chip->dev->kobj);
+	rc = tpm_add_ppi(chip);
 	if (rc)
 		goto del_sysfs;
 
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 9880681..69f4003 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -27,6 +27,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/tpm.h>
+#include <linux/acpi.h>
 
 enum tpm_const {
 	TPM_MINOR = 224,	/* officially assigned */
@@ -94,6 +95,8 @@ struct tpm_vendor_specific {
 #define TPM_VID_WINBOND  0x1050
 #define TPM_VID_STM      0x104A
 
+#define TPM_PPI_VERSION_LEN		3
+
 struct tpm_chip {
 	struct device *dev;	/* Device stuff */
 	const struct tpm_class_ops *ops;
@@ -109,6 +112,11 @@ struct tpm_chip {
 
 	struct dentry **bios_dir;
 
+#ifdef CONFIG_ACPI
+	acpi_handle acpi_dev_handle;
+	char ppi_version[TPM_PPI_VERSION_LEN + 1];
+#endif /* CONFIG_ACPI */
+
 	struct list_head list;
 };
 
@@ -340,15 +348,15 @@ void tpm_sysfs_del_device(struct tpm_chip *chip);
 int tpm_pcr_read_dev(struct tpm_chip *chip, int pcr_idx, u8 *res_buf);
 
 #ifdef CONFIG_ACPI
-extern int tpm_add_ppi(struct kobject *);
-extern void tpm_remove_ppi(struct kobject *);
+extern int tpm_add_ppi(struct tpm_chip *chip);
+extern void tpm_remove_ppi(struct tpm_chip *chip);
 #else
-static inline int tpm_add_ppi(struct kobject *parent)
+static inline int tpm_add_ppi(struct tpm_chip *chip)
 {
 	return 0;
 }
 
-static inline void tpm_remove_ppi(struct kobject *parent)
+static inline void tpm_remove_ppi(struct tpm_chip *chip)
 {
 }
 #endif
diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c
index 61dcc80..6652025 100644
--- a/drivers/char/tpm/tpm_ppi.c
+++ b/drivers/char/tpm/tpm_ppi.c
@@ -1,3 +1,22 @@
+/*
+ * Copyright (C) 2012-2014 Intel Corporation
+ *
+ * Authors:
+ * Xiaoyan Zhang <xiaoyan.zhang@intel.com>
+ * Jiang Liu <jiang.liu@linux.intel.com>
+ * Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+ *
+ * Maintained by: <tpmdd-devel@lists.sourceforge.net>
+ *
+ * This file contains implementation of the sysfs interface for PPI.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+
 #include <linux/acpi.h>
 #include "tpm.h"
 
@@ -12,7 +31,6 @@
 #define PPI_TPM_REQ_MAX		22
 #define PPI_VS_REQ_START	128
 #define PPI_VS_REQ_END		255
-#define PPI_VERSION_LEN		3
 
 static const u8 tpm_ppi_uuid[] = {
 	0xA6, 0xFA, 0xDD, 0x3D,
@@ -22,45 +40,22 @@ static const u8 tpm_ppi_uuid[] = {
 	0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53
 };
 
-static char tpm_ppi_version[PPI_VERSION_LEN + 1];
-static acpi_handle tpm_ppi_handle;
-
-static acpi_status ppi_callback(acpi_handle handle, u32 level, void *context,
-				void **return_value)
-{
-	union acpi_object *obj;
-
-	if (!acpi_check_dsm(handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID,
-			    1 << TPM_PPI_FN_VERSION))
-		return AE_OK;
-
-	/* Cache version string */
-	obj = acpi_evaluate_dsm_typed(handle, tpm_ppi_uuid,
-				      TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION,
-				      NULL, ACPI_TYPE_STRING);
-	if (obj) {
-		strlcpy(tpm_ppi_version, obj->string.pointer,
-			PPI_VERSION_LEN + 1);
-		ACPI_FREE(obj);
-	}
-
-	*return_value = handle;
-
-	return AE_CTRL_TERMINATE;
-}
-
 static inline union acpi_object *
-tpm_eval_dsm(int func, acpi_object_type type, union acpi_object *argv4)
+tpm_eval_dsm(acpi_handle ppi_handle, int func, acpi_object_type type,
+	     union acpi_object *argv4)
 {
-	BUG_ON(!tpm_ppi_handle);
-	return acpi_evaluate_dsm_typed(tpm_ppi_handle, tpm_ppi_uuid,
-				       TPM_PPI_REVISION_ID, func, argv4, type);
+	BUG_ON(!ppi_handle);
+	return acpi_evaluate_dsm_typed(ppi_handle, tpm_ppi_uuid,
+				       TPM_PPI_REVISION_ID,
+				       func, argv4, type);
 }
 
 static ssize_t tpm_show_ppi_version(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
-	return scnprintf(buf, PAGE_SIZE, "%s\n", tpm_ppi_version);
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", chip->ppi_version);
 }
 
 static ssize_t tpm_show_ppi_request(struct device *dev,
@@ -68,8 +63,10 @@ static ssize_t tpm_show_ppi_request(struct device *dev,
 {
 	ssize_t size = -EINVAL;
 	union acpi_object *obj;
+	struct tpm_chip *chip = dev_get_drvdata(dev);
 
-	obj = tpm_eval_dsm(TPM_PPI_FN_GETREQ, ACPI_TYPE_PACKAGE, NULL);
+	obj = tpm_eval_dsm(chip->acpi_dev_handle, TPM_PPI_FN_GETREQ,
+			   ACPI_TYPE_PACKAGE, NULL);
 	if (!obj)
 		return -ENXIO;
 
@@ -103,14 +100,15 @@ static ssize_t tpm_store_ppi_request(struct device *dev,
 	int func = TPM_PPI_FN_SUBREQ;
 	union acpi_object *obj, tmp;
 	union acpi_object argv4 = ACPI_INIT_DSM_ARGV4(1, &tmp);
+	struct tpm_chip *chip = dev_get_drvdata(dev);
 
 	/*
 	 * the function to submit TPM operation request to pre-os environment
 	 * is updated with function index from SUBREQ to SUBREQ2 since PPI
 	 * version 1.1
 	 */
-	if (acpi_check_dsm(tpm_ppi_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID,
-			   1 << TPM_PPI_FN_SUBREQ2))
+	if (acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+			   TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_SUBREQ2))
 		func = TPM_PPI_FN_SUBREQ2;
 
 	/*
@@ -119,7 +117,7 @@ static ssize_t tpm_store_ppi_request(struct device *dev,
 	 * string/package type. For PPI version 1.0 and 1.1, use buffer type
 	 * for compatibility, and use package type since 1.2 according to spec.
 	 */
-	if (strcmp(tpm_ppi_version, "1.2") < 0) {
+	if (strcmp(chip->ppi_version, "1.2") < 0) {
 		if (sscanf(buf, "%d", &req) != 1)
 			return -EINVAL;
 		argv4.type = ACPI_TYPE_BUFFER;
@@ -131,7 +129,8 @@ static ssize_t tpm_store_ppi_request(struct device *dev,
 			return -EINVAL;
 	}
 
-	obj = tpm_eval_dsm(func, ACPI_TYPE_INTEGER, &argv4);
+	obj = tpm_eval_dsm(chip->acpi_dev_handle, func, ACPI_TYPE_INTEGER,
+			   &argv4);
 	if (!obj) {
 		return -ENXIO;
 	} else {
@@ -157,6 +156,7 @@ static ssize_t tpm_show_ppi_transition_action(struct device *dev,
 		.buffer.length = 0,
 		.buffer.pointer = NULL
 	};
+	struct tpm_chip *chip = dev_get_drvdata(dev);
 
 	static char *info[] = {
 		"None",
@@ -171,9 +171,10 @@ static ssize_t tpm_show_ppi_transition_action(struct device *dev,
 	 * (e.g. Capella with PPI 1.0) need integer/string/buffer type, so for
 	 * compatibility, define params[3].type as buffer, if PPI version < 1.2
 	 */
-	if (strcmp(tpm_ppi_version, "1.2") < 0)
+	if (strcmp(chip->ppi_version, "1.2") < 0)
 		obj = &tmp;
-	obj = tpm_eval_dsm(TPM_PPI_FN_GETACT, ACPI_TYPE_INTEGER, obj);
+	obj = tpm_eval_dsm(chip->acpi_dev_handle, TPM_PPI_FN_GETACT,
+			   ACPI_TYPE_INTEGER, obj);
 	if (!obj) {
 		return -ENXIO;
 	} else {
@@ -196,8 +197,10 @@ static ssize_t tpm_show_ppi_response(struct device *dev,
 	acpi_status status = -EINVAL;
 	union acpi_object *obj, *ret_obj;
 	u64 req, res;
+	struct tpm_chip *chip = dev_get_drvdata(dev);
 
-	obj = tpm_eval_dsm(TPM_PPI_FN_GETRSP, ACPI_TYPE_PACKAGE, NULL);
+	obj = tpm_eval_dsm(chip->acpi_dev_handle, TPM_PPI_FN_GETRSP,
+			   ACPI_TYPE_PACKAGE, NULL);
 	if (!obj)
 		return -ENXIO;
 
@@ -248,7 +251,8 @@ cleanup:
 	return status;
 }
 
-static ssize_t show_ppi_operations(char *buf, u32 start, u32 end)
+static ssize_t show_ppi_operations(acpi_handle dev_handle, char *buf, u32 start,
+				   u32 end)
 {
 	int i;
 	u32 ret;
@@ -264,14 +268,15 @@ static ssize_t show_ppi_operations(char *buf, u32 start, u32 end)
 		"User not required",
 	};
 
-	if (!acpi_check_dsm(tpm_ppi_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID,
+	if (!acpi_check_dsm(dev_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID,
 			    1 << TPM_PPI_FN_GETOPR))
 		return -EPERM;
 
 	tmp.integer.type = ACPI_TYPE_INTEGER;
 	for (i = start; i <= end; i++) {
 		tmp.integer.value = i;
-		obj = tpm_eval_dsm(TPM_PPI_FN_GETOPR, ACPI_TYPE_INTEGER, &argv);
+		obj = tpm_eval_dsm(dev_handle, TPM_PPI_FN_GETOPR,
+				   ACPI_TYPE_INTEGER, &argv);
 		if (!obj) {
 			return -ENOMEM;
 		} else {
@@ -291,14 +296,20 @@ static ssize_t tpm_show_ppi_tcg_operations(struct device *dev,
 					   struct device_attribute *attr,
 					   char *buf)
 {
-	return show_ppi_operations(buf, 0, PPI_TPM_REQ_MAX);
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+
+	return show_ppi_operations(chip->acpi_dev_handle, buf, 0,
+				   PPI_TPM_REQ_MAX);
 }
 
 static ssize_t tpm_show_ppi_vs_operations(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf)
 {
-	return show_ppi_operations(buf, PPI_VS_REQ_START, PPI_VS_REQ_END);
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+
+	return show_ppi_operations(chip->acpi_dev_handle, buf, PPI_VS_REQ_START,
+				   PPI_VS_REQ_END);
 }
 
 static DEVICE_ATTR(version, S_IRUGO, tpm_show_ppi_version, NULL);
@@ -323,16 +334,34 @@ static struct attribute_group ppi_attr_grp = {
 	.attrs = ppi_attrs
 };
 
-int tpm_add_ppi(struct kobject *parent)
+int tpm_add_ppi(struct tpm_chip *chip)
 {
-	/* Cache TPM ACPI handle and version string */
-	acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
-			    ppi_callback, NULL, NULL, &tpm_ppi_handle);
-	return tpm_ppi_handle ? sysfs_create_group(parent, &ppi_attr_grp) : 0;
+	union acpi_object *obj;
+
+	if (!chip->acpi_dev_handle)
+		return 0;
+
+	if (!acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+			    TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_VERSION))
+		return 0;
+
+	/* Cache PPI version string. */
+	obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, tpm_ppi_uuid,
+				      TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION,
+				      NULL, ACPI_TYPE_STRING);
+	if (!obj)
+		return -ENOMEM;
+
+	strlcpy(chip->ppi_version, obj->string.pointer,
+		sizeof(chip->ppi_version));
+
+	ACPI_FREE(obj);
+
+	return sysfs_create_group(&chip->dev->kobj, &ppi_attr_grp);
 }
 
-void tpm_remove_ppi(struct kobject *parent)
+void tpm_remove_ppi(struct tpm_chip *chip)
 {
-	if (tpm_ppi_handle)
-		sysfs_remove_group(parent, &ppi_attr_grp);
+	if (chip->ppi_version[0] != '\0')
+		sysfs_remove_group(&chip->dev->kobj, &ppi_attr_grp);
 }
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 0066b68..00ed222 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -536,8 +536,9 @@ static void tpm_tis_remove(struct tpm_chip *chip)
 	release_locality(chip, chip->vendor.locality, 1);
 }
 
-static int tpm_tis_init(struct device *dev, resource_size_t start,
-			resource_size_t len, unsigned int irq)
+static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
+			resource_size_t start, resource_size_t len,
+			unsigned int irq)
 {
 	u32 vendor, intfcaps, intmask;
 	int rc, i, irq_s, irq_e, probe;
@@ -547,6 +548,8 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
 	if (IS_ERR(chip))
 		return PTR_ERR(chip);
 
+	chip->acpi_dev_handle = acpi_dev_handle;
+
 	chip->vendor.iobase = devm_ioremap(dev, start, len);
 	if (!chip->vendor.iobase)
 		return -EIO;
@@ -777,6 +780,7 @@ static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev,
 {
 	resource_size_t start, len;
 	unsigned int irq = 0;
+	acpi_handle acpi_dev_handle = NULL;
 
 	start = pnp_mem_start(pnp_dev, 0);
 	len = pnp_mem_len(pnp_dev, 0);
@@ -789,7 +793,10 @@ static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev,
 	if (is_itpm(pnp_dev))
 		itpm = true;
 
-	return tpm_tis_init(&pnp_dev->dev, start, len, irq);
+	if (pnp_acpi_device(pnp_dev))
+		acpi_dev_handle = pnp_acpi_device(pnp_dev)->handle;
+
+	return tpm_tis_init(&pnp_dev->dev, acpi_dev_handle, start, len, irq);
 }
 
 static struct pnp_device_id tpm_pnp_tbl[] = {
@@ -858,7 +865,7 @@ static int __init init_tis(void)
 		rc = PTR_ERR(pdev);
 		goto err_dev;
 	}
-	rc = tpm_tis_init(&pdev->dev, TIS_MEM_BASE, TIS_MEM_LEN, 0);
+	rc = tpm_tis_init(&pdev->dev, NULL, TIS_MEM_BASE, TIS_MEM_LEN, 0);
 	if (rc)
 		goto err_init;
 	return 0;
-- 
2.1.0

^ permalink raw reply related

* [PATCH v8 2/8] tpm: two-phase chip management functions
From: Jarkko Sakkinen @ 2014-12-02 22:31 UTC (permalink / raw)
  To: Peter Huewe, Ashley Lai, Marcel Selhorst
  Cc: tpmdd-devel, linux-kernel, josh.triplett, christophe.ricard,
	jason.gunthorpe, linux-api, trousers-tech, Jarkko Sakkinen
In-Reply-To: <1417559480-13757-1-git-send-email-jarkko.sakkinen@linux.intel.com>

tpm_register_hardware() and tpm_remove_hardware() are called often
before initializing the device. This is wrong order since it could
be that main TPM driver needs a fully initialized chip to be able to
do its job. For example, now it is impossible to move common startup
functions such as tpm_do_selftest() to tpm_register_hardware().

Added tpmm_chip_alloc() and tpm_chip_register() where tpm_chip_alloc()
reserves memory resources and tpm_chip_register() initializes the
device driver. This way it is possible to alter struct tpm_chip
attributes and initialize the device driver before passing it to
tpm_chip_register().

The framework takes care of freeing struct tpm_chip by using devres
API. The broken release callback has been wiped. For example, ACPI
drivers do not ever get this callback.

This is a interm step to get proper life-cycle for TPM device drivers.
The next steps are adding proper ref counting and locking to tpm_chip
that is used in every place in the TPM driver.

Big thank you to Jason Gunthorpe for carefully reviewing this part
of the code. Without his contribution reaching the best quality would
not have been possible.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/Makefile           |   2 +-
 drivers/char/tpm/tpm-chip.c         | 194 ++++++++++++++++++++++++++++++++++++
 drivers/char/tpm/tpm-interface.c    | 148 +--------------------------
 drivers/char/tpm/tpm.h              |  11 +-
 drivers/char/tpm/tpm_atmel.c        |  11 +-
 drivers/char/tpm/tpm_i2c_atmel.c    |  33 ++----
 drivers/char/tpm/tpm_i2c_infineon.c |  37 ++-----
 drivers/char/tpm/tpm_i2c_nuvoton.c  |  44 +++-----
 drivers/char/tpm/tpm_i2c_stm_st33.c |  38 +++----
 drivers/char/tpm/tpm_ibmvtpm.c      |  17 ++--
 drivers/char/tpm/tpm_infineon.c     |  29 +++---
 drivers/char/tpm/tpm_nsc.c          |  14 ++-
 drivers/char/tpm/tpm_tis.c          |  78 ++++++---------
 drivers/char/tpm/xen-tpmfront.c     |  14 +--
 14 files changed, 327 insertions(+), 343 deletions(-)
 create mode 100644 drivers/char/tpm/tpm-chip.c

diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 4d85dd6..837da04 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the kernel tpm device drivers.
 #
 obj-$(CONFIG_TCG_TPM) += tpm.o
-tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o
+tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o
 tpm-$(CONFIG_ACPI) += tpm_ppi.o
 
 ifdef CONFIG_ACPI
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
new file mode 100644
index 0000000..6a21ef7
--- /dev/null
+++ b/drivers/char/tpm/tpm-chip.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2004 IBM Corporation
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Authors:
+ * Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
+ * Leendert van Doorn <leendert@watson.ibm.com>
+ * Dave Safford <safford@watson.ibm.com>
+ * Reiner Sailer <sailer@watson.ibm.com>
+ * Kylene Hall <kjhall@us.ibm.com>
+ *
+ * Maintained by: <tpmdd-devel@lists.sourceforge.net>
+ *
+ * TPM chip management routines.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ */
+
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/freezer.h>
+#include "tpm.h"
+#include "tpm_eventlog.h"
+
+static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES);
+static LIST_HEAD(tpm_chip_list);
+static DEFINE_SPINLOCK(driver_lock);
+
+/*
+ * tpm_chip_find_get - return tpm_chip for a given chip number
+ * @chip_num the device number for the chip
+ */
+struct tpm_chip *tpm_chip_find_get(int chip_num)
+{
+	struct tpm_chip *pos, *chip = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
+		if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num)
+			continue;
+
+		if (try_module_get(pos->dev->driver->owner)) {
+			chip = pos;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return chip;
+}
+
+/**
+ * tpmm_chip_remove() - free chip memory and device number
+ * @data: points to struct tpm_chip instance
+ *
+ * This is used internally by tpmm_chip_alloc() and called by devres
+ * when the device is released. This function does the opposite of
+ * tpmm_chip_alloc() freeing memory and the device number.
+ */
+static void tpmm_chip_remove(void *data)
+{
+	struct tpm_chip *chip = (struct tpm_chip *) data;
+
+	spin_lock(&driver_lock);
+	clear_bit(chip->dev_num, dev_mask);
+	spin_unlock(&driver_lock);
+	kfree(chip);
+}
+
+/**
+ * tpmm_chip_alloc() - allocate a new struct tpm_chip instance
+ * @dev: device to which the chip is associated
+ * @ops: struct tpm_class_ops instance
+ *
+ * Allocates a new struct tpm_chip instance and assigns a free
+ * device number for it. Caller does not have to worry about
+ * freeing the allocated resources. When the devices is removed
+ * devres calls tpmm_chip_remove() to do the job.
+ */
+struct tpm_chip *tpmm_chip_alloc(struct device *dev,
+				 const struct tpm_class_ops *ops)
+{
+	struct tpm_chip *chip;
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	if (chip == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&chip->tpm_mutex);
+	INIT_LIST_HEAD(&chip->list);
+
+	chip->ops = ops;
+
+	spin_lock(&driver_lock);
+	chip->dev_num = find_first_zero_bit(dev_mask, TPM_NUM_DEVICES);
+	spin_unlock(&driver_lock);
+
+	if (chip->dev_num >= TPM_NUM_DEVICES) {
+		dev_err(dev, "No available tpm device numbers\n");
+		kfree(chip);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	set_bit(chip->dev_num, dev_mask);
+
+	scnprintf(chip->devname, sizeof(chip->devname), "tpm%d", chip->dev_num);
+
+	chip->dev = dev;
+	devm_add_action(dev, tpmm_chip_remove, chip);
+	dev_set_drvdata(dev, chip);
+
+	return chip;
+}
+EXPORT_SYMBOL_GPL(tpmm_chip_alloc);
+
+/*
+ * tpm_chip_register() - create a misc driver for the TPM chip
+ * @chip: TPM chip to use.
+ *
+ * Creates a misc driver for the TPM chip and adds sysfs interfaces for
+ * the device, PPI and TCPA. As the last step this function adds the
+ * chip to the list of TPM chips available for use.
+ *
+ * NOTE: This function should be only called after the chip initialization
+ * is complete.
+ *
+ * Called from tpm_<specific>.c probe function only for devices
+ * the driver has determined it should claim.  Prior to calling
+ * this function the specific probe function has called pci_enable_device
+ * upon errant exit from this function specific probe function should call
+ * pci_disable_device
+ */
+int tpm_chip_register(struct tpm_chip *chip)
+{
+	int rc;
+
+	rc = tpm_dev_add_device(chip);
+	if (rc)
+		return rc;
+
+	rc = tpm_sysfs_add_device(chip);
+	if (rc)
+		goto del_misc;
+
+	rc = tpm_add_ppi(&chip->dev->kobj);
+	if (rc)
+		goto del_sysfs;
+
+	chip->bios_dir = tpm_bios_log_setup(chip->devname);
+
+	/* Make the chip available. */
+	spin_lock(&driver_lock);
+	list_add_rcu(&chip->list, &tpm_chip_list);
+	spin_unlock(&driver_lock);
+
+	return 0;
+del_sysfs:
+	tpm_sysfs_del_device(chip);
+del_misc:
+	tpm_dev_del_device(chip);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_chip_register);
+
+/*
+ * tpm_chip_unregister() - release the TPM driver
+ * @chip: TPM chip to use.
+ *
+ * Takes the chip first away from the list of available TPM chips and then
+ * cleans up all the resources reserved by tpm_chip_register().
+ *
+ * NOTE: This function should be only called before deinitializing chip
+ * resources.
+ */
+void tpm_chip_unregister(struct tpm_chip *chip)
+{
+	spin_lock(&driver_lock);
+	list_del_rcu(&chip->list);
+	spin_unlock(&driver_lock);
+	synchronize_rcu();
+
+	if (chip->bios_dir)
+		tpm_bios_log_teardown(chip->bios_dir);
+	tpm_remove_ppi(&chip->dev->kobj);
+	tpm_sysfs_del_device(chip);
+
+	tpm_dev_del_device(chip);
+}
+EXPORT_SYMBOL_GPL(tpm_chip_unregister);
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 71ac6b7..4dbed1e 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2004 IBM Corporation
+ * Copyright (C) 2014 Intel Corporation
  *
  * Authors:
  * Leendert van Doorn <leendert@watson.ibm.com>
@@ -47,10 +48,6 @@ module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644);
 MODULE_PARM_DESC(suspend_pcr,
 		 "PCR to use for dummy writes to faciltate flush on suspend.");
 
-static LIST_HEAD(tpm_chip_list);
-static DEFINE_SPINLOCK(driver_lock);
-static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES);
-
 /*
  * Array with one entry per ordinal defining the maximum amount
  * of time the chip could take to return the result.  The ordinal
@@ -639,27 +636,6 @@ static int tpm_continue_selftest(struct tpm_chip *chip)
 	return rc;
 }
 
-/*
- * tpm_chip_find_get - return tpm_chip for given chip number
- */
-static struct tpm_chip *tpm_chip_find_get(int chip_num)
-{
-	struct tpm_chip *pos, *chip = NULL;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
-		if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num)
-			continue;
-
-		if (try_module_get(pos->dev->driver->owner)) {
-			chip = pos;
-			break;
-		}
-	}
-	rcu_read_unlock();
-	return chip;
-}
-
 #define TPM_ORDINAL_PCRREAD cpu_to_be32(21)
 #define READ_PCR_RESULT_SIZE 30
 static struct tpm_input_header pcrread_header = {
@@ -887,30 +863,6 @@ again:
 }
 EXPORT_SYMBOL_GPL(wait_for_tpm_stat);
 
-void tpm_remove_hardware(struct device *dev)
-{
-	struct tpm_chip *chip = dev_get_drvdata(dev);
-
-	if (chip == NULL) {
-		dev_err(dev, "No device data found\n");
-		return;
-	}
-
-	spin_lock(&driver_lock);
-	list_del_rcu(&chip->list);
-	spin_unlock(&driver_lock);
-	synchronize_rcu();
-
-	tpm_dev_del_device(chip);
-	tpm_sysfs_del_device(chip);
-	tpm_remove_ppi(&dev->kobj);
-	tpm_bios_log_teardown(chip->bios_dir);
-
-	/* write it this way to be explicit (chip->dev == dev) */
-	put_device(chip->dev);
-}
-EXPORT_SYMBOL_GPL(tpm_remove_hardware);
-
 #define TPM_ORD_SAVESTATE cpu_to_be32(152)
 #define SAVESTATE_RESULT_SIZE 10
 
@@ -1044,104 +996,6 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max)
 }
 EXPORT_SYMBOL_GPL(tpm_get_random);
 
-/* In case vendor provided release function, call it too.*/
-
-void tpm_dev_vendor_release(struct tpm_chip *chip)
-{
-	if (!chip)
-		return;
-
-	clear_bit(chip->dev_num, dev_mask);
-}
-EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
-
-
-/*
- * Once all references to platform device are down to 0,
- * release all allocated structures.
- */
-static void tpm_dev_release(struct device *dev)
-{
-	struct tpm_chip *chip = dev_get_drvdata(dev);
-
-	if (!chip)
-		return;
-
-	tpm_dev_vendor_release(chip);
-
-	chip->release(dev);
-	kfree(chip);
-}
-
-/*
- * Called from tpm_<specific>.c probe function only for devices
- * the driver has determined it should claim.  Prior to calling
- * this function the specific probe function has called pci_enable_device
- * upon errant exit from this function specific probe function should call
- * pci_disable_device
- */
-struct tpm_chip *tpm_register_hardware(struct device *dev,
-				       const struct tpm_class_ops *ops)
-{
-	struct tpm_chip *chip;
-
-	/* Driver specific per-device data */
-	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
-
-	if (chip == NULL)
-		return NULL;
-
-	mutex_init(&chip->tpm_mutex);
-	INIT_LIST_HEAD(&chip->list);
-
-	chip->ops = ops;
-	chip->dev_num = find_first_zero_bit(dev_mask, TPM_NUM_DEVICES);
-
-	if (chip->dev_num >= TPM_NUM_DEVICES) {
-		dev_err(dev, "No available tpm device numbers\n");
-		goto out_free;
-	}
-
-	set_bit(chip->dev_num, dev_mask);
-
-	scnprintf(chip->devname, sizeof(chip->devname), "%s%d", "tpm",
-		  chip->dev_num);
-
-	chip->dev = get_device(dev);
-	chip->release = dev->release;
-	dev->release = tpm_dev_release;
-	dev_set_drvdata(dev, chip);
-
-	if (tpm_dev_add_device(chip))
-		goto put_device;
-
-	if (tpm_sysfs_add_device(chip))
-		goto del_misc;
-
-	if (tpm_add_ppi(&dev->kobj))
-		goto del_sysfs;
-
-	chip->bios_dir = tpm_bios_log_setup(chip->devname);
-
-	/* Make chip available */
-	spin_lock(&driver_lock);
-	list_add_rcu(&chip->list, &tpm_chip_list);
-	spin_unlock(&driver_lock);
-
-	return chip;
-
-del_sysfs:
-	tpm_sysfs_del_device(chip);
-del_misc:
-	tpm_dev_del_device(chip);
-put_device:
-	put_device(chip->dev);
-out_free:
-	kfree(chip);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(tpm_register_hardware);
-
 MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
 MODULE_DESCRIPTION("TPM Driver");
 MODULE_VERSION("2.0");
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index e638eb0..9880681 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -110,7 +110,6 @@ struct tpm_chip {
 	struct dentry **bios_dir;
 
 	struct list_head list;
-	void (*release) (struct device *);
 };
 
 #define to_tpm_chip(n) container_of(n, struct tpm_chip, vendor)
@@ -322,15 +321,17 @@ extern int tpm_get_timeouts(struct tpm_chip *);
 extern void tpm_gen_interrupt(struct tpm_chip *);
 extern int tpm_do_selftest(struct tpm_chip *);
 extern unsigned long tpm_calc_ordinal_duration(struct tpm_chip *, u32);
-extern struct tpm_chip* tpm_register_hardware(struct device *,
-					      const struct tpm_class_ops *ops);
-extern void tpm_dev_vendor_release(struct tpm_chip *);
-extern void tpm_remove_hardware(struct device *);
 extern int tpm_pm_suspend(struct device *);
 extern int tpm_pm_resume(struct device *);
 extern int wait_for_tpm_stat(struct tpm_chip *, u8, unsigned long,
 			     wait_queue_head_t *, bool);
 
+struct tpm_chip *tpm_chip_find_get(int chip_num);
+extern struct tpm_chip *tpmm_chip_alloc(struct device *dev,
+				       const struct tpm_class_ops *ops);
+extern int tpm_chip_register(struct tpm_chip *chip);
+extern void tpm_chip_unregister(struct tpm_chip *chip);
+
 int tpm_dev_add_device(struct tpm_chip *chip);
 void tpm_dev_del_device(struct tpm_chip *chip);
 int tpm_sysfs_add_device(struct tpm_chip *chip);
diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
index 6069d13..8e2576a 100644
--- a/drivers/char/tpm/tpm_atmel.c
+++ b/drivers/char/tpm/tpm_atmel.c
@@ -138,11 +138,11 @@ static void atml_plat_remove(void)
 	struct tpm_chip *chip = dev_get_drvdata(&pdev->dev);
 
 	if (chip) {
+		tpm_chip_unregister(chip);
 		if (chip->vendor.have_region)
 			atmel_release_region(chip->vendor.base,
 					     chip->vendor.region_size);
 		atmel_put_base_addr(chip->vendor.iobase);
-		tpm_remove_hardware(chip->dev);
 		platform_device_unregister(pdev);
 	}
 }
@@ -184,8 +184,9 @@ static int __init init_atmel(void)
 		goto err_rel_reg;
 	}
 
-	if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_atmel))) {
-		rc = -ENODEV;
+	chip = tpmm_chip_alloc(&pdev->dev, &tpm_atmel);
+	if (IS_ERR(chip)) {
+		rc = PTR_ERR(chip);
 		goto err_unreg_dev;
 	}
 
@@ -194,6 +195,10 @@ static int __init init_atmel(void)
 	chip->vendor.have_region = have_region;
 	chip->vendor.region_size = region_size;
 
+	rc = tpm_chip_register(chip);
+	if (rc)
+		goto err_unreg_dev;
+
 	return 0;
 
 err_unreg_dev:
diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c
index 7727292..8af3b4a 100644
--- a/drivers/char/tpm/tpm_i2c_atmel.c
+++ b/drivers/char/tpm/tpm_i2c_atmel.c
@@ -160,11 +160,9 @@ static int i2c_atmel_probe(struct i2c_client *client,
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
 		return -ENODEV;
 
-	chip = tpm_register_hardware(dev, &i2c_atmel);
-	if (!chip) {
-		dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
-		return -ENODEV;
-	}
+	chip = tpmm_chip_alloc(dev, &i2c_atmel);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
 
 	chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
 					 GFP_KERNEL);
@@ -179,21 +177,16 @@ static int i2c_atmel_probe(struct i2c_client *client,
 	/* There is no known way to probe for this device, and all version
 	 * information seems to be read via TPM commands. Thus we rely on the
 	 * TPM startup process in the common code to detect the device. */
-	if (tpm_get_timeouts(chip)) {
-		rc = -ENODEV;
-		goto out_err;
-	}
+	if (tpm_get_timeouts(chip))
+		return -ENODEV;
 
-	if (tpm_do_selftest(chip)) {
-		rc = -ENODEV;
-		goto out_err;
-	}
+	if (tpm_do_selftest(chip))
+		return -ENODEV;
 
-	return 0;
+	rc = tpm_chip_register(chip);
+	if (rc)
+		return rc;
 
-out_err:
-	tpm_dev_vendor_release(chip);
-	tpm_remove_hardware(chip->dev);
 	return rc;
 }
 
@@ -201,11 +194,7 @@ static int i2c_atmel_remove(struct i2c_client *client)
 {
 	struct device *dev = &(client->dev);
 	struct tpm_chip *chip = dev_get_drvdata(dev);
-
-	if (chip)
-		tpm_dev_vendor_release(chip);
-	tpm_remove_hardware(dev);
-	kfree(chip);
+	tpm_chip_unregister(chip);
 	return 0;
 }
 
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c
index 472af4b..03708e6 100644
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -581,12 +581,9 @@ static int tpm_tis_i2c_init(struct device *dev)
 	int rc = 0;
 	struct tpm_chip *chip;
 
-	chip = tpm_register_hardware(dev, &tpm_tis_i2c);
-	if (!chip) {
-		dev_err(dev, "could not register hardware\n");
-		rc = -ENODEV;
-		goto out_err;
-	}
+	chip = tpmm_chip_alloc(dev, &tpm_tis_i2c);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
 
 	/* Disable interrupts */
 	chip->vendor.irq = 0;
@@ -600,7 +597,7 @@ static int tpm_tis_i2c_init(struct device *dev)
 	if (request_locality(chip, 0) != 0) {
 		dev_err(dev, "could not request locality\n");
 		rc = -ENODEV;
-		goto out_vendor;
+		goto out_err;
 	}
 
 	/* read four bytes from DID_VID register */
@@ -628,21 +625,9 @@ static int tpm_tis_i2c_init(struct device *dev)
 	tpm_get_timeouts(chip);
 	tpm_do_selftest(chip);
 
-	return 0;
-
+	return tpm_chip_register(chip);
 out_release:
 	release_locality(chip, chip->vendor.locality, 1);
-
-out_vendor:
-	/* close file handles */
-	tpm_dev_vendor_release(chip);
-
-	/* remove hardware */
-	tpm_remove_hardware(chip->dev);
-
-	/* reset these pointers, otherwise we oops */
-	chip->dev->release = NULL;
-	chip->release = NULL;
 	tpm_dev.client = NULL;
 out_err:
 	return rc;
@@ -712,17 +697,9 @@ static int tpm_tis_i2c_probe(struct i2c_client *client,
 static int tpm_tis_i2c_remove(struct i2c_client *client)
 {
 	struct tpm_chip *chip = tpm_dev.chip;
-	release_locality(chip, chip->vendor.locality, 1);
 
-	/* close file handles */
-	tpm_dev_vendor_release(chip);
-
-	/* remove hardware */
-	tpm_remove_hardware(chip->dev);
-
-	/* reset these pointers, otherwise we oops */
-	chip->dev->release = NULL;
-	chip->release = NULL;
+	tpm_chip_unregister(chip);
+	release_locality(chip, chip->vendor.locality, 1);
 	tpm_dev.client = NULL;
 
 	return 0;
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
index 7b158ef..09f0c46 100644
--- a/drivers/char/tpm/tpm_i2c_nuvoton.c
+++ b/drivers/char/tpm/tpm_i2c_nuvoton.c
@@ -530,11 +530,9 @@ static int i2c_nuvoton_probe(struct i2c_client *client,
 	dev_info(dev, "VID: %04X DID: %02X RID: %02X\n", (u16) vid,
 		 (u8) (vid >> 16), (u8) (vid >> 24));
 
-	chip = tpm_register_hardware(dev, &tpm_i2c);
-	if (!chip) {
-		dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
-		return -ENODEV;
-	}
+	chip = tpmm_chip_alloc(dev, &tpm_i2c);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
 
 	chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
 					 GFP_KERNEL);
@@ -584,7 +582,7 @@ static int i2c_nuvoton_probe(struct i2c_client *client,
 							   TPM_DATA_FIFO_W,
 							   1, (u8 *) (&rc));
 				if (rc < 0)
-					goto out_err;
+					return rc;
 				/* TPM_STS <- 0x40 (commandReady) */
 				i2c_nuvoton_ready(chip);
 			} else {
@@ -594,45 +592,33 @@ static int i2c_nuvoton_probe(struct i2c_client *client,
 				 * only TPM_STS_VALID should be set
 				 */
 				if (i2c_nuvoton_read_status(chip) !=
-				    TPM_STS_VALID) {
-					rc = -EIO;
-					goto out_err;
-				}
+				    TPM_STS_VALID)
+					return -EIO;
 			}
 		}
 	}
 
-	if (tpm_get_timeouts(chip)) {
-		rc = -ENODEV;
-		goto out_err;
-	}
+	if (tpm_get_timeouts(chip))
+		return -ENODEV;
 
-	if (tpm_do_selftest(chip)) {
-		rc = -ENODEV;
-		goto out_err;
-	}
+	if (tpm_do_selftest(chip))
+		return -ENODEV;
 
-	return 0;
+	rc = tpm_chip_register(chip);
+	if (rc)
+		return rc;
 
-out_err:
-	tpm_dev_vendor_release(chip);
-	tpm_remove_hardware(chip->dev);
-	return rc;
+	return 0;
 }
 
 static int i2c_nuvoton_remove(struct i2c_client *client)
 {
 	struct device *dev = &(client->dev);
 	struct tpm_chip *chip = dev_get_drvdata(dev);
-
-	if (chip)
-		tpm_dev_vendor_release(chip);
-	tpm_remove_hardware(dev);
-	kfree(chip);
+	tpm_chip_unregister(chip);
 	return 0;
 }
 
-
 static const struct i2c_device_id i2c_nuvoton_id[] = {
 	{I2C_DRIVER_NAME, 0},
 	{}
diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c
index 4669e37..b9d1a38 100644
--- a/drivers/char/tpm/tpm_i2c_stm_st33.c
+++ b/drivers/char/tpm/tpm_i2c_stm_st33.c
@@ -609,37 +609,29 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (client == NULL) {
 		pr_info("%s: i2c client is NULL. Device not accessible.\n",
 			__func__);
-		err = -ENODEV;
-		goto end;
+		return -ENODEV;
 	}
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
 		dev_info(&client->dev, "client not i2c capable\n");
-		err = -ENODEV;
-		goto end;
+		return -ENODEV;
 	}
 
-	chip = tpm_register_hardware(&client->dev, &st_i2c_tpm);
-	if (!chip) {
-		dev_info(&client->dev, "fail chip\n");
-		err = -ENODEV;
-		goto end;
-	}
+	chip = tpmm_chip_alloc(&client->dev, &st_i2c_tpm);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
 
 	platform_data = client->dev.platform_data;
 
 	if (!platform_data) {
 		dev_info(&client->dev, "chip not available\n");
-		err = -ENODEV;
-		goto _tpm_clean_answer;
+		return -ENODEV;
 	}
 
 	platform_data->tpm_i2c_buffer[0] =
 	    kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL);
-	if (platform_data->tpm_i2c_buffer[0] == NULL) {
-		err = -ENOMEM;
-		goto _tpm_clean_answer;
-	}
+	if (platform_data->tpm_i2c_buffer[0] == NULL)
+		return -ENOMEM;
 	platform_data->tpm_i2c_buffer[1] =
 	    kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL);
 	if (platform_data->tpm_i2c_buffer[1] == NULL) {
@@ -716,8 +708,10 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	tpm_get_timeouts(chip);
 	tpm_do_selftest(chip);
 
-	dev_info(chip->dev, "TPM I2C Initialized\n");
-	return 0;
+	err = tpm_chip_register(chip);
+	if (!err)
+		return 0;
+
 _irq_set:
 	free_irq(gpio_to_irq(platform_data->io_serirq), (void *)chip);
 _gpio_init2:
@@ -732,10 +726,6 @@ _tpm_clean_response2:
 _tpm_clean_response1:
 	kzfree(platform_data->tpm_i2c_buffer[0]);
 	platform_data->tpm_i2c_buffer[0] = NULL;
-_tpm_clean_answer:
-	tpm_remove_hardware(chip->dev);
-end:
-	pr_info("TPM I2C initialisation fail\n");
 	return err;
 }
 
@@ -752,13 +742,13 @@ static int tpm_st33_i2c_remove(struct i2c_client *client)
 		((struct i2c_client *)TPM_VPRIV(chip))->dev.platform_data;
 
 	if (pin_infos != NULL) {
+		tpm_chip_unregister(chip);
+
 		free_irq(pin_infos->io_serirq, chip);
 
 		gpio_free(pin_infos->io_serirq);
 		gpio_free(pin_infos->io_lpcpd);
 
-		tpm_remove_hardware(chip->dev);
-
 		if (pin_infos->tpm_i2c_buffer[1] != NULL) {
 			kzfree(pin_infos->tpm_i2c_buffer[1]);
 			pin_infos->tpm_i2c_buffer[1] = NULL;
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index af74c57..eb95796 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -270,8 +270,11 @@ static int ibmvtpm_crq_send_init(struct ibmvtpm_dev *ibmvtpm)
 static int tpm_ibmvtpm_remove(struct vio_dev *vdev)
 {
 	struct ibmvtpm_dev *ibmvtpm = ibmvtpm_get_data(&vdev->dev);
+	struct tpm_chip *chip = dev_get_drvdata(ibmvtpm->dev);
 	int rc = 0;
 
+	tpm_chip_unregister(chip);
+
 	free_irq(vdev->irq, ibmvtpm);
 
 	do {
@@ -290,8 +293,6 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev)
 		kfree(ibmvtpm->rtce_buf);
 	}
 
-	tpm_remove_hardware(ibmvtpm->dev);
-
 	kfree(ibmvtpm);
 
 	return 0;
@@ -555,11 +556,9 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
 	struct tpm_chip *chip;
 	int rc = -ENOMEM, rc1;
 
-	chip = tpm_register_hardware(dev, &tpm_ibmvtpm);
-	if (!chip) {
-		dev_err(dev, "tpm_register_hardware failed\n");
-		return -ENODEV;
-	}
+	chip = tpmm_chip_alloc(dev, &tpm_ibmvtpm);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
 
 	ibmvtpm = kzalloc(sizeof(struct ibmvtpm_dev), GFP_KERNEL);
 	if (!ibmvtpm) {
@@ -629,7 +628,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
 	if (rc)
 		goto init_irq_cleanup;
 
-	return rc;
+	return tpm_chip_register(chip);
 init_irq_cleanup:
 	do {
 		rc1 = plpar_hcall_norets(H_FREE_CRQ, vio_dev->unit_address);
@@ -644,8 +643,6 @@ cleanup:
 		kfree(ibmvtpm);
 	}
 
-	tpm_remove_hardware(dev);
-
 	return rc;
 }
 
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index dc0a255..dcdb671 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -546,7 +546,14 @@ static int tpm_inf_pnp_probe(struct pnp_dev *dev,
 			 vendorid[0], vendorid[1],
 			 productid[0], productid[1], chipname);
 
-		if (!(chip = tpm_register_hardware(&dev->dev, &tpm_inf)))
+		chip = tpmm_chip_alloc(&dev->dev, &tpm_inf);
+		if (IS_ERR(chip)) {
+			rc = PTR_ERR(chip);
+			goto err_release_region;
+		}
+
+		rc = tpm_chip_register(chip);
+		if (rc)
 			goto err_release_region;
 
 		return 0;
@@ -572,17 +579,15 @@ static void tpm_inf_pnp_remove(struct pnp_dev *dev)
 {
 	struct tpm_chip *chip = pnp_get_drvdata(dev);
 
-	if (chip) {
-		if (tpm_dev.iotype == TPM_INF_IO_PORT) {
-			release_region(tpm_dev.data_regs, tpm_dev.data_size);
-			release_region(tpm_dev.config_port,
-				       tpm_dev.config_size);
-		} else {
-			iounmap(tpm_dev.mem_base);
-			release_mem_region(tpm_dev.map_base, tpm_dev.map_size);
-		}
-		tpm_dev_vendor_release(chip);
-		tpm_remove_hardware(chip->dev);
+	tpm_chip_unregister(chip);
+
+	if (tpm_dev.iotype == TPM_INF_IO_PORT) {
+		release_region(tpm_dev.data_regs, tpm_dev.data_size);
+		release_region(tpm_dev.config_port,
+			       tpm_dev.config_size);
+	} else {
+		iounmap(tpm_dev.mem_base);
+		release_mem_region(tpm_dev.map_base, tpm_dev.map_size);
 	}
 }
 
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index 3179ec9..00c5470 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -247,10 +247,9 @@ static struct platform_device *pdev = NULL;
 static void tpm_nsc_remove(struct device *dev)
 {
 	struct tpm_chip *chip = dev_get_drvdata(dev);
-	if ( chip ) {
-		release_region(chip->vendor.base, 2);
-		tpm_remove_hardware(chip->dev);
-	}
+
+	tpm_chip_unregister(chip);
+	release_region(chip->vendor.base, 2);
 }
 
 static SIMPLE_DEV_PM_OPS(tpm_nsc_pm, tpm_pm_suspend, tpm_pm_resume);
@@ -308,11 +307,16 @@ static int __init init_nsc(void)
 		goto err_del_dev;
 	}
 
-	if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_nsc))) {
+	chip = tpmm_chip_alloc(&pdev->dev, &tpm_nsc);
+	if (IS_ERR(chip)) {
 		rc = -ENODEV;
 		goto err_rel_reg;
 	}
 
+	rc = tpm_chip_register(chip);
+	if (rc)
+		goto err_rel_reg;
+
 	dev_dbg(&pdev->dev, "NSC TPM detected\n");
 	dev_dbg(&pdev->dev,
 		"NSC LDN 0x%x, SID 0x%x, SRID 0x%x\n",
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 2c46734..0066b68 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -75,9 +75,6 @@ enum tis_defaults {
 #define	TPM_DID_VID(l)			(0x0F00 | ((l) << 12))
 #define	TPM_RID(l)			(0x0F04 | ((l) << 12))
 
-static LIST_HEAD(tis_chips);
-static DEFINE_MUTEX(tis_lock);
-
 #if defined(CONFIG_PNP) && defined(CONFIG_ACPI)
 static int is_itpm(struct pnp_dev *dev)
 {
@@ -528,6 +525,17 @@ static bool interrupts = true;
 module_param(interrupts, bool, 0444);
 MODULE_PARM_DESC(interrupts, "Enable interrupts");
 
+static void tpm_tis_remove(struct tpm_chip *chip)
+{
+	iowrite32(~TPM_GLOBAL_INT_ENABLE &
+		  ioread32(chip->vendor.iobase +
+			   TPM_INT_ENABLE(chip->vendor.
+					  locality)),
+		  chip->vendor.iobase +
+		  TPM_INT_ENABLE(chip->vendor.locality));
+	release_locality(chip, chip->vendor.locality, 1);
+}
+
 static int tpm_tis_init(struct device *dev, resource_size_t start,
 			resource_size_t len, unsigned int irq)
 {
@@ -535,14 +543,13 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
 	int rc, i, irq_s, irq_e, probe;
 	struct tpm_chip *chip;
 
-	if (!(chip = tpm_register_hardware(dev, &tpm_tis)))
-		return -ENODEV;
+	chip = tpmm_chip_alloc(dev, &tpm_tis);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
 
-	chip->vendor.iobase = ioremap(start, len);
-	if (!chip->vendor.iobase) {
-		rc = -EIO;
-		goto out_err;
-	}
+	chip->vendor.iobase = devm_ioremap(dev, start, len);
+	if (!chip->vendor.iobase)
+		return -EIO;
 
 	/* Default timeouts */
 	chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
@@ -649,8 +656,8 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
 		for (i = irq_s; i <= irq_e && chip->vendor.irq == 0; i++) {
 			iowrite8(i, chip->vendor.iobase +
 				 TPM_INT_VECTOR(chip->vendor.locality));
-			if (request_irq
-			    (i, tis_int_probe, IRQF_SHARED,
+			if (devm_request_irq
+			    (dev, i, tis_int_probe, IRQF_SHARED,
 			     chip->vendor.miscdev.name, chip) != 0) {
 				dev_info(chip->dev,
 					 "Unable to request irq: %d for probe\n",
@@ -690,15 +697,14 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
 			iowrite32(intmask,
 				  chip->vendor.iobase +
 				  TPM_INT_ENABLE(chip->vendor.locality));
-			free_irq(i, chip);
 		}
 	}
 	if (chip->vendor.irq) {
 		iowrite8(chip->vendor.irq,
 			 chip->vendor.iobase +
 			 TPM_INT_VECTOR(chip->vendor.locality));
-		if (request_irq
-		    (chip->vendor.irq, tis_int_handler, IRQF_SHARED,
+		if (devm_request_irq
+		    (dev, chip->vendor.irq, tis_int_handler, IRQF_SHARED,
 		     chip->vendor.miscdev.name, chip) != 0) {
 			dev_info(chip->dev,
 				 "Unable to request irq: %d for use\n",
@@ -719,17 +725,9 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
 		}
 	}
 
-	INIT_LIST_HEAD(&chip->vendor.list);
-	mutex_lock(&tis_lock);
-	list_add(&chip->vendor.list, &tis_chips);
-	mutex_unlock(&tis_lock);
-
-
-	return 0;
+	return tpm_chip_register(chip);
 out_err:
-	if (chip->vendor.iobase)
-		iounmap(chip->vendor.iobase);
-	tpm_remove_hardware(chip->dev);
+	tpm_tis_remove(chip);
 	return rc;
 }
 
@@ -811,13 +809,10 @@ MODULE_DEVICE_TABLE(pnp, tpm_pnp_tbl);
 static void tpm_tis_pnp_remove(struct pnp_dev *dev)
 {
 	struct tpm_chip *chip = pnp_get_drvdata(dev);
-
-	tpm_dev_vendor_release(chip);
-
-	kfree(chip);
+	tpm_chip_unregister(chip);
+	tpm_tis_remove(chip);
 }
 
-
 static struct pnp_driver tis_pnp_driver = {
 	.name = "tpm_tis",
 	.id_table = tpm_pnp_tbl,
@@ -836,7 +831,7 @@ MODULE_PARM_DESC(hid, "Set additional specific HID for this driver to probe");
 
 static struct platform_driver tis_drv = {
 	.driver = {
-		.name = "tpm_tis",
+		.name		= "tpm_tis",
 		.owner		= THIS_MODULE,
 		.pm		= &tpm_tis_pm,
 	},
@@ -876,31 +871,16 @@ err_dev:
 
 static void __exit cleanup_tis(void)
 {
-	struct tpm_vendor_specific *i, *j;
 	struct tpm_chip *chip;
-	mutex_lock(&tis_lock);
-	list_for_each_entry_safe(i, j, &tis_chips, list) {
-		chip = to_tpm_chip(i);
-		tpm_remove_hardware(chip->dev);
-		iowrite32(~TPM_GLOBAL_INT_ENABLE &
-			  ioread32(chip->vendor.iobase +
-				   TPM_INT_ENABLE(chip->vendor.
-						  locality)),
-			  chip->vendor.iobase +
-			  TPM_INT_ENABLE(chip->vendor.locality));
-		release_locality(chip, chip->vendor.locality, 1);
-		if (chip->vendor.irq)
-			free_irq(chip->vendor.irq, chip);
-		iounmap(i->iobase);
-		list_del(&i->list);
-	}
-	mutex_unlock(&tis_lock);
 #ifdef CONFIG_PNP
 	if (!force) {
 		pnp_unregister_driver(&tis_pnp_driver);
 		return;
 	}
 #endif
+	chip = dev_get_drvdata(&pdev->dev);
+	tpm_chip_unregister(chip);
+	tpm_tis_remove(chip);
 	platform_device_unregister(pdev);
 	platform_driver_unregister(&tis_drv);
 }
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 441b44e..c3b4f5a 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -175,9 +175,9 @@ static int setup_chip(struct device *dev, struct tpm_private *priv)
 {
 	struct tpm_chip *chip;
 
-	chip = tpm_register_hardware(dev, &tpm_vtpm);
-	if (!chip)
-		return -ENODEV;
+	chip = tpmm_chip_alloc(dev, &tpm_vtpm);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
 
 	init_waitqueue_head(&chip->vendor.read_queue);
 
@@ -286,6 +286,7 @@ static int tpmfront_probe(struct xenbus_device *dev,
 		const struct xenbus_device_id *id)
 {
 	struct tpm_private *priv;
+	struct tpm_chip *chip;
 	int rv;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -302,21 +303,22 @@ static int tpmfront_probe(struct xenbus_device *dev,
 
 	rv = setup_ring(dev, priv);
 	if (rv) {
-		tpm_remove_hardware(&dev->dev);
+		chip = dev_get_drvdata(&dev->dev);
+		tpm_chip_unregister(chip);
 		ring_free(priv);
 		return rv;
 	}
 
 	tpm_get_timeouts(priv->chip);
 
-	return rv;
+	return tpm_chip_register(priv->chip);
 }
 
 static int tpmfront_remove(struct xenbus_device *dev)
 {
 	struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
 	struct tpm_private *priv = TPM_VPRIV(chip);
-	tpm_remove_hardware(&dev->dev);
+	tpm_chip_unregister(chip);
 	ring_free(priv);
 	TPM_VPRIV(chip) = NULL;
 	return 0;
-- 
2.1.0

^ permalink raw reply related

* [PATCH v8 1/8] tpm: merge duplicate transmit_cmd() functions
From: Jarkko Sakkinen @ 2014-12-02 22:31 UTC (permalink / raw)
  To: Peter Huewe, Ashley Lai, Marcel Selhorst
  Cc: tpmdd-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	josh.triplett-ral2JQCrhuEAvxtiuMwx3w,
	christophe.ricard-Re5JQEeQqe8AvxtiuMwx3w,
	jason.gunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	trousers-tech-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Jarkko Sakkinen
In-Reply-To: <1417559480-13757-1-git-send-email-jarkko.sakkinen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

Merged transmit_cmd() functions in tpm-interface.c and tpm-sysfs.c.
Added "tpm_" prefix for consistency sake. Changed cmd parameter as
opaque. This enables to use separate command structures for TPM1
and TPM2 commands in future. Loose coupling works fine here.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 drivers/char/tpm/tpm-interface.c | 49 +++++++++++++++++++++-------------------
 drivers/char/tpm/tpm-sysfs.c     | 23 ++-----------------
 drivers/char/tpm/tpm.h           |  3 ++-
 3 files changed, 30 insertions(+), 45 deletions(-)

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 6af1700..71ac6b7 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -398,9 +398,10 @@ out:
 #define TPM_DIGEST_SIZE 20
 #define TPM_RET_CODE_IDX 6
 
-static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd,
-			    int len, const char *desc)
+ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd,
+			 int len, const char *desc)
 {
+	struct tpm_output_header *header;
 	int err;
 
 	len = tpm_transmit(chip, (u8 *) cmd, len);
@@ -409,7 +410,9 @@ static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd,
 	else if (len < TPM_HEADER_SIZE)
 		return -EFAULT;
 
-	err = be32_to_cpu(cmd->header.out.return_code);
+	header = cmd;
+
+	err = be32_to_cpu(header->return_code);
 	if (err != 0 && desc)
 		dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc);
 
@@ -448,7 +451,7 @@ ssize_t tpm_getcap(struct device *dev, __be32 subcap_id, cap_t *cap,
 		tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
 		tpm_cmd.params.getcap_in.subcap = subcap_id;
 	}
-	rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc);
+	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc);
 	if (!rc)
 		*cap = tpm_cmd.params.getcap_out.cap;
 	return rc;
@@ -464,8 +467,8 @@ void tpm_gen_interrupt(struct tpm_chip *chip)
 	tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
 	tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
 
-	rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-			"attempting to determine the timeouts");
+	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+			      "attempting to determine the timeouts");
 }
 EXPORT_SYMBOL_GPL(tpm_gen_interrupt);
 
@@ -484,8 +487,8 @@ static int tpm_startup(struct tpm_chip *chip, __be16 startup_type)
 	struct tpm_cmd_t start_cmd;
 	start_cmd.header.in = tpm_startup_header;
 	start_cmd.params.startup_in.startup_type = startup_type;
-	return transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
-			    "attempting to start the TPM");
+	return tpm_transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
+				"attempting to start the TPM");
 }
 
 int tpm_get_timeouts(struct tpm_chip *chip)
@@ -500,7 +503,7 @@ int tpm_get_timeouts(struct tpm_chip *chip)
 	tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
 	tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
 	tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-	rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL);
+	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL);
 
 	if (rc == TPM_ERR_INVALID_POSTINIT) {
 		/* The TPM is not started, we are the first to talk to it.
@@ -513,7 +516,7 @@ int tpm_get_timeouts(struct tpm_chip *chip)
 		tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
 		tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
 		tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-		rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+		rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
 				  NULL);
 	}
 	if (rc) {
@@ -575,8 +578,8 @@ duration:
 	tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
 	tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION;
 
-	rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-			"attempting to determine the durations");
+	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+			      "attempting to determine the durations");
 	if (rc)
 		return rc;
 
@@ -631,8 +634,8 @@ static int tpm_continue_selftest(struct tpm_chip *chip)
 	struct tpm_cmd_t cmd;
 
 	cmd.header.in = continue_selftest_header;
-	rc = transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE,
-			  "continue selftest");
+	rc = tpm_transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE,
+			      "continue selftest");
 	return rc;
 }
 
@@ -672,8 +675,8 @@ int tpm_pcr_read_dev(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
 
 	cmd.header.in = pcrread_header;
 	cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
-	rc = transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE,
-			  "attempting to read a pcr value");
+	rc = tpm_transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE,
+			      "attempting to read a pcr value");
 
 	if (rc == 0)
 		memcpy(res_buf, cmd.params.pcrread_out.pcr_result,
@@ -737,8 +740,8 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
 	cmd.header.in = pcrextend_header;
 	cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
 	memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
-	rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
-			  "attempting extend a PCR value");
+	rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
+			      "attempting extend a PCR value");
 
 	tpm_chip_put(chip);
 	return rc;
@@ -817,7 +820,7 @@ int tpm_send(u32 chip_num, void *cmd, size_t buflen)
 	if (chip == NULL)
 		return -ENODEV;
 
-	rc = transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd");
+	rc = tpm_transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd");
 
 	tpm_chip_put(chip);
 	return rc;
@@ -938,14 +941,14 @@ int tpm_pm_suspend(struct device *dev)
 		cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(tpm_suspend_pcr);
 		memcpy(cmd.params.pcrextend_in.hash, dummy_hash,
 		       TPM_DIGEST_SIZE);
-		rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
-				  "extending dummy pcr before suspend");
+		rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
+				      "extending dummy pcr before suspend");
 	}
 
 	/* now do the actual savestate */
 	for (try = 0; try < TPM_RETRY; try++) {
 		cmd.header.in = savestate_header;
-		rc = transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL);
+		rc = tpm_transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL);
 
 		/*
 		 * If the TPM indicates that it is too busy to respond to
@@ -1022,7 +1025,7 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max)
 		tpm_cmd.header.in = tpm_getrandom_header;
 		tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes);
 
-		err = transmit_cmd(chip, &tpm_cmd,
+		err = tpm_transmit_cmd(chip, &tpm_cmd,
 				   TPM_GETRANDOM_RESULT_SIZE + num_bytes,
 				   "attempting get random");
 		if (err)
diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c
index 01730a2..8ecb052 100644
--- a/drivers/char/tpm/tpm-sysfs.c
+++ b/drivers/char/tpm/tpm-sysfs.c
@@ -20,25 +20,6 @@
 #include <linux/device.h>
 #include "tpm.h"
 
-/* XXX for now this helper is duplicated in tpm-interface.c */
-static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd,
-			    int len, const char *desc)
-{
-	int err;
-
-	len = tpm_transmit(chip, (u8 *) cmd, len);
-	if (len <  0)
-		return len;
-	else if (len < TPM_HEADER_SIZE)
-		return -EFAULT;
-
-	err = be32_to_cpu(cmd->header.out.return_code);
-	if (err != 0 && desc)
-		dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc);
-
-	return err;
-}
-
 #define READ_PUBEK_RESULT_SIZE 314
 #define TPM_ORD_READPUBEK cpu_to_be32(124)
 static struct tpm_input_header tpm_readpubek_header = {
@@ -58,8 +39,8 @@ static ssize_t pubek_show(struct device *dev, struct device_attribute *attr,
 	struct tpm_chip *chip = dev_get_drvdata(dev);
 
 	tpm_cmd.header.in = tpm_readpubek_header;
-	err = transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE,
-			   "attempting to read the PUBEK");
+	err = tpm_transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE,
+			       "attempting to read the PUBEK");
 	if (err)
 		goto out;
 
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index e4d0888..e638eb0 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -314,9 +314,10 @@ struct tpm_cmd_t {
 } __packed;
 
 ssize_t	tpm_getcap(struct device *, __be32, cap_t *, const char *);
-
 ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
 		     size_t bufsiz);
+ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd, int len,
+			 const char *desc);
 extern int tpm_get_timeouts(struct tpm_chip *);
 extern void tpm_gen_interrupt(struct tpm_chip *);
 extern int tpm_do_selftest(struct tpm_chip *);
-- 
2.1.0

^ permalink raw reply related

* [PATCH v8 0/8] TPM 2.0 support
From: Jarkko Sakkinen @ 2014-12-02 22:31 UTC (permalink / raw)
  To: Peter Huewe, Ashley Lai, Marcel Selhorst
  Cc: tpmdd-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	josh.triplett-ral2JQCrhuEAvxtiuMwx3w,
	christophe.ricard-Re5JQEeQqe8AvxtiuMwx3w,
	jason.gunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	trousers-tech-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Jarkko Sakkinen

This patch set enables TPM2 protocol and provides drivers for FIFO and
CRB interfaces. This patch set does not export any sysfs attributes for
TPM 2.0 because existing sysfs attributes have three non-trivial issues:

- They are associated with the platform device instead of character
  device.
- They are are not trivial key-value pairs but contain text that is
  not easily parsed by a computer.
- Raciness as described in
  http://kroah.com/log/blog/2013/06/26/how-to-create-a-sysfs-file-correctly/

This is too big effort to be included into this patch set and requires
more discussion.

v2:
- Improved struct tpm_chip life-cycle by taking advantage of devres
  API.
- Refined sysfs attributes as simple key-values thereby not repeating
  mistakes in TPM1 sysfs attributes.
- Documented functions in tpm-chip.c and tpm2-cmd.c.
- Documented sysfs attributes.

v3:
- Lots of fixes in calling order in device drivers (thanks to Jason
  Gunthorpe for pointing these out!).
- Attach sysfs attributes to the misc device because it represents
  TPM device to the user space.

v4:
- Disable sysfs attibutes for TPM 2.0 for until we can sort out the 
  best approach for them.
- Fixed all the style issues found with checkpatch.pl.

v5:
- missing EXPORT_SYMBOL_GPL()
- own class for TPM devices used for TPM 2.0 devices and onwards.

v6:
- Non-racy initialization for sysfs attributes using struct device's
  groups field.
- The class 'tpm' is used now for all TPM devices. For the first device
  node major MISC_MAJOR and minor TPM_MINOR is used in order to retain
  backwards compatability.

v7:
- Release device number and free struct tpm_chip memory inside
  tpm_dev_release callback.
- Moved code from tpm-interface.c and tpm_dev.c to tpm-chip.c.

v8:
- Cleaned up unneeded cast from tpm_transmit_cmd().
- Cleaned up redundant PPI_VERSION_LEN constant from tpm_ppi.c.
- Fixed tpm_tis to use tpm2_calc_ordinal_duration() for TPM2 devices.
- tpm_crb: in crb_recv, check that count can hold the TPM header at
  minimum.
- tpm_crb: add enumerations for bit flags in start and cancel fields
  of the control area.
- tpm_crb: use ioremap() for command and response buffer because
  they might be anywhere.
- tpm_crb: use IO access functions for reading ioremapped buffers
  because using direct pointers is not portable.
- tpm_crb: only apply ACPI start if start method reported by the
  TPM2 ACPI table allows it.
- In tpm2_pcr_read() just calculate index and bit and get rid of
  hacky loop.
- Do not add sysfs attributes for TPM 2.0 devices.

Jarkko Sakkinen (7):
  tpm: merge duplicate transmit_cmd() functions
  tpm: two-phase chip management functions
  tpm: fix raciness of PPI interface lookup
  tpm: rename chip->dev to chip->pdev
  tpm: device class for tpm
  tpm: TPM 2.0 baseline support
  tpm: TPM 2.0 CRB Interface

Will Arthur (1):
  tpm: TPM 2.0 FIFO Interface

 drivers/char/tpm/Kconfig            |   9 +
 drivers/char/tpm/Makefile           |   3 +-
 drivers/char/tpm/tpm-chip.c         | 251 ++++++++++++++++
 drivers/char/tpm/tpm-dev.c          |  42 +--
 drivers/char/tpm/tpm-interface.c    | 261 ++++++----------
 drivers/char/tpm/tpm-sysfs.c        |  29 +-
 drivers/char/tpm/tpm.h              | 113 ++++++-
 drivers/char/tpm/tpm2-cmd.c         | 571 ++++++++++++++++++++++++++++++++++++
 drivers/char/tpm/tpm_atmel.c        |  25 +-
 drivers/char/tpm/tpm_crb.c          | 356 ++++++++++++++++++++++
 drivers/char/tpm/tpm_i2c_atmel.c    |  49 ++--
 drivers/char/tpm/tpm_i2c_infineon.c |  43 +--
 drivers/char/tpm/tpm_i2c_nuvoton.c  |  68 ++---
 drivers/char/tpm/tpm_i2c_stm_st33.c |  44 ++-
 drivers/char/tpm/tpm_ibmvtpm.c      |  17 +-
 drivers/char/tpm/tpm_infineon.c     |  51 ++--
 drivers/char/tpm/tpm_nsc.c          |  34 ++-
 drivers/char/tpm/tpm_ppi.c          | 137 +++++----
 drivers/char/tpm/tpm_tis.c          | 180 +++++++-----
 drivers/char/tpm/xen-tpmfront.c     |  14 +-
 20 files changed, 1717 insertions(+), 580 deletions(-)
 create mode 100644 drivers/char/tpm/tpm-chip.c
 create mode 100644 drivers/char/tpm/tpm2-cmd.c
 create mode 100644 drivers/char/tpm/tpm_crb.c

-- 
2.1.0

^ permalink raw reply

* Re: [PATCH v6 0/7] vfs: Non-blockling buffered fs read (page cache only)
From: Milosz Tanski @ 2014-12-02 22:17 UTC (permalink / raw)
  To: Andrew Morton
  Cc: LKML, Christoph Hellwig, linux-fsdevel@vger.kernel.org,
	linux-aio@kvack.org, Mel Gorman, Volker Lendecke, Tejun Heo,
	Jeff Moyer, Theodore Ts'o, Al Viro, Linux API,
	Michael Kerrisk, linux-arch
In-Reply-To: <20141125150101.9596a09e.akpm@linux-foundation.org>

On Tue, Nov 25, 2014 at 6:01 PM, Andrew Morton
<akpm@linux-foundation.org> wrote:
>
> On Mon, 10 Nov 2014 11:40:23 -0500 Milosz Tanski <milosz@adfin.com> wrote:
>
> > This patcheset introduces an ability to perform a non-blocking read from
> > regular files in buffered IO mode. This works by only for those filesystems
> > that have data in the page cache.
> >
> > It does this by introducing new syscalls new syscalls preadv2/pwritev2. These
> > new syscalls behave like the network sendmsg, recvmsg syscalls that accept an
> > extra flag argument (RWF_NONBLOCK).
> >
> > It's a very common patern today (samba, libuv, etc..) use a large threadpool to
> > perform buffered IO operations. They submit the work form another thread
> > that performs network IO and epoll or other threads that perform CPU work. This
> > leads to increased latency for processing, esp. in the case of data that's
> > already cached in the page cache.
>
> It would be extremely useful if we could get input from the developers
> of "samba, libuv, etc.." about this.  Do they think it will be useful,
> will they actually use it, can they identify any shortcomings, etc.
>
> Because it would be terrible if we were to merge this then discover
> that major applications either don't use it, or require
> userspace-visible changes.
>
> Ideally, someone would whip up pread2() support into those apps and
> report on the result.

The Samba folks did express an interest in the functionality when I
originally brought up the idea of having a non-blocking page cache
only when I was getting my mind around the concept. This was
unsolicited on my part. https://lkml.org/lkml/2014/9/7/103 It should
be good enough at this point to enable a "fast path" read without
deferring to their AIO pool.

>
> > With the new interface the applications will now be able to fetch the data in
> > their network / cpu bound thread(s) and only defer to a threadpool if it's not
> > there. In our own application (VLDB) we've observed a decrease in latency for
> > "fast" request by avoiding unnecessary queuing and having to swap out current
> > tasks in IO bound work threads.
>
> I haven't read the patches yet, but I'm scratching my head over
> pwritev2().  There's much talk and testing results here about
> preadv2(), but nothing about how pwritev() works, what its semantics
> are, testing results, etc.

Essentially preadv2 and pwritev2 are same syscalls as preadv/writrev
but support an extra flags argument. With preadv2 the only flag
implemented right now is RWF_NONBLOCK, that allows perform a page
cache only read on a per call basis. Christoph, implemented the
RWF_DSYNC flag for pwritev2 which has the same effect as O_DSYNC but
on per write call basis.

Christoph has included example of the usage of this pwritev2 with
RWF_DSYNC in the commit msg in patch #7. I am currently working on
wiring up test cases as part of xfstests for both pwritev2/preadv2
functionality.

>
> > Version 6 highlight:
> >  - Compat syscall flag checks, per. Jeff.
> >  - Minor stylistic suggestions.
> >
> > Version 5 highlight:
> >  - XFS support for RWF_NONBLOCK. from Christoph.
> >  - RWF_DSYNC flag and support for pwritev2, from Christoph.
> >  - Implemented compat syscalls, per. Jeff.
> >  - Missing nfs, ceph changes from older patchset.
> >
> > Version 4 highlight:
> >  - Updated for 3.18-rc1.
> >  - Performance data from our application.
> >  - First stab at man page with Jeff's help. Patch is in-reply to.
>
> I can't find that manpage.  It is important.  Please include it in the
> patch series.
>
> I'm particularly interested in details regarding
>
> - behaviour and userspace return values when data is not found in pagecache
>
> - how it handles partially uptodate pages (blocksize < pagesize).
>   For both reads and writes.  This sort of thing gets intricate so
>   let's spell the design out with great specificity.
>
> - behaviour at EOF.
>
> - details regarding handling of file holes.

I have replied with the man page update patches with the last two
submissions. Here's an archive link:
https://lkml.org/lkml/2014/11/6/447. I'll re-reply it to the parent
thread of the latest as well. The man page updates cover
preadv2/pwritev2 and their new flags RWF_NONBLOCK/RWF_DSYNC
respectively.

- Behavior on data not in page cache is documented in man page (EAGAIN).
- Since we defer to normal preadv (and thus read) behavior things like
end of file (0 length return value), partial up to date pages, and
hole behavior is the same as in those calls.

Further, the behavior of the logic for RWF_NONBLOCK is primarily
located is mostly contained do_generic_file_read in filemap.c. It does
is bail early if we have to make a call to aops->readpage() returning
a full or partial read if there's data in the page cache EAGAIN if
there's nothing starting at offset in the page cache. So it makes why
it behaves like regular preadv at the end of file file / holes /
etc...

>
> > RFC Version 3 highlights:
> >  - Down to 2 syscalls from 4; can user fp or argument position.
> >  - RWF_NONBLOCK value flag is not the same O_NONBLOCK, per Jeff.
> >
> > RFC Version 2 highlights:
> >  - Put the flags argument into kiocb (less noise), per. Al Viro
> >  - O_DIRECT checking early in the process, per. Jeff Moyer
> >  - Resolved duplicate (c&p) code in syscall code, per. Jeff
> >  - Included perf data in thread cover letter, per. Jeff
> >  - Created a new flag (not O_NONBLOCK) for readv2, perf Jeff
> >
> >
> > Some perf data generated using fio comparing the posix aio engine to a version
> > of the posix AIO engine that attempts to performs "fast" reads before
> > submitting the operations to the queue. This workflow is on ext4 partition on
> > raid0 (test / build-rig.) Simulating our database access patern workload using
> > 16kb read accesses. Our database uses a home-spun posix aio like queue (samba
> > does the same thing.)
> >
> > f1: ~73% rand read over mostly cached data (zipf med-size dataset)
> > f2: ~18% rand read over mostly un-cached data (uniform large-dataset)
> > f3: ~9% seq-read over large dataset
> >
> > before:
> >
> > f1:
> >     bw (KB  /s): min=   11, max= 9088, per=0.56%, avg=969.54, stdev=827.99
> >     lat (msec) : 50=0.01%, 100=1.06%, 250=5.88%, 500=4.08%, 750=12.48%
> >     lat (msec) : 1000=17.27%, 2000=49.86%, >=2000=9.42%
> > f2:
> >     bw (KB  /s): min=    2, max= 1882, per=0.16%, avg=273.28, stdev=220.26
> >     lat (msec) : 250=5.65%, 500=3.31%, 750=15.64%, 1000=24.59%, 2000=46.56%
> >     lat (msec) : >=2000=4.33%
> > f3:
> >     bw (KB  /s): min=    0, max=265568, per=99.95%, avg=174575.10,
> >                  stdev=34526.89
> >     lat (usec) : 2=0.01%, 4=0.01%, 10=0.02%, 20=0.27%, 50=10.82%
> >     lat (usec) : 100=50.34%, 250=5.05%, 500=7.12%, 750=6.60%, 1000=4.55%
> >     lat (msec) : 2=8.73%, 4=3.49%, 10=1.83%, 20=0.89%, 50=0.22%
> >     lat (msec) : 100=0.05%, 250=0.02%, 500=0.01%
> > total:
> >    READ: io=102365MB, aggrb=174669KB/s, minb=240KB/s, maxb=173599KB/s,
> >          mint=600001msec, maxt=600113msec
> >
> > after (with fast read using preadv2 before submit):
> >
> > f1:
> >     bw (KB  /s): min=    3, max=14897, per=1.28%, avg=2276.69, stdev=2930.39
> >     lat (usec) : 2=70.63%, 4=0.01%
> >     lat (msec) : 250=0.20%, 500=2.26%, 750=1.18%, 2000=0.22%, >=2000=25.53%
> > f2:
> >     bw (KB  /s): min=    2, max= 2362, per=0.14%, avg=249.83, stdev=222.00
> >     lat (msec) : 250=6.35%, 500=1.78%, 750=9.29%, 1000=20.49%, 2000=52.18%
> >     lat (msec) : >=2000=9.99%
> > f3:
> >     bw (KB  /s): min=    1, max=245448, per=100.00%, avg=177366.50,
> >                  stdev=35995.60
> >     lat (usec) : 2=64.04%, 4=0.01%, 10=0.01%, 20=0.06%, 50=0.43%
> >     lat (usec) : 100=0.20%, 250=1.27%, 500=2.93%, 750=3.93%, 1000=7.35%
> >     lat (msec) : 2=14.27%, 4=2.88%, 10=1.54%, 20=0.81%, 50=0.22%
> >     lat (msec) : 100=0.05%, 250=0.02%
> > total:
> >    READ: io=103941MB, aggrb=177339KB/s, minb=213KB/s, maxb=176375KB/s,
> >          mint=600020msec, maxt=600178msec
> >
> > Interpreting the results you can see total bandwidth stays the same but overall
> > request latency is decreased in f1 (random, mostly cached) and f3 (sequential)
> > workloads. There is a slight bump in latency for since it's random data that's
>
> s/for/for f2/
>
> > unlikely to be cached but we're always trying "fast read".
> >
> > In our application we have starting keeping track of "fast read" hits/misses
> > and for files / requests that have a lot hit ratio we don't do "fast reads"
> > mostly getting rid of extra latency in the uncached cases. In our real world
> > work load we were able to reduce average response time by 20 to 30% (depends
> > on amount of IO done by request).
> >
> > I've performed other benchmarks and I have no observed any perf regressions in
> > any of the normal (old) code paths.
> >
> > I have co-developed these changes with Christoph Hellwig.
> >
>
> There have been several incomplete attempts to implement fincore().  If
> we were to complete those attempts, preadv2() could be implemented
> using fincore()+pread().  Plus we get fincore(), which is useful for
> other (but probably similar) reasons.  Probably fincore()+pwrite() could
> be used to implement pwritev2(), but I don't know what pwritev2() does
> yet.
>
> Implementing fincore() is more flexible, requires less code and is less
> likely to have bugs.  So why not go that way?  Yes, it's more CPU
> intensive, but how much?  Is the difference sufficient to justify the
> preadv2()/pwritev2() approach?

I would like to see a fincore() functionality (for other reasons) I
don't think it does the job here. fincore() + preadv() is inherently
racy as there's no guarantee that the data becomes uncached between
the two calls. This may not matter in some cases, but in others (ones
that I'm trying to solve) it will introduce unexpected latency.

There's no overlap between prwritev2 and fincore() functionality.

Sorry that this took longer then expected to reply. Got busy with
holidays / unrelated things. Let me know if I missed anything.

-- 
Milosz Tanski
CTO
16 East 34th Street, 15th floor
New York, NY 10016

p: 646-253-9055
e: milosz@adfin.com

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply

* Re: [CFT][PATCH 2/3] userns: Add a knob to disable setgroups on a per user namespace basis
From: Andy Lutomirski @ 2014-12-02 22:17 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Josh Triplett, Andrew Morton, Kees Cook,
	Michael Kerrisk-manpages, Linux API, linux-man,
	linux-kernel@vger.kernel.org, LSM, Casey Schaufler,
	Serge E. Hallyn, Richard Weinberger, Kenton Varda, stable
In-Reply-To: <87a935u3nj.fsf@x220.int.ebiederm.org>

On Tue, Dec 2, 2014 at 1:45 PM, Eric W. Biederman <ebiederm@xmission.com> wrote:
> Andy Lutomirski <luto@amacapital.net> writes:
>
>> On Tue, Dec 2, 2014 at 12:28 PM, Eric W. Biederman
>> <ebiederm@xmission.com> wrote:
>>>
>>> - Expose the knob to user space through a proc file /proc/<pid>/setgroups
>>
>> Can you rename this to something clearer, e.g. userns_setgroups_mode?
>
> I am not certain that is any clearer.  That just reads as wordier.
>
> The userns bit is definitely confusing and wrong.  Why should we need to
> spell out the scope?

Because it's a property of the process' userns, not a property of the process.

userns_setgroups would be okay.  (Arguably it should've been
userns_uid_map, too, but at least uid_map sounds obviously
namespace-related.)

>
>>>   A value of 0 means the setgroups system call is disabled in the
>>>   current processes user namespace and can not be enabled in the
>>>   future in this user namespace.
>>>
>>>   A value of 1 means the segtoups system call is enabled.
>>
>> Would it make more sense to put strings like "allow" and "deny" in
>> here?  That way, future extensions could add additional values.
>
> If the implementation of the write side isn't too bad.  I would love
> to see precedent elsewhere in the kernel.    What I don't expect to do
> is have any values except setgroups are enabled and setgroups are
> disabled.

current_clocksource?  I think there are lots of things like that.

>
> I am fine with allowing for the possibility (that is just good
> engineering) but I don't intend to seriously considering or
> implementing other possibilities.

I can imagine a mode where there's a fixed set of groups that are
forced set but other groups can be added and dropped.  It would be
complicated to set up right, but someone might want it some day.

>
>>> diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
>>> index 21c91feeca2d..6d0ee1b089fb 100644
>>> --- a/arch/s390/kernel/compat_linux.c
>>> +++ b/arch/s390/kernel/compat_linux.c
>>> @@ -252,6 +252,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
>>>         int retval;
>>>
>>>         if (!gid_mapping_possible(user_ns) ||
>>> +           !atomic_read(&user_ns->setgroups_allowed) ||
>>>             !capable(CAP_SETGID))
>>>                 return -EPERM;
>>
>> This is now incomprehensible because of the gid_mapping_possible
>> thing.  If you renamed gid_mapping_possible to
>> userns_setgroup_allowed, then this could be added to the
>> implementation, and this would all make sense (not to mention avoiding
>> duplicating this thing).
>>
>>> @@ -826,6 +827,11 @@ static bool new_idmap_permitted(const struct file *file,
>>>                         kuid_t uid = make_kuid(ns->parent, id);
>>>                         if (uid_eq(uid, cred->euid))
>>>                                 return true;
>>> +               } else if (cap_setid == CAP_SETGID) {
>>> +                       kgid_t gid = make_kgid(ns->parent, id);
>>> +                       if (!atomic_read(&ns->setgroups_allowed) &&
>>> +                           gid_eq(gid, cred->egid))
>>> +                               return true;
>>
>> I still don't see why egid is any better than fsgid here.
>
> Answered in my earlier response fsgid was a goof.
> I can set any gid to my egid with my existing permissions.
> Show me how I can do that with fsgid or fsuid and I will be happy to use
> those.

You can use your fsgid to make a setgid file, I think.  But yes, point
taken, although as mentioned in the other thread, I think it would be
a lot clearer if it were a separate patch.


>>> +
>>> +ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
>>> +                            size_t count, loff_t *ppos)
>>> +{
>>> +       struct seq_file *seq = file->private_data;
>>> +       struct user_namespace *ns = seq->private;
>>> +       char kbuf[3];
>>> +       int setgroups_allowed;
>>> +       ssize_t ret;
>>> +
>>> +       ret = -EPERM;
>>> +       if (!file_ns_capable(file, ns, CAP_SETGID))
>>> +               goto out;
>>
>> CAP_SYS_ADMIN?  This isn't setting a gid in the namespace; it's
>> reconfiguring the namespace.
>
> Hmm.  Maybe.  It is an activity that is normally controlled by
> CAP_SETGID.
>
> Frankly I think the entire split up of the capability model is almost
> totally broken.  But I think CAP_SETGID is a close approximation of the
> right thing here.

I agree that the cap model is screwy.  But we use CAP_SYS_ADMIN for
everything else that changes the overall behavior of a namespace.

In any event, the only way it matters is for a non-ns owner in the
parent ns with CAP_SETGID set but not CAP_SYS_ADMIN.  I'd argue that
CAP_SETGID should not be usable to make unrelated process' syscalls
fail.

>
>>> +       /* Only allow a very narrow range of strings to be written */
>>> +       ret = -EINVAL;
>>> +       if ((*ppos != 0) || (count >= sizeof(kbuf)) || (count < 1))
>>> +               goto out;
>>> +
>>> +       /* What was written? */
>>> +       ret = -EFAULT;
>>> +       if (copy_from_user(kbuf, buf, count))
>>> +               goto out;
>>> +       kbuf[count] = '\0';
>>> +
>>> +       /* What is being requested? */
>>> +       ret = -EINVAL;
>>> +       if (kbuf[0] == '0')
>>> +               setgroups_allowed = 0;
>>> +       else if (kbuf[0] == '1')
>>> +               setgroups_allowed = 1;
>>> +       else
>>> +               goto out;
>>> +
>>> +       /* Allow a trailing newline */
>>> +       ret = -EINVAL;
>>> +       if ((count == 2) && (kbuf[1] != '\n'))
>>> +               goto out;
>>> +
>>> +
>>> +       if (setgroups_allowed) {
>>> +               ret = -EINVAL;
>>> +               if (atomic_read(&ns->setgroups_allowed) == 0)
>>> +                       goto out;
>>> +       } else {
>>
>> I would disallow this if gid_map has been written in the interest of
>> sanity.
>
> Not a chance.  That is part of making this an independent knob.  If
> there is another reason for disabling setgroups you can flip this
> knob even after mappings are established.

Then you really want CAP_SYS_ADMIN, I think.

>
>>> +               atomic_set(&ns->setgroups_allowed, 0);
>>> +               /* sigh memory barriers! */
>>
>> I don't think that any barriers are needed.  If you ever observe
>> setgroups_allowed == 0, it will stay that way forever.
>
> Likely.   In practice the code works today.
>
> But I need to review things closely to understand if there are barriers
> needed.  But especially since it is a write once knob we can get away
> with a lot.
>

Yeah.

For long-term use, I kind of like the flags approach I added in the
other patch.  It makes it easy to add more flags in the future.

In any event, I think the only barrier that's needed is when writing gid_map:

atomic_read / test_bit to determine whether unpriv mappings are okay;
smp_mb() or whatever the current _after_atomic thing is these days;
write mapping;

Although I'm not sure whether Linux supports any architectures that
can violate causality in the way that barrier is there to prevent.

--Andy

^ permalink raw reply

* Re: [CFT][PATCH 1/3] userns: Avoid problems with negative groups
From: Andy Lutomirski @ 2014-12-02 22:09 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-man, Kees Cook, Linux API, Linux Containers, Josh Triplett,
	stable, linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Kenton Varda, LSM, Michael Kerrisk-manpages, Richard Weinberger,
	Casey Schaufler, Andrew Morton
In-Reply-To: <87bnnlvj43.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>

On Tue, Dec 2, 2014 at 1:26 PM, Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
> Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> writes:
>
>> On Tue, Dec 2, 2014 at 12:25 PM, Eric W. Biederman
>> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>>
>>> Classic unix permission checks have an interesting feature, the group
>>> permissions for a file can be set to less than the other permissions
>>> on a file.  Occassionally this is used deliberately to give a certain
>>> group of users fewer permissions than the default.
>>>
>>> Overlooking negative groups has resulted in the permission checks for
>>> setting up a group mapping in a user namespace to be too lax.  Tighten
>>> the permission checks in new_idmap_permitted to ensure that mapping
>>> uids and gids into user namespaces without privilege will not result
>>> in new combinations of credentials being available to the users.
>>>
>>> When setting mappings without privilege only the creator of the user
>>> namespace is interesting as all other users that have CAP_SETUID over
>>> the user namespace will also have CAP_SETUID over the user namespaces
>>> parent.  So the scope of the unprivileged check is reduced to just
>>> the case where cred->euid is the namespace creator.
>>>
>>> For setting a uid mapping without privilege only euid is considered as
>>> setresuid can set uid, suid and fsuid from euid without privielege
>>> making any combination of uids possible with user namespaces already
>>> possible without them.
>>>
>>> For now seeting a gid mapping without privilege is removed.  The only
>>> possible set of credentials that would be safe without a gid mapping
>>> (egid without any supplementary groups) just doesn't happen in practice
>>> so would simply lead to unused untested code.
>>>
>>> setgroups is modified to fail not only when the group ids do not
>>> map but also when there are no gid mappings at all, preventing
>>> setgroups(0, NULL) from succeeding when gid mappings have not been
>>> established.
>>>
>>> For a small class of applications this change breaks userspace
>>> and removes useful functionality.  This small class of applications
>>> includes tools/testing/selftests/mount/unprivileged-remount-test.c
>>>
>>> Most of the removed functionality will be added back with the
>>> addition of a one way knob to disable setgroups.  Once setgroups
>>> is disabled setting the gid_map becomes as safe as setting the uid_map.
>>>
>>> For more common applications that set the uid_map and the gid_map with
>>> privilege this change will have no effect on them.
>>>
>>> This should fix CVE-2014-8989.
>>>
>>> Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>>> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
>>> ---
>>
>>>
>>> +static inline bool gid_mapping_possible(const struct user_namespace *ns)
>>> +{
>>> +       return ns->gid_map.nr_extents != 0;
>>> +}
>>> +
>>
>> Can you rename this to userns_may_setgroups or something like that?
>> To me, gid_mapping_possible sounds like you're allowed to map gids,
>> which sounds like the opposite condition, and it doesn't explain what
>> the point is.
>
> gid_mapping_established?
>
> What I mean to be testing if is if from_kgid and make_kgid will work
> because the gid mappings have been set.

But why do you care whether from_kgid and make_kgid will work?  If
they fail, then they fail.  I think that the point is that you're
checking whether allowing setgroups to drop groups is safe, and that's
only barely the same condition.

>
> The userns knob for setgroups is a different test and is added
> in the next patch.  And yes we really need both or the knob can
> start out as on, and we need to provent setgroups(0, NULL)
> before the user namespace is unshared.

Do you mean before it's mapped?

>
> Although come to think about it probably makes sense to roll those two
> test into one function and call that inline function from the setgroups
> implementation.

That's what I think, too.

>
> Anyway I will think about it and see what I can do to make it easily
> comprehensible.
>
>>> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
>>> index aa312b0dc3ec..51d65b444951 100644
>>> --- a/kernel/user_namespace.c
>>> +++ b/kernel/user_namespace.c
>>> @@ -812,16 +812,19 @@ static bool new_idmap_permitted(const struct file *file,
>>>                                 struct user_namespace *ns, int cap_setid,
>>>                                 struct uid_gid_map *new_map)
>>>  {
>>> -       /* Allow mapping to your own filesystem ids */
>>> -       if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
>>> +       const struct cred *cred = file->f_cred;
>>> +
>>> +       /* Allow a mapping without capabilities when allowing the root
>>> +        * of the user namespace capabilities restricted to that id
>>> +        * will not change the set of credentials available to that
>>> +        * user.
>>> +        */
>>> +       if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
>>> +           uid_eq(ns->owner, cred->euid)) {
>>
>> What's uid_eq(ns->owner, cred->euid)) for?  This should already be covered by:
>
> This means that the only user we attempt to set up unprivileged mappings
> for is the owner of the user namespace.  Anyone else should already
> have capabilities in the parent user namespace or shouldn't be able to
> set the mapping at all.
>
> In practice it is a clarification to make it easier to think about the code.

But why?  I don't see why this check is necessary or why it's relevant
to the current issue.

>
>>     if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
>>         goto out;
>>
>> (except that I don't know why cap_valid(cap_setid) is checked -- this
>> ought to be enforced for projid_map, too, right?)
>
> What to do with projid_map is entirely different discussion.  In
> practice it is dead, and either XFS needs to be fixed to use it
> or that code needs to be removed.  At the time I wrote it XFS
> did not require any privileges to set project ids.
>
>>>                 u32 id = new_map->extent[0].lower_first;
>>>                 if (cap_setid == CAP_SETUID) {
>>>                         kuid_t uid = make_kuid(ns->parent, id);
>>> -                       if (uid_eq(uid, file->f_cred->fsuid))
>>> -                               return true;
>>> -               } else if (cap_setid == CAP_SETGID) {
>>> -                       kgid_t gid = make_kgid(ns->parent, id);
>>> -                       if (gid_eq(gid, file->f_cred->fsgid))
>>> +                       if (uid_eq(uid, cred->euid))
>>
>> Why'd you change this from fsuid to euid?
>
> Because strangely enough I can set euid to any other uid with
> setresuid, but the same does not hold with fsuid.
>
> So strictly speaking fsuid was actually wrong before.  In practice
> fsuid == euid so I don't think anyone will care.  But I want very much
> to enforce the rule that user namespaces can't give you any credentials
> you couldn't get otherwise.

Fair enough.  Want to split that into a separate patch, then?

--Andy

>
> Eric



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply

* Re: [PATCH] powerpc: add little endian flag to syscall_get_arch()
From: Andy Lutomirski @ 2014-12-02 22:08 UTC (permalink / raw)
  To: Richard Guy Briggs
  Cc: linux-audit-H+wXaHxf7aLQT0dZR+AlfA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, Linux API, Steve Grubb,
	Eric Paris, Paul Moore, Tony Jones
In-Reply-To: <fbe03529d720e75aa8663f8b521af5b11b33d52f.1417553967.git.rgb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

On Tue, Dec 2, 2014 at 1:27 PM, Richard Guy Briggs <rgb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
> Since both ppc and ppc64 have LE variants which are now reported by uname, add
> that flag (__AUDIT_ARCH_LE) to syscall_get_arch() and add AUDIT_ARCH_PPC*LE
> variants.
>
> Without this,  perf trace and auditctl fail.
>
> Mainline kernel reports ppc64le (per a058801) but there is no matching
> AUDIT_ARCH_PPC64LE.
>

There's no seccomp filter support for powerpc, so there's no risk that
this breaks it.

--Andy

> See:
>         https://www.redhat.com/archives/linux-audit/2014-August/msg00082.html
>         https://www.redhat.com/archives/linux-audit/2014-December/msg00004.html
>
> Signed-off-by: Richard Guy Briggs <rgb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
> ---
>  arch/powerpc/include/asm/syscall.h |    6 +++++-
>  include/uapi/linux/audit.h         |    2 ++
>  2 files changed, 7 insertions(+), 1 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
> index 6fa2708..a58acab 100644
> --- a/arch/powerpc/include/asm/syscall.h
> +++ b/arch/powerpc/include/asm/syscall.h
> @@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
>
>  static inline int syscall_get_arch(void)
>  {
> -       return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
> +       int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
> +#ifdef __LITTLE_ENDIAN__
> +       arch |= __AUDIT_ARCH_LE
> +#endif
> +       return arch;
>  }
>  #endif /* _ASM_SYSCALL_H */
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index 4d100c8..fe29a99 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -364,7 +364,9 @@ enum {
>  #define AUDIT_ARCH_PARISC      (EM_PARISC)
>  #define AUDIT_ARCH_PARISC64    (EM_PARISC|__AUDIT_ARCH_64BIT)
>  #define AUDIT_ARCH_PPC         (EM_PPC)
> +#define AUDIT_ARCH_PPCLE       (EM_PPC|__AUDIT_ARCH_LE)
>  #define AUDIT_ARCH_PPC64       (EM_PPC64|__AUDIT_ARCH_64BIT)
> +#define AUDIT_ARCH_PPC64LE     (EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
>  #define AUDIT_ARCH_S390                (EM_S390)
>  #define AUDIT_ARCH_S390X       (EM_S390|__AUDIT_ARCH_64BIT)
>  #define AUDIT_ARCH_SH          (EM_SH)
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-api" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply

* Re: [PATCH] powerpc: add little endian flag to syscall_get_arch()
From: Tony Jones @ 2014-12-02 21:54 UTC (permalink / raw)
  To: Richard Guy Briggs, linux-audit-H+wXaHxf7aLQT0dZR+AlfA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ,
	linux-api-u79uwXL29TY76Z2rM5mHXA
  Cc: sgrubb-H+wXaHxf7aLQT0dZR+AlfA, eparis-FjpueFixGhCM4zKIHC2jIg,
	pmoore-H+wXaHxf7aLQT0dZR+AlfA
In-Reply-To: <fbe03529d720e75aa8663f8b521af5b11b33d52f.1417553967.git.rgb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

On 12/02/2014 01:27 PM, Richard Guy Briggs wrote:
> Since both ppc and ppc64 have LE variants which are now reported by uname, add
> that flag (__AUDIT_ARCH_LE) to syscall_get_arch() and add AUDIT_ARCH_PPC*LE
> variants.
> 
> Without this,  perf trace and auditctl fail.
> 
> Mainline kernel reports ppc64le (per a058801) but there is no matching
> AUDIT_ARCH_PPC64LE.
> 
> See:
> 	https://www.redhat.com/archives/linux-audit/2014-August/msg00082.html
> 	https://www.redhat.com/archives/linux-audit/2014-December/msg00004.html
> 
> Signed-off-by: Richard Guy Briggs <rgb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
> ---
>  arch/powerpc/include/asm/syscall.h |    6 +++++-
>  include/uapi/linux/audit.h         |    2 ++
>  2 files changed, 7 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
> index 6fa2708..a58acab 100644
> --- a/arch/powerpc/include/asm/syscall.h
> +++ b/arch/powerpc/include/asm/syscall.h
> @@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
>  
>  static inline int syscall_get_arch(void)
>  {
> -	return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
> +	int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
> +#ifdef __LITTLE_ENDIAN__
> +	arch |= __AUDIT_ARCH_LE
> +#endif
> +	return arch;
>  }
>  #endif	/* _ASM_SYSCALL_H */
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index 4d100c8..fe29a99 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -364,7 +364,9 @@ enum {
>  #define AUDIT_ARCH_PARISC	(EM_PARISC)
>  #define AUDIT_ARCH_PARISC64	(EM_PARISC|__AUDIT_ARCH_64BIT)
>  #define AUDIT_ARCH_PPC		(EM_PPC)
> +#define AUDIT_ARCH_PPCLE	(EM_PPC|__AUDIT_ARCH_LE)
>  #define AUDIT_ARCH_PPC64	(EM_PPC64|__AUDIT_ARCH_64BIT)
> +#define AUDIT_ARCH_PPC64LE	(EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
>  #define AUDIT_ARCH_S390		(EM_S390)
>  #define AUDIT_ARCH_S390X	(EM_S390|__AUDIT_ARCH_64BIT)
>  #define AUDIT_ARCH_SH		(EM_SH)

IBM would know for certain but I wasn't aware there was a PPCLE (32bit compat).

^ permalink raw reply

* Re: [CFT][PATCH 2/3] userns: Add a knob to disable setgroups on a per user namespace basis
From: Eric W. Biederman @ 2014-12-02 21:45 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-man, Kees Cook, Linux API, Linux Containers, Josh Triplett,
	stable, linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Kenton Varda, LSM, Michael Kerrisk-manpages, Richard Weinberger,
	Casey Schaufler, Andrew Morton
In-Reply-To: <CALCETrXyC7XPaqj6oe-TmyypOVc_CkZbF6UAAx8YfkyD=gEMOQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> writes:

> On Tue, Dec 2, 2014 at 12:28 PM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>
>> - Expose the knob to user space through a proc file /proc/<pid>/setgroups
>
> Can you rename this to something clearer, e.g. userns_setgroups_mode?

I am not certain that is any clearer.  That just reads as wordier.

The userns bit is definitely confusing and wrong.  Why should we need to
spell out the scope?

>>   A value of 0 means the setgroups system call is disabled in the
>>   current processes user namespace and can not be enabled in the
>>   future in this user namespace.
>>
>>   A value of 1 means the segtoups system call is enabled.
>
> Would it make more sense to put strings like "allow" and "deny" in
> here?  That way, future extensions could add additional values.

If the implementation of the write side isn't too bad.  I would love
to see precedent elsewhere in the kernel.    What I don't expect to do
is have any values except setgroups are enabled and setgroups are
disabled.

I am fine with allowing for the possibility (that is just good
engineering) but I don't intend to seriously considering or
implementing other possibilities.

>> diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
>> index 21c91feeca2d..6d0ee1b089fb 100644
>> --- a/arch/s390/kernel/compat_linux.c
>> +++ b/arch/s390/kernel/compat_linux.c
>> @@ -252,6 +252,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
>>         int retval;
>>
>>         if (!gid_mapping_possible(user_ns) ||
>> +           !atomic_read(&user_ns->setgroups_allowed) ||
>>             !capable(CAP_SETGID))
>>                 return -EPERM;
>
> This is now incomprehensible because of the gid_mapping_possible
> thing.  If you renamed gid_mapping_possible to
> userns_setgroup_allowed, then this could be added to the
> implementation, and this would all make sense (not to mention avoiding
> duplicating this thing).
>
>> @@ -826,6 +827,11 @@ static bool new_idmap_permitted(const struct file *file,
>>                         kuid_t uid = make_kuid(ns->parent, id);
>>                         if (uid_eq(uid, cred->euid))
>>                                 return true;
>> +               } else if (cap_setid == CAP_SETGID) {
>> +                       kgid_t gid = make_kgid(ns->parent, id);
>> +                       if (!atomic_read(&ns->setgroups_allowed) &&
>> +                           gid_eq(gid, cred->egid))
>> +                               return true;
>
> I still don't see why egid is any better than fsgid here.

Answered in my earlier response fsgid was a goof.
I can set any gid to my egid with my existing permissions.
Show me how I can do that with fsgid or fsuid and I will be happy to use
those.


>>                 }
>>         }
>>
>> @@ -844,6 +850,93 @@ static bool new_idmap_permitted(const struct file *file,
>>         return false;
>>  }
>>
>> +static void *setgroups_m_start(struct seq_file *seq, loff_t *ppos)
>> +{
>> +       struct user_namespace *ns = seq->private;
>> +
>> +       return (*ppos == 0) ?  ns : NULL;
>> +}
>> +
>> +static void *setgroups_m_next(struct seq_file *seq, void *v, loff_t *ppos)
>> +{
>> +       ++*ppos;
>> +       return NULL;
>> +}
>> +
>> +static void setgroups_m_stop(struct seq_file *seq, void *v)
>> +{
>> +}
>> +
>> +static int setgroups_m_show(struct seq_file *seq, void *v)
>> +{
>> +       struct user_namespace *ns = seq->private;
>> +
>> +       seq_printf(seq, "%u\n", atomic_read(&ns->setgroups_allowed));
>> +       return 0;
>> +}
>> +
>> +const struct seq_operations proc_setgroups_seq_operations = {
>> +       .start  = setgroups_m_start,
>> +       .stop = setgroups_m_stop,
>> +       .next = setgroups_m_next,
>> +       .show = setgroups_m_show,
>> +};
>> +
>> +ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
>> +                            size_t count, loff_t *ppos)
>> +{
>> +       struct seq_file *seq = file->private_data;
>> +       struct user_namespace *ns = seq->private;
>> +       char kbuf[3];
>> +       int setgroups_allowed;
>> +       ssize_t ret;
>> +
>> +       ret = -EPERM;
>> +       if (!file_ns_capable(file, ns, CAP_SETGID))
>> +               goto out;
>
> CAP_SYS_ADMIN?  This isn't setting a gid in the namespace; it's
> reconfiguring the namespace.

Hmm.  Maybe.  It is an activity that is normally controlled by
CAP_SETGID.

Frankly I think the entire split up of the capability model is almost
totally broken.  But I think CAP_SETGID is a close approximation of the
right thing here.

>> +       /* Only allow a very narrow range of strings to be written */
>> +       ret = -EINVAL;
>> +       if ((*ppos != 0) || (count >= sizeof(kbuf)) || (count < 1))
>> +               goto out;
>> +
>> +       /* What was written? */
>> +       ret = -EFAULT;
>> +       if (copy_from_user(kbuf, buf, count))
>> +               goto out;
>> +       kbuf[count] = '\0';
>> +
>> +       /* What is being requested? */
>> +       ret = -EINVAL;
>> +       if (kbuf[0] == '0')
>> +               setgroups_allowed = 0;
>> +       else if (kbuf[0] == '1')
>> +               setgroups_allowed = 1;
>> +       else
>> +               goto out;
>> +
>> +       /* Allow a trailing newline */
>> +       ret = -EINVAL;
>> +       if ((count == 2) && (kbuf[1] != '\n'))
>> +               goto out;
>> +
>> +
>> +       if (setgroups_allowed) {
>> +               ret = -EINVAL;
>> +               if (atomic_read(&ns->setgroups_allowed) == 0)
>> +                       goto out;
>> +       } else {
>
> I would disallow this if gid_map has been written in the interest of
> sanity.

Not a chance.  That is part of making this an independent knob.  If
there is another reason for disabling setgroups you can flip this
knob even after mappings are established.

>> +               atomic_set(&ns->setgroups_allowed, 0);
>> +               /* sigh memory barriers! */
>
> I don't think that any barriers are needed.  If you ever observe
> setgroups_allowed == 0, it will stay that way forever.

Likely.   In practice the code works today.

But I need to review things closely to understand if there are barriers
needed.  But especially since it is a write once knob we can get away
with a lot.

>> +       }
>> +
>> +       /* Report a successful write */
>> +       *ppos = count;
>> +       ret = count;
>> +out:
>> +       return ret;
>> +}
>> +
>>  static void *userns_get(struct task_struct *task)
>>  {
>>         struct user_namespace *user_ns;
>
> --Andy

^ permalink raw reply

* [PATCH] powerpc: add little endian flag to syscall_get_arch()
From: Richard Guy Briggs @ 2014-12-02 21:27 UTC (permalink / raw)
  To: linux-audit-H+wXaHxf7aLQT0dZR+AlfA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ,
	linux-api-u79uwXL29TY76Z2rM5mHXA
  Cc: Richard Guy Briggs, sgrubb-H+wXaHxf7aLQT0dZR+AlfA,
	eparis-FjpueFixGhCM4zKIHC2jIg, pmoore-H+wXaHxf7aLQT0dZR+AlfA,
	tonyj-l3A5Bk7waGM

Since both ppc and ppc64 have LE variants which are now reported by uname, add
that flag (__AUDIT_ARCH_LE) to syscall_get_arch() and add AUDIT_ARCH_PPC*LE
variants.

Without this,  perf trace and auditctl fail.

Mainline kernel reports ppc64le (per a058801) but there is no matching
AUDIT_ARCH_PPC64LE.

See:
	https://www.redhat.com/archives/linux-audit/2014-August/msg00082.html
	https://www.redhat.com/archives/linux-audit/2014-December/msg00004.html

Signed-off-by: Richard Guy Briggs <rgb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
 arch/powerpc/include/asm/syscall.h |    6 +++++-
 include/uapi/linux/audit.h         |    2 ++
 2 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 6fa2708..a58acab 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
 
 static inline int syscall_get_arch(void)
 {
-	return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+	int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+#ifdef __LITTLE_ENDIAN__
+	arch |= __AUDIT_ARCH_LE
+#endif
+	return arch;
 }
 #endif	/* _ASM_SYSCALL_H */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4d100c8..fe29a99 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -364,7 +364,9 @@ enum {
 #define AUDIT_ARCH_PARISC	(EM_PARISC)
 #define AUDIT_ARCH_PARISC64	(EM_PARISC|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_PPC		(EM_PPC)
+#define AUDIT_ARCH_PPCLE	(EM_PPC|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_PPC64	(EM_PPC64|__AUDIT_ARCH_64BIT)
+#define AUDIT_ARCH_PPC64LE	(EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_S390		(EM_S390)
 #define AUDIT_ARCH_S390X	(EM_S390|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_SH		(EM_SH)
-- 
1.7.1

^ permalink raw reply related

* Re: [CFT][PATCH 1/3] userns: Avoid problems with negative groups
From: Eric W. Biederman @ 2014-12-02 21:26 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-man, Kees Cook, Linux API, Linux Containers, Josh Triplett,
	stable, linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Kenton Varda, LSM, Michael Kerrisk-manpages, Richard Weinberger,
	Casey Schaufler, Andrew Morton
In-Reply-To: <CALCETrXsQbCeQBUo_FrXNVS42mBEFXz1jku9TicVbFhxTmNGmA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> writes:

> On Tue, Dec 2, 2014 at 12:25 PM, Eric W. Biederman
> <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>
>> Classic unix permission checks have an interesting feature, the group
>> permissions for a file can be set to less than the other permissions
>> on a file.  Occassionally this is used deliberately to give a certain
>> group of users fewer permissions than the default.
>>
>> Overlooking negative groups has resulted in the permission checks for
>> setting up a group mapping in a user namespace to be too lax.  Tighten
>> the permission checks in new_idmap_permitted to ensure that mapping
>> uids and gids into user namespaces without privilege will not result
>> in new combinations of credentials being available to the users.
>>
>> When setting mappings without privilege only the creator of the user
>> namespace is interesting as all other users that have CAP_SETUID over
>> the user namespace will also have CAP_SETUID over the user namespaces
>> parent.  So the scope of the unprivileged check is reduced to just
>> the case where cred->euid is the namespace creator.
>>
>> For setting a uid mapping without privilege only euid is considered as
>> setresuid can set uid, suid and fsuid from euid without privielege
>> making any combination of uids possible with user namespaces already
>> possible without them.
>>
>> For now seeting a gid mapping without privilege is removed.  The only
>> possible set of credentials that would be safe without a gid mapping
>> (egid without any supplementary groups) just doesn't happen in practice
>> so would simply lead to unused untested code.
>>
>> setgroups is modified to fail not only when the group ids do not
>> map but also when there are no gid mappings at all, preventing
>> setgroups(0, NULL) from succeeding when gid mappings have not been
>> established.
>>
>> For a small class of applications this change breaks userspace
>> and removes useful functionality.  This small class of applications
>> includes tools/testing/selftests/mount/unprivileged-remount-test.c
>>
>> Most of the removed functionality will be added back with the
>> addition of a one way knob to disable setgroups.  Once setgroups
>> is disabled setting the gid_map becomes as safe as setting the uid_map.
>>
>> For more common applications that set the uid_map and the gid_map with
>> privilege this change will have no effect on them.
>>
>> This should fix CVE-2014-8989.
>>
>> Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
>> ---
>
>>
>> +static inline bool gid_mapping_possible(const struct user_namespace *ns)
>> +{
>> +       return ns->gid_map.nr_extents != 0;
>> +}
>> +
>
> Can you rename this to userns_may_setgroups or something like that?
> To me, gid_mapping_possible sounds like you're allowed to map gids,
> which sounds like the opposite condition, and it doesn't explain what
> the point is.

gid_mapping_established?

What I mean to be testing if is if from_kgid and make_kgid will work
because the gid mappings have been set.

The userns knob for setgroups is a different test and is added
in the next patch.  And yes we really need both or the knob can
start out as on, and we need to provent setgroups(0, NULL)
before the user namespace is unshared.

Although come to think about it probably makes sense to roll those two
test into one function and call that inline function from the setgroups
implementation.

Anyway I will think about it and see what I can do to make it easily
comprehensible.

>> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
>> index aa312b0dc3ec..51d65b444951 100644
>> --- a/kernel/user_namespace.c
>> +++ b/kernel/user_namespace.c
>> @@ -812,16 +812,19 @@ static bool new_idmap_permitted(const struct file *file,
>>                                 struct user_namespace *ns, int cap_setid,
>>                                 struct uid_gid_map *new_map)
>>  {
>> -       /* Allow mapping to your own filesystem ids */
>> -       if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
>> +       const struct cred *cred = file->f_cred;
>> +
>> +       /* Allow a mapping without capabilities when allowing the root
>> +        * of the user namespace capabilities restricted to that id
>> +        * will not change the set of credentials available to that
>> +        * user.
>> +        */
>> +       if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
>> +           uid_eq(ns->owner, cred->euid)) {
>
> What's uid_eq(ns->owner, cred->euid)) for?  This should already be covered by:

This means that the only user we attempt to set up unprivileged mappings
for is the owner of the user namespace.  Anyone else should already
have capabilities in the parent user namespace or shouldn't be able to
set the mapping at all.

In practice it is a clarification to make it easier to think about the code.

>     if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
>         goto out;
>
> (except that I don't know why cap_valid(cap_setid) is checked -- this
> ought to be enforced for projid_map, too, right?)

What to do with projid_map is entirely different discussion.  In
practice it is dead, and either XFS needs to be fixed to use it
or that code needs to be removed.  At the time I wrote it XFS
did not require any privileges to set project ids.

>>                 u32 id = new_map->extent[0].lower_first;
>>                 if (cap_setid == CAP_SETUID) {
>>                         kuid_t uid = make_kuid(ns->parent, id);
>> -                       if (uid_eq(uid, file->f_cred->fsuid))
>> -                               return true;
>> -               } else if (cap_setid == CAP_SETGID) {
>> -                       kgid_t gid = make_kgid(ns->parent, id);
>> -                       if (gid_eq(gid, file->f_cred->fsgid))
>> +                       if (uid_eq(uid, cred->euid))
>
> Why'd you change this from fsuid to euid?

Because strangely enough I can set euid to any other uid with
setresuid, but the same does not hold with fsuid.

So strictly speaking fsuid was actually wrong before.  In practice
fsuid == euid so I don't think anyone will care.  But I want very much
to enforce the rule that user namespaces can't give you any credentials
you couldn't get otherwise.

Eric

^ permalink raw reply

* Re: [CFT][PATCH 2/3] userns: Add a knob to disable setgroups on a per user namespace basis
From: Andy Lutomirski @ 2014-12-02 21:05 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Josh Triplett, Andrew Morton, Kees Cook,
	Michael Kerrisk-manpages, Linux API, linux-man,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, LSM,
	Casey Schaufler, Serge E. Hallyn, Richard Weinberger,
	Kenton Varda, stable
In-Reply-To: <874mtdyexp.fsf_-_-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>

On Tue, Dec 2, 2014 at 12:28 PM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>
> - Expose the knob to user space through a proc file /proc/<pid>/setgroups

Can you rename this to something clearer, e.g. userns_setgroups_mode?

>
>   A value of 0 means the setgroups system call is disabled in the
>   current processes user namespace and can not be enabled in the
>   future in this user namespace.
>
>   A value of 1 means the segtoups system call is enabled.

Would it make more sense to put strings like "allow" and "deny" in
here?  That way, future extensions could add additional values.

> diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
> index 21c91feeca2d..6d0ee1b089fb 100644
> --- a/arch/s390/kernel/compat_linux.c
> +++ b/arch/s390/kernel/compat_linux.c
> @@ -252,6 +252,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
>         int retval;
>
>         if (!gid_mapping_possible(user_ns) ||
> +           !atomic_read(&user_ns->setgroups_allowed) ||
>             !capable(CAP_SETGID))
>                 return -EPERM;

This is now incomprehensible because of the gid_mapping_possible
thing.  If you renamed gid_mapping_possible to
userns_setgroup_allowed, then this could be added to the
implementation, and this would all make sense (not to mention avoiding
duplicating this thing).

> @@ -826,6 +827,11 @@ static bool new_idmap_permitted(const struct file *file,
>                         kuid_t uid = make_kuid(ns->parent, id);
>                         if (uid_eq(uid, cred->euid))
>                                 return true;
> +               } else if (cap_setid == CAP_SETGID) {
> +                       kgid_t gid = make_kgid(ns->parent, id);
> +                       if (!atomic_read(&ns->setgroups_allowed) &&
> +                           gid_eq(gid, cred->egid))
> +                               return true;

I still don't see why egid is any better than fsgid here.

>                 }
>         }
>
> @@ -844,6 +850,93 @@ static bool new_idmap_permitted(const struct file *file,
>         return false;
>  }
>
> +static void *setgroups_m_start(struct seq_file *seq, loff_t *ppos)
> +{
> +       struct user_namespace *ns = seq->private;
> +
> +       return (*ppos == 0) ?  ns : NULL;
> +}
> +
> +static void *setgroups_m_next(struct seq_file *seq, void *v, loff_t *ppos)
> +{
> +       ++*ppos;
> +       return NULL;
> +}
> +
> +static void setgroups_m_stop(struct seq_file *seq, void *v)
> +{
> +}
> +
> +static int setgroups_m_show(struct seq_file *seq, void *v)
> +{
> +       struct user_namespace *ns = seq->private;
> +
> +       seq_printf(seq, "%u\n", atomic_read(&ns->setgroups_allowed));
> +       return 0;
> +}
> +
> +const struct seq_operations proc_setgroups_seq_operations = {
> +       .start  = setgroups_m_start,
> +       .stop = setgroups_m_stop,
> +       .next = setgroups_m_next,
> +       .show = setgroups_m_show,
> +};
> +
> +ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
> +                            size_t count, loff_t *ppos)
> +{
> +       struct seq_file *seq = file->private_data;
> +       struct user_namespace *ns = seq->private;
> +       char kbuf[3];
> +       int setgroups_allowed;
> +       ssize_t ret;
> +
> +       ret = -EPERM;
> +       if (!file_ns_capable(file, ns, CAP_SETGID))
> +               goto out;

CAP_SYS_ADMIN?  This isn't setting a gid in the namespace; it's
reconfiguring the namespace.

> +
> +       /* Only allow a very narrow range of strings to be written */
> +       ret = -EINVAL;
> +       if ((*ppos != 0) || (count >= sizeof(kbuf)) || (count < 1))
> +               goto out;
> +
> +       /* What was written? */
> +       ret = -EFAULT;
> +       if (copy_from_user(kbuf, buf, count))
> +               goto out;
> +       kbuf[count] = '\0';
> +
> +       /* What is being requested? */
> +       ret = -EINVAL;
> +       if (kbuf[0] == '0')
> +               setgroups_allowed = 0;
> +       else if (kbuf[0] == '1')
> +               setgroups_allowed = 1;
> +       else
> +               goto out;
> +
> +       /* Allow a trailing newline */
> +       ret = -EINVAL;
> +       if ((count == 2) && (kbuf[1] != '\n'))
> +               goto out;
> +
> +
> +       if (setgroups_allowed) {
> +               ret = -EINVAL;
> +               if (atomic_read(&ns->setgroups_allowed) == 0)
> +                       goto out;
> +       } else {

I would disallow this if gid_map has been written in the interest of sanity.

> +               atomic_set(&ns->setgroups_allowed, 0);
> +               /* sigh memory barriers! */

I don't think that any barriers are needed.  If you ever observe
setgroups_allowed == 0, it will stay that way forever.

> +       }
> +
> +       /* Report a successful write */
> +       *ppos = count;
> +       ret = count;
> +out:
> +       return ret;
> +}
> +
>  static void *userns_get(struct task_struct *task)
>  {
>         struct user_namespace *user_ns;

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [CFT][PATCH 1/3] userns: Avoid problems with negative groups
From: Andy Lutomirski @ 2014-12-02 20:58 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Josh Triplett, Andrew Morton, Kees Cook,
	Michael Kerrisk-manpages, Linux API, linux-man,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, LSM,
	Casey Schaufler, Serge E. Hallyn, Richard Weinberger,
	Kenton Varda, stable
In-Reply-To: <87fvcxyf28.fsf_-_-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>

On Tue, Dec 2, 2014 at 12:25 PM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>
> Classic unix permission checks have an interesting feature, the group
> permissions for a file can be set to less than the other permissions
> on a file.  Occassionally this is used deliberately to give a certain
> group of users fewer permissions than the default.
>
> Overlooking negative groups has resulted in the permission checks for
> setting up a group mapping in a user namespace to be too lax.  Tighten
> the permission checks in new_idmap_permitted to ensure that mapping
> uids and gids into user namespaces without privilege will not result
> in new combinations of credentials being available to the users.
>
> When setting mappings without privilege only the creator of the user
> namespace is interesting as all other users that have CAP_SETUID over
> the user namespace will also have CAP_SETUID over the user namespaces
> parent.  So the scope of the unprivileged check is reduced to just
> the case where cred->euid is the namespace creator.
>
> For setting a uid mapping without privilege only euid is considered as
> setresuid can set uid, suid and fsuid from euid without privielege
> making any combination of uids possible with user namespaces already
> possible without them.
>
> For now seeting a gid mapping without privilege is removed.  The only
> possible set of credentials that would be safe without a gid mapping
> (egid without any supplementary groups) just doesn't happen in practice
> so would simply lead to unused untested code.
>
> setgroups is modified to fail not only when the group ids do not
> map but also when there are no gid mappings at all, preventing
> setgroups(0, NULL) from succeeding when gid mappings have not been
> established.
>
> For a small class of applications this change breaks userspace
> and removes useful functionality.  This small class of applications
> includes tools/testing/selftests/mount/unprivileged-remount-test.c
>
> Most of the removed functionality will be added back with the
> addition of a one way knob to disable setgroups.  Once setgroups
> is disabled setting the gid_map becomes as safe as setting the uid_map.
>
> For more common applications that set the uid_map and the gid_map with
> privilege this change will have no effect on them.
>
> This should fix CVE-2014-8989.
>
> Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
> ---

>
> +static inline bool gid_mapping_possible(const struct user_namespace *ns)
> +{
> +       return ns->gid_map.nr_extents != 0;
> +}
> +

Can you rename this to userns_may_setgroups or something like that?
To me, gid_mapping_possible sounds like you're allowed to map gids,
which sounds like the opposite condition, and it doesn't explain what
the point is.


> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index aa312b0dc3ec..51d65b444951 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -812,16 +812,19 @@ static bool new_idmap_permitted(const struct file *file,
>                                 struct user_namespace *ns, int cap_setid,
>                                 struct uid_gid_map *new_map)
>  {
> -       /* Allow mapping to your own filesystem ids */
> -       if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
> +       const struct cred *cred = file->f_cred;
> +
> +       /* Allow a mapping without capabilities when allowing the root
> +        * of the user namespace capabilities restricted to that id
> +        * will not change the set of credentials available to that
> +        * user.
> +        */
> +       if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
> +           uid_eq(ns->owner, cred->euid)) {

What's uid_eq(ns->owner, cred->euid)) for?  This should already be covered by:

    if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
        goto out;

(except that I don't know why cap_valid(cap_setid) is checked -- this
ought to be enforced for projid_map, too, right?)

>                 u32 id = new_map->extent[0].lower_first;
>                 if (cap_setid == CAP_SETUID) {
>                         kuid_t uid = make_kuid(ns->parent, id);
> -                       if (uid_eq(uid, file->f_cred->fsuid))
> -                               return true;
> -               } else if (cap_setid == CAP_SETGID) {
> -                       kgid_t gid = make_kgid(ns->parent, id);
> -                       if (gid_eq(gid, file->f_cred->fsgid))
> +                       if (uid_eq(uid, cred->euid))

Why'd you change this from fsuid to euid?

--Andy

^ permalink raw reply

* [CFT][PATCH 3/3] userns: Unbreak the unprivileged remount tests
From: Eric W. Biederman @ 2014-12-02 20:30 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linux Containers, Josh Triplett, Andrew Morton, Kees Cook,
	Michael Kerrisk-manpages, Linux API, linux-man,
	linux-kernel@vger.kernel.org, LSM, Casey Schaufler,
	Serge E. Hallyn, Richard Weinberger, Kenton Varda, stable
In-Reply-To: <874mtdyexp.fsf_-_@x220.int.ebiederm.org>


A security fix in caused the way the unprivileged remount tests were
using user namespaces to break.  Tweak the way user namespaces are
being used so the test works again.

Cc: stable@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---

This is what it takes to fix a broken application, in it's full glory.
This fix works even if new functionality does not exist.

 tools/testing/selftests/mount/unprivileged-remount-test.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
index 9669d375625a..d47227494137 100644
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -144,13 +144,12 @@ static void create_and_enter_userns(void)
 			strerror(errno));
 	}
 
+	if (access("/proc/self/setgroups", F_OK) == 0) {
+		write_file("/proc/self/setgroups", "0");
+	}
 	write_file("/proc/self/uid_map", "0 %d 1", uid);
 	write_file("/proc/self/gid_map", "0 %d 1", gid);
 
-	if (setgroups(0, NULL) != 0) {
-		die("setgroups failed: %s\n",
-			strerror(errno));
-	}
 	if (setgid(0) != 0) {
 		die ("setgid(0) failed %s\n",
 			strerror(errno));
-- 
1.9.1

^ permalink raw reply related

* [CFT][PATCH 2/3] userns: Add a knob to disable setgroups on a per user namespace basis
From: Eric W. Biederman @ 2014-12-02 20:28 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-man, Kees Cook, Linux API, Linux Containers, Josh Triplett,
	stable, linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Kenton Varda, LSM, Michael Kerrisk-manpages, Richard Weinberger,
	Casey Schaufler, Andrew Morton
In-Reply-To: <87fvcxyf28.fsf_-_-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>


- Expose the knob to user space through a proc file /proc/<pid>/setgroups

  A value of 0 means the setgroups system call is disabled in the
  current processes user namespace and can not be enabled in the
  future in this user namespace.

  A value of 1 means the segtoups system call is enabled.

- Descedent user namespaces inherit the value of setgroups from
  their parents.

- A proc file is used (instead of a sysctl) as sysctls
  currently do not pass in a struct file so file_ns_capable
  is unusable.

- Update new_idmap_permitted to allow unprivileged users to
  establish a mapping of their own gid, as such mappings
  can no longer allow dropping of supplemental groups.

This set of changes restores as much as possible the functionality
that was lost when new_idmap_permitted was modified to not allow
mappinges to be established without privilege.

As this fixes a regression from: "userns: Avoid problems with negative groups"
it is probably a candidate for a backport.

Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---

This patch still needs a little bit of love.
I need to take a hard look at the interaction of barriers and atomic ops,
and it seems I have at least one comment fix that needs to move elsewhere.

But this should be enough to move the conversation forward.

 arch/s390/kernel/compat_linux.c |  1 +
 fs/proc/base.c                  | 31 ++++++++++----
 include/linux/user_namespace.h  |  3 ++
 kernel/groups.c                 |  1 +
 kernel/uid16.c                  |  1 +
 kernel/user.c                   |  1 +
 kernel/user_namespace.c         | 95 ++++++++++++++++++++++++++++++++++++++++-
 7 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 21c91feeca2d..6d0ee1b089fb 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -252,6 +252,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
 	int retval;
 
 	if (!gid_mapping_possible(user_ns) ||
+	    !atomic_read(&user_ns->setgroups_allowed) ||
 	    !capable(CAP_SETGID))
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 772efa45a452..4ebed9f01d97 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2386,7 +2386,7 @@ static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
 #endif /* CONFIG_TASK_IO_ACCOUNTING */
 
 #ifdef CONFIG_USER_NS
-static int proc_id_map_open(struct inode *inode, struct file *file,
+static int proc_userns_open(struct inode *inode, struct file *file,
 	const struct seq_operations *seq_ops)
 {
 	struct user_namespace *ns = NULL;
@@ -2418,7 +2418,7 @@ err:
 	return ret;
 }
 
-static int proc_id_map_release(struct inode *inode, struct file *file)
+static int proc_userns_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq = file->private_data;
 	struct user_namespace *ns = seq->private;
@@ -2428,17 +2428,17 @@ static int proc_id_map_release(struct inode *inode, struct file *file)
 
 static int proc_uid_map_open(struct inode *inode, struct file *file)
 {
-	return proc_id_map_open(inode, file, &proc_uid_seq_operations);
+	return proc_userns_open(inode, file, &proc_uid_seq_operations);
 }
 
 static int proc_gid_map_open(struct inode *inode, struct file *file)
 {
-	return proc_id_map_open(inode, file, &proc_gid_seq_operations);
+	return proc_userns_open(inode, file, &proc_gid_seq_operations);
 }
 
 static int proc_projid_map_open(struct inode *inode, struct file *file)
 {
-	return proc_id_map_open(inode, file, &proc_projid_seq_operations);
+	return proc_userns_open(inode, file, &proc_projid_seq_operations);
 }
 
 static const struct file_operations proc_uid_map_operations = {
@@ -2446,7 +2446,7 @@ static const struct file_operations proc_uid_map_operations = {
 	.write		= proc_uid_map_write,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= proc_id_map_release,
+	.release	= proc_userns_release,
 };
 
 static const struct file_operations proc_gid_map_operations = {
@@ -2454,7 +2454,7 @@ static const struct file_operations proc_gid_map_operations = {
 	.write		= proc_gid_map_write,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= proc_id_map_release,
+	.release	= proc_userns_release,
 };
 
 static const struct file_operations proc_projid_map_operations = {
@@ -2462,7 +2462,20 @@ static const struct file_operations proc_projid_map_operations = {
 	.write		= proc_projid_map_write,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= proc_id_map_release,
+	.release	= proc_userns_release,
+};
+
+static int proc_setgroups_open(struct inode *inode, struct file *file)
+{
+	return proc_userns_open(inode, file, &proc_setgroups_seq_operations);
+}
+
+static const struct file_operations proc_setgroups_operations = {
+	.open		= proc_setgroups_open,
+	.write		= proc_setgroups_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= proc_userns_release,
 };
 #endif /* CONFIG_USER_NS */
 
@@ -2572,6 +2585,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	REG("timers",	  S_IRUGO, proc_timers_operations),
@@ -2913,6 +2927,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 };
 
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 26d5e8f5db97..1e8cb168b1d0 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -27,6 +27,7 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
+	atomic_t		setgroups_allowed;
 
 	/* Register of per-UID persistent keyrings for this namespace */
 #ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -65,9 +66,11 @@ struct seq_operations;
 extern const struct seq_operations proc_uid_seq_operations;
 extern const struct seq_operations proc_gid_seq_operations;
 extern const struct seq_operations proc_projid_seq_operations;
+extern const struct seq_operations proc_setgroups_seq_operations;
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
+extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
diff --git a/kernel/groups.c b/kernel/groups.c
index b9a6a5c7e100..467ae954e859 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -226,6 +226,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
 	int retval;
 
 	if (!gid_mapping_possible(user_ns) ||
+	    !atomic_read(&user_ns->setgroups_allowed) ||
 	    !ns_capable(user_ns, CAP_SETGID))
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 602c7de2aa11..096962fa1975 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -179,6 +179,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
 	int retval;
 
 	if (!gid_mapping_possible(user_ns) ||
+	    !atomic_read(&user_ns->setgroups_allowed) ||
 	    !ns_capable(user_ns, CAP_SETGID))
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
diff --git a/kernel/user.c b/kernel/user.c
index 4efa39350e44..0d78759f7dbe 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,7 @@ struct user_namespace init_user_ns = {
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
 	.proc_inum = PROC_USER_INIT_INO,
+	.setgroups_allowed	= ATOMIC_INIT(1),
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	.persistent_keyring_register_sem =
 	__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 51d65b444951..521c8d53ee17 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -98,6 +98,7 @@ int create_user_ns(struct cred *new)
 	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
+	atomic_set(&ns->setgroups_allowed, atomic_read(&parent_ns->setgroups_allowed));
 
 	set_cred_user_ns(new, ns);
 
@@ -640,7 +641,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	if (!page)
 		goto out;
 
-	/* Only allow <= page size writes at the beginning of the file */
+	/* Only allow < page size writes at the beginning of the file */
 	ret = -EINVAL;
 	if ((*ppos != 0) || (count >= PAGE_SIZE))
 		goto out;
@@ -826,6 +827,11 @@ static bool new_idmap_permitted(const struct file *file,
 			kuid_t uid = make_kuid(ns->parent, id);
 			if (uid_eq(uid, cred->euid))
 				return true;
+		} else if (cap_setid == CAP_SETGID) {
+			kgid_t gid = make_kgid(ns->parent, id);
+			if (!atomic_read(&ns->setgroups_allowed) &&
+			    gid_eq(gid, cred->egid))
+				return true;
 		}
 	}
 
@@ -844,6 +850,93 @@ static bool new_idmap_permitted(const struct file *file,
 	return false;
 }
 
+static void *setgroups_m_start(struct seq_file *seq, loff_t *ppos)
+{
+	struct user_namespace *ns = seq->private;
+
+	return (*ppos == 0) ?  ns : NULL;
+}
+
+static void *setgroups_m_next(struct seq_file *seq, void *v, loff_t *ppos)
+{
+	++*ppos;
+	return NULL;
+}
+
+static void setgroups_m_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int setgroups_m_show(struct seq_file *seq, void *v)
+{
+	struct user_namespace *ns = seq->private;
+
+	seq_printf(seq, "%u\n", atomic_read(&ns->setgroups_allowed));
+	return 0;
+}
+
+const struct seq_operations proc_setgroups_seq_operations = {
+	.start	= setgroups_m_start,
+	.stop = setgroups_m_stop,
+	.next = setgroups_m_next,
+	.show = setgroups_m_show,
+};
+
+ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	char kbuf[3];
+	int setgroups_allowed;
+	ssize_t ret;
+
+	ret = -EPERM;
+	if (!file_ns_capable(file, ns, CAP_SETGID))
+		goto out;
+
+	/* Only allow a very narrow range of strings to be written */
+	ret = -EINVAL;
+	if ((*ppos != 0) || (count >= sizeof(kbuf)) || (count < 1))
+		goto out;
+
+	/* What was written? */
+	ret = -EFAULT;
+	if (copy_from_user(kbuf, buf, count))
+		goto out;
+	kbuf[count] = '\0';
+
+	/* What is being requested? */
+	ret = -EINVAL;
+	if (kbuf[0] == '0')
+		setgroups_allowed = 0;
+	else if (kbuf[0] == '1')
+		setgroups_allowed = 1;
+	else
+		goto out;
+
+	/* Allow a trailing newline */
+	ret = -EINVAL;
+	if ((count == 2) && (kbuf[1] != '\n'))
+		goto out;
+
+
+	if (setgroups_allowed) {
+		ret = -EINVAL;
+		if (atomic_read(&ns->setgroups_allowed) == 0)
+			goto out;
+	} else {
+		atomic_set(&ns->setgroups_allowed, 0);
+		/* sigh memory barriers! */
+	}
+
+	/* Report a successful write */
+	*ppos = count;
+	ret = count;
+out:
+	return ret;
+}
+
 static void *userns_get(struct task_struct *task)
 {
 	struct user_namespace *user_ns;
-- 
1.9.1

^ permalink raw reply related

* [CFT][PATCH 1/3] userns: Avoid problems with negative groups
From: Eric W. Biederman @ 2014-12-02 20:25 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linux Containers, Josh Triplett, Andrew Morton, Kees Cook,
	Michael Kerrisk-manpages, Linux API, linux-man,
	linux-kernel@vger.kernel.org, LSM, Casey Schaufler,
	Serge E. Hallyn, Richard Weinberger, Kenton Varda, stable
In-Reply-To: <CALCETrVfO4sBdZcQiZXsofPZMj7pqKeVbX+4g3dAj6WjUca+1w-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>


Classic unix permission checks have an interesting feature, the group
permissions for a file can be set to less than the other permissions
on a file.  Occassionally this is used deliberately to give a certain
group of users fewer permissions than the default.

Overlooking negative groups has resulted in the permission checks for
setting up a group mapping in a user namespace to be too lax.  Tighten
the permission checks in new_idmap_permitted to ensure that mapping
uids and gids into user namespaces without privilege will not result
in new combinations of credentials being available to the users.

When setting mappings without privilege only the creator of the user
namespace is interesting as all other users that have CAP_SETUID over
the user namespace will also have CAP_SETUID over the user namespaces
parent.  So the scope of the unprivileged check is reduced to just
the case where cred->euid is the namespace creator.

For setting a uid mapping without privilege only euid is considered as
setresuid can set uid, suid and fsuid from euid without privielege
making any combination of uids possible with user namespaces already
possible without them.

For now seeting a gid mapping without privilege is removed.  The only
possible set of credentials that would be safe without a gid mapping
(egid without any supplementary groups) just doesn't happen in practice
so would simply lead to unused untested code.

setgroups is modified to fail not only when the group ids do not
map but also when there are no gid mappings at all, preventing
setgroups(0, NULL) from succeeding when gid mappings have not been
established.

For a small class of applications this change breaks userspace
and removes useful functionality.  This small class of applications
includes tools/testing/selftests/mount/unprivileged-remount-test.c

Most of the removed functionality will be added back with the
addition of a one way knob to disable setgroups.  Once setgroups
is disabled setting the gid_map becomes as safe as setting the uid_map.

For more common applications that set the uid_map and the gid_map with
privilege this change will have no effect on them.

This should fix CVE-2014-8989.

Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Signed-off-by: "Eric W. Biederman" <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
---
 arch/s390/kernel/compat_linux.c |  5 ++++-
 include/linux/user_namespace.h  | 10 ++++++++++
 kernel/groups.c                 |  5 ++++-
 kernel/uid16.c                  |  5 ++++-
 kernel/user_namespace.c         | 17 ++++++++++-------
 5 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index ca38139423ae..21c91feeca2d 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -49,6 +49,7 @@
 #include <linux/fadvise.h>
 #include <linux/ipc.h>
 #include <linux/slab.h>
+#include <linux/user_namespace.h>
 
 #include <asm/types.h>
 #include <asm/uaccess.h>
@@ -246,10 +247,12 @@ out:
 
 COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist)
 {
+	struct user_namespace *user_ns = current_user_ns();
 	struct group_info *group_info;
 	int retval;
 
-	if (!capable(CAP_SETGID))
+	if (!gid_mapping_possible(user_ns) ||
+	    !capable(CAP_SETGID))
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index e95372654f09..26d5e8f5db97 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -46,6 +46,11 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 	return ns;
 }
 
+static inline bool gid_mapping_possible(const struct user_namespace *ns)
+{
+	return ns->gid_map.nr_extents != 0;
+}
+
 extern int create_user_ns(struct cred *new);
 extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
 extern void free_user_ns(struct user_namespace *ns);
@@ -70,6 +75,11 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 	return &init_user_ns;
 }
 
+static inline bool gid_mapping_possible(const struct user_namespace *ns)
+{
+	return true;
+}
+
 static inline int create_user_ns(struct cred *new)
 {
 	return -EINVAL;
diff --git a/kernel/groups.c b/kernel/groups.c
index 451698f86cfa..b9a6a5c7e100 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -6,6 +6,7 @@
 #include <linux/slab.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 
 /* init to 2 - one for init_task, one to ensure it is never freed */
@@ -220,10 +221,12 @@ out:
 
 SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
 {
+	struct user_namespace *user_ns = current_user_ns();
 	struct group_info *group_info;
 	int retval;
 
-	if (!ns_capable(current_user_ns(), CAP_SETGID))
+	if (!gid_mapping_possible(user_ns) ||
+	    !ns_capable(user_ns, CAP_SETGID))
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 602e5bbbceff..602c7de2aa11 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -13,6 +13,7 @@
 #include <linux/highuid.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/user_namespace.h>
 
 #include <asm/uaccess.h>
 
@@ -173,10 +174,12 @@ out:
 
 SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
 {
+	struct user_namespace *user_ns = current_user_ns();
 	struct group_info *group_info;
 	int retval;
 
-	if (!ns_capable(current_user_ns(), CAP_SETGID))
+	if (!gid_mapping_possible(user_ns) ||
+	    !ns_capable(user_ns, CAP_SETGID))
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index aa312b0dc3ec..51d65b444951 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -812,16 +812,19 @@ static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *new_map)
 {
-	/* Allow mapping to your own filesystem ids */
-	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+	const struct cred *cred = file->f_cred;
+
+	/* Allow a mapping without capabilities when allowing the root
+	 * of the user namespace capabilities restricted to that id
+	 * will not change the set of credentials available to that
+	 * user.
+	 */
+	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
+	    uid_eq(ns->owner, cred->euid)) {
 		u32 id = new_map->extent[0].lower_first;
 		if (cap_setid == CAP_SETUID) {
 			kuid_t uid = make_kuid(ns->parent, id);
-			if (uid_eq(uid, file->f_cred->fsuid))
-				return true;
-		} else if (cap_setid == CAP_SETGID) {
-			kgid_t gid = make_kgid(ns->parent, id);
-			if (gid_eq(gid, file->f_cred->fsgid))
+			if (uid_eq(uid, cred->euid))
 				return true;
 		}
 	}
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH v2] userns: Disallow setgroups unless the gid_map writer is privileged
From: Andy Lutomirski @ 2014-12-02 20:13 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: linux-man, Kees Cook, Linux API, Linux Containers, Josh Triplett,
	stable, linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Kenton Varda, LSM, Michael Kerrisk-manpages, Richard Weinberger,
	Casey Schaufler, Andrew Morton
In-Reply-To: <87mw75ygwp.fsf-JOvCrm2gF+uungPnsOpG7nhyD016LWXt@public.gmane.org>

On Tue, Dec 2, 2014 at 11:45 AM, Eric W. Biederman
<ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
> Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> writes:
>
>> On Tue, Dec 2, 2014 at 4:09 AM, Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>>> Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> writes:
>>>
>>>> Classic unix permission checks have an interesting feature.  The
>>>> group permissions for a file can be set to less than the other
>>>> permissions on a file.  Occasionally this is used deliberately to
>>>> give a certain group of users fewer permissions than the default.
>>>>
>>>> User namespaces break this usage.  Groups set in rgid or egid are
>>>> unaffected because an unprivileged user namespace creator can only
>>>> map a single group, so setresgid inside and outside the namespace
>>>> have the same effect.  However, an unprivileged user namespace
>>>> creator can currently use setgroups(2) to drop all supplementary
>>>> groups, so, if a supplementary group denies access to some resource,
>>>> user namespaces can be used to bypass that restriction.
>>>>
>>>> To fix this issue, this introduces a new user namespace flag
>>>> USERNS_SETGROUPS_ALLOWED.  If that flag is not set, then
>>>> setgroups(2) will fail regardless of the caller's capabilities.
>>>>
>>>> USERNS_SETGROUPS_ALLOWED is cleared in a new user namespace.  By
>>>> default, if the writer of gid_map has CAP_SETGID in the parent
>>>> userns and the parent userns has USERNS_SETGROUPS_ALLOWED, then the
>>>> USERNS_SETGROUPS_ALLOWED will be set in the child.  If the writer is
>>>> not so privileged, then writing to gid_map will fail unless the
>>>> writer adds "setgroups deny" to gid_map, in which case the check is
>>>> skipped but USERNS_SETGROUPS_ALLOWED will remain cleared.
>>>>
>>>> The full semantics are:
>>>>
>>>> If "setgroups allow" is present or no explicit "setgroups" setting
>>>> is written to gid_map, then writing to gid_map will fail with -EPERM
>>>> unless the opener and writer have CAP_SETGID in the parent namespace
>>>> and the parent namespace has USERNS_SETGROUPS_ALLOWED.
>>>>
>>>> If "setgroups deny" is present, then writing gid_map will work as
>>>> before, but USERNS_SETGROUPS_ALLOWED will remain cleared.  This will
>>>> result in processes in the userns that have CAP_SETGID to be
>>>> nontheless unable to use setgroups(2).  If this breaks something
>>>> inside the userns, then this is okay -- the userns creator
>>>> specifically requested this behavior.
>>>
>>> I think we need to do this but I also think setgroups allow/deny
>>> should be a separate knob than the uid/gid mapping.
>>
>> Yeah.  It should be readable, too.
>>
>>>
>>> If for no other reason than you missed at least two implementations of
>>> setgroups, in your implementation.
>>
>> I clearly didn't grep hard enough.  Grr.
>>
>>>
>>>> While it could be safe to set USERNS_SETGROUPS_ALLOWED if the user
>>>> namespace creator has no supplementary groups, doing so could be
>>>> surprising and could have unpleasant interactions with setns(2).
>>>>
>>>> Any application that uses newgidmap(1) should be unaffected by this
>>>> fix, but unprivileged users that create user namespaces to
>>>> manipulate mounts or sandbox themselves will break until they start
>>>> using "setgroups deny".
>>>>
>>>> This should fix CVE-2014-8989.
>>>>
>>>> Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>>>> Signed-off-by: Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org>
>>>> ---
>>>>
>>>> Unlike v1, this *will* break things like Sandstorm.  Fixing them will be
>>>> easy.  I agree that this will result in better long-term semantics, but
>>>> I'm not so happy about breaking working software.
>>>
>>> I know what you mean.   One of the pieces of software broken by all of
>>> this is my test to verify the remount semantics.   Which makes all of
>>> this very unfortunate.
>>>
>>>> If this is unpalatable, here's a different option: get rid of all these
>>>> permission checks and just change setgroups.  Specifically, make it so
>>>> that setgroups(2) in a userns will succeed but will silently refuse to
>>>> remove unmapped groups.
>>>
>>> Nope silently refusing to remove unmapped groups is not enough.  I can
>>> make any gid in my supplemental groups my egid, it takes a sgid helper
>>> application but I don't need any special privileges to create that.
>>> Once that group is my egid I can map it.  Which means I could drop
>>> any one group of my choosing without privielges.  Which out and out
>>> breaks negative groups :(
>>
>> Whoops, right.  And you can, indeed, have egid match one of your
>> supplementary groups.
>>
>>>
>>> I got to looking and I have a significant piece of code that all of this
>>> breaks.
>>>
>>> tools/testing/selftests/mount/unprivileged-remount-test.c
>>>
>>> So I am extra motivated to figure out at find a way to preserve most of
>>> the existing functionality.  My regression tests won't pass until I can
>>> find something pallateable.
>>>
>>> It is very annoying that every option I have considered so far breaks
>>> something useful.
>>>
>>> Having a write once setgroups disable, and the allowing unprivileged
>>> mappings after that seems the most palatable option I have seen,
>>> semantically.  Which means existing software that doesn't care about
>>> setgroups can just add the disable code and then work otherwise
>>> unmodified.
>>>
>>> The other option that I have played with is forcing a set of groups
>>> in setgroups if your user namespace was created without privilege,
>>> that winds up requiring that verify you don't have any other
>>> supplementary groups, and is generally messy whichever way I look at it.
>>
>> How bad would the automatic selection of setgroups behavior really be?
>>
>> Suppose we have /proc/self/userns_setgroups_mode that can be "allow",
>> "deny", or "auto".  It starts out as "auto" (or "deny" if it's set to
>> "deny" in the parent).  Once any of the maps have been set,
>> userns_options becomes readonly.  If you try to write to gid_map when
>> setgroups == auto, then it switches to "allow" or "deny" depending on
>> whether the writer has privilege.
>>
>> This is nasty magical behavior, but it should DTRT for existing users,
>> and everyone can be updated to set the value explicitly.
>
> Rarely is everything updated unless there is a requirement for an
> update.
>
> For my code that cares an update is necessary anyway as it contains
> a gratuitous setgroups(0, NULL).
>
> Since we have to break applications breaking them loud and clear and
> letting them set the flat to recover (if possible) seems the best we can
> do.  That at least allows someone to ask if they depend on setgroups or
> init_groups.

Fair enough.

Any thoughts on what the API should be for v3?

>
>> FWIW, it might also make sense to move all of this stuff into
>> /proc/PID/userns.  There may be races in which a setuid or otherwise
>> privileged helper pokes at more than one userns file but actually
>> modifies different namespaces each time.  I don't know whether these
>> races matter.  uid_map, gid_map, and projid_map could be symlinks.
>
> I don't see how moving these files as removing any races.

It helps if you use openat to open the userns directory and of the
/proc infrastructure is smart enough to make that work.

Admittedly, I don't actually see a dangerous race right now.

--Andy

>
> Eric
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply

* Re: [PATCH v2] userns: Disallow setgroups unless the gid_map writer is privileged
From: Eric W. Biederman @ 2014-12-02 19:45 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-man, Kees Cook, Linux API, Linux Containers, Josh Triplett,
	stable, linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Kenton Varda, LSM, Michael Kerrisk-manpages, Richard Weinberger,
	Casey Schaufler, Andrew Morton
In-Reply-To: <CALCETrXOz4C7Tu8mggBtR=k47ZmkuAhinVUxWJSFyS1Ep0HvRw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> writes:

> On Tue, Dec 2, 2014 at 4:09 AM, Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org> wrote:
>> Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org> writes:
>>
>>> Classic unix permission checks have an interesting feature.  The
>>> group permissions for a file can be set to less than the other
>>> permissions on a file.  Occasionally this is used deliberately to
>>> give a certain group of users fewer permissions than the default.
>>>
>>> User namespaces break this usage.  Groups set in rgid or egid are
>>> unaffected because an unprivileged user namespace creator can only
>>> map a single group, so setresgid inside and outside the namespace
>>> have the same effect.  However, an unprivileged user namespace
>>> creator can currently use setgroups(2) to drop all supplementary
>>> groups, so, if a supplementary group denies access to some resource,
>>> user namespaces can be used to bypass that restriction.
>>>
>>> To fix this issue, this introduces a new user namespace flag
>>> USERNS_SETGROUPS_ALLOWED.  If that flag is not set, then
>>> setgroups(2) will fail regardless of the caller's capabilities.
>>>
>>> USERNS_SETGROUPS_ALLOWED is cleared in a new user namespace.  By
>>> default, if the writer of gid_map has CAP_SETGID in the parent
>>> userns and the parent userns has USERNS_SETGROUPS_ALLOWED, then the
>>> USERNS_SETGROUPS_ALLOWED will be set in the child.  If the writer is
>>> not so privileged, then writing to gid_map will fail unless the
>>> writer adds "setgroups deny" to gid_map, in which case the check is
>>> skipped but USERNS_SETGROUPS_ALLOWED will remain cleared.
>>>
>>> The full semantics are:
>>>
>>> If "setgroups allow" is present or no explicit "setgroups" setting
>>> is written to gid_map, then writing to gid_map will fail with -EPERM
>>> unless the opener and writer have CAP_SETGID in the parent namespace
>>> and the parent namespace has USERNS_SETGROUPS_ALLOWED.
>>>
>>> If "setgroups deny" is present, then writing gid_map will work as
>>> before, but USERNS_SETGROUPS_ALLOWED will remain cleared.  This will
>>> result in processes in the userns that have CAP_SETGID to be
>>> nontheless unable to use setgroups(2).  If this breaks something
>>> inside the userns, then this is okay -- the userns creator
>>> specifically requested this behavior.
>>
>> I think we need to do this but I also think setgroups allow/deny
>> should be a separate knob than the uid/gid mapping.
>
> Yeah.  It should be readable, too.
>
>>
>> If for no other reason than you missed at least two implementations of
>> setgroups, in your implementation.
>
> I clearly didn't grep hard enough.  Grr.
>
>>
>>> While it could be safe to set USERNS_SETGROUPS_ALLOWED if the user
>>> namespace creator has no supplementary groups, doing so could be
>>> surprising and could have unpleasant interactions with setns(2).
>>>
>>> Any application that uses newgidmap(1) should be unaffected by this
>>> fix, but unprivileged users that create user namespaces to
>>> manipulate mounts or sandbox themselves will break until they start
>>> using "setgroups deny".
>>>
>>> This should fix CVE-2014-8989.
>>>
>>> Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>>> Signed-off-by: Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org>
>>> ---
>>>
>>> Unlike v1, this *will* break things like Sandstorm.  Fixing them will be
>>> easy.  I agree that this will result in better long-term semantics, but
>>> I'm not so happy about breaking working software.
>>
>> I know what you mean.   One of the pieces of software broken by all of
>> this is my test to verify the remount semantics.   Which makes all of
>> this very unfortunate.
>>
>>> If this is unpalatable, here's a different option: get rid of all these
>>> permission checks and just change setgroups.  Specifically, make it so
>>> that setgroups(2) in a userns will succeed but will silently refuse to
>>> remove unmapped groups.
>>
>> Nope silently refusing to remove unmapped groups is not enough.  I can
>> make any gid in my supplemental groups my egid, it takes a sgid helper
>> application but I don't need any special privileges to create that.
>> Once that group is my egid I can map it.  Which means I could drop
>> any one group of my choosing without privielges.  Which out and out
>> breaks negative groups :(
>
> Whoops, right.  And you can, indeed, have egid match one of your
> supplementary groups.
>
>>
>> I got to looking and I have a significant piece of code that all of this
>> breaks.
>>
>> tools/testing/selftests/mount/unprivileged-remount-test.c
>>
>> So I am extra motivated to figure out at find a way to preserve most of
>> the existing functionality.  My regression tests won't pass until I can
>> find something pallateable.
>>
>> It is very annoying that every option I have considered so far breaks
>> something useful.
>>
>> Having a write once setgroups disable, and the allowing unprivileged
>> mappings after that seems the most palatable option I have seen,
>> semantically.  Which means existing software that doesn't care about
>> setgroups can just add the disable code and then work otherwise
>> unmodified.
>>
>> The other option that I have played with is forcing a set of groups
>> in setgroups if your user namespace was created without privilege,
>> that winds up requiring that verify you don't have any other
>> supplementary groups, and is generally messy whichever way I look at it.
>
> How bad would the automatic selection of setgroups behavior really be?
>
> Suppose we have /proc/self/userns_setgroups_mode that can be "allow",
> "deny", or "auto".  It starts out as "auto" (or "deny" if it's set to
> "deny" in the parent).  Once any of the maps have been set,
> userns_options becomes readonly.  If you try to write to gid_map when
> setgroups == auto, then it switches to "allow" or "deny" depending on
> whether the writer has privilege.
>
> This is nasty magical behavior, but it should DTRT for existing users,
> and everyone can be updated to set the value explicitly.

Rarely is everything updated unless there is a requirement for an
update.

For my code that cares an update is necessary anyway as it contains
a gratuitous setgroups(0, NULL). 

Since we have to break applications breaking them loud and clear and
letting them set the flat to recover (if possible) seems the best we can
do.  That at least allows someone to ask if they depend on setgroups or
init_groups.

> FWIW, it might also make sense to move all of this stuff into
> /proc/PID/userns.  There may be races in which a setuid or otherwise
> privileged helper pokes at more than one userns file but actually
> modifies different namespaces each time.  I don't know whether these
> races matter.  uid_map, gid_map, and projid_map could be symlinks.

I don't see how moving these files as removing any races.

Eric

^ permalink raw reply

* Re: [PATCHv2 0/7] CGroup Namespaces
From: Aditya Kali @ 2014-12-02 19:14 UTC (permalink / raw)
  To: Richard Weinberger
  Cc: Linux API, Linux Containers, Serge Hallyn,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Andy Lutomirski, Eric W. Biederman, Tejun Heo,
	cgroups mailinglist, Ingo Molnar
In-Reply-To: <CAFLxGvybiem34J3zrtVhW=4itSdczassNt9RcuxnpJQeAz-JVA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

On Wed, Nov 26, 2014 at 2:58 PM, Richard Weinberger
<richard.weinberger-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>
> On Thu, Nov 6, 2014 at 6:33 PM, Aditya Kali <adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> > On Tue, Nov 4, 2014 at 5:10 AM, Vivek Goyal <vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
> >> On Fri, Oct 31, 2014 at 12:18:54PM -0700, Aditya Kali wrote:
> >> [..]
> >>>  fs/kernfs/dir.c                  | 194 ++++++++++++++++++++++++++++++++++-----
> >>>  fs/kernfs/mount.c                |  48 ++++++++++
> >>>  fs/proc/namespaces.c             |   1 +
> >>>  include/linux/cgroup.h           |  41 ++++++++-
> >>>  include/linux/cgroup_namespace.h |  36 ++++++++
> >>>  include/linux/kernfs.h           |   5 +
> >>>  include/linux/nsproxy.h          |   2 +
> >>>  include/linux/proc_ns.h          |   4 +
> >>>  include/uapi/linux/sched.h       |   3 +-
> >>>  kernel/Makefile                  |   2 +-
> >>>  kernel/cgroup.c                  | 108 +++++++++++++++++-----
> >>>  kernel/cgroup_namespace.c        | 148 +++++++++++++++++++++++++++++
> >>>  kernel/fork.c                    |   2 +-
> >>>  kernel/nsproxy.c                 |  19 +++-
> >>
> >> Hi Aditya,
> >>
> >> Can we provide a documentation file for cgroup namespace behavior. Say,
> >> Documentation/namespaces/cgroup-namespace.txt.
> >>
> > Yes, definitely. I will add it as soon as we have a consensus on the
> > overall series.
>
> Do you have a public git repository which contains your patches?
>

Hi, Sorry for late reply. I don't have these in a public git repo yet.
But I will try to post it on github or somewhere.
Also, I found a bug in this patchset that crashes the kernel in some
cases (when both unified and split hierarchies are mounted). I have a
fix and will send out the patches (with documentation) soon.

>
> --
> Thanks,
> //richard

Thanks,
-- 
Aditya

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox