* [PATCH 1/4] crypto: nintendo-aes - add a new AES driver
From: Emmanuel Gil Peyrot @ 2021-09-21 21:39 UTC (permalink / raw)
To: linux-crypto
Cc: devicetree, Herbert Xu, Emmanuel Gil Peyrot, linux-kernel,
Rob Herring, Paul Mackerras, Ash Logan, linuxppc-dev,
David S. Miller, Jonathan Neuschäfer
In-Reply-To: <20210921213930.10366-1-linkmauve@linkmauve.fr>
This engine implements AES in CBC mode, using 128-bit keys only. It is
present on both the Wii and the Wii U, and is apparently identical in
both consoles.
The hardware is capable of firing an interrupt when the operation is
done, but this driver currently uses a busy loop, I’m not too sure
whether it would be preferable to switch, nor how to achieve that.
It also supports a mode where no operation is done, and thus could be
used as a DMA copy engine, but I don’t know how to expose that to the
kernel or whether it would even be useful.
In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
speedup.
This driver was written based on reversed documentation, see:
https://wiibrew.org/wiki/Hardware/AES
Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
Tested-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> # on Wii U
---
drivers/crypto/Kconfig | 11 ++
drivers/crypto/Makefile | 1 +
drivers/crypto/nintendo-aes.c | 273 ++++++++++++++++++++++++++++++++++
3 files changed, 285 insertions(+)
create mode 100644 drivers/crypto/nintendo-aes.c
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 9a4c275a1335..adc94ad7462d 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -871,4 +871,15 @@ config CRYPTO_DEV_SA2UL
source "drivers/crypto/keembay/Kconfig"
+config CRYPTO_DEV_NINTENDO
+ tristate "Support for the Nintendo Wii U AES engine"
+ depends on WII || WIIU || COMPILE_TEST
+ select CRYPTO_AES
+ help
+ Say 'Y' here to use the Nintendo Wii or Wii U on-board AES
+ engine for the CryptoAPI AES algorithm.
+
+ To compile this driver as a module, choose M here: the module
+ will be called nintendo-aes.
+
endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index fa22cb19e242..004dae7bbf39 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_CRYPTO_DEV_MARVELL) += marvell/
obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
n2_crypto-y := n2_core.o n2_asm.o
+obj-$(CONFIG_CRYPTO_DEV_NINTENDO) += nintendo-aes.o
obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
obj-$(CONFIG_CRYPTO_DEV_OMAP) += omap-crypto.o
obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes-driver.o
diff --git a/drivers/crypto/nintendo-aes.c b/drivers/crypto/nintendo-aes.c
new file mode 100644
index 000000000000..79ae77500999
--- /dev/null
+++ b/drivers/crypto/nintendo-aes.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2021 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/crypto.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
+
+#include <linux/io.h>
+#include <linux/delay.h>
+
+/* Addresses of the registers */
+#define AES_CTRL 0
+#define AES_SRC 4
+#define AES_DEST 8
+#define AES_KEY 12
+#define AES_IV 16
+
+#define AES_CTRL_EXEC 0x80000000
+#define AES_CTRL_EXEC_RESET 0x00000000
+#define AES_CTRL_EXEC_INIT 0x80000000
+#define AES_CTRL_IRQ 0x40000000
+#define AES_CTRL_ERR 0x20000000
+#define AES_CTRL_ENA 0x10000000
+#define AES_CTRL_DEC 0x08000000
+#define AES_CTRL_IV 0x00001000
+#define AES_CTRL_BLOCK 0x00000fff
+
+#define OP_TIMEOUT 0x1000
+
+#define AES_DIR_DECRYPT 0
+#define AES_DIR_ENCRYPT 1
+
+static void __iomem *base;
+static spinlock_t lock;
+
+/* Write a 128 bit field (either a writable key or IV) */
+static inline void
+writefield(u32 reg, const void *_value)
+{
+ const u32 *value = _value;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ iowrite32be(value[i], base + reg);
+}
+
+static int
+do_crypt(const void *src, void *dst, u32 len, u32 flags)
+{
+ u32 blocks = ((len >> 4) - 1) & AES_CTRL_BLOCK;
+ u32 status;
+ u32 counter = OP_TIMEOUT;
+ u32 i;
+
+ /* Flush out all of src, we can’t know whether any of it is in cache */
+ for (i = 0; i < len; i += 32)
+ __asm__("dcbf 0, %0" : : "r" (src + i));
+ __asm__("sync" : : : "memory");
+
+ /* Set the addresses for DMA */
+ iowrite32be(virt_to_phys((void *)src), base + AES_SRC);
+ iowrite32be(virt_to_phys(dst), base + AES_DEST);
+
+ /* Start the operation */
+ iowrite32be(flags | blocks, base + AES_CTRL);
+
+ /* TODO: figure out how to use interrupts here, this will probably
+ * lower throughput but let the CPU do other things while the AES
+ * engine is doing its work. */
+ do {
+ status = ioread32be(base + AES_CTRL);
+ cpu_relax();
+ } while ((status & AES_CTRL_EXEC) && --counter);
+
+ /* Do we ever get called with dst ≠ src? If so we have to invalidate
+ * dst in addition to the earlier flush of src. */
+ if (unlikely(dst != src)) {
+ for (i = 0; i < len; i += 32)
+ __asm__("dcbi 0, %0" : : "r" (dst + i));
+ __asm__("sync" : : : "memory");
+ }
+
+ return counter ? 0 : 1;
+}
+
+static void
+nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
+ bool firstchunk)
+{
+ u32 flags = 0;
+ unsigned long iflags;
+ int ret;
+
+ flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
+
+ if (dir == AES_DIR_DECRYPT)
+ flags |= AES_CTRL_DEC;
+
+ if (!firstchunk)
+ flags |= AES_CTRL_IV;
+
+ /* Start the critical section */
+ spin_lock_irqsave(&lock, iflags);
+
+ if (firstchunk)
+ writefield(AES_IV, iv);
+
+ ret = do_crypt(src, dst, len, flags);
+ BUG_ON(ret);
+
+ spin_unlock_irqrestore(&lock, iflags);
+}
+
+static int nintendo_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int len)
+{
+ /* The hardware only supports AES-128 */
+ if (len != AES_KEYSIZE_128)
+ return -EINVAL;
+
+ writefield(AES_KEY, key);
+ return 0;
+}
+
+static int nintendo_skcipher_crypt(struct skcipher_request *req, int dir)
+{
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+ char ivbuf[AES_BLOCK_SIZE];
+ unsigned int ivsize;
+
+ bool firstchunk = true;
+
+ /* Reset the engine */
+ iowrite32be(0, base + AES_CTRL);
+
+ err = skcipher_walk_virt(&walk, req, false);
+ ivsize = min(sizeof(ivbuf), walk.ivsize);
+
+ while ((nbytes = walk.nbytes) != 0) {
+ unsigned int chunkbytes = round_down(nbytes, AES_BLOCK_SIZE);
+ unsigned int ret = nbytes % AES_BLOCK_SIZE;
+
+ if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
+ /* If this is the last chunk and we're decrypting, take
+ * note of the IV (which is the last ciphertext block)
+ */
+ memcpy(ivbuf, walk.src.virt.addr + walk.total - ivsize,
+ ivsize);
+ }
+
+ nintendo_aes_crypt(walk.src.virt.addr, walk.dst.virt.addr,
+ chunkbytes, walk.iv, dir, firstchunk);
+
+ if (walk.total == chunkbytes && dir == AES_DIR_ENCRYPT) {
+ /* If this is the last chunk and we're encrypting, take
+ * note of the IV (which is the last ciphertext block)
+ */
+ memcpy(walk.iv,
+ walk.dst.virt.addr + walk.total - ivsize,
+ ivsize);
+ } else if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
+ memcpy(walk.iv, ivbuf, ivsize);
+ }
+
+ err = skcipher_walk_done(&walk, ret);
+ firstchunk = false;
+ }
+
+ return err;
+}
+
+static int nintendo_cbc_encrypt(struct skcipher_request *req)
+{
+ return nintendo_skcipher_crypt(req, AES_DIR_ENCRYPT);
+}
+
+static int nintendo_cbc_decrypt(struct skcipher_request *req)
+{
+ return nintendo_skcipher_crypt(req, AES_DIR_DECRYPT);
+}
+
+static struct skcipher_alg nintendo_alg = {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-aes-nintendo",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_alignmask = 15,
+ .base.cra_module = THIS_MODULE,
+ .setkey = nintendo_setkey_skcipher,
+ .encrypt = nintendo_cbc_encrypt,
+ .decrypt = nintendo_cbc_decrypt,
+ .min_keysize = AES_KEYSIZE_128,
+ .max_keysize = AES_KEYSIZE_128,
+ .ivsize = AES_BLOCK_SIZE,
+};
+
+static int nintendo_aes_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+
+ crypto_unregister_skcipher(&nintendo_alg);
+ devm_iounmap(dev, base);
+ base = NULL;
+
+ return 0;
+}
+
+static int nintendo_aes_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ int ret;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ spin_lock_init(&lock);
+
+ ret = crypto_register_skcipher(&nintendo_alg);
+ if (ret)
+ goto eiomap;
+
+ dev_notice(dev, "Nintendo Wii and Wii U AES engine enabled\n");
+ return 0;
+
+ eiomap:
+ devm_iounmap(dev, base);
+
+ dev_err(dev, "Nintendo Wii and Wii U AES initialization failed\n");
+ return ret;
+}
+
+static const struct of_device_id nintendo_aes_of_match[] = {
+ { .compatible = "nintendo,hollywood-aes", },
+ { .compatible = "nintendo,latte-aes", },
+ {/* sentinel */},
+};
+MODULE_DEVICE_TABLE(of, nintendo_aes_of_match);
+
+static struct platform_driver nintendo_aes_driver = {
+ .driver = {
+ .name = "nintendo-aes",
+ .of_match_table = nintendo_aes_of_match,
+ },
+ .probe = nintendo_aes_probe,
+ .remove = nintendo_aes_remove,
+};
+
+module_platform_driver(nintendo_aes_driver);
+
+MODULE_AUTHOR("Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>");
+MODULE_DESCRIPTION("Nintendo Wii and Wii U Hardware AES driver");
+MODULE_LICENSE("GPL");
--
2.33.0
^ permalink raw reply related
* [PATCH 3/4] powerpc: wii.dts: Expose the AES engine on this platform
From: Emmanuel Gil Peyrot @ 2021-09-21 21:39 UTC (permalink / raw)
To: linuxppc-dev
Cc: devicetree, Herbert Xu, Emmanuel Gil Peyrot, linux-kernel,
linux-crypto, Rob Herring, Paul Mackerras, Ash Logan,
David S. Miller, Jonathan Neuschäfer
In-Reply-To: <20210921213930.10366-1-linkmauve@linkmauve.fr>
This can be used by the newly-added nintendo-aes crypto module.
Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
---
arch/powerpc/boot/dts/wii.dts | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index aaa381da1906..c5720fdd0686 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -113,6 +113,13 @@ exi@d006800 {
interrupts = <4>;
};
+ aes@d020000 {
+ compatible = "nintendo,hollywood-aes";
+ reg = <0x0d020000 0x14>;
+ interrupts = <2>;
+ interrupt-parent = <&PIC1>;
+ };
+
usb@d040000 {
compatible = "nintendo,hollywood-usb-ehci",
"usb-ehci";
--
2.33.0
^ permalink raw reply related
* [PATCH 2/4] dt-bindings: nintendo-aes: Document the Wii and Wii U AES support
From: Emmanuel Gil Peyrot @ 2021-09-21 21:39 UTC (permalink / raw)
To: Rob Herring, devicetree
Cc: Herbert Xu, Emmanuel Gil Peyrot, linux-kernel, linux-crypto,
Paul Mackerras, Ash Logan, linuxppc-dev, David S. Miller,
Jonathan Neuschäfer
In-Reply-To: <20210921213930.10366-1-linkmauve@linkmauve.fr>
Both of these consoles use the exact same AES engine, which only
supports CBC mode with 128-bit keys.
Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
---
.../bindings/crypto/nintendo-aes.yaml | 34 +++++++++++++++++++
1 file changed, 34 insertions(+)
create mode 100644 Documentation/devicetree/bindings/crypto/nintendo-aes.yaml
diff --git a/Documentation/devicetree/bindings/crypto/nintendo-aes.yaml b/Documentation/devicetree/bindings/crypto/nintendo-aes.yaml
new file mode 100644
index 000000000000..e62a2bfc571c
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/nintendo-aes.yaml
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/nintendo-aes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Nintendo Wii and Wii U AES engine
+
+maintainers:
+ - Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
+
+description: |+
+ The AES engine in the Nintendo Wii and Wii U supports the following:
+ -- Advanced Encryption Standard (AES) in CBC mode, with 128-bit keys
+
+properties:
+ compatible:
+ items:
+ - const: nintendo,hollywood-aes
+ - const: nintendo,latte-aes
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ description: Not supported yet.
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
--
2.33.0
^ permalink raw reply related
* [PATCH 4/4] powerpc: wii_defconfig: Enable AES by default
From: Emmanuel Gil Peyrot @ 2021-09-21 21:39 UTC (permalink / raw)
To: linuxppc-dev
Cc: devicetree, Herbert Xu, Emmanuel Gil Peyrot, linux-kernel,
linux-crypto, Rob Herring, Paul Mackerras, Ash Logan,
David S. Miller, Jonathan Neuschäfer
In-Reply-To: <20210921213930.10366-1-linkmauve@linkmauve.fr>
This selects the nintendo-aes module when building for this platform.
Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
---
arch/powerpc/configs/wii_defconfig | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 379c171f3ddd..752e081d28d0 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -123,4 +123,6 @@ CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_DMA_API_DEBUG=y
CONFIG_PPC_EARLY_DEBUG=y
-# CONFIG_CRYPTO_HW is not set
+CONFIG_CRYPTO_HW=y
+CONFIG_CRYPTO_DEV_NINTENDO=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
--
2.33.0
^ permalink raw reply related
* Re: [PATCH v3 5/8] x86/sme: Replace occurrences of sme_active() with cc_platform_has()
From: Tom Lendacky @ 2021-09-21 21:43 UTC (permalink / raw)
To: Kirill A. Shutemov, Borislav Petkov
Cc: Sathyanarayanan Kuppuswamy, linux-efi, Brijesh Singh, kvm,
Peter Zijlstra, Dave Hansen, dri-devel, platform-driver-x86,
Will Deacon, linux-s390, Andi Kleen, Joerg Roedel, x86, amd-gfx,
Christoph Hellwig, Ingo Molnar, linux-graphics-maintainer,
Tianyu Lan, Andy Lutomirski, Thomas Gleixner, kexec, linux-kernel,
iommu, linux-fsdevel, linuxppc-dev
In-Reply-To: <20210921213401.i2pzaotgjvn4efgg@box.shutemov.name>
On 9/21/21 4:34 PM, Kirill A. Shutemov wrote:
> On Tue, Sep 21, 2021 at 11:27:17PM +0200, Borislav Petkov wrote:
>> On Wed, Sep 22, 2021 at 12:20:59AM +0300, Kirill A. Shutemov wrote:
>>> I still believe calling cc_platform_has() from __startup_64() is totally
>>> broken as it lacks proper wrapping while accessing global variables.
>>
>> Well, one of the issues on the AMD side was using boot_cpu_data too
>> early and the Intel side uses it too. Can you replace those checks with
>> is_tdx_guest() or whatever was the helper's name which would check
>> whether the the kernel is running as a TDX guest, and see if that helps?
>
> There's no need in Intel check this early. Only AMD need it. Maybe just
> opencode them?
Any way you can put a gzipped/bzipped copy of your vmlinux file somewhere
I can grab it from and take a look at it?
Thanks,
Tom
>
^ permalink raw reply
* Re: [PATCH v3 5/8] x86/sme: Replace occurrences of sme_active() with cc_platform_has()
From: Kirill A. Shutemov @ 2021-09-21 21:58 UTC (permalink / raw)
To: Tom Lendacky
Cc: Sathyanarayanan Kuppuswamy, linux-efi, Brijesh Singh, kvm,
Peter Zijlstra, Dave Hansen, dri-devel, platform-driver-x86,
Will Deacon, linux-s390, Andi Kleen, Joerg Roedel, x86, amd-gfx,
Christoph Hellwig, Ingo Molnar, linux-graphics-maintainer,
Tianyu Lan, Borislav Petkov, Andy Lutomirski, Thomas Gleixner,
kexec, linux-kernel, iommu, linux-fsdevel, linuxppc-dev
In-Reply-To: <00f52bf8-cbc6-3721-f40e-2f51744751b0@amd.com>
On Tue, Sep 21, 2021 at 04:43:59PM -0500, Tom Lendacky wrote:
> On 9/21/21 4:34 PM, Kirill A. Shutemov wrote:
> > On Tue, Sep 21, 2021 at 11:27:17PM +0200, Borislav Petkov wrote:
> > > On Wed, Sep 22, 2021 at 12:20:59AM +0300, Kirill A. Shutemov wrote:
> > > > I still believe calling cc_platform_has() from __startup_64() is totally
> > > > broken as it lacks proper wrapping while accessing global variables.
> > >
> > > Well, one of the issues on the AMD side was using boot_cpu_data too
> > > early and the Intel side uses it too. Can you replace those checks with
> > > is_tdx_guest() or whatever was the helper's name which would check
> > > whether the the kernel is running as a TDX guest, and see if that helps?
> >
> > There's no need in Intel check this early. Only AMD need it. Maybe just
> > opencode them?
>
> Any way you can put a gzipped/bzipped copy of your vmlinux file somewhere I
> can grab it from and take a look at it?
You can find broken vmlinux and bzImage here:
https://drive.google.com/drive/folders/1n74vUQHOGebnF70Im32qLFY8iS3wvjIs?usp=sharing
Let me know when I can remove it.
--
Kirill A. Shutemov
^ permalink raw reply
* Re: [PATCH 0/4] crypto: nintendo-aes - add a new AES driver
From: Eric Biggers @ 2021-09-21 21:59 UTC (permalink / raw)
To: Emmanuel Gil Peyrot
Cc: devicetree, Herbert Xu, Ash Logan, linux-kernel, Rob Herring,
Paul Mackerras, linux-crypto, linuxppc-dev, David S. Miller,
Jonathan Neuschäfer
In-Reply-To: <20210921213930.10366-1-linkmauve@linkmauve.fr>
On Tue, Sep 21, 2021 at 11:39:26PM +0200, Emmanuel Gil Peyrot wrote:
> This engine implements AES in CBC mode, using 128-bit keys only. It is
> present on both the Wii and the Wii U, and is apparently identical in
> both consoles.
>
> The hardware is capable of firing an interrupt when the operation is
> done, but this driver currently uses a busy loop, I’m not too sure
> whether it would be preferable to switch, nor how to achieve that.
>
> It also supports a mode where no operation is done, and thus could be
> used as a DMA copy engine, but I don’t know how to expose that to the
> kernel or whether it would even be useful.
>
> In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> speedup.
>
> This driver was written based on reversed documentation, see:
> https://wiibrew.org/wiki/Hardware/AES
>
> Emmanuel Gil Peyrot (4):
> crypto: nintendo-aes - add a new AES driver
> dt-bindings: nintendo-aes: Document the Wii and Wii U AES support
> powerpc: wii.dts: Expose the AES engine on this platform
> powerpc: wii_defconfig: Enable AES by default
Does this pass the self-tests, including the fuzz tests which are enabled by
CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y?
- Eric
^ permalink raw reply
* [PATCH 5/7] PCI: Add pci_find_dvsec_capability to find designated VSEC
From: Ben Widawsky @ 2021-09-21 22:04 UTC (permalink / raw)
To: linux-cxl, linux-pci
Cc: Vishal Verma, Alison Schofield, Ben Widawsky, Andrew Donnellan,
Ira Weiny, Frederic Barrat, David E . Box, Jonathan Cameron,
Bjorn Helgaas, Dan Williams, linuxppc-dev
In-Reply-To: <20210921220459.2437386-1-ben.widawsky@intel.com>
Add pci_find_dvsec_capability to locate a Designated Vendor-Specific
Extended Capability with the specified DVSEC ID.
The Designated Vendor-Specific Extended Capability (DVSEC) allows one or
more vendor specific capabilities that aren't tied to the vendor ID of
the PCI component.
DVSEC is critical for both the Compute Express Link (CXL) driver as well
as the driver for OpenCAPI coherent accelerator (OCXL).
Cc: David E. Box <david.e.box@linux.intel.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-pci@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Frederic Barrat <fbarrat@linux.ibm.com>
Cc: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
---
drivers/pci/pci.c | 32 ++++++++++++++++++++++++++++++++
include/linux/pci.h | 1 +
2 files changed, 33 insertions(+)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ce2ab62b64cf..94ac86ff28b0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -732,6 +732,38 @@ u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap)
}
EXPORT_SYMBOL_GPL(pci_find_vsec_capability);
+/**
+ * pci_find_dvsec_capability - Find DVSEC for vendor
+ * @dev: PCI device to query
+ * @vendor: Vendor ID to match for the DVSEC
+ * @dvsec: Designated Vendor-specific capability ID
+ *
+ * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability
+ * offset in config space; otherwise return 0.
+ */
+u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec)
+{
+ int pos;
+
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC);
+ if (!pos)
+ return 0;
+
+ while (pos) {
+ u16 v, id;
+
+ pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, &v);
+ pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, &id);
+ if (vendor == v && dvsec == id)
+ return pos;
+
+ pos = pci_find_next_ext_capability(dev, pos, PCI_EXT_CAP_ID_DVSEC);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_find_dvsec_capability);
+
/**
* pci_find_parent_resource - return resource region of parent bus of given
* region
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd8aa6fce204..c93ccfa4571b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1130,6 +1130,7 @@ u16 pci_find_ext_capability(struct pci_dev *dev, int cap);
u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap);
struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap);
+u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec);
u64 pci_get_dsn(struct pci_dev *dev);
--
2.33.0
^ permalink raw reply related
* [PATCH 7/7] ocxl: Use pci core's DVSEC functionality
From: Ben Widawsky @ 2021-09-21 22:04 UTC (permalink / raw)
To: linux-cxl, linux-pci
Cc: Alison Schofield, Ben Widawsky, Andrew Donnellan, Ira Weiny,
Vishal Verma, Jonathan Cameron, Frederic Barrat, Dan Williams,
linuxppc-dev
In-Reply-To: <20210921220459.2437386-1-ben.widawsky@intel.com>
Reduce maintenance burden of DVSEC query implementation by using the
centralized PCI core implementation.
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Frederic Barrat <fbarrat@linux.ibm.com>
Cc: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
---
drivers/misc/ocxl/config.c | 13 +------------
1 file changed, 1 insertion(+), 12 deletions(-)
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index a68738f38252..e401a51596b9 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -33,18 +33,7 @@
static int find_dvsec(struct pci_dev *dev, int dvsec_id)
{
- int vsec = 0;
- u16 vendor, id;
-
- while ((vsec = pci_find_next_ext_capability(dev, vsec,
- OCXL_EXT_CAP_ID_DVSEC))) {
- pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
- &vendor);
- pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
- if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
- return vsec;
- }
- return 0;
+ return pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM, dvsec_id);
}
static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
--
2.33.0
^ permalink raw reply related
* Re: [PATCH 0/4] crypto: nintendo-aes - add a new AES driver
From: Emmanuel Gil Peyrot @ 2021-09-21 22:37 UTC (permalink / raw)
To: Eric Biggers
Cc: devicetree, Herbert Xu, Ash Logan, Emmanuel Gil Peyrot,
linux-kernel, Rob Herring, Paul Mackerras, linux-crypto,
linuxppc-dev, David S. Miller, Jonathan Neuschäfer
In-Reply-To: <YUpVyTN7MQbMShdf@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1920 bytes --]
On Tue, Sep 21, 2021 at 02:59:37PM -0700, Eric Biggers wrote:
> On Tue, Sep 21, 2021 at 11:39:26PM +0200, Emmanuel Gil Peyrot wrote:
> > This engine implements AES in CBC mode, using 128-bit keys only. It is
> > present on both the Wii and the Wii U, and is apparently identical in
> > both consoles.
> >
> > The hardware is capable of firing an interrupt when the operation is
> > done, but this driver currently uses a busy loop, I’m not too sure
> > whether it would be preferable to switch, nor how to achieve that.
> >
> > It also supports a mode where no operation is done, and thus could be
> > used as a DMA copy engine, but I don’t know how to expose that to the
> > kernel or whether it would even be useful.
> >
> > In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> > aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> > speedup.
> >
> > This driver was written based on reversed documentation, see:
> > https://wiibrew.org/wiki/Hardware/AES
> >
> > Emmanuel Gil Peyrot (4):
> > crypto: nintendo-aes - add a new AES driver
> > dt-bindings: nintendo-aes: Document the Wii and Wii U AES support
> > powerpc: wii.dts: Expose the AES engine on this platform
> > powerpc: wii_defconfig: Enable AES by default
>
> Does this pass the self-tests, including the fuzz tests which are enabled by
> CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y?
I wasn’t aware of those, and indeed it doesn’t pass them yet:
[ 0.680164] alg: skcipher: cbc-aes-nintendo encryption overran dst buffer on test vector 0, cfg="out-of-place"
[ 0.680201] fbcon: Taking over console
[ 0.680219] ------------[ cut here ]------------
[ 0.680222] alg: self-tests for cbc-aes-nintendo (cbc(aes)) failed (rc=-75)
I’ll try to figure out how to debug this and I’ll send a v2, thanks for
the hint!
>
> - Eric
--
Emmanuel Gil Peyrot
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* Re: [PATCH] mm: Remove HARDENED_USERCOPY_FALLBACK
From: Joel Stanley @ 2021-09-21 23:50 UTC (permalink / raw)
To: Stephen Kitt
Cc: Andrew Morton, Kees Cook, James Morris, Linux Kernel Mailing List,
Pekka Enberg, linux-mm, linux-security-module, linux-hardening,
David Rientjes, Christoph Lameter, linuxppc-dev, Joonsoo Kim,
Vlastimil Babka, Serge E . Hallyn
In-Reply-To: <20210921061149.1091163-1-steve@sk2.org>
On Tue, 21 Sept 2021 at 09:50, Stephen Kitt <steve@sk2.org> wrote:
>
> This has served its purpose and is no longer used. All usercopy
> violations appear to have been handled by now, any remaining
> instances (or new bugs) will cause copies to be rejected.
>
> This isn't a direct revert of commit 2d891fbc3bb6 ("usercopy: Allow
> strict enforcement of whitelists"); since usercopy_fallback is
> effectively 0, the fallback handling is removed too.
>
> This also removes the usercopy_fallback module parameter on
> slab_common.
>
> Link: https://github.com/KSPP/linux/issues/153
> Signed-off-by: Stephen Kitt <steve@sk2.org>
> Suggested-by: Kees Cook <keescook@chromium.org>
> ---
> arch/powerpc/configs/skiroot_defconfig | 1 -
For the defconfig change:
Reviewed-by: Joel Stanley <joel@jms.id.au>
Cheers,
Joel
> include/linux/slab.h | 2 --
> mm/slab.c | 13 -------------
> mm/slab_common.c | 8 --------
> mm/slub.c | 14 --------------
> security/Kconfig | 14 --------------
> 6 files changed, 52 deletions(-)
>
> diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
> index b806a5d3a695..c3ba614c973d 100644
> --- a/arch/powerpc/configs/skiroot_defconfig
> +++ b/arch/powerpc/configs/skiroot_defconfig
> @@ -275,7 +275,6 @@ CONFIG_NLS_UTF8=y
> CONFIG_ENCRYPTED_KEYS=y
> CONFIG_SECURITY=y
> CONFIG_HARDENED_USERCOPY=y
> -# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
> CONFIG_HARDENED_USERCOPY_PAGESPAN=y
> CONFIG_FORTIFY_SOURCE=y
> CONFIG_SECURITY_LOCKDOWN_LSM=y
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 0c97d788762c..5b21515afae0 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -142,8 +142,6 @@ struct mem_cgroup;
> void __init kmem_cache_init(void);
> bool slab_is_available(void);
>
> -extern bool usercopy_fallback;
> -
> struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
> unsigned int align, slab_flags_t flags,
> void (*ctor)(void *));
> diff --git a/mm/slab.c b/mm/slab.c
> index d0f725637663..4d826394ffcb 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -4207,19 +4207,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
> n <= cachep->useroffset - offset + cachep->usersize)
> return;
>
> - /*
> - * If the copy is still within the allocated object, produce
> - * a warning instead of rejecting the copy. This is intended
> - * to be a temporary method to find any missing usercopy
> - * whitelists.
> - */
> - if (usercopy_fallback &&
> - offset <= cachep->object_size &&
> - n <= cachep->object_size - offset) {
> - usercopy_warn("SLAB object", cachep->name, to_user, offset, n);
> - return;
> - }
> -
> usercopy_abort("SLAB object", cachep->name, to_user, offset, n);
> }
> #endif /* CONFIG_HARDENED_USERCOPY */
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index a4a571428c51..925b00c1d4e8 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -37,14 +37,6 @@ LIST_HEAD(slab_caches);
> DEFINE_MUTEX(slab_mutex);
> struct kmem_cache *kmem_cache;
>
> -#ifdef CONFIG_HARDENED_USERCOPY
> -bool usercopy_fallback __ro_after_init =
> - IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
> -module_param(usercopy_fallback, bool, 0400);
> -MODULE_PARM_DESC(usercopy_fallback,
> - "WARN instead of reject usercopy whitelist violations");
> -#endif
> -
> static LIST_HEAD(slab_caches_to_rcu_destroy);
> static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
> static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
> diff --git a/mm/slub.c b/mm/slub.c
> index 3f96e099817a..77f53e76a3c3 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -4125,7 +4125,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
> {
> struct kmem_cache *s;
> unsigned int offset;
> - size_t object_size;
> bool is_kfence = is_kfence_address(ptr);
>
> ptr = kasan_reset_tag(ptr);
> @@ -4158,19 +4157,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
> n <= s->useroffset - offset + s->usersize)
> return;
>
> - /*
> - * If the copy is still within the allocated object, produce
> - * a warning instead of rejecting the copy. This is intended
> - * to be a temporary method to find any missing usercopy
> - * whitelists.
> - */
> - object_size = slab_ksize(s);
> - if (usercopy_fallback &&
> - offset <= object_size && n <= object_size - offset) {
> - usercopy_warn("SLUB object", s->name, to_user, offset, n);
> - return;
> - }
> -
> usercopy_abort("SLUB object", s->name, to_user, offset, n);
> }
> #endif /* CONFIG_HARDENED_USERCOPY */
> diff --git a/security/Kconfig b/security/Kconfig
> index 0ced7fd33e4d..d9698900c9b7 100644
> --- a/security/Kconfig
> +++ b/security/Kconfig
> @@ -163,20 +163,6 @@ config HARDENED_USERCOPY
> or are part of the kernel text. This kills entire classes
> of heap overflow exploits and similar kernel memory exposures.
>
> -config HARDENED_USERCOPY_FALLBACK
> - bool "Allow usercopy whitelist violations to fallback to object size"
> - depends on HARDENED_USERCOPY
> - default y
> - help
> - This is a temporary option that allows missing usercopy whitelists
> - to be discovered via a WARN() to the kernel log, instead of
> - rejecting the copy, falling back to non-whitelisted hardened
> - usercopy that checks the slab allocation size instead of the
> - whitelist size. This option will be removed once it seems like
> - all missing usercopy whitelists have been identified and fixed.
> - Booting with "slab_common.usercopy_fallback=Y/N" can change
> - this setting.
> -
> config HARDENED_USERCOPY_PAGESPAN
> bool "Refuse to copy allocations that span multiple pages"
> depends on HARDENED_USERCOPY
>
> base-commit: 368094df48e680fa51cedb68537408cfa64b788e
> --
> 2.30.2
>
^ permalink raw reply
* Re: [PATCH 7/7] ocxl: Use pci core's DVSEC functionality
From: Dan Williams @ 2021-09-22 0:44 UTC (permalink / raw)
To: Ben Widawsky
Cc: Alison Schofield, Andrew Donnellan, Ira Weiny, Linux PCI,
linux-cxl, Vishal Verma, Jonathan Cameron, Frederic Barrat,
linuxppc-dev
In-Reply-To: <20210921220459.2437386-8-ben.widawsky@intel.com>
On Tue, Sep 21, 2021 at 3:05 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> Reduce maintenance burden of DVSEC query implementation by using the
> centralized PCI core implementation.
>
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: Frederic Barrat <fbarrat@linux.ibm.com>
> Cc: Andrew Donnellan <ajd@linux.ibm.com>
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> ---
> drivers/misc/ocxl/config.c | 13 +------------
> 1 file changed, 1 insertion(+), 12 deletions(-)
>
> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
> index a68738f38252..e401a51596b9 100644
> --- a/drivers/misc/ocxl/config.c
> +++ b/drivers/misc/ocxl/config.c
> @@ -33,18 +33,7 @@
>
> static int find_dvsec(struct pci_dev *dev, int dvsec_id)
> {
> - int vsec = 0;
> - u16 vendor, id;
> -
> - while ((vsec = pci_find_next_ext_capability(dev, vsec,
> - OCXL_EXT_CAP_ID_DVSEC))) {
> - pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
> - &vendor);
> - pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
> - if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
> - return vsec;
> - }
> - return 0;
> + return pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM, dvsec_id);
> }
What about:
arch/powerpc/platforms/powernv/ocxl.c::find_dvsec_from_pos()
...? With that converted the redundant definitions below:
OCXL_EXT_CAP_ID_DVSEC
OCXL_DVSEC_VENDOR_OFFSET
OCXL_DVSEC_ID_OFFSET
...can be cleaned up in favor of the core definitions.
^ permalink raw reply
* Re: [PATCH v2 01/16] ASoC: eureka-tlv320: Update to modern clocking terminology
From: Fabio Estevam @ 2021-09-22 0:49 UTC (permalink / raw)
To: Mark Brown
Cc: Linux-ALSA, Xiubo Li, Shengjiu Wang, Liam Girdwood, Nicolin Chen,
linuxppc-dev,
moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE
In-Reply-To: <20210921213542.31688-1-broonie@kernel.org>
On Tue, Sep 21, 2021 at 6:36 PM Mark Brown <broonie@kernel.org> wrote:
>
> As part of moving to remove the old style defines for the bus clocks update
> the eureka-tlv320 driver to use more modern terminology for clocking.
>
> Signed-off-by: Mark Brown <broonie@kernel.org>
For the whole series:
Reviewed-by: Fabio Estevam <festevam@gmail.com>
^ permalink raw reply
* Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver
From: Joel Stanley @ 2021-09-22 2:02 UTC (permalink / raw)
To: Emmanuel Gil Peyrot
Cc: devicetree, Herbert Xu, Linux Kernel Mailing List, Rob Herring,
Paul Mackerras, Linux Crypto Mailing List,
Jonathan Neuschäfer, linuxppc-dev, David S. Miller,
Ash Logan
In-Reply-To: <20210921213930.10366-2-linkmauve@linkmauve.fr>
On Tue, 21 Sept 2021 at 21:47, Emmanuel Gil Peyrot
<linkmauve@linkmauve.fr> wrote:
>
> This engine implements AES in CBC mode, using 128-bit keys only. It is
> present on both the Wii and the Wii U, and is apparently identical in
> both consoles.
>
> The hardware is capable of firing an interrupt when the operation is
> done, but this driver currently uses a busy loop, I’m not too sure
> whether it would be preferable to switch, nor how to achieve that.
>
> It also supports a mode where no operation is done, and thus could be
> used as a DMA copy engine, but I don’t know how to expose that to the
> kernel or whether it would even be useful.
>
> In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> speedup.
>
> This driver was written based on reversed documentation, see:
> https://wiibrew.org/wiki/Hardware/AES
>
> Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
> Tested-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> # on Wii U
> ---
> drivers/crypto/Kconfig | 11 ++
> drivers/crypto/Makefile | 1 +
> drivers/crypto/nintendo-aes.c | 273 ++++++++++++++++++++++++++++++++++
> 3 files changed, 285 insertions(+)
> create mode 100644 drivers/crypto/nintendo-aes.c
>
> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
> index 9a4c275a1335..adc94ad7462d 100644
> --- a/drivers/crypto/Kconfig
> +++ b/drivers/crypto/Kconfig
> @@ -871,4 +871,15 @@ config CRYPTO_DEV_SA2UL
>
> source "drivers/crypto/keembay/Kconfig"
>
> +config CRYPTO_DEV_NINTENDO
> + tristate "Support for the Nintendo Wii U AES engine"
> + depends on WII || WIIU || COMPILE_TEST
This current seteup will allow the driver to be compile tested for
non-powerpc, which will fail on the dcbf instructions.
Perhaps use this instead:
depends on WII || WIIU || (COMPILE_TEST && PPC)
> + select CRYPTO_AES
> + help
> + Say 'Y' here to use the Nintendo Wii or Wii U on-board AES
> + engine for the CryptoAPI AES algorithm.
> +
> + To compile this driver as a module, choose M here: the module
> + will be called nintendo-aes.
> +
> endif # CRYPTO_HW
> diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
> index fa22cb19e242..004dae7bbf39 100644
> --- a/drivers/crypto/Makefile
> +++ b/drivers/crypto/Makefile
> @@ -22,6 +22,7 @@ obj-$(CONFIG_CRYPTO_DEV_MARVELL) += marvell/
> obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
> obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
> n2_crypto-y := n2_core.o n2_asm.o
> +obj-$(CONFIG_CRYPTO_DEV_NINTENDO) += nintendo-aes.o
> obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
> obj-$(CONFIG_CRYPTO_DEV_OMAP) += omap-crypto.o
> obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes-driver.o
> diff --git a/drivers/crypto/nintendo-aes.c b/drivers/crypto/nintendo-aes.c
> new file mode 100644
> index 000000000000..79ae77500999
> --- /dev/null
> +++ b/drivers/crypto/nintendo-aes.c
> @@ -0,0 +1,273 @@
> +/*
> + * Copyright (C) 2021 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
The kernel uses the SDPX header instead of pasting the text.
> +static int
> +do_crypt(const void *src, void *dst, u32 len, u32 flags)
> +{
> + u32 blocks = ((len >> 4) - 1) & AES_CTRL_BLOCK;
> + u32 status;
> + u32 counter = OP_TIMEOUT;
> + u32 i;
> +
> + /* Flush out all of src, we can’t know whether any of it is in cache */
> + for (i = 0; i < len; i += 32)
> + __asm__("dcbf 0, %0" : : "r" (src + i));
> + __asm__("sync" : : : "memory");
This could be flush_dcache_range, from asm/cacheflush.h
> +
> + /* Set the addresses for DMA */
> + iowrite32be(virt_to_phys((void *)src), base + AES_SRC);
> + iowrite32be(virt_to_phys(dst), base + AES_DEST);
> +
> + /* Start the operation */
> + iowrite32be(flags | blocks, base + AES_CTRL);
> +
> + /* TODO: figure out how to use interrupts here, this will probably
> + * lower throughput but let the CPU do other things while the AES
> + * engine is doing its work. */
> + do {
> + status = ioread32be(base + AES_CTRL);
> + cpu_relax();
> + } while ((status & AES_CTRL_EXEC) && --counter);
You could add a msleep in here?
Consider using readl_poll_timeout().
Cheers,
Joel
> +
> + /* Do we ever get called with dst ≠ src? If so we have to invalidate
> + * dst in addition to the earlier flush of src. */
> + if (unlikely(dst != src)) {
> + for (i = 0; i < len; i += 32)
> + __asm__("dcbi 0, %0" : : "r" (dst + i));
> + __asm__("sync" : : : "memory");
> + }
> +
> + return counter ? 0 : 1;
> +}
^ permalink raw reply
* [PATCH v1] powerpc/64s: Fix unrecoverable MCE crash
From: Nicholas Piggin @ 2021-09-22 2:02 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Mahesh Salgaonkar, Ganesh Goudar, Nicholas Piggin
The machine check handler is not considered NMI on 64s. The early
handler is the true NMI handler, and then it schedules the
machine_check_exception handler to run when interrupts are enabled.
This works fine except the case of an unrecoverable MCE, where the true
NMI is taken when MSR[RI] is clear, it can not recover to schedule the
next handler, so it calls machine_check_exception directly so something
might be done about it.
Calling an async handler from NMI context can result in irq state and
other things getting corrupted. This can also trigger the BUG at
arch/powerpc/include/asm/interrupt.h:168.
Fix this by just making the 64s machine_check_exception handler an NMI
like it is on other subarchs.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 4 ----
arch/powerpc/kernel/traps.c | 23 +++++++----------------
2 files changed, 7 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 6b800d3e2681..b32ed910a8cf 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -524,11 +524,7 @@ static __always_inline long ____##func(struct pt_regs *regs)
/* Interrupt handlers */
/* kernel/traps.c */
DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
-#ifdef CONFIG_PPC_BOOK3S_64
-DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception);
-#else
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
-#endif
DECLARE_INTERRUPT_HANDLER(SMIException);
DECLARE_INTERRUPT_HANDLER(handle_hmi_exception);
DECLARE_INTERRUPT_HANDLER(unknown_exception);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index aac8c0412ff9..b21450c655d2 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -790,24 +790,19 @@ void die_mce(const char *str, struct pt_regs *regs, long err)
* do_exit() checks for in_interrupt() and panics in that case, so
* exit the irq/nmi before calling die.
*/
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
- irq_exit();
- else
- nmi_exit();
+ nmi_exit();
die(str, regs, err);
}
/*
- * BOOK3S_64 does not call this handler as a non-maskable interrupt
- * (it uses its own early real-mode handler to handle the MCE proper
- * and then raises irq_work to call this handler when interrupts are
- * enabled).
+ * BOOK3S_64 does not call this handler as a non-maskable interrupt (it uses
+ * its own early real-mode handler to handle the MCE proper and then raises
+ * irq_work to call this handler when interrupts are enabled), except in the
+ * case of unrecoverable_mce. If unrecoverable_mce was a separate NMI handler,
+ * then this could be ASYNC on 64s. However it should all work okay as an NMI
+ * handler (and it is NMI on other platforms) so just make it an NMI.
*/
-#ifdef CONFIG_PPC_BOOK3S_64
-DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception)
-#else
DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
-#endif
{
int recover = 0;
@@ -842,11 +837,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
if (regs_is_unrecoverable(regs))
die_mce("Unrecoverable Machine check", regs, SIGBUS);
-#ifdef CONFIG_PPC_BOOK3S_64
- return;
-#else
return 0;
-#endif
}
DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */
--
2.23.0
^ permalink raw reply related
* Re: [PATCH 3/3] powerpc/pseries/cpuhp: delete add/remove_by_count code
From: kernel test robot @ 2021-09-20 21:50 UTC (permalink / raw)
To: Nathan Lynch, linuxppc-dev
Cc: danielhb413, tyreld, ldufour, kbuild-all, aneesh.kumar
In-Reply-To: <20210920135504.1792219-4-nathanl@linux.ibm.com>
[-- Attachment #1: Type: text/plain, Size: 5199 bytes --]
Hi Nathan,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on powerpc/next]
[also build test ERROR on linus/master v5.15-rc2 next-20210920]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Nathan-Lynch/CPU-DLPAR-hotplug-for-v5-16/20210920-215907
base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allmodconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/72ea4c8a5398a4a72da34051a66f260ab0154f57
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Nathan-Lynch/CPU-DLPAR-hotplug-for-v5-16/20210920-215907
git checkout 72ea4c8a5398a4a72da34051a66f260ab0154f57
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross ARCH=powerpc
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
arch/powerpc/platforms/pseries/hotplug-cpu.c: In function 'dlpar_cpu':
>> arch/powerpc/platforms/pseries/hotplug-cpu.c:746:13: error: variable 'count' set but not used [-Werror=unused-but-set-variable]
746 | u32 count, drc_index;
| ^~~~~
cc1: all warnings being treated as errors
vim +/count +746 arch/powerpc/platforms/pseries/hotplug-cpu.c
ac71380071d19d Nathan Fontenot 2015-12-16 743
ac71380071d19d Nathan Fontenot 2015-12-16 744 int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
ac71380071d19d Nathan Fontenot 2015-12-16 745 {
ac71380071d19d Nathan Fontenot 2015-12-16 @746 u32 count, drc_index;
ac71380071d19d Nathan Fontenot 2015-12-16 747 int rc;
ac71380071d19d Nathan Fontenot 2015-12-16 748
ac71380071d19d Nathan Fontenot 2015-12-16 749 count = hp_elog->_drc_u.drc_count;
ac71380071d19d Nathan Fontenot 2015-12-16 750 drc_index = hp_elog->_drc_u.drc_index;
ac71380071d19d Nathan Fontenot 2015-12-16 751
ac71380071d19d Nathan Fontenot 2015-12-16 752 lock_device_hotplug();
ac71380071d19d Nathan Fontenot 2015-12-16 753
ac71380071d19d Nathan Fontenot 2015-12-16 754 switch (hp_elog->action) {
ac71380071d19d Nathan Fontenot 2015-12-16 755 case PSERIES_HP_ELOG_ACTION_REMOVE:
72ea4c8a5398a4 Nathan Lynch 2021-09-20 756 if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
ac71380071d19d Nathan Fontenot 2015-12-16 757 rc = dlpar_cpu_remove_by_index(drc_index);
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16 758 /*
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16 759 * Setting the isolation state of an UNISOLATED/CONFIGURED
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16 760 * device to UNISOLATE is a no-op, but the hypervisor can
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16 761 * use it as a hint that the CPU removal failed.
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16 762 */
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16 763 if (rc)
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16 764 dlpar_unisolate_drc(drc_index);
29c9a2699e71a7 Daniel Henrique Barboza 2021-04-16 765 }
ac71380071d19d Nathan Fontenot 2015-12-16 766 else
ac71380071d19d Nathan Fontenot 2015-12-16 767 rc = -EINVAL;
ac71380071d19d Nathan Fontenot 2015-12-16 768 break;
90edf184b9b727 Nathan Fontenot 2015-12-16 769 case PSERIES_HP_ELOG_ACTION_ADD:
72ea4c8a5398a4 Nathan Lynch 2021-09-20 770 if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
90edf184b9b727 Nathan Fontenot 2015-12-16 771 rc = dlpar_cpu_add(drc_index);
90edf184b9b727 Nathan Fontenot 2015-12-16 772 else
90edf184b9b727 Nathan Fontenot 2015-12-16 773 rc = -EINVAL;
90edf184b9b727 Nathan Fontenot 2015-12-16 774 break;
ac71380071d19d Nathan Fontenot 2015-12-16 775 default:
ac71380071d19d Nathan Fontenot 2015-12-16 776 pr_err("Invalid action (%d) specified\n", hp_elog->action);
ac71380071d19d Nathan Fontenot 2015-12-16 777 rc = -EINVAL;
ac71380071d19d Nathan Fontenot 2015-12-16 778 break;
ac71380071d19d Nathan Fontenot 2015-12-16 779 }
ac71380071d19d Nathan Fontenot 2015-12-16 780
ac71380071d19d Nathan Fontenot 2015-12-16 781 unlock_device_hotplug();
ac71380071d19d Nathan Fontenot 2015-12-16 782 return rc;
ac71380071d19d Nathan Fontenot 2015-12-16 783 }
ac71380071d19d Nathan Fontenot 2015-12-16 784
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 74041 bytes --]
^ permalink raw reply
* Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver
From: Corentin Labbe @ 2021-09-22 6:04 UTC (permalink / raw)
To: Emmanuel Gil Peyrot
Cc: devicetree, Herbert Xu, Ash Logan, linux-kernel, Rob Herring,
Paul Mackerras, linux-crypto, linuxppc-dev, David S. Miller,
Jonathan Neuschäfer
In-Reply-To: <20210921213930.10366-2-linkmauve@linkmauve.fr>
Le Tue, Sep 21, 2021 at 11:39:27PM +0200, Emmanuel Gil Peyrot a écrit :
> This engine implements AES in CBC mode, using 128-bit keys only. It is
> present on both the Wii and the Wii U, and is apparently identical in
> both consoles.
>
> The hardware is capable of firing an interrupt when the operation is
> done, but this driver currently uses a busy loop, I’m not too sure
> whether it would be preferable to switch, nor how to achieve that.
>
> It also supports a mode where no operation is done, and thus could be
> used as a DMA copy engine, but I don’t know how to expose that to the
> kernel or whether it would even be useful.
>
> In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> speedup.
>
> This driver was written based on reversed documentation, see:
> https://wiibrew.org/wiki/Hardware/AES
>
> Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
> Tested-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> # on Wii U
[...]
> +static int
> +do_crypt(const void *src, void *dst, u32 len, u32 flags)
> +{
> + u32 blocks = ((len >> 4) - 1) & AES_CTRL_BLOCK;
> + u32 status;
> + u32 counter = OP_TIMEOUT;
> + u32 i;
> +
> + /* Flush out all of src, we can’t know whether any of it is in cache */
> + for (i = 0; i < len; i += 32)
> + __asm__("dcbf 0, %0" : : "r" (src + i));
> + __asm__("sync" : : : "memory");
> +
> + /* Set the addresses for DMA */
> + iowrite32be(virt_to_phys((void *)src), base + AES_SRC);
> + iowrite32be(virt_to_phys(dst), base + AES_DEST);
Hello
Since you do DMA operation, I think you should use the DMA-API and call dma_map_xxx()
This will prevent the use of __asm__ and virt_to_phys().
Regards
^ permalink raw reply
* Re: [PATCH] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()
From: Michael Ellerman @ 2021-09-22 6:32 UTC (permalink / raw)
To: Nathan Lynch, linuxppc-dev; +Cc: srikar, npiggin
In-Reply-To: <20210921031213.2029824-1-nathanl@linux.ibm.com>
Nathan Lynch <nathanl@linux.ibm.com> writes:
> vcpu_is_preempted() can be used outside of preempt-disabled critical
> sections, yielding warnings such as:
>
> BUG: using smp_processor_id() in preemptible [00000000] code: systemd-udevd/185
> caller is rwsem_spin_on_owner+0x1cc/0x2d0
> CPU: 1 PID: 185 Comm: systemd-udevd Not tainted 5.15.0-rc2+ #33
> Call Trace:
> [c000000012907ac0] [c000000000aa30a8] dump_stack_lvl+0xac/0x108 (unreliable)
> [c000000012907b00] [c000000001371f70] check_preemption_disabled+0x150/0x160
> [c000000012907b90] [c0000000001e0e8c] rwsem_spin_on_owner+0x1cc/0x2d0
> [c000000012907be0] [c0000000001e1408] rwsem_down_write_slowpath+0x478/0x9a0
> [c000000012907ca0] [c000000000576cf4] filename_create+0x94/0x1e0
> [c000000012907d10] [c00000000057ac08] do_symlinkat+0x68/0x1a0
> [c000000012907d70] [c00000000057ae18] sys_symlink+0x58/0x70
> [c000000012907da0] [c00000000002e448] system_call_exception+0x198/0x3c0
> [c000000012907e10] [c00000000000c54c] system_call_common+0xec/0x250
>
> The result of vcpu_is_preempted() is always subject to invalidation by
> events inside and outside of Linux; it's just a best guess at a point in
> time. Use raw_smp_processor_id() to avoid such warnings.
>
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> Fixes: ca3f969dcb11 ("powerpc/paravirt: Use is_kvm_guest() in vcpu_is_preempted()")
> ---
> arch/powerpc/include/asm/paravirt.h | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
> index bcb7b5f917be..e429aca566de 100644
> --- a/arch/powerpc/include/asm/paravirt.h
> +++ b/arch/powerpc/include/asm/paravirt.h
> @@ -97,7 +97,14 @@ static inline bool vcpu_is_preempted(int cpu)
>
> #ifdef CONFIG_PPC_SPLPAR
> if (!is_kvm_guest()) {
> - int first_cpu = cpu_first_thread_sibling(smp_processor_id());
> + int first_cpu;
> +
> + /*
> + * This is only a guess at best, and this function may be
> + * called with preemption enabled. Using raw_smp_processor_id()
> + * does not damage accuracy.
> + */
> + first_cpu = cpu_first_thread_sibling(raw_smp_processor_id());
This change seems good, except I think the comment needs to be a lot
more explicit about what it's doing and why.
A casual reader is going to be confused about vcpu preemption vs
"preemption", which are basically unrelated yet use the same word.
It's not clear how raw_smp_processor_id() is related to (Linux)
preemption, unless you know that smp_processor_id() is the alternative
and it contains a preemption check.
And "this is only a guess" is not clear on what *this* is, you're
referring to the result of the whole function, but that's not obvious.
> /*
> * Preemption can only happen at core granularity. This CPU
^^^^^^^^^^
Means something different to "preemption" above.
I know you didn't write that comment, and maybe we need to rewrite some
of those existing comments to make it clear they're not talking about
Linux preemption.
cheers
^ permalink raw reply
* Re: [PATCH] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()
From: Srikar Dronamraju @ 2021-09-22 7:57 UTC (permalink / raw)
To: Nathan Lynch; +Cc: linuxppc-dev, npiggin
In-Reply-To: <20210921031213.2029824-1-nathanl@linux.ibm.com>
* Nathan Lynch <nathanl@linux.ibm.com> [2021-09-20 22:12:13]:
> vcpu_is_preempted() can be used outside of preempt-disabled critical
> sections, yielding warnings such as:
>
> BUG: using smp_processor_id() in preemptible [00000000] code: systemd-udevd/185
> caller is rwsem_spin_on_owner+0x1cc/0x2d0
> CPU: 1 PID: 185 Comm: systemd-udevd Not tainted 5.15.0-rc2+ #33
> Call Trace:
> [c000000012907ac0] [c000000000aa30a8] dump_stack_lvl+0xac/0x108 (unreliable)
> [c000000012907b00] [c000000001371f70] check_preemption_disabled+0x150/0x160
> [c000000012907b90] [c0000000001e0e8c] rwsem_spin_on_owner+0x1cc/0x2d0
> [c000000012907be0] [c0000000001e1408] rwsem_down_write_slowpath+0x478/0x9a0
> [c000000012907ca0] [c000000000576cf4] filename_create+0x94/0x1e0
> [c000000012907d10] [c00000000057ac08] do_symlinkat+0x68/0x1a0
> [c000000012907d70] [c00000000057ae18] sys_symlink+0x58/0x70
> [c000000012907da0] [c00000000002e448] system_call_exception+0x198/0x3c0
> [c000000012907e10] [c00000000000c54c] system_call_common+0xec/0x250
>
> The result of vcpu_is_preempted() is always subject to invalidation by
> events inside and outside of Linux; it's just a best guess at a point in
> time. Use raw_smp_processor_id() to avoid such warnings.
Typically smp_processor_id() and raw_smp_processor_id() except for the
CONFIG_DEBUG_PREEMPT. In the CONFIG_DEBUG_PREEMPT case, smp_processor_id()
is actually debug_smp_processor_id(), which does all the checks.
I believe these checks in debug_smp_processor_id() are only valid for x86
case (aka cases were they have __smp_processor_id() defined.)
i.e x86 has a different implementation of _smp_processor_id() for stable and
unstable
>
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> Fixes: ca3f969dcb11 ("powerpc/paravirt: Use is_kvm_guest() in vcpu_is_preempted()")
> ---
> arch/powerpc/include/asm/paravirt.h | 9 ++++++++-
> 1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
> index bcb7b5f917be..e429aca566de 100644
> --- a/arch/powerpc/include/asm/paravirt.h
> +++ b/arch/powerpc/include/asm/paravirt.h
> @@ -97,7 +97,14 @@ static inline bool vcpu_is_preempted(int cpu)
>
> #ifdef CONFIG_PPC_SPLPAR
> if (!is_kvm_guest()) {
> - int first_cpu = cpu_first_thread_sibling(smp_processor_id());
> + int first_cpu;
> +
> + /*
> + * This is only a guess at best, and this function may be
> + * called with preemption enabled. Using raw_smp_processor_id()
> + * does not damage accuracy.
> + */
> + first_cpu = cpu_first_thread_sibling(raw_smp_processor_id());
>
> /*
> * Preemption can only happen at core granularity. This CPU
> --
> 2.31.1
>
How about something like the below?
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 510519e8a1eb..8c669e8ceb73 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -256,12 +256,14 @@ static inline int get_boot_cpu_id(void)
*/
#ifndef __smp_processor_id
#define __smp_processor_id(x) raw_smp_processor_id(x)
-#endif
-
+#else
#ifdef CONFIG_DEBUG_PREEMPT
extern unsigned int debug_smp_processor_id(void);
# define smp_processor_id() debug_smp_processor_id()
-#else
+#endif
+#endif
+
+#ifndef smp_processor_id
# define smp_processor_id() __smp_processor_id()
#endif
--
Thanks and Regards
Srikar Dronamraju
^ permalink raw reply related
* Re: [PATCH 5/7] PCI: Add pci_find_dvsec_capability to find designated VSEC
From: Frederic Barrat @ 2021-09-22 9:33 UTC (permalink / raw)
To: Ben Widawsky, linux-cxl, linux-pci
Cc: Alison Schofield, Andrew Donnellan, Ira Weiny, Vishal Verma,
David E . Box, Jonathan Cameron, Bjorn Helgaas, Dan Williams,
linuxppc-dev
In-Reply-To: <20210921220459.2437386-6-ben.widawsky@intel.com>
On 22/09/2021 00:04, Ben Widawsky wrote:
> Add pci_find_dvsec_capability to locate a Designated Vendor-Specific
> Extended Capability with the specified DVSEC ID.
>
> The Designated Vendor-Specific Extended Capability (DVSEC) allows one or
> more vendor specific capabilities that aren't tied to the vendor ID of
> the PCI component.
>
> DVSEC is critical for both the Compute Express Link (CXL) driver as well
> as the driver for OpenCAPI coherent accelerator (OCXL).
>
> Cc: David E. Box <david.e.box@linux.intel.com>
> Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Cc: Bjorn Helgaas <bhelgaas@google.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: linux-pci@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: Frederic Barrat <fbarrat@linux.ibm.com>
> Cc: Andrew Donnellan <ajd@linux.ibm.com>
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> ---
LGTM
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
> drivers/pci/pci.c | 32 ++++++++++++++++++++++++++++++++
> include/linux/pci.h | 1 +
> 2 files changed, 33 insertions(+)
>
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index ce2ab62b64cf..94ac86ff28b0 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -732,6 +732,38 @@ u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap)
> }
> EXPORT_SYMBOL_GPL(pci_find_vsec_capability);
>
> +/**
> + * pci_find_dvsec_capability - Find DVSEC for vendor
> + * @dev: PCI device to query
> + * @vendor: Vendor ID to match for the DVSEC
> + * @dvsec: Designated Vendor-specific capability ID
> + *
> + * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability
> + * offset in config space; otherwise return 0.
> + */
> +u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec)
> +{
> + int pos;
> +
> + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC);
> + if (!pos)
> + return 0;
> +
> + while (pos) {
> + u16 v, id;
> +
> + pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, &v);
> + pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, &id);
> + if (vendor == v && dvsec == id)
> + return pos;
> +
> + pos = pci_find_next_ext_capability(dev, pos, PCI_EXT_CAP_ID_DVSEC);
> + }
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(pci_find_dvsec_capability);
> +
> /**
> * pci_find_parent_resource - return resource region of parent bus of given
> * region
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index cd8aa6fce204..c93ccfa4571b 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1130,6 +1130,7 @@ u16 pci_find_ext_capability(struct pci_dev *dev, int cap);
> u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap);
> struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
> u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap);
> +u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec);
>
> u64 pci_get_dsn(struct pci_dev *dev);
>
>
^ permalink raw reply
* Re: [PATCH 7/7] ocxl: Use pci core's DVSEC functionality
From: Frederic Barrat @ 2021-09-22 9:38 UTC (permalink / raw)
To: Dan Williams, Ben Widawsky
Cc: Alison Schofield, Andrew Donnellan, Linux PCI, linuxppc-dev,
linux-cxl, Vishal Verma, Jonathan Cameron, Ira Weiny
In-Reply-To: <CAPcyv4h4QHAQF+ogMvOXrkdyR5Jceo8yp7TQNN+836=v0QwdDw@mail.gmail.com>
On 22/09/2021 02:44, Dan Williams wrote:
> On Tue, Sep 21, 2021 at 3:05 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
>>
>> Reduce maintenance burden of DVSEC query implementation by using the
>> centralized PCI core implementation.
>>
>> Cc: linuxppc-dev@lists.ozlabs.org
>> Cc: Frederic Barrat <fbarrat@linux.ibm.com>
>> Cc: Andrew Donnellan <ajd@linux.ibm.com>
>> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
>> ---
>> drivers/misc/ocxl/config.c | 13 +------------
>> 1 file changed, 1 insertion(+), 12 deletions(-)
>>
>> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
>> index a68738f38252..e401a51596b9 100644
>> --- a/drivers/misc/ocxl/config.c
>> +++ b/drivers/misc/ocxl/config.c
>> @@ -33,18 +33,7 @@
>>
>> static int find_dvsec(struct pci_dev *dev, int dvsec_id)
>> {
>> - int vsec = 0;
>> - u16 vendor, id;
>> -
>> - while ((vsec = pci_find_next_ext_capability(dev, vsec,
>> - OCXL_EXT_CAP_ID_DVSEC))) {
>> - pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
>> - &vendor);
>> - pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
>> - if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
>> - return vsec;
>> - }
>> - return 0;
>> + return pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM, dvsec_id);
>> }
That looks fine, thanks for spotting it. You can add this for the next
revision:
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
>
> What about:
>
> arch/powerpc/platforms/powernv/ocxl.c::find_dvsec_from_pos()
>
> ...? With that converted the redundant definitions below:
>
> OCXL_EXT_CAP_ID_DVSEC
> OCXL_DVSEC_VENDOR_OFFSET
> OCXL_DVSEC_ID_OFFSET
>
> ...can be cleaned up in favor of the core definitions.
That would be great. Are you guys willing to do it? If not, I could have
a follow-on patch, if I don't forget :-)
Thanks,
Fred
^ permalink raw reply
* Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver
From: Ard Biesheuvel @ 2021-09-22 10:10 UTC (permalink / raw)
To: Emmanuel Gil Peyrot
Cc: open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS,
Herbert Xu, Ash Logan, Linux Kernel Mailing List, Rob Herring,
Paul Mackerras, Linux Crypto Mailing List,
open list:LINUX FOR POWERPC (32-BIT AND 64-BIT), David S. Miller,
Jonathan Neuschäfer
In-Reply-To: <20210921213930.10366-2-linkmauve@linkmauve.fr>
On Tue, 21 Sept 2021 at 23:49, Emmanuel Gil Peyrot
<linkmauve@linkmauve.fr> wrote:
>
> This engine implements AES in CBC mode, using 128-bit keys only. It is
> present on both the Wii and the Wii U, and is apparently identical in
> both consoles.
>
> The hardware is capable of firing an interrupt when the operation is
> done, but this driver currently uses a busy loop, I’m not too sure
> whether it would be preferable to switch, nor how to achieve that.
>
> It also supports a mode where no operation is done, and thus could be
> used as a DMA copy engine, but I don’t know how to expose that to the
> kernel or whether it would even be useful.
>
> In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> speedup.
>
> This driver was written based on reversed documentation, see:
> https://wiibrew.org/wiki/Hardware/AES
>
> Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
> Tested-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> # on Wii U
This is redundant - everybody should test the code they submit.
...
> + /* TODO: figure out how to use interrupts here, this will probably
> + * lower throughput but let the CPU do other things while the AES
> + * engine is doing its work. */
So is it worthwhile like this? How much faster is it to use this
accelerator rather than the CPU?
> + do {
> + status = ioread32be(base + AES_CTRL);
> + cpu_relax();
> + } while ((status & AES_CTRL_EXEC) && --counter);
> +
> + /* Do we ever get called with dst ≠ src? If so we have to invalidate
> + * dst in addition to the earlier flush of src. */
> + if (unlikely(dst != src)) {
> + for (i = 0; i < len; i += 32)
> + __asm__("dcbi 0, %0" : : "r" (dst + i));
> + __asm__("sync" : : : "memory");
> + }
> +
> + return counter ? 0 : 1;
> +}
> +
> +static void
> +nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
> + bool firstchunk)
> +{
> + u32 flags = 0;
> + unsigned long iflags;
> + int ret;
> +
> + flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
> +
> + if (dir == AES_DIR_DECRYPT)
> + flags |= AES_CTRL_DEC;
> +
> + if (!firstchunk)
> + flags |= AES_CTRL_IV;
> +
> + /* Start the critical section */
> + spin_lock_irqsave(&lock, iflags);
> +
> + if (firstchunk)
> + writefield(AES_IV, iv);
> +
> + ret = do_crypt(src, dst, len, flags);
> + BUG_ON(ret);
> +
> + spin_unlock_irqrestore(&lock, iflags);
> +}
> +
> +static int nintendo_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
> + unsigned int len)
> +{
> + /* The hardware only supports AES-128 */
> + if (len != AES_KEYSIZE_128)
> + return -EINVAL;
> +
> + writefield(AES_KEY, key);
> + return 0;
> +}
> +
> +static int nintendo_skcipher_crypt(struct skcipher_request *req, int dir)
> +{
> + struct skcipher_walk walk;
> + unsigned int nbytes;
> + int err;
> + char ivbuf[AES_BLOCK_SIZE];
> + unsigned int ivsize;
> +
> + bool firstchunk = true;
> +
> + /* Reset the engine */
> + iowrite32be(0, base + AES_CTRL);
> +
> + err = skcipher_walk_virt(&walk, req, false);
> + ivsize = min(sizeof(ivbuf), walk.ivsize);
> +
> + while ((nbytes = walk.nbytes) != 0) {
> + unsigned int chunkbytes = round_down(nbytes, AES_BLOCK_SIZE);
> + unsigned int ret = nbytes % AES_BLOCK_SIZE;
> +
> + if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> + /* If this is the last chunk and we're decrypting, take
> + * note of the IV (which is the last ciphertext block)
> + */
> + memcpy(ivbuf, walk.src.virt.addr + walk.total - ivsize,
> + ivsize);
> + }
> +
> + nintendo_aes_crypt(walk.src.virt.addr, walk.dst.virt.addr,
> + chunkbytes, walk.iv, dir, firstchunk);
> +
> + if (walk.total == chunkbytes && dir == AES_DIR_ENCRYPT) {
> + /* If this is the last chunk and we're encrypting, take
> + * note of the IV (which is the last ciphertext block)
> + */
> + memcpy(walk.iv,
> + walk.dst.virt.addr + walk.total - ivsize,
> + ivsize);
> + } else if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> + memcpy(walk.iv, ivbuf, ivsize);
> + }
> +
> + err = skcipher_walk_done(&walk, ret);
> + firstchunk = false;
> + }
> +
> + return err;
> +}
> +
> +static int nintendo_cbc_encrypt(struct skcipher_request *req)
> +{
> + return nintendo_skcipher_crypt(req, AES_DIR_ENCRYPT);
> +}
> +
> +static int nintendo_cbc_decrypt(struct skcipher_request *req)
> +{
> + return nintendo_skcipher_crypt(req, AES_DIR_DECRYPT);
> +}
> +
> +static struct skcipher_alg nintendo_alg = {
> + .base.cra_name = "cbc(aes)",
> + .base.cra_driver_name = "cbc-aes-nintendo",
> + .base.cra_priority = 400,
> + .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY,
> + .base.cra_blocksize = AES_BLOCK_SIZE,
> + .base.cra_alignmask = 15,
> + .base.cra_module = THIS_MODULE,
> + .setkey = nintendo_setkey_skcipher,
> + .encrypt = nintendo_cbc_encrypt,
> + .decrypt = nintendo_cbc_decrypt,
> + .min_keysize = AES_KEYSIZE_128,
> + .max_keysize = AES_KEYSIZE_128,
> + .ivsize = AES_BLOCK_SIZE,
> +};
> +
> +static int nintendo_aes_remove(struct platform_device *pdev)
> +{
> + struct device *dev = &pdev->dev;
> +
> + crypto_unregister_skcipher(&nintendo_alg);
> + devm_iounmap(dev, base);
> + base = NULL;
> +
> + return 0;
> +}
> +
> +static int nintendo_aes_probe(struct platform_device *pdev)
> +{
> + struct device *dev = &pdev->dev;
> + struct resource *res;
> + int ret;
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> + base = devm_ioremap_resource(dev, res);
> + if (IS_ERR(base))
> + return PTR_ERR(base);
> +
> + spin_lock_init(&lock);
> +
> + ret = crypto_register_skcipher(&nintendo_alg);
> + if (ret)
> + goto eiomap;
> +
> + dev_notice(dev, "Nintendo Wii and Wii U AES engine enabled\n");
> + return 0;
> +
> + eiomap:
> + devm_iounmap(dev, base);
> +
> + dev_err(dev, "Nintendo Wii and Wii U AES initialization failed\n");
> + return ret;
> +}
> +
> +static const struct of_device_id nintendo_aes_of_match[] = {
> + { .compatible = "nintendo,hollywood-aes", },
> + { .compatible = "nintendo,latte-aes", },
> + {/* sentinel */},
> +};
> +MODULE_DEVICE_TABLE(of, nintendo_aes_of_match);
> +
> +static struct platform_driver nintendo_aes_driver = {
> + .driver = {
> + .name = "nintendo-aes",
> + .of_match_table = nintendo_aes_of_match,
> + },
> + .probe = nintendo_aes_probe,
> + .remove = nintendo_aes_remove,
> +};
> +
> +module_platform_driver(nintendo_aes_driver);
> +
> +MODULE_AUTHOR("Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>");
> +MODULE_DESCRIPTION("Nintendo Wii and Wii U Hardware AES driver");
> +MODULE_LICENSE("GPL");
> --
> 2.33.0
>
^ permalink raw reply
* Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver
From: Emmanuel Gil Peyrot @ 2021-09-22 10:43 UTC (permalink / raw)
To: Ard Biesheuvel
Cc: open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS,
Herbert Xu, Ash Logan, Emmanuel Gil Peyrot,
Linux Kernel Mailing List, Rob Herring, Paul Mackerras,
Linux Crypto Mailing List,
open list:LINUX FOR POWERPC (32-BIT AND 64-BIT), David S. Miller,
Jonathan Neuschäfer
In-Reply-To: <CAMj1kXF6RpaAsN2zUgkO0NW7gMwwhXMHEEM-wpQXxeNJbGJ79A@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 9209 bytes --]
On Wed, Sep 22, 2021 at 12:10:41PM +0200, Ard Biesheuvel wrote:
> On Tue, 21 Sept 2021 at 23:49, Emmanuel Gil Peyrot
> <linkmauve@linkmauve.fr> wrote:
> >
> > This engine implements AES in CBC mode, using 128-bit keys only. It is
> > present on both the Wii and the Wii U, and is apparently identical in
> > both consoles.
> >
> > The hardware is capable of firing an interrupt when the operation is
> > done, but this driver currently uses a busy loop, I’m not too sure
> > whether it would be preferable to switch, nor how to achieve that.
> >
> > It also supports a mode where no operation is done, and thus could be
> > used as a DMA copy engine, but I don’t know how to expose that to the
> > kernel or whether it would even be useful.
> >
> > In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> > aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> > speedup.
> >
> > This driver was written based on reversed documentation, see:
> > https://wiibrew.org/wiki/Hardware/AES
> >
> > Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
> > Tested-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> # on Wii U
>
> This is redundant - everybody should test the code they submit.
Indeed, except for the comment, as I haven’t been able to test on the
Wii just yet and that’s kind of a call for doing exactly that. :)
>
> ...
> > + /* TODO: figure out how to use interrupts here, this will probably
> > + * lower throughput but let the CPU do other things while the AES
> > + * engine is doing its work. */
>
> So is it worthwhile like this? How much faster is it to use this
> accelerator rather than the CPU?
As I mentioned above, on my hardware it reaches 80.7 MiB/s using this
busy loop instead of 30.9 MiB/s using aes-generic, measured using
`cryptsetup benchmark --cipher=aes --key-size=128`. I expect the
difference would be even more pronounced on the Wii, with its CPU being
clocked lower.
I will give a try at using the interrupt, but I fully expect a lower
throughput alongside a lower CPU usage (for large requests).
>
> > + do {
> > + status = ioread32be(base + AES_CTRL);
> > + cpu_relax();
> > + } while ((status & AES_CTRL_EXEC) && --counter);
> > +
> > + /* Do we ever get called with dst ≠ src? If so we have to invalidate
> > + * dst in addition to the earlier flush of src. */
> > + if (unlikely(dst != src)) {
> > + for (i = 0; i < len; i += 32)
> > + __asm__("dcbi 0, %0" : : "r" (dst + i));
> > + __asm__("sync" : : : "memory");
> > + }
> > +
> > + return counter ? 0 : 1;
> > +}
> > +
> > +static void
> > +nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
> > + bool firstchunk)
> > +{
> > + u32 flags = 0;
> > + unsigned long iflags;
> > + int ret;
> > +
> > + flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
> > +
> > + if (dir == AES_DIR_DECRYPT)
> > + flags |= AES_CTRL_DEC;
> > +
> > + if (!firstchunk)
> > + flags |= AES_CTRL_IV;
> > +
> > + /* Start the critical section */
> > + spin_lock_irqsave(&lock, iflags);
> > +
> > + if (firstchunk)
> > + writefield(AES_IV, iv);
> > +
> > + ret = do_crypt(src, dst, len, flags);
> > + BUG_ON(ret);
> > +
> > + spin_unlock_irqrestore(&lock, iflags);
> > +}
> > +
> > +static int nintendo_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
> > + unsigned int len)
> > +{
> > + /* The hardware only supports AES-128 */
> > + if (len != AES_KEYSIZE_128)
> > + return -EINVAL;
> > +
> > + writefield(AES_KEY, key);
> > + return 0;
> > +}
> > +
> > +static int nintendo_skcipher_crypt(struct skcipher_request *req, int dir)
> > +{
> > + struct skcipher_walk walk;
> > + unsigned int nbytes;
> > + int err;
> > + char ivbuf[AES_BLOCK_SIZE];
> > + unsigned int ivsize;
> > +
> > + bool firstchunk = true;
> > +
> > + /* Reset the engine */
> > + iowrite32be(0, base + AES_CTRL);
> > +
> > + err = skcipher_walk_virt(&walk, req, false);
> > + ivsize = min(sizeof(ivbuf), walk.ivsize);
> > +
> > + while ((nbytes = walk.nbytes) != 0) {
> > + unsigned int chunkbytes = round_down(nbytes, AES_BLOCK_SIZE);
> > + unsigned int ret = nbytes % AES_BLOCK_SIZE;
> > +
> > + if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> > + /* If this is the last chunk and we're decrypting, take
> > + * note of the IV (which is the last ciphertext block)
> > + */
> > + memcpy(ivbuf, walk.src.virt.addr + walk.total - ivsize,
> > + ivsize);
> > + }
> > +
> > + nintendo_aes_crypt(walk.src.virt.addr, walk.dst.virt.addr,
> > + chunkbytes, walk.iv, dir, firstchunk);
> > +
> > + if (walk.total == chunkbytes && dir == AES_DIR_ENCRYPT) {
> > + /* If this is the last chunk and we're encrypting, take
> > + * note of the IV (which is the last ciphertext block)
> > + */
> > + memcpy(walk.iv,
> > + walk.dst.virt.addr + walk.total - ivsize,
> > + ivsize);
> > + } else if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> > + memcpy(walk.iv, ivbuf, ivsize);
> > + }
> > +
> > + err = skcipher_walk_done(&walk, ret);
> > + firstchunk = false;
> > + }
> > +
> > + return err;
> > +}
> > +
> > +static int nintendo_cbc_encrypt(struct skcipher_request *req)
> > +{
> > + return nintendo_skcipher_crypt(req, AES_DIR_ENCRYPT);
> > +}
> > +
> > +static int nintendo_cbc_decrypt(struct skcipher_request *req)
> > +{
> > + return nintendo_skcipher_crypt(req, AES_DIR_DECRYPT);
> > +}
> > +
> > +static struct skcipher_alg nintendo_alg = {
> > + .base.cra_name = "cbc(aes)",
> > + .base.cra_driver_name = "cbc-aes-nintendo",
> > + .base.cra_priority = 400,
> > + .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY,
> > + .base.cra_blocksize = AES_BLOCK_SIZE,
> > + .base.cra_alignmask = 15,
> > + .base.cra_module = THIS_MODULE,
> > + .setkey = nintendo_setkey_skcipher,
> > + .encrypt = nintendo_cbc_encrypt,
> > + .decrypt = nintendo_cbc_decrypt,
> > + .min_keysize = AES_KEYSIZE_128,
> > + .max_keysize = AES_KEYSIZE_128,
> > + .ivsize = AES_BLOCK_SIZE,
> > +};
> > +
> > +static int nintendo_aes_remove(struct platform_device *pdev)
> > +{
> > + struct device *dev = &pdev->dev;
> > +
> > + crypto_unregister_skcipher(&nintendo_alg);
> > + devm_iounmap(dev, base);
> > + base = NULL;
> > +
> > + return 0;
> > +}
> > +
> > +static int nintendo_aes_probe(struct platform_device *pdev)
> > +{
> > + struct device *dev = &pdev->dev;
> > + struct resource *res;
> > + int ret;
> > +
> > + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > + base = devm_ioremap_resource(dev, res);
> > + if (IS_ERR(base))
> > + return PTR_ERR(base);
> > +
> > + spin_lock_init(&lock);
> > +
> > + ret = crypto_register_skcipher(&nintendo_alg);
> > + if (ret)
> > + goto eiomap;
> > +
> > + dev_notice(dev, "Nintendo Wii and Wii U AES engine enabled\n");
> > + return 0;
> > +
> > + eiomap:
> > + devm_iounmap(dev, base);
> > +
> > + dev_err(dev, "Nintendo Wii and Wii U AES initialization failed\n");
> > + return ret;
> > +}
> > +
> > +static const struct of_device_id nintendo_aes_of_match[] = {
> > + { .compatible = "nintendo,hollywood-aes", },
> > + { .compatible = "nintendo,latte-aes", },
> > + {/* sentinel */},
> > +};
> > +MODULE_DEVICE_TABLE(of, nintendo_aes_of_match);
> > +
> > +static struct platform_driver nintendo_aes_driver = {
> > + .driver = {
> > + .name = "nintendo-aes",
> > + .of_match_table = nintendo_aes_of_match,
> > + },
> > + .probe = nintendo_aes_probe,
> > + .remove = nintendo_aes_remove,
> > +};
> > +
> > +module_platform_driver(nintendo_aes_driver);
> > +
> > +MODULE_AUTHOR("Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>");
> > +MODULE_DESCRIPTION("Nintendo Wii and Wii U Hardware AES driver");
> > +MODULE_LICENSE("GPL");
> > --
> > 2.33.0
> >
--
Emmanuel Gil Peyrot
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply
* Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver
From: Ard Biesheuvel @ 2021-09-22 10:55 UTC (permalink / raw)
To: Emmanuel Gil Peyrot
Cc: open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS,
Herbert Xu, Ash Logan, Linux Kernel Mailing List, Rob Herring,
Paul Mackerras, Linux Crypto Mailing List,
open list:LINUX FOR POWERPC (32-BIT AND 64-BIT), David S. Miller,
Jonathan Neuschäfer
In-Reply-To: <20210922104302.22pgaoy2vspranqj@luna>
On Wed, 22 Sept 2021 at 12:43, Emmanuel Gil Peyrot
<linkmauve@linkmauve.fr> wrote:
>
> On Wed, Sep 22, 2021 at 12:10:41PM +0200, Ard Biesheuvel wrote:
> > On Tue, 21 Sept 2021 at 23:49, Emmanuel Gil Peyrot
> > <linkmauve@linkmauve.fr> wrote:
> > >
> > > This engine implements AES in CBC mode, using 128-bit keys only. It is
> > > present on both the Wii and the Wii U, and is apparently identical in
> > > both consoles.
> > >
> > > The hardware is capable of firing an interrupt when the operation is
> > > done, but this driver currently uses a busy loop, I’m not too sure
> > > whether it would be preferable to switch, nor how to achieve that.
> > >
> > > It also supports a mode where no operation is done, and thus could be
> > > used as a DMA copy engine, but I don’t know how to expose that to the
> > > kernel or whether it would even be useful.
> > >
> > > In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> > > aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> > > speedup.
> > >
> > > This driver was written based on reversed documentation, see:
> > > https://wiibrew.org/wiki/Hardware/AES
> > >
> > > Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
> > > Tested-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> # on Wii U
> >
> > This is redundant - everybody should test the code they submit.
>
> Indeed, except for the comment, as I haven’t been able to test on the
> Wii just yet and that’s kind of a call for doing exactly that. :)
>
> >
> > ...
> > > + /* TODO: figure out how to use interrupts here, this will probably
> > > + * lower throughput but let the CPU do other things while the AES
> > > + * engine is doing its work. */
> >
> > So is it worthwhile like this? How much faster is it to use this
> > accelerator rather than the CPU?
>
> As I mentioned above, on my hardware it reaches 80.7 MiB/s using this
> busy loop instead of 30.9 MiB/s using aes-generic, measured using
> `cryptsetup benchmark --cipher=aes --key-size=128`. I expect the
> difference would be even more pronounced on the Wii, with its CPU being
> clocked lower.
>
Ah apologies for not spotting that. This is a nice speedup.
> I will give a try at using the interrupt, but I fully expect a lower
> throughput alongside a lower CPU usage (for large requests).
>
You should consider latency as well. Is it really necessary to disable
interrupts as well? A scheduling blackout of ~1ms (for the worst case
of 64k of input @ 80 MB/s) may be tolerable but keeping interrupts
disabled for that long is probably not a great idea. (Just make sure
you use spin_lock_bh() to prevent deadlocks that could occur if your
code is called from softirq context)
But using the interrupt is obviously preferred. What's wrong with it?
Btw the crypto API does not permit AES-128 only - you will need to add
a fallback for other key sizes as well.
> >
> > > + do {
> > > + status = ioread32be(base + AES_CTRL);
> > > + cpu_relax();
> > > + } while ((status & AES_CTRL_EXEC) && --counter);
> > > +
> > > + /* Do we ever get called with dst ≠ src? If so we have to invalidate
> > > + * dst in addition to the earlier flush of src. */
> > > + if (unlikely(dst != src)) {
> > > + for (i = 0; i < len; i += 32)
> > > + __asm__("dcbi 0, %0" : : "r" (dst + i));
> > > + __asm__("sync" : : : "memory");
> > > + }
> > > +
> > > + return counter ? 0 : 1;
> > > +}
> > > +
> > > +static void
> > > +nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
> > > + bool firstchunk)
> > > +{
> > > + u32 flags = 0;
> > > + unsigned long iflags;
> > > + int ret;
> > > +
> > > + flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
> > > +
> > > + if (dir == AES_DIR_DECRYPT)
> > > + flags |= AES_CTRL_DEC;
> > > +
> > > + if (!firstchunk)
> > > + flags |= AES_CTRL_IV;
> > > +
> > > + /* Start the critical section */
> > > + spin_lock_irqsave(&lock, iflags);
> > > +
> > > + if (firstchunk)
> > > + writefield(AES_IV, iv);
> > > +
> > > + ret = do_crypt(src, dst, len, flags);
> > > + BUG_ON(ret);
> > > +
> > > + spin_unlock_irqrestore(&lock, iflags);
> > > +}
> > > +
> > > +static int nintendo_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
> > > + unsigned int len)
> > > +{
> > > + /* The hardware only supports AES-128 */
> > > + if (len != AES_KEYSIZE_128)
> > > + return -EINVAL;
> > > +
> > > + writefield(AES_KEY, key);
> > > + return 0;
> > > +}
> > > +
> > > +static int nintendo_skcipher_crypt(struct skcipher_request *req, int dir)
> > > +{
> > > + struct skcipher_walk walk;
> > > + unsigned int nbytes;
> > > + int err;
> > > + char ivbuf[AES_BLOCK_SIZE];
> > > + unsigned int ivsize;
> > > +
> > > + bool firstchunk = true;
> > > +
> > > + /* Reset the engine */
> > > + iowrite32be(0, base + AES_CTRL);
> > > +
> > > + err = skcipher_walk_virt(&walk, req, false);
> > > + ivsize = min(sizeof(ivbuf), walk.ivsize);
> > > +
> > > + while ((nbytes = walk.nbytes) != 0) {
> > > + unsigned int chunkbytes = round_down(nbytes, AES_BLOCK_SIZE);
> > > + unsigned int ret = nbytes % AES_BLOCK_SIZE;
> > > +
> > > + if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> > > + /* If this is the last chunk and we're decrypting, take
> > > + * note of the IV (which is the last ciphertext block)
> > > + */
> > > + memcpy(ivbuf, walk.src.virt.addr + walk.total - ivsize,
> > > + ivsize);
> > > + }
> > > +
> > > + nintendo_aes_crypt(walk.src.virt.addr, walk.dst.virt.addr,
> > > + chunkbytes, walk.iv, dir, firstchunk);
> > > +
> > > + if (walk.total == chunkbytes && dir == AES_DIR_ENCRYPT) {
> > > + /* If this is the last chunk and we're encrypting, take
> > > + * note of the IV (which is the last ciphertext block)
> > > + */
> > > + memcpy(walk.iv,
> > > + walk.dst.virt.addr + walk.total - ivsize,
> > > + ivsize);
> > > + } else if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> > > + memcpy(walk.iv, ivbuf, ivsize);
> > > + }
> > > +
> > > + err = skcipher_walk_done(&walk, ret);
> > > + firstchunk = false;
> > > + }
> > > +
> > > + return err;
> > > +}
> > > +
> > > +static int nintendo_cbc_encrypt(struct skcipher_request *req)
> > > +{
> > > + return nintendo_skcipher_crypt(req, AES_DIR_ENCRYPT);
> > > +}
> > > +
> > > +static int nintendo_cbc_decrypt(struct skcipher_request *req)
> > > +{
> > > + return nintendo_skcipher_crypt(req, AES_DIR_DECRYPT);
> > > +}
> > > +
> > > +static struct skcipher_alg nintendo_alg = {
> > > + .base.cra_name = "cbc(aes)",
> > > + .base.cra_driver_name = "cbc-aes-nintendo",
> > > + .base.cra_priority = 400,
> > > + .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY,
> > > + .base.cra_blocksize = AES_BLOCK_SIZE,
> > > + .base.cra_alignmask = 15,
> > > + .base.cra_module = THIS_MODULE,
> > > + .setkey = nintendo_setkey_skcipher,
> > > + .encrypt = nintendo_cbc_encrypt,
> > > + .decrypt = nintendo_cbc_decrypt,
> > > + .min_keysize = AES_KEYSIZE_128,
> > > + .max_keysize = AES_KEYSIZE_128,
> > > + .ivsize = AES_BLOCK_SIZE,
> > > +};
> > > +
> > > +static int nintendo_aes_remove(struct platform_device *pdev)
> > > +{
> > > + struct device *dev = &pdev->dev;
> > > +
> > > + crypto_unregister_skcipher(&nintendo_alg);
> > > + devm_iounmap(dev, base);
> > > + base = NULL;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int nintendo_aes_probe(struct platform_device *pdev)
> > > +{
> > > + struct device *dev = &pdev->dev;
> > > + struct resource *res;
> > > + int ret;
> > > +
> > > + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > > + base = devm_ioremap_resource(dev, res);
> > > + if (IS_ERR(base))
> > > + return PTR_ERR(base);
> > > +
> > > + spin_lock_init(&lock);
> > > +
> > > + ret = crypto_register_skcipher(&nintendo_alg);
> > > + if (ret)
> > > + goto eiomap;
> > > +
> > > + dev_notice(dev, "Nintendo Wii and Wii U AES engine enabled\n");
> > > + return 0;
> > > +
> > > + eiomap:
> > > + devm_iounmap(dev, base);
> > > +
> > > + dev_err(dev, "Nintendo Wii and Wii U AES initialization failed\n");
> > > + return ret;
> > > +}
> > > +
> > > +static const struct of_device_id nintendo_aes_of_match[] = {
> > > + { .compatible = "nintendo,hollywood-aes", },
> > > + { .compatible = "nintendo,latte-aes", },
> > > + {/* sentinel */},
> > > +};
> > > +MODULE_DEVICE_TABLE(of, nintendo_aes_of_match);
> > > +
> > > +static struct platform_driver nintendo_aes_driver = {
> > > + .driver = {
> > > + .name = "nintendo-aes",
> > > + .of_match_table = nintendo_aes_of_match,
> > > + },
> > > + .probe = nintendo_aes_probe,
> > > + .remove = nintendo_aes_remove,
> > > +};
> > > +
> > > +module_platform_driver(nintendo_aes_driver);
> > > +
> > > +MODULE_AUTHOR("Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>");
> > > +MODULE_DESCRIPTION("Nintendo Wii and Wii U Hardware AES driver");
> > > +MODULE_LICENSE("GPL");
> > > --
> > > 2.33.0
> > >
>
> --
> Emmanuel Gil Peyrot
^ permalink raw reply
* Re: [PATCH] powerpc/code-patching: Return error on patch_branch() out-of-range failure
From: Naveen N. Rao @ 2021-09-22 12:06 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
Paul Mackerras
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <4940b03de220d1dfe2c6b47a41e60925497ce125.1630657331.git.christophe.leroy@csgroup.eu>
Christophe Leroy wrote:
> Do not silentely ignore a failure of create_branch() in
> patch_branch(). Return -ERANGE.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
> arch/powerpc/lib/code-patching.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
>
> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> index f9a3019e37b4..0bc9cc0416b8 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -202,7 +202,9 @@ int patch_branch(u32 *addr, unsigned long target, int flags)
> {
> struct ppc_inst instr;
>
> - create_branch(&instr, addr, target, flags);
> + if (create_branch(&instr, addr, target, flags))
> + return -ERANGE;
> +
> return patch_instruction(addr, instr);
> }
>
> --
> 2.25.0
>
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox