* Re: [PATCH V2 2/2] pinctrl: bcm: add driver for BCM4908 pinmux
From: Rafał Miłecki @ 2022-01-05 12:34 UTC (permalink / raw)
To: kbuild-all
In-Reply-To: <202201052056.f2jSr4cB-lkp@intel.com>
[-- Attachment #1: Type: text/plain, Size: 2627 bytes --]
Hi Andy,
On 5.01.2022 13:24, kernel test robot wrote:
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on linusw-pinctrl/devel]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url: https://github.com/0day-ci/linux/commits/Rafa-Mi-ecki/dt-bindings-pinctrl-Add-binding-for-BCM4908-pinctrl/20211222-191252
> base: https://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git devel
> config: ia64-randconfig-r036-20220105 (https://download.01.org/0day-ci/archive/20220105/202201052056.f2jSr4cB-lkp(a)intel.com/config)
> compiler: ia64-linux-gcc (GCC) 11.2.0
> reproduce (this is a W=1 build):
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # https://github.com/0day-ci/linux/commit/337a257cc34c7f7e883bf90ccade5bf4fb71684b
> git remote add linux-review https://github.com/0day-ci/linux
> git fetch --no-tags linux-review Rafa-Mi-ecki/dt-bindings-pinctrl-Add-binding-for-BCM4908-pinctrl/20211222-191252
> git checkout 337a257cc34c7f7e883bf90ccade5bf4fb71684b
> # save the config file to linux build tree
> mkdir build_dir
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=ia64 SHELL=/bin/bash
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
> ia64-linux-ld: drivers/pinctrl/bcm/pinctrl-bcm4908.o: in function `pinconf_generic_dt_node_to_map_group':
>>> pinctrl-bcm4908.c:(.text+0x262): undefined reference to `pinconf_generic_dt_node_to_map'
>>> ia64-linux-ld: drivers/pinctrl/bcm/pinctrl-bcm4908.o:(.data.rel.ro+0x78): undefined reference to `pinconf_generic_dt_free_map'
Above answers your old question (see below) ;)
On 16.12.2021 20:55, Andy Shevchenko wrote:> On Thu, Dec 16, 2021 at 1:25 AM Rafał Miłecki <zajec5@gmail.com> wrote:
>>
>> From: Rafał Miłecki <rafal@milecki.pl>
>>
>> BCM4908 has its own pins layout so it needs a custom binding and a Linux
>> driver.
>
> ...
>
>> +config PINCTRL_BCM4908
>> + bool "Broadcom BCM4908 pinmux driver"
>
> Why not tristate?
>
>> + depends on OF && (ARCH_BCM4908 || COMPILE_TEST)
>
> Is there really dependency to OF?
Yes, without OF pinconf_generic_dt_node_to_map() is not available.
^ permalink raw reply
* Re: Linux 5.4.170
From: Greg Kroah-Hartman @ 2022-01-05 12:34 UTC (permalink / raw)
To: linux-kernel, akpm, torvalds, stable; +Cc: lwn, jslaby, Greg Kroah-Hartman
In-Reply-To: <16413860522474@kroah.com>
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e7f71df9daf1..165abcb656c5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1487,6 +1487,8 @@
architectures force reset to be always executed
i8042.unlock [HW] Unlock (ignore) the keylock
i8042.kbdreset [HW] Reset device connected to KBD port
+ i8042.probe_defer
+ [HW] Allow deferred probing upon i8042 probe errors
i810= [HW,DRM]
diff --git a/Makefile b/Makefile
index 151fd2454012..7380354e4951 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 4
-SUBLEVEL = 169
+SUBLEVEL = 170
EXTRAVERSION =
NAME = Kleptomaniac Octopus
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 3526bb1488e5..b5022a7f6bae 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -613,7 +613,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc,
BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size);
if (buffer->async_transaction) {
- alloc->free_async_space += size + sizeof(struct binder_buffer);
+ alloc->free_async_space += buffer_size + sizeof(struct binder_buffer);
binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
"%d: binder_free_buf size %zd async free %zd\n",
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 0cfcfb116a03..5169a38ee47a 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -149,6 +149,7 @@ config HID_APPLEIR
config HID_ASUS
tristate "Asus"
+ depends on USB_HID
depends on LEDS_CLASS
depends on ASUS_WMI || ASUS_WMI=n
select POWER_SUPPLY
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index c9ae1895cd48..7da6ca26a5f5 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -536,6 +536,9 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo
sizeof(rdwr_arg)))
return -EFAULT;
+ if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0)
+ return -EINVAL;
+
if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS)
return -EINVAL;
diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c
index cf7cbcd0c29d..697141299069 100644
--- a/drivers/input/joystick/spaceball.c
+++ b/drivers/input/joystick/spaceball.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/input.h>
#include <linux/serio.h>
+#include <asm/unaligned.h>
#define DRIVER_DESC "SpaceTec SpaceBall 2003/3003/4000 FLX driver"
@@ -75,9 +76,15 @@ static void spaceball_process_packet(struct spaceball* spaceball)
case 'D': /* Ball data */
if (spaceball->idx != 15) return;
- for (i = 0; i < 6; i++)
+ /*
+ * Skip first three bytes; read six axes worth of data.
+ * Axis values are signed 16-bit big-endian.
+ */
+ data += 3;
+ for (i = 0; i < ARRAY_SIZE(spaceball_axes); i++) {
input_report_abs(dev, spaceball_axes[i],
- (__s16)((data[2 * i + 3] << 8) | data[2 * i + 2]));
+ (__s16)get_unaligned_be16(&data[i * 2]));
+ }
break;
case 'K': /* Button data */
diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c
index 3f06e8a495d8..ff9438841664 100644
--- a/drivers/input/mouse/appletouch.c
+++ b/drivers/input/mouse/appletouch.c
@@ -916,6 +916,8 @@ static int atp_probe(struct usb_interface *iface,
set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit);
set_bit(BTN_LEFT, input_dev->keybit);
+ INIT_WORK(&dev->work, atp_reinit);
+
error = input_register_device(dev->input);
if (error)
goto err_free_buffer;
@@ -923,8 +925,6 @@ static int atp_probe(struct usb_interface *iface,
/* save our data pointer in this interface device */
usb_set_intfdata(iface, dev);
- INIT_WORK(&dev->work, atp_reinit);
-
return 0;
err_free_buffer:
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 202e43a6ffae..0282c4c55e9d 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -995,6 +995,24 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = {
{ }
};
+static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = {
+ {
+ /* ASUS ZenBook UX425UA */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"),
+ },
+ },
+ {
+ /* ASUS ZenBook UM325UA */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"),
+ },
+ },
+ { }
+};
+
#endif /* CONFIG_X86 */
#ifdef CONFIG_PNP
@@ -1314,6 +1332,9 @@ static int __init i8042_platform_init(void)
if (dmi_check_system(i8042_dmi_kbdreset_table))
i8042_kbdreset = true;
+ if (dmi_check_system(i8042_dmi_probe_defer_table))
+ i8042_probe_defer = true;
+
/*
* A20 was already enabled during early kernel init. But some buggy
* BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 6ff6b5710dd4..bb76ff2f6b1d 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -44,6 +44,10 @@ static bool i8042_unlock;
module_param_named(unlock, i8042_unlock, bool, 0);
MODULE_PARM_DESC(unlock, "Ignore keyboard lock.");
+static bool i8042_probe_defer;
+module_param_named(probe_defer, i8042_probe_defer, bool, 0);
+MODULE_PARM_DESC(probe_defer, "Allow deferred probing.");
+
enum i8042_controller_reset_mode {
I8042_RESET_NEVER,
I8042_RESET_ALWAYS,
@@ -709,7 +713,7 @@ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version)
* LCS/Telegraphics.
*/
-static int __init i8042_check_mux(void)
+static int i8042_check_mux(void)
{
unsigned char mux_version;
@@ -738,10 +742,10 @@ static int __init i8042_check_mux(void)
/*
* The following is used to test AUX IRQ delivery.
*/
-static struct completion i8042_aux_irq_delivered __initdata;
-static bool i8042_irq_being_tested __initdata;
+static struct completion i8042_aux_irq_delivered;
+static bool i8042_irq_being_tested;
-static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id)
+static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id)
{
unsigned long flags;
unsigned char str, data;
@@ -768,7 +772,7 @@ static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id)
* verifies success by readinng CTR. Used when testing for presence of AUX
* port.
*/
-static int __init i8042_toggle_aux(bool on)
+static int i8042_toggle_aux(bool on)
{
unsigned char param;
int i;
@@ -796,7 +800,7 @@ static int __init i8042_toggle_aux(bool on)
* the presence of an AUX interface.
*/
-static int __init i8042_check_aux(void)
+static int i8042_check_aux(void)
{
int retval = -1;
bool irq_registered = false;
@@ -1003,7 +1007,7 @@ static int i8042_controller_init(void)
if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) {
pr_err("Can't read CTR while initializing i8042\n");
- return -EIO;
+ return i8042_probe_defer ? -EPROBE_DEFER : -EIO;
}
} while (n < 2 || ctr[0] != ctr[1]);
@@ -1318,7 +1322,7 @@ static void i8042_shutdown(struct platform_device *dev)
i8042_controller_reset(false);
}
-static int __init i8042_create_kbd_port(void)
+static int i8042_create_kbd_port(void)
{
struct serio *serio;
struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO];
@@ -1346,7 +1350,7 @@ static int __init i8042_create_kbd_port(void)
return 0;
}
-static int __init i8042_create_aux_port(int idx)
+static int i8042_create_aux_port(int idx)
{
struct serio *serio;
int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx;
@@ -1383,13 +1387,13 @@ static int __init i8042_create_aux_port(int idx)
return 0;
}
-static void __init i8042_free_kbd_port(void)
+static void i8042_free_kbd_port(void)
{
kfree(i8042_ports[I8042_KBD_PORT_NO].serio);
i8042_ports[I8042_KBD_PORT_NO].serio = NULL;
}
-static void __init i8042_free_aux_ports(void)
+static void i8042_free_aux_ports(void)
{
int i;
@@ -1399,7 +1403,7 @@ static void __init i8042_free_aux_ports(void)
}
}
-static void __init i8042_register_ports(void)
+static void i8042_register_ports(void)
{
int i;
@@ -1440,7 +1444,7 @@ static void i8042_free_irqs(void)
i8042_aux_irq_registered = i8042_kbd_irq_registered = false;
}
-static int __init i8042_setup_aux(void)
+static int i8042_setup_aux(void)
{
int (*aux_enable)(void);
int error;
@@ -1482,7 +1486,7 @@ static int __init i8042_setup_aux(void)
return error;
}
-static int __init i8042_setup_kbd(void)
+static int i8042_setup_kbd(void)
{
int error;
@@ -1532,7 +1536,7 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb,
return 0;
}
-static int __init i8042_probe(struct platform_device *dev)
+static int i8042_probe(struct platform_device *dev)
{
int error;
@@ -1597,6 +1601,7 @@ static struct platform_driver i8042_driver = {
.pm = &i8042_pm_ops,
#endif
},
+ .probe = i8042_probe,
.remove = i8042_remove,
.shutdown = i8042_shutdown,
};
@@ -1607,7 +1612,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = {
static int __init i8042_init(void)
{
- struct platform_device *pdev;
int err;
dbg_init();
@@ -1623,17 +1627,29 @@ static int __init i8042_init(void)
/* Set this before creating the dev to allow i8042_command to work right away */
i8042_present = true;
- pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0);
- if (IS_ERR(pdev)) {
- err = PTR_ERR(pdev);
+ err = platform_driver_register(&i8042_driver);
+ if (err)
goto err_platform_exit;
+
+ i8042_platform_device = platform_device_alloc("i8042", -1);
+ if (!i8042_platform_device) {
+ err = -ENOMEM;
+ goto err_unregister_driver;
}
+ err = platform_device_add(i8042_platform_device);
+ if (err)
+ goto err_free_device;
+
bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block);
panic_blink = i8042_panic_blink;
return 0;
+err_free_device:
+ platform_device_put(i8042_platform_device);
+err_unregister_driver:
+ platform_driver_unregister(&i8042_driver);
err_platform_exit:
i8042_platform_exit();
return err;
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index 47f6fee1f396..1812434cda84 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1791,7 +1791,7 @@ static int fman_port_probe(struct platform_device *of_dev)
fman = dev_get_drvdata(&fm_pdev->dev);
if (!fman) {
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
err = of_property_read_u32(port_node, "cell-index", &val);
@@ -1799,7 +1799,7 @@ static int fman_port_probe(struct platform_device *of_dev)
dev_err(port->dev, "%s: reading cell-index for %pOF failed\n",
__func__, port_node);
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
port_id = (u8)val;
port->dts_params.id = port_id;
@@ -1833,7 +1833,7 @@ static int fman_port_probe(struct platform_device *of_dev)
} else {
dev_err(port->dev, "%s: Illegal port type\n", __func__);
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
port->dts_params.type = port_type;
@@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev)
dev_err(port->dev, "%s: incorrect qman-channel-id\n",
__func__);
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
port->dts_params.qman_channel_id = qman_channel_id;
}
@@ -1857,7 +1857,7 @@ static int fman_port_probe(struct platform_device *of_dev)
dev_err(port->dev, "%s: of_address_to_resource() failed\n",
__func__);
err = -ENOMEM;
- goto return_err;
+ goto put_device;
}
port->dts_params.fman = fman;
@@ -1882,6 +1882,8 @@ static int fman_port_probe(struct platform_device *of_dev)
return 0;
+put_device:
+ put_device(&fm_pdev->dev);
return_err:
of_node_put(port_node);
free_port:
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 6e504854571c..94541bf889a2 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -209,7 +209,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
skb->protocol = eth_type_trans(skb, net_dev);
netif_receive_skb(skb);
net_dev->stats.rx_packets++;
- net_dev->stats.rx_bytes += len - ETH_FCS_LEN;
+ net_dev->stats.rx_bytes += len;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5f4f0f61c83c..dea884c94568 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3907,12 +3907,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
static int mlx5e_handle_feature(struct net_device *netdev,
netdev_features_t *features,
- netdev_features_t wanted_features,
netdev_features_t feature,
mlx5e_feature_handler feature_handler)
{
- netdev_features_t changes = wanted_features ^ netdev->features;
- bool enable = !!(wanted_features & feature);
+ netdev_features_t changes = *features ^ netdev->features;
+ bool enable = !!(*features & feature);
int err;
if (!(changes & feature))
@@ -3920,22 +3919,22 @@ static int mlx5e_handle_feature(struct net_device *netdev,
err = feature_handler(netdev, enable);
if (err) {
+ MLX5E_SET_FEATURE(features, feature, !enable);
netdev_err(netdev, "%s feature %pNF failed, err %d\n",
enable ? "Enable" : "Disable", &feature, err);
return err;
}
- MLX5E_SET_FEATURE(features, feature, enable);
return 0;
}
int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
{
- netdev_features_t oper_features = netdev->features;
+ netdev_features_t oper_features = features;
int err = 0;
#define MLX5E_HANDLE_FEATURE(feature, handler) \
- mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
+ mlx5e_handle_feature(netdev, &oper_features, feature, handler)
err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 56bf900eb753..dbdb6a9592f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/mlx5/eswitch.h>
+#include <linux/err.h>
#include "dr_types.h"
static int dr_domain_init_cache(struct mlx5dr_domain *dmn)
@@ -64,9 +65,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
}
dmn->uar = mlx5_get_uars_page(dmn->mdev);
- if (!dmn->uar) {
+ if (IS_ERR(dmn->uar)) {
mlx5dr_err(dmn, "Couldn't allocate UAR\n");
- ret = -ENOMEM;
+ ret = PTR_ERR(dmn->uar);
goto clean_pd;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 99ba3551458f..f9c303d76658 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1995,7 +1995,7 @@ static int ionic_lif_init(struct ionic_lif *lif)
return -EINVAL;
}
- lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL);
+ lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
if (!lif->dbid_inuse) {
dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
return -ENOMEM;
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index b744c09346a7..dda051c94fb4 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -495,11 +495,11 @@ static void read_bulk_callback(struct urb *urb)
goto goon;
rx_status = buf[count - 2];
- if (rx_status & 0x1e) {
+ if (rx_status & 0x1c) {
netif_dbg(pegasus, rx_err, net,
"RX packet error %x\n", rx_status);
net->stats.rx_errors++;
- if (rx_status & 0x06) /* long or runt */
+ if (rx_status & 0x04) /* runt */
net->stats.rx_length_errors++;
if (rx_status & 0x08)
net->stats.rx_crc_errors++;
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 23ed11f91213..6ea59426ab0b 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -533,7 +533,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
if (IS_ERR(phy->gpiod_ena)) {
nfc_err(dev, "Unable to get ENABLE GPIO\n");
- return PTR_ERR(phy->gpiod_ena);
+ r = PTR_ERR(phy->gpiod_ena);
+ goto out_free;
}
phy->se_status.is_ese_present =
@@ -544,7 +545,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
r = st21nfca_hci_platform_init(phy);
if (r < 0) {
nfc_err(&client->dev, "Unable to reboot st21nfca\n");
- return r;
+ goto out_free;
}
r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
@@ -553,15 +554,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
ST21NFCA_HCI_DRIVER_NAME, phy);
if (r < 0) {
nfc_err(&client->dev, "Unable to register IRQ handler\n");
- return r;
+ goto out_free;
}
- return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
- ST21NFCA_FRAME_HEADROOM,
- ST21NFCA_FRAME_TAILROOM,
- ST21NFCA_HCI_LLC_MAX_PAYLOAD,
- &phy->hdev,
- &phy->se_status);
+ r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
+ ST21NFCA_FRAME_HEADROOM,
+ ST21NFCA_FRAME_TAILROOM,
+ ST21NFCA_HCI_LLC_MAX_PAYLOAD,
+ &phy->hdev,
+ &phy->se_status);
+ if (r)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ kfree_skb(phy->pending_skb);
+ return r;
}
static int st21nfca_hci_i2c_remove(struct i2c_client *client)
@@ -574,6 +583,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client)
if (phy->powered)
st21nfca_hci_i2c_disable(phy);
+ if (phy->pending_skb)
+ kfree_skb(phy->pending_skb);
return 0;
}
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 7e3083deb1c5..1b86005c0f5f 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -625,7 +625,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
}
gmux_data->iostart = res->start;
- gmux_data->iolen = res->end - res->start;
+ gmux_data->iolen = resource_size(res);
if (gmux_data->iolen < GMUX_MIN_IO_LEN) {
pr_err("gmux I/O region too small (%lu < %u)\n",
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 3c9248d2435e..e15bb3dfe995 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -2757,8 +2757,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf,
char mybuf[64];
char *pbuf;
- if (nbytes > 64)
- nbytes = 64;
+ if (nbytes > 63)
+ nbytes = 63;
memset(mybuf, 0, sizeof(mybuf));
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 0ac342b1deb9..0370ff6dd2ef 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -578,9 +578,12 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
* Commands like INQUIRY may transfer less data than
* requested by the initiator via bufflen. Set residual
* count to make upper layer aware of the actual amount
- * of data returned.
+ * of data returned. There are cases when controller
+ * returns zero dataLen with non zero data - do not set
+ * residual count in that case.
*/
- scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
+ if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
+ scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
cmd->result = (DID_OK << 16);
break;
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index d6491e973fa4..0d5ae8053049 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -1,26 +1,18 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (c) 2015-2016, Linaro Limited
+ * Copyright (c) 2015-2017, 2019-2021 Linaro Limited
*/
+#include <linux/anon_inodes.h>
#include <linux/device.h>
-#include <linux/dma-buf.h>
-#include <linux/fdtable.h>
#include <linux/idr.h>
+#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/tee_drv.h>
#include "tee_private.h"
-static void tee_shm_release(struct tee_shm *shm)
+static void tee_shm_release(struct tee_device *teedev, struct tee_shm *shm)
{
- struct tee_device *teedev = shm->teedev;
-
- mutex_lock(&teedev->mutex);
- idr_remove(&teedev->idr, shm->id);
- if (shm->ctx)
- list_del(&shm->link);
- mutex_unlock(&teedev->mutex);
-
if (shm->flags & TEE_SHM_POOL) {
struct tee_shm_pool_mgr *poolm;
@@ -52,51 +44,6 @@ static void tee_shm_release(struct tee_shm *shm)
tee_device_put(teedev);
}
-static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment
- *attach, enum dma_data_direction dir)
-{
- return NULL;
-}
-
-static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach,
- struct sg_table *table,
- enum dma_data_direction dir)
-{
-}
-
-static void tee_shm_op_release(struct dma_buf *dmabuf)
-{
- struct tee_shm *shm = dmabuf->priv;
-
- tee_shm_release(shm);
-}
-
-static void *tee_shm_op_map(struct dma_buf *dmabuf, unsigned long pgnum)
-{
- return NULL;
-}
-
-static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
-{
- struct tee_shm *shm = dmabuf->priv;
- size_t size = vma->vm_end - vma->vm_start;
-
- /* Refuse sharing shared memory provided by application */
- if (shm->flags & TEE_SHM_REGISTER)
- return -EINVAL;
-
- return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT,
- size, vma->vm_page_prot);
-}
-
-static const struct dma_buf_ops tee_shm_dma_buf_ops = {
- .map_dma_buf = tee_shm_op_map_dma_buf,
- .unmap_dma_buf = tee_shm_op_unmap_dma_buf,
- .release = tee_shm_op_release,
- .map = tee_shm_op_map,
- .mmap = tee_shm_op_mmap,
-};
-
static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx,
struct tee_device *teedev,
size_t size, u32 flags)
@@ -137,6 +84,7 @@ static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx,
goto err_dev_put;
}
+ refcount_set(&shm->refcount, 1);
shm->flags = flags | TEE_SHM_POOL;
shm->teedev = teedev;
shm->ctx = ctx;
@@ -159,21 +107,6 @@ static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx,
goto err_pool_free;
}
- if (flags & TEE_SHM_DMA_BUF) {
- DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
-
- exp_info.ops = &tee_shm_dma_buf_ops;
- exp_info.size = shm->size;
- exp_info.flags = O_RDWR;
- exp_info.priv = shm;
-
- shm->dmabuf = dma_buf_export(&exp_info);
- if (IS_ERR(shm->dmabuf)) {
- ret = ERR_CAST(shm->dmabuf);
- goto err_rem;
- }
- }
-
if (ctx) {
teedev_ctx_get(ctx);
mutex_lock(&teedev->mutex);
@@ -182,10 +115,6 @@ static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx,
}
return shm;
-err_rem:
- mutex_lock(&teedev->mutex);
- idr_remove(&teedev->idr, shm->id);
- mutex_unlock(&teedev->mutex);
err_pool_free:
poolm->ops->free(poolm, shm);
err_kfree:
@@ -268,6 +197,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
goto err;
}
+ refcount_set(&shm->refcount, 1);
shm->flags = flags | TEE_SHM_REGISTER;
shm->teedev = teedev;
shm->ctx = ctx;
@@ -309,22 +239,6 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
goto err;
}
- if (flags & TEE_SHM_DMA_BUF) {
- DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
-
- exp_info.ops = &tee_shm_dma_buf_ops;
- exp_info.size = shm->size;
- exp_info.flags = O_RDWR;
- exp_info.priv = shm;
-
- shm->dmabuf = dma_buf_export(&exp_info);
- if (IS_ERR(shm->dmabuf)) {
- ret = ERR_CAST(shm->dmabuf);
- teedev->desc->ops->shm_unregister(ctx, shm);
- goto err;
- }
- }
-
mutex_lock(&teedev->mutex);
list_add_tail(&shm->link, &ctx->list_shm);
mutex_unlock(&teedev->mutex);
@@ -352,6 +266,35 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
}
EXPORT_SYMBOL_GPL(tee_shm_register);
+static int tee_shm_fop_release(struct inode *inode, struct file *filp)
+{
+ tee_shm_put(filp->private_data);
+ return 0;
+}
+
+static int tee_shm_fop_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct tee_shm *shm = filp->private_data;
+ size_t size = vma->vm_end - vma->vm_start;
+
+ /* Refuse sharing shared memory provided by application */
+ if (shm->flags & TEE_SHM_USER_MAPPED)
+ return -EINVAL;
+
+ /* check for overflowing the buffer's size */
+ if (vma->vm_pgoff + vma_pages(vma) > shm->size >> PAGE_SHIFT)
+ return -EINVAL;
+
+ return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT,
+ size, vma->vm_page_prot);
+}
+
+static const struct file_operations tee_shm_fops = {
+ .owner = THIS_MODULE,
+ .release = tee_shm_fop_release,
+ .mmap = tee_shm_fop_mmap,
+};
+
/**
* tee_shm_get_fd() - Increase reference count and return file descriptor
* @shm: Shared memory handle
@@ -364,10 +307,11 @@ int tee_shm_get_fd(struct tee_shm *shm)
if (!(shm->flags & TEE_SHM_DMA_BUF))
return -EINVAL;
- get_dma_buf(shm->dmabuf);
- fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC);
+ /* matched by tee_shm_put() in tee_shm_op_release() */
+ refcount_inc(&shm->refcount);
+ fd = anon_inode_getfd("tee_shm", &tee_shm_fops, shm, O_RDWR);
if (fd < 0)
- dma_buf_put(shm->dmabuf);
+ tee_shm_put(shm);
return fd;
}
@@ -377,17 +321,7 @@ int tee_shm_get_fd(struct tee_shm *shm)
*/
void tee_shm_free(struct tee_shm *shm)
{
- /*
- * dma_buf_put() decreases the dmabuf reference counter and will
- * call tee_shm_release() when the last reference is gone.
- *
- * In the case of driver private memory we call tee_shm_release
- * directly instead as it doesn't have a reference counter.
- */
- if (shm->flags & TEE_SHM_DMA_BUF)
- dma_buf_put(shm->dmabuf);
- else
- tee_shm_release(shm);
+ tee_shm_put(shm);
}
EXPORT_SYMBOL_GPL(tee_shm_free);
@@ -494,10 +428,15 @@ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id)
teedev = ctx->teedev;
mutex_lock(&teedev->mutex);
shm = idr_find(&teedev->idr, id);
+ /*
+ * If the tee_shm was found in the IDR it must have a refcount
+ * larger than 0 due to the guarantee in tee_shm_put() below. So
+ * it's safe to use refcount_inc().
+ */
if (!shm || shm->ctx != ctx)
shm = ERR_PTR(-EINVAL);
- else if (shm->flags & TEE_SHM_DMA_BUF)
- get_dma_buf(shm->dmabuf);
+ else
+ refcount_inc(&shm->refcount);
mutex_unlock(&teedev->mutex);
return shm;
}
@@ -509,7 +448,25 @@ EXPORT_SYMBOL_GPL(tee_shm_get_from_id);
*/
void tee_shm_put(struct tee_shm *shm)
{
- if (shm->flags & TEE_SHM_DMA_BUF)
- dma_buf_put(shm->dmabuf);
+ struct tee_device *teedev = shm->teedev;
+ bool do_release = false;
+
+ mutex_lock(&teedev->mutex);
+ if (refcount_dec_and_test(&shm->refcount)) {
+ /*
+ * refcount has reached 0, we must now remove it from the
+ * IDR before releasing the mutex. This will guarantee that
+ * the refcount_inc() in tee_shm_get_from_id() never starts
+ * from 0.
+ */
+ idr_remove(&teedev->idr, shm->id);
+ if (shm->ctx)
+ list_del(&shm->link);
+ do_release = true;
+ }
+ mutex_unlock(&teedev->mutex);
+
+ if (do_release)
+ tee_shm_release(teedev, shm);
}
EXPORT_SYMBOL_GPL(tee_shm_put);
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 94ccf43368df..3f5c21f7f990 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1791,11 +1791,15 @@ static void ffs_data_clear(struct ffs_data *ffs)
BUG_ON(ffs->gadget);
- if (ffs->epfiles)
+ if (ffs->epfiles) {
ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count);
+ ffs->epfiles = NULL;
+ }
- if (ffs->ffs_eventfd)
+ if (ffs->ffs_eventfd) {
eventfd_ctx_put(ffs->ffs_eventfd);
+ ffs->ffs_eventfd = NULL;
+ }
kfree(ffs->raw_descs_data);
kfree(ffs->raw_strings);
@@ -1808,7 +1812,6 @@ static void ffs_data_reset(struct ffs_data *ffs)
ffs_data_clear(ffs);
- ffs->epfiles = NULL;
ffs->raw_descs_data = NULL;
ffs->raw_descs = NULL;
ffs->raw_strings = NULL;
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index ded05c39e4d1..e7533787db41 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -108,7 +108,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
/* Look for vendor-specific quirks */
if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
(pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK ||
- pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100 ||
pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1400)) {
if (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK &&
pdev->revision == 0x0) {
@@ -143,6 +142,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009)
xhci->quirks |= XHCI_BROKEN_STREAMS;
+ if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
+ pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100)
+ xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+
if (pdev->vendor == PCI_VENDOR_ID_NEC)
xhci->quirks |= XHCI_NEC_HOST;
diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c
index 619c4598e64e..253c8b71d3c4 100644
--- a/drivers/usb/mtu3/mtu3_gadget.c
+++ b/drivers/usb/mtu3/mtu3_gadget.c
@@ -100,6 +100,13 @@ static int mtu3_ep_enable(struct mtu3_ep *mep)
interval = clamp_val(interval, 1, 16) - 1;
mult = usb_endpoint_maxp_mult(desc) - 1;
}
+ break;
+ case USB_SPEED_FULL:
+ if (usb_endpoint_xfer_isoc(desc))
+ interval = clamp_val(desc->bInterval, 1, 16);
+ else if (usb_endpoint_xfer_int(desc))
+ interval = clamp_val(desc->bInterval, 1, 255);
+
break;
default:
break; /*others are ignored */
@@ -245,6 +252,7 @@ struct usb_request *mtu3_alloc_request(struct usb_ep *ep, gfp_t gfp_flags)
mreq->request.dma = DMA_ADDR_INVALID;
mreq->epnum = mep->epnum;
mreq->mep = mep;
+ INIT_LIST_HEAD(&mreq->list);
trace_mtu3_alloc_request(mreq);
return &mreq->request;
diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c
index 3f414f91b589..2ea3157ddb6e 100644
--- a/drivers/usb/mtu3/mtu3_qmu.c
+++ b/drivers/usb/mtu3/mtu3_qmu.c
@@ -273,6 +273,8 @@ static int mtu3_prepare_tx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq)
gpd->dw3_info |= cpu_to_le32(GPD_EXT_FLAG_ZLP);
}
+ /* prevent reorder, make sure GPD's HWO is set last */
+ mb();
gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO);
mreq->gpd = gpd;
@@ -306,6 +308,8 @@ static int mtu3_prepare_rx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq)
gpd->next_gpd = cpu_to_le32(lower_32_bits(enq_dma));
ext_addr |= GPD_EXT_NGP(mtu, upper_32_bits(enq_dma));
gpd->dw3_info = cpu_to_le32(ext_addr);
+ /* prevent reorder, make sure GPD's HWO is set last */
+ mb();
gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO);
mreq->gpd = gpd;
@@ -445,7 +449,8 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum)
return;
}
mtu3_setbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_TXPKTRDY);
-
+ /* prevent reorder, make sure GPD's HWO is set last */
+ mb();
/* by pass the current GDP */
gpd_current->dw0_info |= cpu_to_le32(GPD_FLAGS_BPS | GPD_FLAGS_HWO);
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f491690d54c6..64b971b2542d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -351,8 +351,8 @@ phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align,
phys_addr_t start, phys_addr_t end);
phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
-static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
- phys_addr_t align)
+static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
+ phys_addr_t align)
{
return memblock_phys_alloc_range(size, align, 0,
MEMBLOCK_ALLOC_ACCESSIBLE);
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index cd15c1b7fae0..e08ace76eba6 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -178,7 +178,7 @@ void tee_device_unregister(struct tee_device *teedev);
* @offset: offset of buffer in user space
* @pages: locked pages from userspace
* @num_pages: number of locked pages
- * @dmabuf: dmabuf used to for exporting to user space
+ * @refcount: reference counter
* @flags: defined by TEE_SHM_* in tee_drv.h
* @id: unique id of a shared memory object on this device
*
@@ -195,7 +195,7 @@ struct tee_shm {
unsigned int offset;
struct page **pages;
size_t num_pages;
- struct dma_buf *dmabuf;
+ refcount_t refcount;
u32 flags;
int id;
};
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 3ab5c6bbb90b..35c108a6b872 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -103,6 +103,7 @@ extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
+typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *);
void sctp_transport_walk_start(struct rhashtable_iter *iter);
void sctp_transport_walk_stop(struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_next(struct net *net,
@@ -113,9 +114,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr, void *p);
-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
- int (*cb_done)(struct sctp_transport *, void *),
- struct net *net, int *pos, void *p);
+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
+ struct net *net, int *pos, void *p);
int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
struct sctp_info *info);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index fd7c3f76040c..cb05e503c9cd 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1345,6 +1345,7 @@ struct sctp_endpoint {
u32 secid;
u32 peer_secid;
+ struct rcu_head rcu;
};
/* Recover the outter endpoint structure. */
@@ -1360,7 +1361,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t);
void sctp_endpoint_free(struct sctp_endpoint *);
void sctp_endpoint_put(struct sctp_endpoint *);
-void sctp_endpoint_hold(struct sctp_endpoint *);
+int sctp_endpoint_hold(struct sctp_endpoint *ep);
void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *);
struct sctp_association *sctp_endpoint_lookup_assoc(
const struct sctp_endpoint *ep,
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index f6e3c8c9c744..4fa4e979e948 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -263,7 +263,7 @@ enum nfc_sdp_attr {
#define NFC_SE_ENABLED 0x1
struct sockaddr_nfc {
- sa_family_t sa_family;
+ __kernel_sa_family_t sa_family;
__u32 dev_idx;
__u32 target_idx;
__u32 nfc_protocol;
@@ -271,14 +271,14 @@ struct sockaddr_nfc {
#define NFC_LLCP_MAX_SERVICE_NAME 63
struct sockaddr_nfc_llcp {
- sa_family_t sa_family;
+ __kernel_sa_family_t sa_family;
__u32 dev_idx;
__u32 target_idx;
__u32 nfc_protocol;
__u8 dsap; /* Destination SAP, if known */
__u8 ssap; /* Source SAP to be bound to */
char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */;
- size_t service_name_len;
+ __kernel_size_t service_name_len;
};
/* NFC socket protocols */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70f92aaca411..c800220c404d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1974,6 +1974,10 @@ static int __init inet_init(void)
ip_init();
+ /* Initialise per-cpu ipv4 mibs */
+ if (init_ipv4_mibs())
+ panic("%s: Cannot init ipv4 mibs\n", __func__);
+
/* Setup TCP slab cache for open requests. */
tcp_init();
@@ -2004,12 +2008,6 @@ static int __init inet_init(void)
if (init_inet_pernet_ops())
pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
- /*
- * Initialise per-cpu ipv4 mibs
- */
-
- if (init_ipv4_mibs())
- pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
ipv4_proc_init();
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 818fc9975625..a71bfa5b0277 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1132,7 +1132,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
return -EINVAL;
}
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index a33ea45dec05..27700887c321 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb,
pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR);
if (!pnest)
return -ENOMEM;
- nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+ rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+ if (rc) {
+ nla_nest_cancel(skb, pnest);
+ return rc;
+ }
if ((0x1 << np->id) == ndp->package_whitelist)
nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index ba9f64fdfd23..7921e77fa55a 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -292,9 +292,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
return err;
}
-static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
+static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
@@ -304,6 +303,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
int err = 0;
lock_sock(sk);
+ if (ep != tsp->asoc->ep)
+ goto release;
list_for_each_entry(assoc, &ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
@@ -346,9 +347,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
return err;
}
-static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
+static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
const struct inet_diag_req_v2 *r = commp->r;
@@ -506,8 +506,8 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
- sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
- net, &pos, &commp);
+ sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump,
+ net, &pos, &commp);
cb->args[2] = pos;
done:
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 3067deb0fbec..665a22d5c725 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
}
/* Final destructor for endpoint. */
+static void sctp_endpoint_destroy_rcu(struct rcu_head *head)
+{
+ struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu);
+ struct sock *sk = ep->base.sk;
+
+ sctp_sk(sk)->ep = NULL;
+ sock_put(sk);
+
+ kfree(ep);
+ SCTP_DBG_OBJCNT_DEC(ep);
+}
+
static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
{
struct sock *sk;
@@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
if (sctp_sk(sk)->bind_hash)
sctp_put_port(sk);
- sctp_sk(sk)->ep = NULL;
- /* Give up our hold on the sock */
- sock_put(sk);
-
- kfree(ep);
- SCTP_DBG_OBJCNT_DEC(ep);
+ call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu);
}
/* Hold a reference to an endpoint. */
-void sctp_endpoint_hold(struct sctp_endpoint *ep)
+int sctp_endpoint_hold(struct sctp_endpoint *ep)
{
- refcount_inc(&ep->base.refcnt);
+ return refcount_inc_not_zero(&ep->base.refcnt);
}
/* Release a reference to an endpoint and clean up if there are
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 2146372adff4..565aa77fe5cb 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5395,11 +5395,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
}
EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
- int (*cb_done)(struct sctp_transport *, void *),
- struct net *net, int *pos, void *p) {
+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
+ struct net *net, int *pos, void *p)
+{
struct rhashtable_iter hti;
struct sctp_transport *tsp;
+ struct sctp_endpoint *ep;
int ret;
again:
@@ -5408,26 +5409,32 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
- ret = cb(tsp, p);
- if (ret)
- break;
+ ep = tsp->asoc->ep;
+ if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */
+ ret = cb(ep, tsp, p);
+ if (ret)
+ break;
+ sctp_endpoint_put(ep);
+ }
(*pos)++;
sctp_transport_put(tsp);
}
sctp_transport_walk_stop(&hti);
if (ret) {
- if (cb_done && !cb_done(tsp, p)) {
+ if (cb_done && !cb_done(ep, tsp, p)) {
(*pos)++;
+ sctp_endpoint_put(ep);
sctp_transport_put(tsp);
goto again;
}
+ sctp_endpoint_put(ep);
sctp_transport_put(tsp);
}
return ret;
}
-EXPORT_SYMBOL_GPL(sctp_for_each_transport);
+EXPORT_SYMBOL_GPL(sctp_transport_traverse_process);
/* 7.2.1 Association Status (SCTP_STATUS)
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index a4ca050815ab..dc1d3696af6b 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -252,7 +252,7 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "s390" && $bits == 64) {
if ($cc =~ /-DCC_USING_HOTPATCH/) {
- $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(bcrl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$";
+ $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(brcl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$";
$mcount_adjust = 0;
} else {
$mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$";
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8b9cbe1e8ee2..91f2ba0b225b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5734,7 +5734,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
struct common_audit_data ad;
struct lsm_network_audit net = {0,};
char *addrp;
- u8 proto;
+ u8 proto = 0;
if (sk == NULL)
return NF_ACCEPT;
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index eba0b3395851..861fc6f4ebfb 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -1029,6 +1029,8 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
return false;
if (!domain)
return true;
+ if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED]))
+ return false;
list_for_each_entry_rcu(ptr, &domain->acl_info_list, list,
srcu_read_lock_held(&tomoyo_ss)) {
u16 perm;
@@ -1074,14 +1076,12 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
if (count < tomoyo_profile(domain->ns, domain->profile)->
pref[TOMOYO_PREF_MAX_LEARNING_ENTRY])
return true;
- if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) {
- domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true;
- /* r->granted = false; */
- tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
+ WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true);
+ /* r->granted = false; */
+ tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
#ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING
- pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n",
- domain->domainname->name);
+ pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n",
+ domain->domainname->name);
#endif
- }
return false;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3a169a026635..bbf1f2d3387e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2308,7 +2308,7 @@ static int process_switch_event(struct perf_tool *tool,
if (perf_event__process_switch(tool, event, sample, machine) < 0)
return -1;
- if (scripting_ops && scripting_ops->process_switch)
+ if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample))
scripting_ops->process_switch(event, sample, machine);
if (!script->show_switch_events)
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index c66da6ffd6d8..7badaf215de2 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -156,13 +156,13 @@ struct testcase testcases_v4[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
@@ -259,13 +259,13 @@ struct testcase testcases_v6[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 17512a43885e..f1fdaa270291 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -419,6 +419,7 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
+ const char *bind_addr = NULL;
int max_len, hdrlen;
int c;
@@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv)
cfg_cpu = strtol(optarg, NULL, 0);
break;
case 'D':
- setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+ bind_addr = optarg;
break;
case 'l':
cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
@@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv)
}
}
+ if (!bind_addr)
+ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+ setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
+
if (optind != argc)
usage(argv[0]);
^ permalink raw reply related
* Re: [PATCH] ALSA: hda/realtek: Add quirk for Legion Y9000X 2020
From: Greg Kroah-Hartman @ 2022-01-05 12:33 UTC (permalink / raw)
To: Baole Fang
Cc: alsa-devel, Kailang Yang, Jeremy Szu, Takashi Iwai, linux-kernel,
Elia Devito, Takashi Iwai, Werner Sembach, Hui Wang, Sami Loone,
Cameron Berkenpas
In-Reply-To: <6bf35d26-73d4-ba14-f931-8d379c623482@163.com>
A: http://en.wikipedia.org/wiki/Top_post
Q: Were do I find info about this thing called top-posting?
A: Because it messes up the order in which people normally read text.
Q: Why is top-posting such a bad thing?
A: Top-posting.
Q: What is the most annoying thing in e-mail?
A: No.
Q: Should I include quotations after my reply?
http://daringfireball.net/2007/07/on_top
On Wed, Jan 05, 2022 at 08:29:26PM +0800, Baole Fang wrote:
> Thank you for your explanation! I shouldn't have written that line and I
> supposed it can be ignored. Is there anything else I could do?
We can not just "ignore" it, you need to fix your change up and resend
it in a proper format so that it can be applied.
As-is, it is not acceptable, sorry.
thanks,
greg k-h
^ permalink raw reply
* Linux 5.10.90
From: Greg Kroah-Hartman @ 2022-01-05 12:34 UTC (permalink / raw)
To: linux-kernel, akpm, torvalds, stable; +Cc: lwn, jslaby, Greg Kroah-Hartman
I'm announcing the release of the 5.10.90 kernel.
All users of the 5.10 kernel series must upgrade.
The updated 5.10.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.10.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/admin-guide/kernel-parameters.txt | 2
Documentation/admin-guide/sysctl/kernel.rst | 17 ++
Makefile | 2
arch/parisc/kernel/traps.c | 2
drivers/android/binder_alloc.c | 2
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 76 +++++++----
drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 7 +
drivers/gpu/drm/amd/include/discovery.h | 49 +++++++
drivers/i2c/i2c-dev.c | 3
drivers/input/joystick/spaceball.c | 11 +
drivers/input/mouse/appletouch.c | 4
drivers/input/serio/i8042-x86ia64io.h | 21 +++
drivers/input/serio/i8042.c | 54 +++++--
drivers/net/ethernet/atheros/ag71xx.c | 23 +--
drivers/net/ethernet/freescale/fman/fman_port.c | 12 +
drivers/net/ethernet/intel/igc/igc_main.c | 6
drivers/net/ethernet/lantiq_xrx200.c | 2
drivers/net/ethernet/mellanox/mlx5/core/en.h | 3
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 10 +
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 41 +++--
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5
drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2
drivers/net/usb/pegasus.c | 4
drivers/nfc/st21nfca/i2c.c | 29 ++--
drivers/platform/x86/apple-gmux.c | 2
drivers/scsi/lpfc/lpfc_debugfs.c | 4
drivers/scsi/vmw_pvscsi.c | 7 -
drivers/usb/gadget/function/f_fs.c | 9 -
drivers/usb/host/xhci-pci.c | 5
drivers/usb/mtu3/mtu3_gadget.c | 8 +
drivers/usb/mtu3/mtu3_qmu.c | 7 -
include/linux/memblock.h | 4
include/net/sctp/sctp.h | 6
include/net/sctp/structs.h | 3
include/uapi/linux/nfc.h | 6
init/Kconfig | 10 +
kernel/bpf/syscall.c | 3
kernel/sysctl.c | 29 +++-
net/ipv4/af_inet.c | 10 -
net/ipv6/udp.c | 2
net/ncsi/ncsi-netlink.c | 6
net/sctp/diag.c | 12 -
net/sctp/endpointola.c | 23 ++-
net/sctp/socket.c | 23 ++-
net/smc/smc.h | 5
net/smc/smc_cdc.c | 59 ++++----
net/smc/smc_cdc.h | 2
net/smc/smc_core.c | 47 ++++--
net/smc/smc_core.h | 6
net/smc/smc_ib.c | 4
net/smc/smc_ib.h | 1
net/smc/smc_llc.c | 65 +++++++--
net/smc/smc_tx.c | 22 ---
net/smc/smc_wr.c | 51 +------
net/smc/smc_wr.h | 17 ++
scripts/recordmcount.pl | 2
security/selinux/hooks.c | 2
security/tomoyo/util.c | 31 ++--
tools/perf/builtin-script.c | 2
tools/testing/selftests/net/udpgso.c | 12 -
tools/testing/selftests/net/udpgso_bench_tx.c | 8 +
61 files changed, 594 insertions(+), 308 deletions(-)
Adrian Hunter (1):
perf script: Fix CPU filtering of a script's switch events
Aleksander Jan Bajkowski (1):
net: lantiq_xrx200: fix statistics of received bytes
Alex Deucher (1):
drm/amdgpu: add support for IP discovery gc_info table v2
Alexey Makhalov (1):
scsi: vmw_pvscsi: Set residual data length conditionally
Amir Tzin (1):
net/mlx5e: Wrap the tx reporter dump callback to extract the sq
Christophe JAILLET (2):
net: ag71xx: Fix a potential double free in error handling paths
ionic: Initialize the 'lif->dbid_inuse' bitmap
Chunfeng Yun (3):
usb: mtu3: add memory barrier before set GPD's HWO
usb: mtu3: fix list_head check warning
usb: mtu3: set interval of FS intr and isoc endpoint
Coco Li (2):
udp: using datalen to cap ipv6 udp max gso segments
selftests: Calculate udpgso segment count without header adjustment
Dan Carpenter (1):
scsi: lpfc: Terminate string in lpfc_debugfs_nvmeio_trc_write()
Daniel Borkmann (1):
bpf: Add kconfig knob for disabling unpriv bpf by default
Dmitry V. Levin (1):
uapi: fix linux/nfc.h userspace compilation errors
Dmitry Vyukov (1):
tomoyo: Check exceeded quota early in tomoyo_domain_quota_is_ok().
Dust Li (2):
net/smc: don't send CDC/LLC message if link not ready
net/smc: fix kernel panic caused by race of smc_sock
Gal Pressman (1):
net/mlx5e: Fix wrong features assignment in case of error
Greg Kroah-Hartman (1):
Linux 5.10.90
Heiko Carstens (1):
recordmcount.pl: fix typo in s390 mcount regex
Helge Deller (1):
parisc: Clear stale IIR value on instruction access rights trap
Jackie Liu (1):
memblock: fix memblock_phys_alloc() section mismatch error
James McLaughlin (1):
igc: Fix TX timestamp support for non-MSI-X platforms
Jiasheng Jiang (1):
net/ncsi: check for error return from call to nla_put_u32
Karsten Graul (2):
net/smc: fix using of uninitialized completions
net/smc: improved fix wait on already cleared link
Krzysztof Kozlowski (1):
nfc: uapi: use kernel size_t to fix user-space builds
Leo L. Schwab (1):
Input: spaceball - fix parsing of movement data packets
Mathias Nyman (1):
xhci: Fresco FL1100 controller should not have BROKEN_MSI quirk set.
Matthias-Christian Ott (1):
net: usb: pegasus: Do not drop long Ethernet frames
Maxim Mikityanskiy (1):
net/mlx5e: Fix ICOSQ recovery flow for XSK
Miaoqian Lin (2):
net/mlx5: DR, Fix NULL vs IS_ERR checking in dr_domain_init_resources
fsl/fman: Fix missing put_device() call in fman_port_probe
Muchun Song (1):
net: fix use-after-free in tw_timer_handler
Pavel Skripkin (2):
i2c: validate user data in compat ioctl
Input: appletouch - initialize work before device registration
Samuel Čavoj (1):
Input: i8042 - enable deferred probe quirk for ASUS UM325UA
Takashi Iwai (1):
Input: i8042 - add deferred probe support
Tetsuo Handa (1):
tomoyo: use hwight16() in tomoyo_domain_quota_is_ok()
Todd Kjos (1):
binder: fix async_free_space accounting for empty parcels
Tom Rix (1):
selinux: initialize proto variable in selinux_ip_postroute_compat()
Vincent Pelletier (1):
usb: gadget: f_fs: Clear ffs_eventfd in ffs_data_clear.
Wang Qing (1):
platform/x86: apple-gmux: use resource_size() with res
Wei Yongjun (1):
NFC: st21nfca: Fix memory leak in device probe and remove
Xin Long (1):
sctp: use call_rcu to free endpoint
chen gong (1):
drm/amdgpu: When the VCN(1.0) block is suspended, powergating is explicitly enabled
wujianguo (1):
selftests/net: udpgso_bench_tx: fix dst ip argument
^ permalink raw reply
* [PATCH v2] rtl8723bs: fix memory leak error
From: F.A.Sulaiman @ 2022-01-05 12:34 UTC (permalink / raw)
To: gregkh, fabioaiuto83, marcocesati, dan.carpenter, hdegoede
Cc: F.A.Sulaiman, linux-staging, linux-kernel
In-Reply-To: <20210830193355.11338-1-asha.16@itfac.mrt.ac.lk>
Smatch reported memory leak bug in rtl8723b_FirmwareDownload function.
The problem is pFirmware memory is not released in 'release_fw1'.
Instead of redirecting to 'release_fw1', we can turn it into 'exit'
and free the memory.
Signed-off-by: F.A. SULAIMAN <asha.16@itfac.mrt.ac.lk>
---
drivers/staging/rtl8723bs/hal/rtl8723b_hal_init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/rtl8723bs/hal/rtl8723b_hal_init.c b/drivers/staging/rtl8723bs/hal/rtl8723b_hal_init.c
index f1fc077ed29c..5f09b3ef9459 100644
--- a/drivers/staging/rtl8723bs/hal/rtl8723b_hal_init.c
+++ b/drivers/staging/rtl8723bs/hal/rtl8723b_hal_init.c
@@ -361,7 +361,7 @@ s32 rtl8723b_FirmwareDownload(struct adapter *padapter, bool bUsedWoWLANFw)
netdev_emerg(padapter->pnetdev,
"Firmware size:%u exceed %u\n",
pFirmware->fw_length, FW_8723B_SIZE);
- goto release_fw1;
+ goto exit;
}
pFirmwareBuf = pFirmware->fw_buffer_sz;
--
2.17.1
^ permalink raw reply related
* Re: Linux 5.10.90
From: Greg Kroah-Hartman @ 2022-01-05 12:34 UTC (permalink / raw)
To: linux-kernel, akpm, torvalds, stable; +Cc: lwn, jslaby, Greg Kroah-Hartman
In-Reply-To: <164138605832170@kroah.com>
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ccaa72562538..d00618967854 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1617,6 +1617,8 @@
architectures force reset to be always executed
i8042.unlock [HW] Unlock (ignore) the keylock
i8042.kbdreset [HW] Reset device connected to KBD port
+ i8042.probe_defer
+ [HW] Allow deferred probing upon i8042 probe errors
i810= [HW,DRM]
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index d4b32cc32bb7..7d5e8a67c775 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1457,11 +1457,22 @@ unprivileged_bpf_disabled
=========================
Writing 1 to this entry will disable unprivileged calls to ``bpf()``;
-once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return
-``-EPERM``.
+once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` or ``CAP_BPF``
+will return ``-EPERM``. Once set to 1, this can't be cleared from the
+running kernel anymore.
-Once set, this can't be cleared.
+Writing 2 to this entry will also disable unprivileged calls to ``bpf()``,
+however, an admin can still change this setting later on, if needed, by
+writing 0 or 1 to this entry.
+If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this
+entry will default to 2 instead of 0.
+
+= =============================================================
+0 Unprivileged calls to ``bpf()`` are enabled
+1 Unprivileged calls to ``bpf()`` are disabled without recovery
+2 Unprivileged calls to ``bpf()`` are disabled
+= =============================================================
watchdog
========
diff --git a/Makefile b/Makefile
index 1500ea340424..556241a10821 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 10
-SUBLEVEL = 89
+SUBLEVEL = 90
EXTRAVERSION =
NAME = Dare mighty things
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index a52c7abf2ca4..43f56335759a 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -729,6 +729,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
}
mmap_read_unlock(current->mm);
}
+ /* CPU could not fetch instruction, so clear stale IIR value. */
+ regs->iir = 0xbaadf00d;
fallthrough;
case 27:
/* Data memory protection ID trap */
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 7caf74ad2405..95ca4f934d28 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -662,7 +662,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc,
BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size);
if (buffer->async_transaction) {
- alloc->free_async_space += size + sizeof(struct binder_buffer);
+ alloc->free_async_space += buffer_size + sizeof(struct binder_buffer);
binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
"%d: binder_free_buf size %zd async free %zd\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index bfb95143ba5e..ec6bfa316daa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -372,10 +372,15 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
return -EINVAL;
}
+union gc_info {
+ struct gc_info_v1_0 v1;
+ struct gc_info_v2_0 v2;
+};
+
int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
{
struct binary_header *bhdr;
- struct gc_info_v1_0 *gc_info;
+ union gc_info *gc_info;
if (!adev->mman.discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
@@ -383,27 +388,54 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
}
bhdr = (struct binary_header *)adev->mman.discovery_bin;
- gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
+ gc_info = (union gc_info *)(adev->mman.discovery_bin +
le16_to_cpu(bhdr->table_list[GC].offset));
-
- adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
- adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
- le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
- adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
- adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
- adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
- adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
- adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
- adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
- adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
- adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
- adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
- adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
- adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
- adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
- adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
- le32_to_cpu(gc_info->gc_num_sa_per_se);
- adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
-
+ switch (gc_info->v1.header.version_major) {
+ case 1:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
+ le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
+ break;
+ case 2:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled GC info table %d.%d\n",
+ gc_info->v1.header.version_major,
+ gc_info->v1.header.version_minor);
+ return -EINVAL;
+ }
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index e8737fa438f0..7115f6dbb137 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -254,6 +254,13 @@ static int vcn_v1_0_suspend(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool idle_work_unexecuted;
+
+ idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work);
+ if (idle_work_unexecuted) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+ }
r = vcn_v1_0_hw_fini(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h
index 7ec4331e67f2..a486769b66c6 100644
--- a/drivers/gpu/drm/amd/include/discovery.h
+++ b/drivers/gpu/drm/amd/include/discovery.h
@@ -143,6 +143,55 @@ struct gc_info_v1_0 {
uint32_t gc_num_gl2a;
};
+struct gc_info_v1_1 {
+ struct gpu_info_header header;
+
+ uint32_t gc_num_se;
+ uint32_t gc_num_wgp0_per_sa;
+ uint32_t gc_num_wgp1_per_sa;
+ uint32_t gc_num_rb_per_se;
+ uint32_t gc_num_gl2c;
+ uint32_t gc_num_gprs;
+ uint32_t gc_num_max_gs_thds;
+ uint32_t gc_gs_table_depth;
+ uint32_t gc_gsprim_buff_depth;
+ uint32_t gc_parameter_cache_depth;
+ uint32_t gc_double_offchip_lds_buffer;
+ uint32_t gc_wave_size;
+ uint32_t gc_max_waves_per_simd;
+ uint32_t gc_max_scratch_slots_per_cu;
+ uint32_t gc_lds_size;
+ uint32_t gc_num_sc_per_se;
+ uint32_t gc_num_sa_per_se;
+ uint32_t gc_num_packer_per_sc;
+ uint32_t gc_num_gl2a;
+ uint32_t gc_num_tcp_per_sa;
+ uint32_t gc_num_sdp_interface;
+ uint32_t gc_num_tcps;
+};
+
+struct gc_info_v2_0 {
+ struct gpu_info_header header;
+
+ uint32_t gc_num_se;
+ uint32_t gc_num_cu_per_sh;
+ uint32_t gc_num_sh_per_se;
+ uint32_t gc_num_rb_per_se;
+ uint32_t gc_num_tccs;
+ uint32_t gc_num_gprs;
+ uint32_t gc_num_max_gs_thds;
+ uint32_t gc_gs_table_depth;
+ uint32_t gc_gsprim_buff_depth;
+ uint32_t gc_parameter_cache_depth;
+ uint32_t gc_double_offchip_lds_buffer;
+ uint32_t gc_wave_size;
+ uint32_t gc_max_waves_per_simd;
+ uint32_t gc_max_scratch_slots_per_cu;
+ uint32_t gc_lds_size;
+ uint32_t gc_num_sc_per_se;
+ uint32_t gc_num_packer_per_sc;
+};
+
typedef struct harvest_info_header {
uint32_t signature; /* Table Signature */
uint32_t version; /* Table Version */
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index f358120d59b3..dafad891998e 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -536,6 +536,9 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo
sizeof(rdwr_arg)))
return -EFAULT;
+ if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0)
+ return -EINVAL;
+
if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS)
return -EINVAL;
diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c
index 429411c6c0a8..a85a4f33aea8 100644
--- a/drivers/input/joystick/spaceball.c
+++ b/drivers/input/joystick/spaceball.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/input.h>
#include <linux/serio.h>
+#include <asm/unaligned.h>
#define DRIVER_DESC "SpaceTec SpaceBall 2003/3003/4000 FLX driver"
@@ -75,9 +76,15 @@ static void spaceball_process_packet(struct spaceball* spaceball)
case 'D': /* Ball data */
if (spaceball->idx != 15) return;
- for (i = 0; i < 6; i++)
+ /*
+ * Skip first three bytes; read six axes worth of data.
+ * Axis values are signed 16-bit big-endian.
+ */
+ data += 3;
+ for (i = 0; i < ARRAY_SIZE(spaceball_axes); i++) {
input_report_abs(dev, spaceball_axes[i],
- (__s16)((data[2 * i + 3] << 8) | data[2 * i + 2]));
+ (__s16)get_unaligned_be16(&data[i * 2]));
+ }
break;
case 'K': /* Button data */
diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c
index bfa26651c0be..627048bc6a12 100644
--- a/drivers/input/mouse/appletouch.c
+++ b/drivers/input/mouse/appletouch.c
@@ -916,6 +916,8 @@ static int atp_probe(struct usb_interface *iface,
set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit);
set_bit(BTN_LEFT, input_dev->keybit);
+ INIT_WORK(&dev->work, atp_reinit);
+
error = input_register_device(dev->input);
if (error)
goto err_free_buffer;
@@ -923,8 +925,6 @@ static int atp_probe(struct usb_interface *iface,
/* save our data pointer in this interface device */
usb_set_intfdata(iface, dev);
- INIT_WORK(&dev->work, atp_reinit);
-
return 0;
err_free_buffer:
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index aedd05541044..148a7c5fd0e2 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -995,6 +995,24 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = {
{ }
};
+static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = {
+ {
+ /* ASUS ZenBook UX425UA */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"),
+ },
+ },
+ {
+ /* ASUS ZenBook UM325UA */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"),
+ },
+ },
+ { }
+};
+
#endif /* CONFIG_X86 */
#ifdef CONFIG_PNP
@@ -1315,6 +1333,9 @@ static int __init i8042_platform_init(void)
if (dmi_check_system(i8042_dmi_kbdreset_table))
i8042_kbdreset = true;
+ if (dmi_check_system(i8042_dmi_probe_defer_table))
+ i8042_probe_defer = true;
+
/*
* A20 was already enabled during early kernel init. But some buggy
* BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index abae23af0791..a9f68f535b72 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -45,6 +45,10 @@ static bool i8042_unlock;
module_param_named(unlock, i8042_unlock, bool, 0);
MODULE_PARM_DESC(unlock, "Ignore keyboard lock.");
+static bool i8042_probe_defer;
+module_param_named(probe_defer, i8042_probe_defer, bool, 0);
+MODULE_PARM_DESC(probe_defer, "Allow deferred probing.");
+
enum i8042_controller_reset_mode {
I8042_RESET_NEVER,
I8042_RESET_ALWAYS,
@@ -711,7 +715,7 @@ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version)
* LCS/Telegraphics.
*/
-static int __init i8042_check_mux(void)
+static int i8042_check_mux(void)
{
unsigned char mux_version;
@@ -740,10 +744,10 @@ static int __init i8042_check_mux(void)
/*
* The following is used to test AUX IRQ delivery.
*/
-static struct completion i8042_aux_irq_delivered __initdata;
-static bool i8042_irq_being_tested __initdata;
+static struct completion i8042_aux_irq_delivered;
+static bool i8042_irq_being_tested;
-static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id)
+static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id)
{
unsigned long flags;
unsigned char str, data;
@@ -770,7 +774,7 @@ static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id)
* verifies success by readinng CTR. Used when testing for presence of AUX
* port.
*/
-static int __init i8042_toggle_aux(bool on)
+static int i8042_toggle_aux(bool on)
{
unsigned char param;
int i;
@@ -798,7 +802,7 @@ static int __init i8042_toggle_aux(bool on)
* the presence of an AUX interface.
*/
-static int __init i8042_check_aux(void)
+static int i8042_check_aux(void)
{
int retval = -1;
bool irq_registered = false;
@@ -1005,7 +1009,7 @@ static int i8042_controller_init(void)
if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) {
pr_err("Can't read CTR while initializing i8042\n");
- return -EIO;
+ return i8042_probe_defer ? -EPROBE_DEFER : -EIO;
}
} while (n < 2 || ctr[0] != ctr[1]);
@@ -1320,7 +1324,7 @@ static void i8042_shutdown(struct platform_device *dev)
i8042_controller_reset(false);
}
-static int __init i8042_create_kbd_port(void)
+static int i8042_create_kbd_port(void)
{
struct serio *serio;
struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO];
@@ -1349,7 +1353,7 @@ static int __init i8042_create_kbd_port(void)
return 0;
}
-static int __init i8042_create_aux_port(int idx)
+static int i8042_create_aux_port(int idx)
{
struct serio *serio;
int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx;
@@ -1386,13 +1390,13 @@ static int __init i8042_create_aux_port(int idx)
return 0;
}
-static void __init i8042_free_kbd_port(void)
+static void i8042_free_kbd_port(void)
{
kfree(i8042_ports[I8042_KBD_PORT_NO].serio);
i8042_ports[I8042_KBD_PORT_NO].serio = NULL;
}
-static void __init i8042_free_aux_ports(void)
+static void i8042_free_aux_ports(void)
{
int i;
@@ -1402,7 +1406,7 @@ static void __init i8042_free_aux_ports(void)
}
}
-static void __init i8042_register_ports(void)
+static void i8042_register_ports(void)
{
int i;
@@ -1443,7 +1447,7 @@ static void i8042_free_irqs(void)
i8042_aux_irq_registered = i8042_kbd_irq_registered = false;
}
-static int __init i8042_setup_aux(void)
+static int i8042_setup_aux(void)
{
int (*aux_enable)(void);
int error;
@@ -1485,7 +1489,7 @@ static int __init i8042_setup_aux(void)
return error;
}
-static int __init i8042_setup_kbd(void)
+static int i8042_setup_kbd(void)
{
int error;
@@ -1535,7 +1539,7 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb,
return 0;
}
-static int __init i8042_probe(struct platform_device *dev)
+static int i8042_probe(struct platform_device *dev)
{
int error;
@@ -1600,6 +1604,7 @@ static struct platform_driver i8042_driver = {
.pm = &i8042_pm_ops,
#endif
},
+ .probe = i8042_probe,
.remove = i8042_remove,
.shutdown = i8042_shutdown,
};
@@ -1610,7 +1615,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = {
static int __init i8042_init(void)
{
- struct platform_device *pdev;
int err;
dbg_init();
@@ -1626,17 +1630,29 @@ static int __init i8042_init(void)
/* Set this before creating the dev to allow i8042_command to work right away */
i8042_present = true;
- pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0);
- if (IS_ERR(pdev)) {
- err = PTR_ERR(pdev);
+ err = platform_driver_register(&i8042_driver);
+ if (err)
goto err_platform_exit;
+
+ i8042_platform_device = platform_device_alloc("i8042", -1);
+ if (!i8042_platform_device) {
+ err = -ENOMEM;
+ goto err_unregister_driver;
}
+ err = platform_device_add(i8042_platform_device);
+ if (err)
+ goto err_free_device;
+
bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block);
panic_blink = i8042_panic_blink;
return 0;
+err_free_device:
+ platform_device_put(i8042_platform_device);
+err_unregister_driver:
+ platform_driver_unregister(&i8042_driver);
err_platform_exit:
i8042_platform_exit();
return err;
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index a60ce9030581..c26c9b0c00d8 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1904,15 +1904,12 @@ static int ag71xx_probe(struct platform_device *pdev)
ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac");
if (IS_ERR(ag->mac_reset)) {
netif_err(ag, probe, ndev, "missing mac reset\n");
- err = PTR_ERR(ag->mac_reset);
- goto err_free;
+ return PTR_ERR(ag->mac_reset);
}
ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
- if (!ag->mac_base) {
- err = -ENOMEM;
- goto err_free;
- }
+ if (!ag->mac_base)
+ return -ENOMEM;
ndev->irq = platform_get_irq(pdev, 0);
err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
@@ -1920,7 +1917,7 @@ static int ag71xx_probe(struct platform_device *pdev)
if (err) {
netif_err(ag, probe, ndev, "unable to request IRQ %d\n",
ndev->irq);
- goto err_free;
+ return err;
}
ndev->netdev_ops = &ag71xx_netdev_ops;
@@ -1948,10 +1945,8 @@ static int ag71xx_probe(struct platform_device *pdev)
ag->stop_desc = dmam_alloc_coherent(&pdev->dev,
sizeof(struct ag71xx_desc),
&ag->stop_desc_dma, GFP_KERNEL);
- if (!ag->stop_desc) {
- err = -ENOMEM;
- goto err_free;
- }
+ if (!ag->stop_desc)
+ return -ENOMEM;
ag->stop_desc->data = 0;
ag->stop_desc->ctrl = 0;
@@ -1968,7 +1963,7 @@ static int ag71xx_probe(struct platform_device *pdev)
err = of_get_phy_mode(np, &ag->phy_if_mode);
if (err) {
netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
- goto err_free;
+ return err;
}
netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT);
@@ -1976,7 +1971,7 @@ static int ag71xx_probe(struct platform_device *pdev)
err = clk_prepare_enable(ag->clk_eth);
if (err) {
netif_err(ag, probe, ndev, "Failed to enable eth clk.\n");
- goto err_free;
+ return err;
}
ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0);
@@ -2012,8 +2007,6 @@ static int ag71xx_probe(struct platform_device *pdev)
ag71xx_mdio_remove(ag);
err_put_clk:
clk_disable_unprepare(ag->clk_eth);
-err_free:
- free_netdev(ndev);
return err;
}
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index d9baac0dbc7d..4c9d05c45c03 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev)
fman = dev_get_drvdata(&fm_pdev->dev);
if (!fman) {
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
err = of_property_read_u32(port_node, "cell-index", &val);
@@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev)
dev_err(port->dev, "%s: reading cell-index for %pOF failed\n",
__func__, port_node);
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
port_id = (u8)val;
port->dts_params.id = port_id;
@@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev)
} else {
dev_err(port->dev, "%s: Illegal port type\n", __func__);
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
port->dts_params.type = port_type;
@@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev)
dev_err(port->dev, "%s: incorrect qman-channel-id\n",
__func__);
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
port->dts_params.qman_channel_id = qman_channel_id;
}
@@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev)
dev_err(port->dev, "%s: of_address_to_resource() failed\n",
__func__);
err = -ENOMEM;
- goto return_err;
+ goto put_device;
}
port->dts_params.fman = fman;
@@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev)
return 0;
+put_device:
+ put_device(&fm_pdev->dev);
return_err:
of_node_put(port_node);
free_port:
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index cae090a07252..61cebb7df6bc 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4422,6 +4422,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data)
mod_timer(&adapter->watchdog_timer, jiffies + 1);
}
+ if (icr & IGC_ICR_TS)
+ igc_tsync_interrupt(adapter);
+
napi_schedule(&q_vector->napi);
return IRQ_HANDLED;
@@ -4465,6 +4468,9 @@ static irqreturn_t igc_intr(int irq, void *data)
mod_timer(&adapter->watchdog_timer, jiffies + 1);
}
+ if (icr & IGC_ICR_TS)
+ igc_tsync_interrupt(adapter);
+
napi_schedule(&q_vector->napi);
return IRQ_HANDLED;
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 072075bc60ee..500511b72ac6 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -209,7 +209,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
skb->protocol = eth_type_trans(skb, net_dev);
netif_receive_skb(skb);
net_dev->stats.rx_packets++;
- net_dev->stats.rx_bytes += len - ETH_FCS_LEN;
+ net_dev->stats.rx_bytes += len;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9da34f82d466..73060b30fece 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -916,9 +916,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
void mlx5e_close_rq(struct mlx5e_rq *rq);
struct mlx5e_sq_param;
-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
-void mlx5e_close_icosq(struct mlx5e_icosq *sq);
int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
struct mlx5e_xdpsq *sq, bool is_redirect);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 8be6eaa3eeb1..13dd34c571b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -335,6 +335,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms
return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
}
+static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
+
+ return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
+}
+
static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
struct devlink_fmsg *fmsg)
{
@@ -418,7 +426,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
to_ctx.sq = sq;
err_ctx.ctx = &to_ctx;
err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
- err_ctx.dump = mlx5e_tx_reporter_dump_sq;
+ err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
snprintf(err_str, sizeof(err_str),
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6ec4b96497ff..2f6c3a5813ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1051,9 +1051,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_icosq_cqe_err(sq);
}
+static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work)
+{
+ struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
+ recover_work);
+
+ /* Not implemented yet. */
+
+ netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n");
+}
+
static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
struct mlx5e_sq_param *param,
- struct mlx5e_icosq *sq)
+ struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
{
void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
struct mlx5_core_dev *mdev = c->mdev;
@@ -1073,7 +1084,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
if (err)
goto err_sq_wq_destroy;
- INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
+ INIT_WORK(&sq->recover_work, recover_work_func);
return 0;
@@ -1423,13 +1434,14 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_tx_err_cqe(sq);
}
-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
+static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
{
struct mlx5e_create_sq_param csp = {};
int err;
- err = mlx5e_alloc_icosq(c, param, sq);
+ err = mlx5e_alloc_icosq(c, param, sq, recover_work_func);
if (err)
return err;
@@ -1459,7 +1471,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
synchronize_net(); /* Sync with NAPI. */
}
-void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1862,11 +1874,13 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
spin_lock_init(&c->async_icosq_lock);
- err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq);
+ err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq,
+ mlx5e_async_icosq_err_cqe_work);
if (err)
goto err_disable_napi;
- err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
+ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq,
+ mlx5e_icosq_err_cqe_work);
if (err)
goto err_close_async_icosq;
@@ -3921,12 +3935,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
static int mlx5e_handle_feature(struct net_device *netdev,
netdev_features_t *features,
- netdev_features_t wanted_features,
netdev_features_t feature,
mlx5e_feature_handler feature_handler)
{
- netdev_features_t changes = wanted_features ^ netdev->features;
- bool enable = !!(wanted_features & feature);
+ netdev_features_t changes = *features ^ netdev->features;
+ bool enable = !!(*features & feature);
int err;
if (!(changes & feature))
@@ -3934,22 +3947,22 @@ static int mlx5e_handle_feature(struct net_device *netdev,
err = feature_handler(netdev, enable);
if (err) {
+ MLX5E_SET_FEATURE(features, feature, !enable);
netdev_err(netdev, "%s feature %pNF failed, err %d\n",
enable ? "Enable" : "Disable", &feature, err);
return err;
}
- MLX5E_SET_FEATURE(features, feature, enable);
return 0;
}
int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
{
- netdev_features_t oper_features = netdev->features;
+ netdev_features_t oper_features = features;
int err = 0;
#define MLX5E_HANDLE_FEATURE(feature, handler) \
- mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
+ mlx5e_handle_feature(netdev, &oper_features, feature, handler)
err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 00d861361428..16a7c7ec5e13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/mlx5/eswitch.h>
+#include <linux/err.h>
#include "dr_types.h"
#define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \
@@ -69,9 +70,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
}
dmn->uar = mlx5_get_uars_page(dmn->mdev);
- if (!dmn->uar) {
+ if (IS_ERR(dmn->uar)) {
mlx5dr_err(dmn, "Couldn't allocate UAR\n");
- ret = -ENOMEM;
+ ret = PTR_ERR(dmn->uar);
goto clean_pd;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 1b44155fa24b..e95c09dc2c30 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -2836,7 +2836,7 @@ int ionic_lif_init(struct ionic_lif *lif)
return -EINVAL;
}
- lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL);
+ lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
if (!lif->dbid_inuse) {
dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
return -ENOMEM;
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 2a748a924f83..138279bbb544 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -518,11 +518,11 @@ static void read_bulk_callback(struct urb *urb)
goto goon;
rx_status = buf[count - 2];
- if (rx_status & 0x1e) {
+ if (rx_status & 0x1c) {
netif_dbg(pegasus, rx_err, net,
"RX packet error %x\n", rx_status);
net->stats.rx_errors++;
- if (rx_status & 0x06) /* long or runt */
+ if (rx_status & 0x04) /* runt */
net->stats.rx_length_errors++;
if (rx_status & 0x08)
net->stats.rx_crc_errors++;
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 23ed11f91213..6ea59426ab0b 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -533,7 +533,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
if (IS_ERR(phy->gpiod_ena)) {
nfc_err(dev, "Unable to get ENABLE GPIO\n");
- return PTR_ERR(phy->gpiod_ena);
+ r = PTR_ERR(phy->gpiod_ena);
+ goto out_free;
}
phy->se_status.is_ese_present =
@@ -544,7 +545,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
r = st21nfca_hci_platform_init(phy);
if (r < 0) {
nfc_err(&client->dev, "Unable to reboot st21nfca\n");
- return r;
+ goto out_free;
}
r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
@@ -553,15 +554,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
ST21NFCA_HCI_DRIVER_NAME, phy);
if (r < 0) {
nfc_err(&client->dev, "Unable to register IRQ handler\n");
- return r;
+ goto out_free;
}
- return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
- ST21NFCA_FRAME_HEADROOM,
- ST21NFCA_FRAME_TAILROOM,
- ST21NFCA_HCI_LLC_MAX_PAYLOAD,
- &phy->hdev,
- &phy->se_status);
+ r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
+ ST21NFCA_FRAME_HEADROOM,
+ ST21NFCA_FRAME_TAILROOM,
+ ST21NFCA_HCI_LLC_MAX_PAYLOAD,
+ &phy->hdev,
+ &phy->se_status);
+ if (r)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ kfree_skb(phy->pending_skb);
+ return r;
}
static int st21nfca_hci_i2c_remove(struct i2c_client *client)
@@ -574,6 +583,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client)
if (phy->powered)
st21nfca_hci_i2c_disable(phy);
+ if (phy->pending_skb)
+ kfree_skb(phy->pending_skb);
return 0;
}
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 9aae45a45200..57553f9b4d1d 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -625,7 +625,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
}
gmux_data->iostart = res->start;
- gmux_data->iolen = res->end - res->start;
+ gmux_data->iolen = resource_size(res);
if (gmux_data->iolen < GMUX_MIN_IO_LEN) {
pr_err("gmux I/O region too small (%lu < %u)\n",
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index b89c5513243e..beaf3a8d206f 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -2956,8 +2956,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf,
char mybuf[64];
char *pbuf;
- if (nbytes > 64)
- nbytes = 64;
+ if (nbytes > 63)
+ nbytes = 63;
memset(mybuf, 0, sizeof(mybuf));
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 1421b1394d81..7d51ff4672d7 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -591,9 +591,12 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
* Commands like INQUIRY may transfer less data than
* requested by the initiator via bufflen. Set residual
* count to make upper layer aware of the actual amount
- * of data returned.
+ * of data returned. There are cases when controller
+ * returns zero dataLen with non zero data - do not set
+ * residual count in that case.
*/
- scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
+ if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
+ scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
cmd->result = (DID_OK << 16);
break;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 725e35167837..cbb7947f366f 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1772,11 +1772,15 @@ static void ffs_data_clear(struct ffs_data *ffs)
BUG_ON(ffs->gadget);
- if (ffs->epfiles)
+ if (ffs->epfiles) {
ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count);
+ ffs->epfiles = NULL;
+ }
- if (ffs->ffs_eventfd)
+ if (ffs->ffs_eventfd) {
eventfd_ctx_put(ffs->ffs_eventfd);
+ ffs->ffs_eventfd = NULL;
+ }
kfree(ffs->raw_descs_data);
kfree(ffs->raw_strings);
@@ -1789,7 +1793,6 @@ static void ffs_data_reset(struct ffs_data *ffs)
ffs_data_clear(ffs);
- ffs->epfiles = NULL;
ffs->raw_descs_data = NULL;
ffs->raw_descs = NULL;
ffs->raw_strings = NULL;
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index c9133df71e52..dafb58f05c9f 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -122,7 +122,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
/* Look for vendor-specific quirks */
if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
(pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK ||
- pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100 ||
pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1400)) {
if (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK &&
pdev->revision == 0x0) {
@@ -157,6 +156,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009)
xhci->quirks |= XHCI_BROKEN_STREAMS;
+ if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
+ pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100)
+ xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+
if (pdev->vendor == PCI_VENDOR_ID_NEC)
xhci->quirks |= XHCI_NEC_HOST;
diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c
index 0b3aa7c65857..a3e1105c5c66 100644
--- a/drivers/usb/mtu3/mtu3_gadget.c
+++ b/drivers/usb/mtu3/mtu3_gadget.c
@@ -92,6 +92,13 @@ static int mtu3_ep_enable(struct mtu3_ep *mep)
interval = clamp_val(interval, 1, 16) - 1;
mult = usb_endpoint_maxp_mult(desc) - 1;
}
+ break;
+ case USB_SPEED_FULL:
+ if (usb_endpoint_xfer_isoc(desc))
+ interval = clamp_val(desc->bInterval, 1, 16);
+ else if (usb_endpoint_xfer_int(desc))
+ interval = clamp_val(desc->bInterval, 1, 255);
+
break;
default:
break; /*others are ignored */
@@ -235,6 +242,7 @@ struct usb_request *mtu3_alloc_request(struct usb_ep *ep, gfp_t gfp_flags)
mreq->request.dma = DMA_ADDR_INVALID;
mreq->epnum = mep->epnum;
mreq->mep = mep;
+ INIT_LIST_HEAD(&mreq->list);
trace_mtu3_alloc_request(mreq);
return &mreq->request;
diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c
index 3f414f91b589..2ea3157ddb6e 100644
--- a/drivers/usb/mtu3/mtu3_qmu.c
+++ b/drivers/usb/mtu3/mtu3_qmu.c
@@ -273,6 +273,8 @@ static int mtu3_prepare_tx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq)
gpd->dw3_info |= cpu_to_le32(GPD_EXT_FLAG_ZLP);
}
+ /* prevent reorder, make sure GPD's HWO is set last */
+ mb();
gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO);
mreq->gpd = gpd;
@@ -306,6 +308,8 @@ static int mtu3_prepare_rx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq)
gpd->next_gpd = cpu_to_le32(lower_32_bits(enq_dma));
ext_addr |= GPD_EXT_NGP(mtu, upper_32_bits(enq_dma));
gpd->dw3_info = cpu_to_le32(ext_addr);
+ /* prevent reorder, make sure GPD's HWO is set last */
+ mb();
gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO);
mreq->gpd = gpd;
@@ -445,7 +449,8 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum)
return;
}
mtu3_setbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_TXPKTRDY);
-
+ /* prevent reorder, make sure GPD's HWO is set last */
+ mb();
/* by pass the current GDP */
gpd_current->dw0_info |= cpu_to_le32(GPD_FLAGS_BPS | GPD_FLAGS_HWO);
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 1a8d25f2e041..3baea2ef33fb 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -387,8 +387,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t end, int nid, bool exact_nid);
phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
-static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
- phys_addr_t align)
+static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
+ phys_addr_t align)
{
return memblock_phys_alloc_range(size, align, 0,
MEMBLOCK_ALLOC_ACCESSIBLE);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 4fc747b778eb..33475d061823 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -103,6 +103,7 @@ extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
+typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *);
void sctp_transport_walk_start(struct rhashtable_iter *iter);
void sctp_transport_walk_stop(struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_next(struct net *net,
@@ -113,9 +114,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr, void *p);
-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
- int (*cb_done)(struct sctp_transport *, void *),
- struct net *net, int *pos, void *p);
+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
+ struct net *net, int *pos, void *p);
int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
struct sctp_info *info);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 51d698f2656f..be9ff0422c16 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1339,6 +1339,7 @@ struct sctp_endpoint {
u32 secid;
u32 peer_secid;
+ struct rcu_head rcu;
};
/* Recover the outter endpoint structure. */
@@ -1354,7 +1355,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t);
void sctp_endpoint_free(struct sctp_endpoint *);
void sctp_endpoint_put(struct sctp_endpoint *);
-void sctp_endpoint_hold(struct sctp_endpoint *);
+int sctp_endpoint_hold(struct sctp_endpoint *ep);
void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *);
struct sctp_association *sctp_endpoint_lookup_assoc(
const struct sctp_endpoint *ep,
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index f6e3c8c9c744..4fa4e979e948 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -263,7 +263,7 @@ enum nfc_sdp_attr {
#define NFC_SE_ENABLED 0x1
struct sockaddr_nfc {
- sa_family_t sa_family;
+ __kernel_sa_family_t sa_family;
__u32 dev_idx;
__u32 target_idx;
__u32 nfc_protocol;
@@ -271,14 +271,14 @@ struct sockaddr_nfc {
#define NFC_LLCP_MAX_SERVICE_NAME 63
struct sockaddr_nfc_llcp {
- sa_family_t sa_family;
+ __kernel_sa_family_t sa_family;
__u32 dev_idx;
__u32 target_idx;
__u32 nfc_protocol;
__u8 dsap; /* Destination SAP, if known */
__u8 ssap; /* Source SAP to be bound to */
char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */;
- size_t service_name_len;
+ __kernel_size_t service_name_len;
};
/* NFC socket protocols */
diff --git a/init/Kconfig b/init/Kconfig
index fc4c9f416fad..13685bffef37 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1722,6 +1722,16 @@ config BPF_JIT_DEFAULT_ON
def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
depends on HAVE_EBPF_JIT && BPF_JIT
+config BPF_UNPRIV_DEFAULT_OFF
+ bool "Disable unprivileged BPF by default"
+ depends on BPF_SYSCALL
+ help
+ Disables unprivileged BPF by default by setting the corresponding
+ /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+ still reenable it by setting it to 0 later on, or permanently
+ disable it by setting it to 1 (from which no other transition to
+ 0 is possible anymore).
+
source "kernel/bpf/preload/Kconfig"
config USERFAULTFD
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bb9a9cb1f321..209e6567cdab 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(map_idr_lock);
static DEFINE_IDR(link_idr);
static DEFINE_SPINLOCK(link_idr_lock);
-int sysctl_unprivileged_bpf_disabled __read_mostly;
+int sysctl_unprivileged_bpf_disabled __read_mostly =
+ IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
static const struct bpf_map_ops * const bpf_map_types[] = {
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b9306d2bb426..72ceb19574d0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -233,7 +233,27 @@ static int bpf_stats_handler(struct ctl_table *table, int write,
mutex_unlock(&bpf_stats_enabled_mutex);
return ret;
}
-#endif
+
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, unpriv_enable = *(int *)table->data;
+ bool locked_state = unpriv_enable == 1;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &unpriv_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (locked_state && unpriv_enable != 1)
+ return -EPERM;
+ *(int *)table->data = unpriv_enable;
+ }
+ return ret;
+}
+#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
/*
* /proc/sys support
@@ -2626,10 +2646,9 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
- /* only handle a transition from default "0" to "1" */
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ONE,
- .extra2 = SYSCTL_ONE,
+ .proc_handler = bpf_unpriv_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
},
{
.procname = "bpf_stats_enabled",
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8267349afe23..e2f85a16fad9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -2003,6 +2003,10 @@ static int __init inet_init(void)
ip_init();
+ /* Initialise per-cpu ipv4 mibs */
+ if (init_ipv4_mibs())
+ panic("%s: Cannot init ipv4 mibs\n", __func__);
+
/* Setup TCP slab cache for open requests. */
tcp_init();
@@ -2033,12 +2037,6 @@ static int __init inet_init(void)
if (init_inet_pernet_ops())
pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
- /*
- * Initialise per-cpu ipv4 mibs
- */
-
- if (init_ipv4_mibs())
- pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
ipv4_proc_init();
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8a1863146f34..069551a04369 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1189,7 +1189,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
return -EINVAL;
}
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index bb5f1650f11c..c189b4c8a182 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb,
pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR);
if (!pnest)
return -ENOMEM;
- nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+ rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+ if (rc) {
+ nla_nest_cancel(skb, pnest);
+ return rc;
+ }
if ((0x1 << np->id) == ndp->package_whitelist)
nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 493fc01e5d2b..babadd6720a2 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -292,9 +292,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
return err;
}
-static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
+static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
@@ -304,6 +303,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
int err = 0;
lock_sock(sk);
+ if (ep != tsp->asoc->ep)
+ goto release;
list_for_each_entry(assoc, &ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
@@ -346,9 +347,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
return err;
}
-static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
+static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
const struct inet_diag_req_v2 *r = commp->r;
@@ -507,8 +507,8 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
- sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
- net, &pos, &commp);
+ sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump,
+ net, &pos, &commp);
cb->args[2] = pos;
done:
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 48c9c2c7602f..efffde7f2328 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
}
/* Final destructor for endpoint. */
+static void sctp_endpoint_destroy_rcu(struct rcu_head *head)
+{
+ struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu);
+ struct sock *sk = ep->base.sk;
+
+ sctp_sk(sk)->ep = NULL;
+ sock_put(sk);
+
+ kfree(ep);
+ SCTP_DBG_OBJCNT_DEC(ep);
+}
+
static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
{
struct sock *sk;
@@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
if (sctp_sk(sk)->bind_hash)
sctp_put_port(sk);
- sctp_sk(sk)->ep = NULL;
- /* Give up our hold on the sock */
- sock_put(sk);
-
- kfree(ep);
- SCTP_DBG_OBJCNT_DEC(ep);
+ call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu);
}
/* Hold a reference to an endpoint. */
-void sctp_endpoint_hold(struct sctp_endpoint *ep)
+int sctp_endpoint_hold(struct sctp_endpoint *ep)
{
- refcount_inc(&ep->base.refcnt);
+ return refcount_inc_not_zero(&ep->base.refcnt);
}
/* Release a reference to an endpoint and clean up if there are
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e872bc50bbe6..0a9e2c7d8e5f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5223,11 +5223,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
}
EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
- int (*cb_done)(struct sctp_transport *, void *),
- struct net *net, int *pos, void *p) {
+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
+ struct net *net, int *pos, void *p)
+{
struct rhashtable_iter hti;
struct sctp_transport *tsp;
+ struct sctp_endpoint *ep;
int ret;
again:
@@ -5236,26 +5237,32 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
- ret = cb(tsp, p);
- if (ret)
- break;
+ ep = tsp->asoc->ep;
+ if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */
+ ret = cb(ep, tsp, p);
+ if (ret)
+ break;
+ sctp_endpoint_put(ep);
+ }
(*pos)++;
sctp_transport_put(tsp);
}
sctp_transport_walk_stop(&hti);
if (ret) {
- if (cb_done && !cb_done(tsp, p)) {
+ if (cb_done && !cb_done(ep, tsp, p)) {
(*pos)++;
+ sctp_endpoint_put(ep);
sctp_transport_put(tsp);
goto again;
}
+ sctp_endpoint_put(ep);
sctp_transport_put(tsp);
}
return ret;
}
-EXPORT_SYMBOL_GPL(sctp_for_each_transport);
+EXPORT_SYMBOL_GPL(sctp_transport_traverse_process);
/* 7.2.1 Association Status (SCTP_STATUS)
diff --git a/net/smc/smc.h b/net/smc/smc.h
index d65e15f0c944..e6919fe31617 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -170,6 +170,11 @@ struct smc_connection {
u16 tx_cdc_seq; /* sequence # for CDC send */
u16 tx_cdc_seq_fin; /* sequence # - tx completed */
spinlock_t send_lock; /* protect wr_sends */
+ atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe
+ * - inc when post wqe,
+ * - dec on polled tx cqe
+ */
+ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index b1ce6ccbfaec..0c490cdde6a4 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
struct smc_sock *smc;
int diff;
- if (!conn)
- /* already dismissed */
- return;
-
smc = container_of(conn, struct smc_sock, conn);
bh_lock_sock(&smc->sk);
if (!wc_status) {
@@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
conn);
conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
}
+
+ if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) &&
+ unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
+ wake_up(&conn->cdc_pend_tx_wq);
+ WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0);
+
smc_tx_sndbuf_nonfull(smc);
bh_unlock_sock(&smc->sk);
}
@@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
conn->tx_cdc_seq++;
conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed);
+
+ atomic_inc(&conn->cdc_pend_tx_wr);
+ smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */
+
rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
if (!rc) {
smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
@@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
} else {
conn->tx_cdc_seq--;
conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
+ atomic_dec(&conn->cdc_pend_tx_wr);
}
return rc;
@@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn,
peer->token = htonl(local->token);
peer->prod_flags.failover_validation = 1;
+ /* We need to set pend->conn here to make sure smc_cdc_tx_handler()
+ * can handle properly
+ */
+ smc_cdc_add_pending_send(conn, pend);
+
+ atomic_inc(&conn->cdc_pend_tx_wr);
+ smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */
+
rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
+ if (unlikely(rc))
+ atomic_dec(&conn->cdc_pend_tx_wr);
+
return rc;
}
@@ -150,9 +168,11 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
again:
link = conn->lnk;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
if (rc)
- return rc;
+ goto put_out;
spin_lock_bh(&conn->send_lock);
if (link != conn->lnk) {
@@ -160,6 +180,7 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
spin_unlock_bh(&conn->send_lock);
smc_wr_tx_put_slot(link,
(struct smc_wr_tx_pend_priv *)pend);
+ smc_wr_tx_link_put(link);
if (again)
return -ENOLINK;
again = true;
@@ -167,6 +188,8 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
}
rc = smc_cdc_msg_send(conn, wr_buf, pend);
spin_unlock_bh(&conn->send_lock);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -188,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
return rc;
}
-static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
- unsigned long data)
+void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn)
{
- struct smc_connection *conn = (struct smc_connection *)data;
- struct smc_cdc_tx_pend *cdc_pend =
- (struct smc_cdc_tx_pend *)tx_pend;
-
- return cdc_pend->conn == conn;
-}
-
-static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
-{
- struct smc_cdc_tx_pend *cdc_pend =
- (struct smc_cdc_tx_pend *)tx_pend;
-
- cdc_pend->conn = NULL;
-}
-
-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
-{
- struct smc_link *link = conn->lnk;
-
- smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
- smc_cdc_tx_filter, smc_cdc_tx_dismisser,
- (unsigned long)conn);
+ wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr));
}
/* Send a SMC-D CDC header.
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 0a0a89abd38b..696cc11f2303 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend);
-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
+void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3f1343dfa16b..2a22dc85951e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -226,7 +226,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
- if (smc_link_usable(lnk))
+ if (smc_link_sendable(lnk))
lnk->state = SMC_LNK_INACTIVE;
}
wake_up_all(&lgr->llc_msg_waiter);
@@ -550,7 +550,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
to_lnk = &lgr->lnk[i];
break;
}
- if (!to_lnk) {
+ if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
smc_lgr_terminate_sched(lgr);
return NULL;
}
@@ -582,24 +582,26 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
read_unlock_bh(&lgr->conns_lock);
/* pre-fetch buffer outside of send_lock, might sleep */
rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
- if (rc) {
- smcr_link_down_cond_sched(to_lnk);
- return NULL;
- }
+ if (rc)
+ goto err_out;
/* avoid race with smcr_tx_sndbuf_nonempty() */
spin_lock_bh(&conn->send_lock);
conn->lnk = to_lnk;
rc = smc_switch_cursor(smc, pend, wr_buf);
spin_unlock_bh(&conn->send_lock);
sock_put(&smc->sk);
- if (rc) {
- smcr_link_down_cond_sched(to_lnk);
- return NULL;
- }
+ if (rc)
+ goto err_out;
goto again;
}
read_unlock_bh(&lgr->conns_lock);
+ smc_wr_tx_link_put(to_lnk);
return to_lnk;
+
+err_out:
+ smcr_link_down_cond_sched(to_lnk);
+ smc_wr_tx_link_put(to_lnk);
+ return NULL;
}
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
@@ -655,7 +657,7 @@ void smc_conn_free(struct smc_connection *conn)
smc_ism_unset_conn(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
- smc_cdc_tx_dismiss_slots(conn);
+ smc_cdc_wait_pend_tx_wr(conn);
if (current_work() != &conn->abort_work)
cancel_work_sync(&conn->abort_work);
}
@@ -732,7 +734,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
smc_llc_link_clear(lnk, log);
smcr_buf_unmap_lgr(lnk);
smcr_rtoken_clear_link(lnk);
- smc_ib_modify_qp_reset(lnk);
+ smc_ib_modify_qp_error(lnk);
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
@@ -876,7 +878,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
else
tasklet_unlock_wait(&conn->rx_tsklet);
} else {
- smc_cdc_tx_dismiss_slots(conn);
+ smc_cdc_wait_pend_tx_wr(conn);
}
smc_lgr_unregister_conn(conn);
smc_close_active_abort(smc);
@@ -1000,11 +1002,16 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd)
/* Called when an SMCR device is removed or the smc module is unloaded.
* If smcibdev is given, all SMCR link groups using this device are terminated.
* If smcibdev is NULL, all SMCR link groups are terminated.
+ *
+ * We must wait here for QPs been destroyed before we destroy the CQs,
+ * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
+ * smc_sock cannot be released.
*/
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
+ LIST_HEAD(lgr_linkdown_list);
int i;
spin_lock_bh(&smc_lgr_list.lock);
@@ -1016,7 +1023,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].smcibdev == smcibdev)
- smcr_link_down_cond_sched(&lgr->lnk[i]);
+ list_move_tail(&lgr->list, &lgr_linkdown_list);
}
}
}
@@ -1028,6 +1035,16 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
__smc_lgr_terminate(lgr, false);
}
+ list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].smcibdev == smcibdev) {
+ mutex_lock(&lgr->llc_conf_mutex);
+ smcr_link_down_cond(&lgr->lnk[i]);
+ mutex_unlock(&lgr->llc_conf_mutex);
+ }
+ }
+ }
+
if (smcibdev) {
if (atomic_read(&smcibdev->lnk_cnt))
wait_event(smcibdev->lnks_deleted,
@@ -1127,7 +1144,6 @@ static void smcr_link_down(struct smc_link *lnk)
if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
return;
- smc_ib_modify_qp_reset(lnk);
to_lnk = smc_switch_conns(lgr, lnk, true);
if (!to_lnk) { /* no backup link available */
smcr_link_clear(lnk, true);
@@ -1355,6 +1371,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
+ init_waitqueue_head(&conn->cdc_pend_tx_wq);
INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
if (ini->is_smcd) {
conn->rx_off = sizeof(struct smcd_cdc_msg);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 4745a9a5a28f..9364d0f35cce 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -359,6 +359,12 @@ static inline bool smc_link_usable(struct smc_link *lnk)
return true;
}
+static inline bool smc_link_sendable(struct smc_link *lnk)
+{
+ return smc_link_usable(lnk) &&
+ lnk->qp_attr.cur_qp_state == IB_QPS_RTS;
+}
+
static inline bool smc_link_active(struct smc_link *lnk)
{
return lnk->state == SMC_LNK_ACTIVE;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index fc766b537ac7..f1ffbd414602 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -100,12 +100,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk)
IB_QP_MAX_QP_RD_ATOMIC);
}
-int smc_ib_modify_qp_reset(struct smc_link *lnk)
+int smc_ib_modify_qp_error(struct smc_link *lnk)
{
struct ib_qp_attr qp_attr;
memset(&qp_attr, 0, sizeof(qp_attr));
- qp_attr.qp_state = IB_QPS_RESET;
+ qp_attr.qp_state = IB_QPS_ERR;
return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE);
}
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 2ce481187dd0..f90d15eae2aa 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -74,6 +74,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk);
int smc_ib_ready_link(struct smc_link *lnk);
int smc_ib_modify_qp_rts(struct smc_link *lnk);
int smc_ib_modify_qp_reset(struct smc_link *lnk);
+int smc_ib_modify_qp_error(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
struct smc_buf_desc *buf_slot, u8 link_idx);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index d8fe4e1f24d1..ee1f0fdba085 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -383,9 +383,11 @@ int smc_llc_send_confirm_link(struct smc_link *link,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
memset(confllc, 0, sizeof(*confllc));
confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
@@ -402,6 +404,8 @@ int smc_llc_send_confirm_link(struct smc_link *link,
confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -415,9 +419,11 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
struct smc_link *link;
int i, rc, rtok_ix;
+ if (!smc_wr_tx_link_hold(send_link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
@@ -444,6 +450,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
(u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
/* send llc message */
rc = smc_wr_tx_send(send_link, pend);
+put_out:
+ smc_wr_tx_link_put(send_link);
return rc;
}
@@ -456,9 +464,11 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
@@ -467,6 +477,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -480,9 +492,11 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
addllc = (struct smc_llc_msg_add_link *)wr_buf;
memset(addllc, 0, sizeof(*addllc));
@@ -504,6 +518,8 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
}
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -517,9 +533,11 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
delllc = (struct smc_llc_msg_del_link *)wr_buf;
memset(delllc, 0, sizeof(*delllc));
@@ -536,6 +554,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
delllc->reason = htonl(reason);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -547,9 +567,11 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
testllc = (struct smc_llc_msg_test_link *)wr_buf;
memset(testllc, 0, sizeof(*testllc));
testllc->hd.common.type = SMC_LLC_TEST_LINK;
@@ -557,6 +579,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -567,13 +591,16 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
struct smc_wr_buf *wr_buf;
int rc;
- if (!smc_link_usable(link))
+ if (!smc_wr_tx_link_hold(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
- return smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
/* schedule an llc send on link, may wait for buffers,
@@ -586,13 +613,16 @@ static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf)
struct smc_wr_buf *wr_buf;
int rc;
- if (!smc_link_usable(link))
+ if (!smc_wr_tx_link_hold(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
- return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+ rc = smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
/********************************* receive ***********************************/
@@ -672,9 +702,11 @@ static int smc_llc_add_link_cont(struct smc_link *link,
struct smc_buf_desc *rmb;
u8 n;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf;
memset(addc_llc, 0, sizeof(*addc_llc));
@@ -706,7 +738,10 @@ static int smc_llc_add_link_cont(struct smc_link *link,
addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
if (lgr->role == SMC_CLNT)
addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
- return smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
static int smc_llc_cli_rkey_exchange(struct smc_link *link,
@@ -1323,7 +1358,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
delllc.reason = htonl(rsn);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (!smc_link_usable(&lgr->lnk[i]))
+ if (!smc_link_sendable(&lgr->lnk[i]))
continue;
if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
break;
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index ff02952b3d03..52ef1fca0b60 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -479,7 +479,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
-static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
struct smc_link *link = conn->lnk;
@@ -488,8 +488,11 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
struct smc_wr_buf *wr_buf;
int rc;
+ if (!link || !smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
if (rc < 0) {
+ smc_wr_tx_link_put(link);
if (rc == -EBUSY) {
struct smc_sock *smc =
container_of(conn, struct smc_sock, conn);
@@ -530,22 +533,7 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
out_unlock:
spin_unlock_bh(&conn->send_lock);
- return rc;
-}
-
-static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
-{
- struct smc_link *link = conn->lnk;
- int rc = -ENOLINK;
-
- if (!link)
- return rc;
-
- atomic_inc(&link->wr_tx_refcnt);
- if (smc_link_usable(link))
- rc = _smcr_tx_sndbuf_nonempty(conn);
- if (atomic_dec_and_test(&link->wr_tx_refcnt))
- wake_up_all(&link->wr_tx_wait);
+ smc_wr_tx_link_put(link);
return rc;
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 9dbe4804853e..5a81f8c9ebf9 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link)
}
/* wait till all pending tx work requests on the given link are completed */
-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
+void smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
{
- if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link),
- SMC_WR_TX_WAIT_PENDING_TIME))
- return 0;
- else /* timeout */
- return -EPIPE;
+ wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link));
}
static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
@@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
struct smc_wr_tx_pend pnd_snd;
struct smc_link *link;
u32 pnd_snd_idx;
- int i;
link = wc->qp->qp_context;
@@ -115,14 +110,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
return;
if (wc->status) {
- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- /* clear full struct smc_wr_tx_pend including .priv */
- memset(&link->wr_tx_pends[i], 0,
- sizeof(link->wr_tx_pends[i]));
- memset(&link->wr_tx_bufs[i], 0,
- sizeof(link->wr_tx_bufs[i]));
- clear_bit(i, link->wr_tx_mask);
- }
/* terminate link */
smcr_link_down_cond_sched(link);
}
@@ -169,7 +156,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
{
*idx = link->wr_tx_cnt;
- if (!smc_link_usable(link))
+ if (!smc_link_sendable(link))
return -ENOLINK;
for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
if (!test_and_set_bit(*idx, link->wr_tx_mask))
@@ -212,7 +199,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
} else {
rc = wait_event_interruptible_timeout(
link->wr_tx_wait,
- !smc_link_usable(link) ||
+ !smc_link_sendable(link) ||
lgr->terminating ||
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
@@ -288,18 +275,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
unsigned long timeout)
{
struct smc_wr_tx_pend *pend;
+ u32 pnd_idx;
int rc;
pend = container_of(priv, struct smc_wr_tx_pend, priv);
pend->compl_requested = 1;
- init_completion(&link->wr_tx_compl[pend->idx]);
+ pnd_idx = pend->idx;
+ init_completion(&link->wr_tx_compl[pnd_idx]);
rc = smc_wr_tx_send(link, priv);
if (rc)
return rc;
/* wait for completion by smc_wr_tx_process_cqe() */
rc = wait_for_completion_interruptible_timeout(
- &link->wr_tx_compl[pend->idx], timeout);
+ &link->wr_tx_compl[pnd_idx], timeout);
if (rc <= 0)
rc = -ENODATA;
if (rc > 0)
@@ -349,25 +338,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
return rc;
}
-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
- smc_wr_tx_filter filter,
- smc_wr_tx_dismisser dismisser,
- unsigned long data)
-{
- struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_tx;
- int i;
-
- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
- if (wr_tx->type != wr_tx_hdr_type)
- continue;
- tx_pend = &link->wr_tx_pends[i].priv;
- if (filter(tx_pend, data))
- dismisser(tx_pend);
- }
-}
-
/****************************** receive queue ********************************/
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
@@ -572,10 +542,7 @@ void smc_wr_free_link(struct smc_link *lnk)
smc_wr_wakeup_reg_wait(lnk);
smc_wr_wakeup_tx_wait(lnk);
- if (smc_wr_tx_wait_no_pending_sends(lnk))
- memset(lnk->wr_tx_mask, 0,
- BITS_TO_LONGS(SMC_WR_BUF_CNT) *
- sizeof(*lnk->wr_tx_mask));
+ smc_wr_tx_wait_no_pending_sends(lnk);
wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 423b8709f1c9..cb58e60078f5 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -22,7 +22,6 @@
#define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */
#define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ)
-#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ)
#define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */
@@ -60,6 +59,20 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
atomic_long_set(wr_tx_id, val);
}
+static inline bool smc_wr_tx_link_hold(struct smc_link *link)
+{
+ if (!smc_link_sendable(link))
+ return false;
+ atomic_inc(&link->wr_tx_refcnt);
+ return true;
+}
+
+static inline void smc_wr_tx_link_put(struct smc_link *link)
+{
+ if (atomic_dec_and_test(&link->wr_tx_refcnt))
+ wake_up_all(&link->wr_tx_wait);
+}
+
static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk)
{
wake_up_all(&lnk->wr_tx_wait);
@@ -108,7 +121,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
unsigned long data);
-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
+void smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
int smc_wr_rx_post_init(struct smc_link *link);
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index a4ca050815ab..dc1d3696af6b 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -252,7 +252,7 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "s390" && $bits == 64) {
if ($cc =~ /-DCC_USING_HOTPATCH/) {
- $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(bcrl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$";
+ $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(brcl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$";
$mcount_adjust = 0;
} else {
$mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$";
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f32026bc96b4..ff2191ae5352 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5665,7 +5665,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
struct common_audit_data ad;
struct lsm_network_audit net = {0,};
char *addrp;
- u8 proto;
+ u8 proto = 0;
if (sk == NULL)
return NF_ACCEPT;
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index cd458e10cf2a..11dd8260c9cc 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -1046,10 +1046,11 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
return false;
if (!domain)
return true;
+ if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED]))
+ return false;
list_for_each_entry_rcu(ptr, &domain->acl_info_list, list,
srcu_read_lock_held(&tomoyo_ss)) {
u16 perm;
- u8 i;
if (ptr->is_deleted)
continue;
@@ -1060,23 +1061,23 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
*/
switch (ptr->type) {
case TOMOYO_TYPE_PATH_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_path_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_path_acl, head)->perm);
break;
case TOMOYO_TYPE_PATH2_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_path2_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_path2_acl, head)->perm);
break;
case TOMOYO_TYPE_PATH_NUMBER_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_path_number_acl, head)
+ perm = data_race(container_of(ptr, struct tomoyo_path_number_acl, head)
->perm);
break;
case TOMOYO_TYPE_MKDEV_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_mkdev_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_mkdev_acl, head)->perm);
break;
case TOMOYO_TYPE_INET_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_inet_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_inet_acl, head)->perm);
break;
case TOMOYO_TYPE_UNIX_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_unix_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_unix_acl, head)->perm);
break;
case TOMOYO_TYPE_MANUAL_TASK_ACL:
perm = 0;
@@ -1084,21 +1085,17 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
default:
perm = 1;
}
- for (i = 0; i < 16; i++)
- if (perm & (1 << i))
- count++;
+ count += hweight16(perm);
}
if (count < tomoyo_profile(domain->ns, domain->profile)->
pref[TOMOYO_PREF_MAX_LEARNING_ENTRY])
return true;
- if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) {
- domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true;
- /* r->granted = false; */
- tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
+ WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true);
+ /* r->granted = false; */
+ tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
#ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING
- pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n",
- domain->domainname->name);
+ pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n",
+ domain->domainname->name);
#endif
- }
return false;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 1d727387cb20..5109d01619ee 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2354,7 +2354,7 @@ static int process_switch_event(struct perf_tool *tool,
if (perf_event__process_switch(tool, event, sample, machine) < 0)
return -1;
- if (scripting_ops && scripting_ops->process_switch)
+ if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample))
scripting_ops->process_switch(event, sample, machine);
if (!script->show_switch_events)
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index c66da6ffd6d8..7badaf215de2 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -156,13 +156,13 @@ struct testcase testcases_v4[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
@@ -259,13 +259,13 @@ struct testcase testcases_v6[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 17512a43885e..f1fdaa270291 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -419,6 +419,7 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
+ const char *bind_addr = NULL;
int max_len, hdrlen;
int c;
@@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv)
cfg_cpu = strtol(optarg, NULL, 0);
break;
case 'D':
- setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+ bind_addr = optarg;
break;
case 'l':
cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
@@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv)
}
}
+ if (!bind_addr)
+ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+ setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
+
if (optind != argc)
usage(argv[0]);
^ permalink raw reply related
* Linux 5.15.13
From: Greg Kroah-Hartman @ 2022-01-05 12:34 UTC (permalink / raw)
To: linux-kernel, akpm, torvalds, stable; +Cc: lwn, jslaby, Greg Kroah-Hartman
I'm announcing the release of the 5.15.13 kernel.
All users of the 5.15 kernel series must upgrade.
The updated 5.15.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.15.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/admin-guide/kernel-parameters.txt | 2
Makefile | 2
arch/arm/include/asm/efi.h | 1
arch/arm64/include/asm/efi.h | 1
arch/parisc/kernel/traps.c | 2
arch/powerpc/mm/ptdump/ptdump.c | 2
arch/riscv/include/asm/efi.h | 1
arch/x86/include/asm/efi.h | 2
drivers/android/binder_alloc.c | 2
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 76 +++++++----
drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 7 +
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 1
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 2
drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 2
drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 2
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 2
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2
drivers/gpu/drm/amd/include/discovery.h | 49 +++++++
drivers/gpu/drm/nouveau/nouveau_fence.c | 28 ++--
drivers/i2c/i2c-dev.c | 3
drivers/input/joystick/spaceball.c | 11 +
drivers/input/mouse/appletouch.c | 4
drivers/input/serio/i8042-x86ia64io.h | 21 +++
drivers/input/serio/i8042.c | 54 +++++--
drivers/net/ethernet/atheros/ag71xx.c | 23 +--
drivers/net/ethernet/freescale/fman/fman_port.c | 12 +
drivers/net/ethernet/intel/igc/igc_main.c | 6
drivers/net/ethernet/intel/igc/igc_ptp.c | 15 ++
drivers/net/ethernet/lantiq_xrx200.c | 2
drivers/net/ethernet/mellanox/mlx5/core/en.h | 5
drivers/net/ethernet/mellanox/mlx5/core/en/health.h | 2
drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 2
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 35 ++++-
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 10 +
drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h | 27 +++
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 16 ++
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 48 ++++--
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 29 ----
drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3
drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 -
drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 4
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5
drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2
drivers/net/usb/pegasus.c | 4
drivers/nfc/st21nfca/i2c.c | 29 ++--
drivers/platform/mellanox/mlxbf-pmc.c | 4
drivers/platform/x86/apple-gmux.c | 2
drivers/scsi/lpfc/lpfc_debugfs.c | 4
drivers/scsi/vmw_pvscsi.c | 7 -
drivers/usb/gadget/function/f_fs.c | 9 -
drivers/usb/host/xhci-pci.c | 5
drivers/usb/mtu3/mtu3_gadget.c | 8 +
drivers/usb/mtu3/mtu3_qmu.c | 7 -
drivers/virt/nitro_enclaves/ne_misc_dev.c | 5
fs/namespace.c | 9 -
include/linux/efi.h | 6
include/linux/memblock.h | 4
include/net/pkt_sched.h | 15 ++
include/net/sch_generic.h | 2
include/net/sctp/sctp.h | 6
include/net/sctp/structs.h | 3
include/uapi/linux/nfc.h | 6
mm/damon/dbgfs.c | 8 +
net/bridge/br_multicast.c | 32 ++++
net/bridge/br_netlink.c | 4
net/bridge/br_private.h | 12 +
net/bridge/br_sysfs_br.c | 4
net/bridge/br_vlan_options.c | 4
net/core/dev.c | 8 -
net/ipv4/af_inet.c | 10 -
net/ipv6/udp.c | 2
net/ncsi/ncsi-netlink.c | 6
net/sched/act_ct.c | 14 +-
net/sched/cls_api.c | 6
net/sched/cls_flower.c | 3
net/sched/sch_frag.c | 3
net/sctp/diag.c | 12 -
net/sctp/endpointola.c | 23 ++-
net/sctp/socket.c | 23 ++-
net/smc/smc.h | 5
net/smc/smc_cdc.c | 52 +++----
net/smc/smc_cdc.h | 2
net/smc/smc_core.c | 27 +++
net/smc/smc_core.h | 6
net/smc/smc_ib.c | 4
net/smc/smc_ib.h | 1
net/smc/smc_llc.c | 2
net/smc/smc_wr.c | 51 +------
net/smc/smc_wr.h | 5
scripts/recordmcount.pl | 2
security/selinux/hooks.c | 2
security/tomoyo/util.c | 31 ++--
sound/hda/intel-sdw-acpi.c | 13 +
tools/perf/builtin-script.c | 2
tools/perf/scripts/python/intel-pt-events.py | 23 +--
tools/perf/util/intel-pt.c | 1
tools/testing/selftests/net/udpgro_fwd.sh | 6
tools/testing/selftests/net/udpgso.c | 12 -
tools/testing/selftests/net/udpgso_bench_tx.c | 8 +
102 files changed, 733 insertions(+), 372 deletions(-)
Adrian Hunter (3):
perf intel-pt: Fix parsing of VM time correlation arguments
perf script: Fix CPU filtering of a script's switch events
perf scripts python: intel-pt-events.py: Fix printing of switch events
Aleksander Jan Bajkowski (1):
net: lantiq_xrx200: fix statistics of received bytes
Alex Deucher (1):
drm/amdgpu: add support for IP discovery gc_info table v2
Alexey Makhalov (1):
scsi: vmw_pvscsi: Set residual data length conditionally
Amir Tzin (1):
net/mlx5e: Wrap the tx reporter dump callback to extract the sq
Andra Paraschiv (1):
nitro_enclaves: Use get_user_pages_unlocked() call to handle mmap assert
Angus Wang (1):
drm/amd/display: Changed pipe split policy to allow for multi-display pipe split
Chris Mi (2):
net/mlx5: Fix tc max supported prio for nic mode
net/mlx5e: Delete forward rule for ct or sample action
Christian Brauner (1):
fs/mount_setattr: always cleanup mount_kattr
Christian König (1):
drm/nouveau: wait for the exclusive fence after the shared ones v2
Christophe JAILLET (2):
net: ag71xx: Fix a potential double free in error handling paths
ionic: Initialize the 'lif->dbid_inuse' bitmap
Chunfeng Yun (3):
usb: mtu3: add memory barrier before set GPD's HWO
usb: mtu3: fix list_head check warning
usb: mtu3: set interval of FS intr and isoc endpoint
Coco Li (2):
udp: using datalen to cap ipv6 udp max gso segments
selftests: Calculate udpgso segment count without header adjustment
Dan Carpenter (1):
scsi: lpfc: Terminate string in lpfc_debugfs_nvmeio_trc_write()
Dmitry V. Levin (1):
uapi: fix linux/nfc.h userspace compilation errors
Dmitry Vyukov (1):
tomoyo: Check exceeded quota early in tomoyo_domain_quota_is_ok().
Dust Li (2):
net/smc: don't send CDC/LLC message if link not ready
net/smc: fix kernel panic caused by race of smc_sock
Gal Pressman (1):
net/mlx5e: Fix wrong features assignment in case of error
Greg Kroah-Hartman (1):
Linux 5.15.13
Heiko Carstens (1):
recordmcount.pl: fix typo in s390 mcount regex
Helge Deller (1):
parisc: Clear stale IIR value on instruction access rights trap
Jackie Liu (1):
memblock: fix memblock_phys_alloc() section mismatch error
James McLaughlin (1):
igc: Fix TX timestamp support for non-MSI-X platforms
Javier Martinez Canillas (1):
efi: Move efifb_setup_from_dmi() prototype from arch headers
Jianguo Wu (2):
selftests: net: Fix a typo in udpgro_fwd.sh
selftests: net: using ping6 for IPv6 in udpgro_fwd.sh
Jiasheng Jiang (1):
net/ncsi: check for error return from call to nla_put_u32
Karsten Graul (1):
net/smc: fix using of uninitialized completions
Krzysztof Kozlowski (1):
nfc: uapi: use kernel size_t to fix user-space builds
Leo L. Schwab (1):
Input: spaceball - fix parsing of movement data packets
Libin Yang (2):
ALSA: hda: intel-sdw-acpi: harden detection of controller
ALSA: hda: intel-sdw-acpi: go through HDAS ACPI at max depth of 2
Mathias Nyman (1):
xhci: Fresco FL1100 controller should not have BROKEN_MSI quirk set.
Matthias-Christian Ott (1):
net: usb: pegasus: Do not drop long Ethernet frames
Maxim Mikityanskiy (2):
net/mlx5e: Fix interoperability between XSK and ICOSQ recovery flow
net/mlx5e: Fix ICOSQ recovery flow for XSK
Miaoqian Lin (3):
platform/mellanox: mlxbf-pmc: Fix an IS_ERR() vs NULL bug in mlxbf_pmc_map_counters
net/mlx5: DR, Fix NULL vs IS_ERR checking in dr_domain_init_resources
fsl/fman: Fix missing put_device() call in fman_port_probe
Michael Ellerman (1):
powerpc/ptdump: Fix DEBUG_WX since generic ptdump conversion
Moshe Shemesh (1):
net/mlx5: Fix SF health recovery flow
Muchun Song (1):
net: fix use-after-free in tw_timer_handler
Nicholas Kazlauskas (2):
drm/amd/display: Send s0i2_rdy in stream_count == 0 optimization
drm/amd/display: Set optimize_pwr_state for DCN31
Nikolay Aleksandrov (3):
net: bridge: mcast: add and enforce query interval minimum
net: bridge: mcast: add and enforce startup query interval minimum
net: bridge: mcast: fix br_multicast_ctx_vlan_global_disabled helper
Paul Blakey (1):
net/sched: Extend qdisc control block with tc control block
Pavel Skripkin (2):
i2c: validate user data in compat ioctl
Input: appletouch - initialize work before device registration
Roi Dayan (1):
net/mlx5e: Use tc sample stubs instead of ifdefs in source file
Samuel Čavoj (1):
Input: i8042 - enable deferred probe quirk for ASUS UM325UA
SeongJae Park (1):
mm/damon/dbgfs: fix 'struct pid' leaks in 'dbgfs_target_ids_write()'
Shay Drory (1):
net/mlx5: Fix error print in case of IRQ request failed
Takashi Iwai (1):
Input: i8042 - add deferred probe support
Tetsuo Handa (1):
tomoyo: use hwight16() in tomoyo_domain_quota_is_ok()
Todd Kjos (1):
binder: fix async_free_space accounting for empty parcels
Tom Rix (1):
selinux: initialize proto variable in selinux_ip_postroute_compat()
Vincent Pelletier (1):
usb: gadget: f_fs: Clear ffs_eventfd in ffs_data_clear.
Vinicius Costa Gomes (1):
igc: Do not enable crosstimestamping for i225-V models
Wang Qing (1):
platform/x86: apple-gmux: use resource_size() with res
Wei Yongjun (1):
NFC: st21nfca: Fix memory leak in device probe and remove
Xin Long (1):
sctp: use call_rcu to free endpoint
chen gong (1):
drm/amdgpu: When the VCN(1.0) block is suspended, powergating is explicitly enabled
wujianguo (1):
selftests/net: udpgso_bench_tx: fix dst ip argument
^ permalink raw reply
* Re: Linux 5.15.13
From: Greg Kroah-Hartman @ 2022-01-05 12:34 UTC (permalink / raw)
To: linux-kernel, akpm, torvalds, stable; +Cc: lwn, jslaby, Greg Kroah-Hartman
In-Reply-To: <164138606435252@kroah.com>
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a454f438bd62..8ff6dafafdf8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1690,6 +1690,8 @@
architectures force reset to be always executed
i8042.unlock [HW] Unlock (ignore) the keylock
i8042.kbdreset [HW] Reset device connected to KBD port
+ i8042.probe_defer
+ [HW] Allow deferred probing upon i8042 probe errors
i810= [HW,DRM]
diff --git a/Makefile b/Makefile
index 474b2a2292ca..0964b940b889 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 15
-SUBLEVEL = 12
+SUBLEVEL = 13
EXTRAVERSION =
NAME = Trick or Treat
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index a6f3b179e8a9..27218eabbf9a 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -17,7 +17,6 @@
#ifdef CONFIG_EFI
void efi_init(void);
-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index d3e1825337be..ad55079abe47 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -14,7 +14,6 @@
#ifdef CONFIG_EFI
extern void efi_init(void);
-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
#else
#define efi_init()
#endif
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 747c328fb886..197cb8480350 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -729,6 +729,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
}
mmap_read_unlock(current->mm);
}
+ /* CPU could not fetch instruction, so clear stale IIR value. */
+ regs->iir = 0xbaadf00d;
fallthrough;
case 27:
/* Data memory protection ID trap */
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index bf251191e78d..32bfb215c485 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -183,7 +183,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
{
pte_t pte = __pte(st->current_flags);
- if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx)
+ if (!IS_ENABLED(CONFIG_DEBUG_WX) || !st->check_wx)
return;
if (!pte_write(pte) || !pte_exec(pte))
diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
index 49b398fe99f1..cc4f6787f937 100644
--- a/arch/riscv/include/asm/efi.h
+++ b/arch/riscv/include/asm/efi.h
@@ -13,7 +13,6 @@
#ifdef CONFIG_EFI
extern void efi_init(void);
-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
#else
#define efi_init()
#endif
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 4d0b126835b8..63158fd55856 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -197,8 +197,6 @@ static inline bool efi_runtime_supported(void)
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
-
extern void efi_thunk_runtime_setup(void);
efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
unsigned long descriptor_size,
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 340515f54498..47bc74a8c7b6 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -671,7 +671,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc,
BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size);
if (buffer->async_transaction) {
- alloc->free_async_space += size + sizeof(struct binder_buffer);
+ alloc->free_async_space += buffer_size + sizeof(struct binder_buffer);
binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
"%d: binder_free_buf size %zd async free %zd\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index ada7bc19118a..a919f5daacd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -415,10 +415,15 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
}
}
+union gc_info {
+ struct gc_info_v1_0 v1;
+ struct gc_info_v2_0 v2;
+};
+
int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
{
struct binary_header *bhdr;
- struct gc_info_v1_0 *gc_info;
+ union gc_info *gc_info;
if (!adev->mman.discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
@@ -426,27 +431,54 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
}
bhdr = (struct binary_header *)adev->mman.discovery_bin;
- gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
+ gc_info = (union gc_info *)(adev->mman.discovery_bin +
le16_to_cpu(bhdr->table_list[GC].offset));
-
- adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
- adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
- le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
- adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
- adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
- adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
- adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
- adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
- adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
- adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
- adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
- adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
- adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
- adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
- adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
- adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
- le32_to_cpu(gc_info->gc_num_sa_per_se);
- adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
-
+ switch (gc_info->v1.header.version_major) {
+ case 1:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
+ le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
+ break;
+ case 2:
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
+ adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled GC info table %d.%d\n",
+ gc_info->v1.header.version_major,
+ gc_info->v1.header.version_minor);
+ return -EINVAL;
+ }
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 121ee9f2b8d1..462008d50690 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -253,6 +253,13 @@ static int vcn_v1_0_suspend(void *handle)
{
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool idle_work_unexecuted;
+
+ idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work);
+ if (idle_work_unexecuted) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+ }
r = vcn_v1_0_hw_fini(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index 377c4e53a2b3..407e19412a94 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -157,6 +157,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
union display_idle_optimization_u idle_info = { 0 };
idle_info.idle_info.df_request_disabled = 1;
idle_info.idle_info.phy_ref_clk_off = 1;
+ idle_info.idle_info.s0i2_rdy = 1;
dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
/* update power state */
clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index ede11eb120d4..b01a21d8336c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1067,7 +1067,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
+ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index fbbdf9976183..92a308ad1213 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -874,7 +874,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.clock_trace = true,
.disable_pplib_clock_request = true,
.min_disp_clk_khz = 100000,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
+ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index a0de309475a9..89a237b5864c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -840,7 +840,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
+ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index 912285fdce18..9e2f18a0c948 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -863,7 +863,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_clock_gate = true,
.disable_pplib_clock_request = true,
.disable_pplib_wm_range = true,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
+ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
index 7d3ff5d44402..2292bb82026e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
@@ -211,7 +211,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
+ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index dd38796ba30a..589ddab61c2a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -193,7 +193,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
+ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
index ac8fb202fd5e..4e9fe090b770 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
@@ -100,6 +100,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.z10_save_init = dcn31_z10_save_init,
.is_abm_supported = dcn31_is_abm_supported,
.set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
+ .optimize_pwr_state = dcn21_optimize_pwr_state,
.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
.update_visual_confirm_color = dcn20_update_visual_confirm_color,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 79e92ecca96c..6d8f26dada72 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -923,7 +923,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = false,
- .pipe_split_policy = MPC_SPLIT_AVOID,
+ .pipe_split_policy = MPC_SPLIT_DYNAMIC,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h
index 7ec4331e67f2..a486769b66c6 100644
--- a/drivers/gpu/drm/amd/include/discovery.h
+++ b/drivers/gpu/drm/amd/include/discovery.h
@@ -143,6 +143,55 @@ struct gc_info_v1_0 {
uint32_t gc_num_gl2a;
};
+struct gc_info_v1_1 {
+ struct gpu_info_header header;
+
+ uint32_t gc_num_se;
+ uint32_t gc_num_wgp0_per_sa;
+ uint32_t gc_num_wgp1_per_sa;
+ uint32_t gc_num_rb_per_se;
+ uint32_t gc_num_gl2c;
+ uint32_t gc_num_gprs;
+ uint32_t gc_num_max_gs_thds;
+ uint32_t gc_gs_table_depth;
+ uint32_t gc_gsprim_buff_depth;
+ uint32_t gc_parameter_cache_depth;
+ uint32_t gc_double_offchip_lds_buffer;
+ uint32_t gc_wave_size;
+ uint32_t gc_max_waves_per_simd;
+ uint32_t gc_max_scratch_slots_per_cu;
+ uint32_t gc_lds_size;
+ uint32_t gc_num_sc_per_se;
+ uint32_t gc_num_sa_per_se;
+ uint32_t gc_num_packer_per_sc;
+ uint32_t gc_num_gl2a;
+ uint32_t gc_num_tcp_per_sa;
+ uint32_t gc_num_sdp_interface;
+ uint32_t gc_num_tcps;
+};
+
+struct gc_info_v2_0 {
+ struct gpu_info_header header;
+
+ uint32_t gc_num_se;
+ uint32_t gc_num_cu_per_sh;
+ uint32_t gc_num_sh_per_se;
+ uint32_t gc_num_rb_per_se;
+ uint32_t gc_num_tccs;
+ uint32_t gc_num_gprs;
+ uint32_t gc_num_max_gs_thds;
+ uint32_t gc_gs_table_depth;
+ uint32_t gc_gsprim_buff_depth;
+ uint32_t gc_parameter_cache_depth;
+ uint32_t gc_double_offchip_lds_buffer;
+ uint32_t gc_wave_size;
+ uint32_t gc_max_waves_per_simd;
+ uint32_t gc_max_scratch_slots_per_cu;
+ uint32_t gc_lds_size;
+ uint32_t gc_num_sc_per_se;
+ uint32_t gc_num_packer_per_sc;
+};
+
typedef struct harvest_info_header {
uint32_t signature; /* Table Signature */
uint32_t version; /* Table Version */
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 05d0b3eb3690..0ae416aa76dc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -353,15 +353,22 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
if (ret)
return ret;
- }
- fobj = dma_resv_shared_list(resv);
- fence = dma_resv_excl_fence(resv);
+ fobj = NULL;
+ } else {
+ fobj = dma_resv_shared_list(resv);
+ }
- if (fence) {
+ /* Waiting for the exclusive fence first causes performance regressions
+ * under some circumstances. So manually wait for the shared ones first.
+ */
+ for (i = 0; i < (fobj ? fobj->shared_count : 0) && !ret; ++i) {
struct nouveau_channel *prev = NULL;
bool must_wait = true;
+ fence = rcu_dereference_protected(fobj->shared[i],
+ dma_resv_held(resv));
+
f = nouveau_local_fence(fence, chan->drm);
if (f) {
rcu_read_lock();
@@ -373,20 +380,13 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
if (must_wait)
ret = dma_fence_wait(fence, intr);
-
- return ret;
}
- if (!exclusive || !fobj)
- return ret;
-
- for (i = 0; i < fobj->shared_count && !ret; ++i) {
+ fence = dma_resv_excl_fence(resv);
+ if (fence) {
struct nouveau_channel *prev = NULL;
bool must_wait = true;
- fence = rcu_dereference_protected(fobj->shared[i],
- dma_resv_held(resv));
-
f = nouveau_local_fence(fence, chan->drm);
if (f) {
rcu_read_lock();
@@ -398,6 +398,8 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
if (must_wait)
ret = dma_fence_wait(fence, intr);
+
+ return ret;
}
return ret;
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index bce0e8bb7852..cf5d049342ea 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -535,6 +535,9 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo
sizeof(rdwr_arg)))
return -EFAULT;
+ if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0)
+ return -EINVAL;
+
if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS)
return -EINVAL;
diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c
index 429411c6c0a8..a85a4f33aea8 100644
--- a/drivers/input/joystick/spaceball.c
+++ b/drivers/input/joystick/spaceball.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/input.h>
#include <linux/serio.h>
+#include <asm/unaligned.h>
#define DRIVER_DESC "SpaceTec SpaceBall 2003/3003/4000 FLX driver"
@@ -75,9 +76,15 @@ static void spaceball_process_packet(struct spaceball* spaceball)
case 'D': /* Ball data */
if (spaceball->idx != 15) return;
- for (i = 0; i < 6; i++)
+ /*
+ * Skip first three bytes; read six axes worth of data.
+ * Axis values are signed 16-bit big-endian.
+ */
+ data += 3;
+ for (i = 0; i < ARRAY_SIZE(spaceball_axes); i++) {
input_report_abs(dev, spaceball_axes[i],
- (__s16)((data[2 * i + 3] << 8) | data[2 * i + 2]));
+ (__s16)get_unaligned_be16(&data[i * 2]));
+ }
break;
case 'K': /* Button data */
diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c
index bfa26651c0be..627048bc6a12 100644
--- a/drivers/input/mouse/appletouch.c
+++ b/drivers/input/mouse/appletouch.c
@@ -916,6 +916,8 @@ static int atp_probe(struct usb_interface *iface,
set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit);
set_bit(BTN_LEFT, input_dev->keybit);
+ INIT_WORK(&dev->work, atp_reinit);
+
error = input_register_device(dev->input);
if (error)
goto err_free_buffer;
@@ -923,8 +925,6 @@ static int atp_probe(struct usb_interface *iface,
/* save our data pointer in this interface device */
usb_set_intfdata(iface, dev);
- INIT_WORK(&dev->work, atp_reinit);
-
return 0;
err_free_buffer:
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index aedd05541044..148a7c5fd0e2 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -995,6 +995,24 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = {
{ }
};
+static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = {
+ {
+ /* ASUS ZenBook UX425UA */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"),
+ },
+ },
+ {
+ /* ASUS ZenBook UM325UA */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"),
+ },
+ },
+ { }
+};
+
#endif /* CONFIG_X86 */
#ifdef CONFIG_PNP
@@ -1315,6 +1333,9 @@ static int __init i8042_platform_init(void)
if (dmi_check_system(i8042_dmi_kbdreset_table))
i8042_kbdreset = true;
+ if (dmi_check_system(i8042_dmi_probe_defer_table))
+ i8042_probe_defer = true;
+
/*
* A20 was already enabled during early kernel init. But some buggy
* BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 0b9f1d0a8f8b..3fc0a89cc785 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -45,6 +45,10 @@ static bool i8042_unlock;
module_param_named(unlock, i8042_unlock, bool, 0);
MODULE_PARM_DESC(unlock, "Ignore keyboard lock.");
+static bool i8042_probe_defer;
+module_param_named(probe_defer, i8042_probe_defer, bool, 0);
+MODULE_PARM_DESC(probe_defer, "Allow deferred probing.");
+
enum i8042_controller_reset_mode {
I8042_RESET_NEVER,
I8042_RESET_ALWAYS,
@@ -711,7 +715,7 @@ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version)
* LCS/Telegraphics.
*/
-static int __init i8042_check_mux(void)
+static int i8042_check_mux(void)
{
unsigned char mux_version;
@@ -740,10 +744,10 @@ static int __init i8042_check_mux(void)
/*
* The following is used to test AUX IRQ delivery.
*/
-static struct completion i8042_aux_irq_delivered __initdata;
-static bool i8042_irq_being_tested __initdata;
+static struct completion i8042_aux_irq_delivered;
+static bool i8042_irq_being_tested;
-static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id)
+static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id)
{
unsigned long flags;
unsigned char str, data;
@@ -770,7 +774,7 @@ static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id)
* verifies success by readinng CTR. Used when testing for presence of AUX
* port.
*/
-static int __init i8042_toggle_aux(bool on)
+static int i8042_toggle_aux(bool on)
{
unsigned char param;
int i;
@@ -798,7 +802,7 @@ static int __init i8042_toggle_aux(bool on)
* the presence of an AUX interface.
*/
-static int __init i8042_check_aux(void)
+static int i8042_check_aux(void)
{
int retval = -1;
bool irq_registered = false;
@@ -1005,7 +1009,7 @@ static int i8042_controller_init(void)
if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) {
pr_err("Can't read CTR while initializing i8042\n");
- return -EIO;
+ return i8042_probe_defer ? -EPROBE_DEFER : -EIO;
}
} while (n < 2 || ctr[0] != ctr[1]);
@@ -1320,7 +1324,7 @@ static void i8042_shutdown(struct platform_device *dev)
i8042_controller_reset(false);
}
-static int __init i8042_create_kbd_port(void)
+static int i8042_create_kbd_port(void)
{
struct serio *serio;
struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO];
@@ -1349,7 +1353,7 @@ static int __init i8042_create_kbd_port(void)
return 0;
}
-static int __init i8042_create_aux_port(int idx)
+static int i8042_create_aux_port(int idx)
{
struct serio *serio;
int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx;
@@ -1386,13 +1390,13 @@ static int __init i8042_create_aux_port(int idx)
return 0;
}
-static void __init i8042_free_kbd_port(void)
+static void i8042_free_kbd_port(void)
{
kfree(i8042_ports[I8042_KBD_PORT_NO].serio);
i8042_ports[I8042_KBD_PORT_NO].serio = NULL;
}
-static void __init i8042_free_aux_ports(void)
+static void i8042_free_aux_ports(void)
{
int i;
@@ -1402,7 +1406,7 @@ static void __init i8042_free_aux_ports(void)
}
}
-static void __init i8042_register_ports(void)
+static void i8042_register_ports(void)
{
int i;
@@ -1443,7 +1447,7 @@ static void i8042_free_irqs(void)
i8042_aux_irq_registered = i8042_kbd_irq_registered = false;
}
-static int __init i8042_setup_aux(void)
+static int i8042_setup_aux(void)
{
int (*aux_enable)(void);
int error;
@@ -1485,7 +1489,7 @@ static int __init i8042_setup_aux(void)
return error;
}
-static int __init i8042_setup_kbd(void)
+static int i8042_setup_kbd(void)
{
int error;
@@ -1535,7 +1539,7 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb,
return 0;
}
-static int __init i8042_probe(struct platform_device *dev)
+static int i8042_probe(struct platform_device *dev)
{
int error;
@@ -1600,6 +1604,7 @@ static struct platform_driver i8042_driver = {
.pm = &i8042_pm_ops,
#endif
},
+ .probe = i8042_probe,
.remove = i8042_remove,
.shutdown = i8042_shutdown,
};
@@ -1610,7 +1615,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = {
static int __init i8042_init(void)
{
- struct platform_device *pdev;
int err;
dbg_init();
@@ -1626,17 +1630,29 @@ static int __init i8042_init(void)
/* Set this before creating the dev to allow i8042_command to work right away */
i8042_present = true;
- pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0);
- if (IS_ERR(pdev)) {
- err = PTR_ERR(pdev);
+ err = platform_driver_register(&i8042_driver);
+ if (err)
goto err_platform_exit;
+
+ i8042_platform_device = platform_device_alloc("i8042", -1);
+ if (!i8042_platform_device) {
+ err = -ENOMEM;
+ goto err_unregister_driver;
}
+ err = platform_device_add(i8042_platform_device);
+ if (err)
+ goto err_free_device;
+
bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block);
panic_blink = i8042_panic_blink;
return 0;
+err_free_device:
+ platform_device_put(i8042_platform_device);
+err_unregister_driver:
+ platform_driver_unregister(&i8042_driver);
err_platform_exit:
i8042_platform_exit();
return err;
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 02ae98aabf91..416a5c99db5a 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1915,15 +1915,12 @@ static int ag71xx_probe(struct platform_device *pdev)
ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac");
if (IS_ERR(ag->mac_reset)) {
netif_err(ag, probe, ndev, "missing mac reset\n");
- err = PTR_ERR(ag->mac_reset);
- goto err_free;
+ return PTR_ERR(ag->mac_reset);
}
ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
- if (!ag->mac_base) {
- err = -ENOMEM;
- goto err_free;
- }
+ if (!ag->mac_base)
+ return -ENOMEM;
ndev->irq = platform_get_irq(pdev, 0);
err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
@@ -1931,7 +1928,7 @@ static int ag71xx_probe(struct platform_device *pdev)
if (err) {
netif_err(ag, probe, ndev, "unable to request IRQ %d\n",
ndev->irq);
- goto err_free;
+ return err;
}
ndev->netdev_ops = &ag71xx_netdev_ops;
@@ -1959,10 +1956,8 @@ static int ag71xx_probe(struct platform_device *pdev)
ag->stop_desc = dmam_alloc_coherent(&pdev->dev,
sizeof(struct ag71xx_desc),
&ag->stop_desc_dma, GFP_KERNEL);
- if (!ag->stop_desc) {
- err = -ENOMEM;
- goto err_free;
- }
+ if (!ag->stop_desc)
+ return -ENOMEM;
ag->stop_desc->data = 0;
ag->stop_desc->ctrl = 0;
@@ -1977,7 +1972,7 @@ static int ag71xx_probe(struct platform_device *pdev)
err = of_get_phy_mode(np, &ag->phy_if_mode);
if (err) {
netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
- goto err_free;
+ return err;
}
netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT);
@@ -1985,7 +1980,7 @@ static int ag71xx_probe(struct platform_device *pdev)
err = clk_prepare_enable(ag->clk_eth);
if (err) {
netif_err(ag, probe, ndev, "Failed to enable eth clk.\n");
- goto err_free;
+ return err;
}
ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0);
@@ -2021,8 +2016,6 @@ static int ag71xx_probe(struct platform_device *pdev)
ag71xx_mdio_remove(ag);
err_put_clk:
clk_disable_unprepare(ag->clk_eth);
-err_free:
- free_netdev(ndev);
return err;
}
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index d9baac0dbc7d..4c9d05c45c03 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev)
fman = dev_get_drvdata(&fm_pdev->dev);
if (!fman) {
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
err = of_property_read_u32(port_node, "cell-index", &val);
@@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev)
dev_err(port->dev, "%s: reading cell-index for %pOF failed\n",
__func__, port_node);
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
port_id = (u8)val;
port->dts_params.id = port_id;
@@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev)
} else {
dev_err(port->dev, "%s: Illegal port type\n", __func__);
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
port->dts_params.type = port_type;
@@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev)
dev_err(port->dev, "%s: incorrect qman-channel-id\n",
__func__);
err = -EINVAL;
- goto return_err;
+ goto put_device;
}
port->dts_params.qman_channel_id = qman_channel_id;
}
@@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev)
dev_err(port->dev, "%s: of_address_to_resource() failed\n",
__func__);
err = -ENOMEM;
- goto return_err;
+ goto put_device;
}
port->dts_params.fman = fman;
@@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev)
return 0;
+put_device:
+ put_device(&fm_pdev->dev);
return_err:
of_node_put(port_node);
free_port:
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 0e19b4d02e62..0a96627391a8 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -5466,6 +5466,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data)
mod_timer(&adapter->watchdog_timer, jiffies + 1);
}
+ if (icr & IGC_ICR_TS)
+ igc_tsync_interrupt(adapter);
+
napi_schedule(&q_vector->napi);
return IRQ_HANDLED;
@@ -5509,6 +5512,9 @@ static irqreturn_t igc_intr(int irq, void *data)
mod_timer(&adapter->watchdog_timer, jiffies + 1);
}
+ if (icr & IGC_ICR_TS)
+ igc_tsync_interrupt(adapter);
+
napi_schedule(&q_vector->napi);
return IRQ_HANDLED;
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 30568e3544cd..4f9245aa79a1 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -768,7 +768,20 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr)
*/
static bool igc_is_crosststamp_supported(struct igc_adapter *adapter)
{
- return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false;
+ if (!IS_ENABLED(CONFIG_X86_TSC))
+ return false;
+
+ /* FIXME: it was noticed that enabling support for PCIe PTM in
+ * some i225-V models could cause lockups when bringing the
+ * interface up/down. There should be no downsides to
+ * disabling crosstimestamping support for i225-V, as it
+ * doesn't have any PTP support. That way we gain some time
+ * while root causing the issue.
+ */
+ if (adapter->pdev->device == IGC_DEV_ID_I225_V)
+ return false;
+
+ return pcie_ptm_enabled(adapter->pdev);
}
static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp)
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index fb78f17d734f..b02f796b5422 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -209,7 +209,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
skb->protocol = eth_type_trans(skb, net_dev);
netif_receive_skb(skb);
net_dev->stats.rx_packets++;
- net_dev->stats.rx_bytes += len - ETH_FCS_LEN;
+ net_dev->stats.rx_bytes += len;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c10a107a3ea5..7204bc86e474 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -727,6 +727,8 @@ struct mlx5e_channel {
DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
int ix;
int cpu;
+ /* Sync between icosq recovery and XSK enable/disable. */
+ struct mutex icosq_recovery_lock;
};
struct mlx5e_ptp;
@@ -954,9 +956,6 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param);
void mlx5e_destroy_rq(struct mlx5e_rq *rq);
struct mlx5e_sq_param;
-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq);
-void mlx5e_close_icosq(struct mlx5e_icosq *sq);
int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
struct mlx5e_xdpsq *sq, bool is_redirect);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index 018262d0164b..3aaf3c2752fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c);
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c);
#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
#define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index de03684528bb..8451940c16ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -647,9 +647,7 @@ static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *sk
"Failed to restore tunnel info for sampled packet\n");
return;
}
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
mlx5e_tc_sample_skb(skb, mapped_obj);
-#endif /* CONFIG_MLX5_TC_SAMPLE */
mlx5_rep_tc_post_napi_receive(tc_priv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 0eb125316fe2..e329158fdc55 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -59,6 +59,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
{
+ struct mlx5e_rq *xskrq = NULL;
struct mlx5_core_dev *mdev;
struct mlx5e_icosq *icosq;
struct net_device *dev;
@@ -67,7 +68,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
int err;
icosq = ctx;
+
+ mutex_lock(&icosq->channel->icosq_recovery_lock);
+
+ /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
rq = &icosq->channel->rq;
+ if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
+ xskrq = &icosq->channel->xskrq;
mdev = icosq->channel->mdev;
dev = icosq->channel->netdev;
err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
@@ -81,6 +88,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
goto out;
mlx5e_deactivate_rq(rq);
+ if (xskrq)
+ mlx5e_deactivate_rq(xskrq);
+
err = mlx5e_wait_for_icosq_flush(icosq);
if (err)
goto out;
@@ -94,15 +104,28 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
goto out;
mlx5e_reset_icosq_cc_pc(icosq);
+
mlx5e_free_rx_in_progress_descs(rq);
+ if (xskrq)
+ mlx5e_free_rx_in_progress_descs(xskrq);
+
clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
mlx5e_activate_icosq(icosq);
- mlx5e_activate_rq(rq);
+ mlx5e_activate_rq(rq);
rq->stats->recover++;
+
+ if (xskrq) {
+ mlx5e_activate_rq(xskrq);
+ xskrq->stats->recover++;
+ }
+
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+
return 0;
out:
clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
return err;
}
@@ -703,6 +726,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
}
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
+{
+ mutex_lock(&c->icosq_recovery_lock);
+}
+
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
+{
+ mutex_unlock(&c->icosq_recovery_lock);
+}
+
static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
.name = "rx",
.recover = mlx5e_rx_reporter_recover,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index bb682fd751c9..802459999464 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -463,6 +463,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms
return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
}
+static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
+
+ return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
+}
+
static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
struct devlink_fmsg *fmsg)
{
@@ -558,7 +566,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
to_ctx.sq = sq;
err_ctx.ctx = &to_ctx;
err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
- err_ctx.dump = mlx5e_tx_reporter_dump_sq;
+ err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
snprintf(err_str, sizeof(err_str),
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
index db0146df9b30..9ef8a49d7801 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
@@ -19,6 +19,8 @@ struct mlx5e_sample_attr {
struct mlx5e_sample_flow *sample_flow;
};
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+
void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
struct mlx5_flow_handle *
@@ -38,4 +40,29 @@ mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act);
void
mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
+#else /* CONFIG_MLX5_TC_SAMPLE */
+
+static inline struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ u32 tunnel_id)
+{ return ERR_PTR(-EOPNOTSUPP); }
+
+static inline void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr) {}
+
+static inline struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
+{ return ERR_PTR(-EOPNOTSUPP); }
+
+static inline void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample) {}
+
+static inline void
+mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) {}
+
+#endif /* CONFIG_MLX5_TC_SAMPLE */
#endif /* __MLX5_EN_TC_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 538bc2419bd8..8526a5fbbf0b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -4,6 +4,7 @@
#include "setup.h"
#include "en/params.h"
#include "en/txrx.h"
+#include "en/health.h"
/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may
* change unexpectedly, and mlx5e has a minimum valid stride size for striding
@@ -170,7 +171,13 @@ void mlx5e_close_xsk(struct mlx5e_channel *c)
void mlx5e_activate_xsk(struct mlx5e_channel *c)
{
+ /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid
+ * activating XSKRQ in the middle of recovery.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ mlx5e_reporter_icosq_resume_recovery(c);
+
/* TX queue is created active. */
spin_lock_bh(&c->async_icosq_lock);
@@ -180,6 +187,13 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c)
void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
{
- mlx5e_deactivate_rq(&c->xskrq);
+ /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the
+ * middle of recovery. Suspend the recovery to avoid it.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ mlx5e_reporter_icosq_resume_recovery(c);
+ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
+
/* TX queue is disabled on close. */
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8cf5fbebd674..baa0d7d48fc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -911,8 +911,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
void mlx5e_close_rq(struct mlx5e_rq *rq)
{
cancel_work_sync(&rq->dim.work);
- if (rq->icosq)
- cancel_work_sync(&rq->icosq->recover_work);
cancel_work_sync(&rq->recover_work);
mlx5e_destroy_rq(rq);
mlx5e_free_rx_descs(rq);
@@ -1038,9 +1036,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_icosq_cqe_err(sq);
}
+static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work)
+{
+ struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
+ recover_work);
+
+ /* Not implemented yet. */
+
+ netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n");
+}
+
static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
struct mlx5e_sq_param *param,
- struct mlx5e_icosq *sq)
+ struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
{
void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
struct mlx5_core_dev *mdev = c->mdev;
@@ -1061,7 +1070,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
if (err)
goto err_sq_wq_destroy;
- INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
+ INIT_WORK(&sq->recover_work, recover_work_func);
return 0;
@@ -1399,13 +1408,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_tx_err_cqe(sq);
}
-int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
+static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
+ struct mlx5e_sq_param *param, struct mlx5e_icosq *sq,
+ work_func_t recover_work_func)
{
struct mlx5e_create_sq_param csp = {};
int err;
- err = mlx5e_alloc_icosq(c, param, sq);
+ err = mlx5e_alloc_icosq(c, param, sq, recover_work_func);
if (err)
return err;
@@ -1444,7 +1454,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
synchronize_net(); /* Sync with NAPI. */
}
-void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
{
struct mlx5e_channel *c = sq->channel;
@@ -1871,11 +1881,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
spin_lock_init(&c->async_icosq_lock);
- err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq);
+ err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq,
+ mlx5e_async_icosq_err_cqe_work);
if (err)
goto err_close_xdpsq_cq;
- err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
+ mutex_init(&c->icosq_recovery_lock);
+
+ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq,
+ mlx5e_icosq_err_cqe_work);
if (err)
goto err_close_async_icosq;
@@ -1943,9 +1957,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c)
mlx5e_close_xdpsq(&c->xdpsq);
if (c->xdp)
mlx5e_close_xdpsq(&c->rq_xdpsq);
+ /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */
+ cancel_work_sync(&c->icosq.recover_work);
mlx5e_close_rq(&c->rq);
mlx5e_close_sqs(c);
mlx5e_close_icosq(&c->icosq);
+ mutex_destroy(&c->icosq_recovery_lock);
mlx5e_close_icosq(&c->async_icosq);
if (c->xdp)
mlx5e_close_cq(&c->rq_xdpsq.cq);
@@ -3433,12 +3450,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
static int mlx5e_handle_feature(struct net_device *netdev,
netdev_features_t *features,
- netdev_features_t wanted_features,
netdev_features_t feature,
mlx5e_feature_handler feature_handler)
{
- netdev_features_t changes = wanted_features ^ netdev->features;
- bool enable = !!(wanted_features & feature);
+ netdev_features_t changes = *features ^ netdev->features;
+ bool enable = !!(*features & feature);
int err;
if (!(changes & feature))
@@ -3446,22 +3462,22 @@ static int mlx5e_handle_feature(struct net_device *netdev,
err = feature_handler(netdev, enable);
if (err) {
+ MLX5E_SET_FEATURE(features, feature, !enable);
netdev_err(netdev, "%s feature %pNF failed, err %d\n",
enable ? "Enable" : "Disable", &feature, err);
return err;
}
- MLX5E_SET_FEATURE(features, feature, enable);
return 0;
}
int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
{
- netdev_features_t oper_features = netdev->features;
+ netdev_features_t oper_features = features;
int err = 0;
#define MLX5E_HANDLE_FEATURE(feature, handler) \
- mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
+ mlx5e_handle_feature(netdev, &oper_features, feature, handler)
err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index e3b320b6d85b..fa461bc57bae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -248,7 +248,6 @@ get_ct_priv(struct mlx5e_priv *priv)
return priv->fs.tc.ct;
}
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
static struct mlx5e_tc_psample *
get_sample_priv(struct mlx5e_priv *priv)
{
@@ -265,7 +264,6 @@ get_sample_priv(struct mlx5e_priv *priv)
return NULL;
}
-#endif
struct mlx5_flow_handle *
mlx5_tc_rule_insert(struct mlx5e_priv *priv,
@@ -1148,11 +1146,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
flow, spec, attr,
mod_hdr_acts);
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
} else if (flow_flag_test(flow, SAMPLE)) {
rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
mlx5e_tc_get_flow_tun_id(flow));
-#endif
} else {
rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
}
@@ -1183,23 +1179,16 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
goto offload_rule_0;
- if (flow_flag_test(flow, CT)) {
- mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
- return;
- }
-
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
- if (flow_flag_test(flow, SAMPLE)) {
- mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
- return;
- }
-#endif
-
if (attr->esw_attr->split_count)
mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
+ if (flow_flag_test(flow, CT))
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+ else if (flow_flag_test(flow, SAMPLE))
+ mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
+ else
offload_rule_0:
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
}
struct mlx5_flow_handle *
@@ -5014,9 +5003,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
MLX5_FLOW_NAMESPACE_FDB,
uplink_priv->post_act);
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
-#endif
mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
@@ -5060,9 +5047,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
err_enc_opts_mapping:
mapping_destroy(uplink_priv->tunnel_mapping);
err_tun_mapping:
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
-#endif
mlx5_tc_ct_clean(uplink_priv->ct_priv);
netdev_warn(priv->netdev,
"Failed to initialize tc (eswitch), err: %d", err);
@@ -5082,9 +5067,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
mapping_destroy(uplink_priv->tunnel_mapping);
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
-#endif
mlx5_tc_ct_clean(uplink_priv->ct_priv);
mlx5e_tc_post_act_destroy(uplink_priv->post_act);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index 97e5845b4cfd..d5e47630e284 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
{
+ if (!mlx5_chains_prios_supported(chains))
+ return 1;
+
if (mlx5_chains_ignore_flow_level_supported(chains))
return UINT_MAX;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 92b08fa07efa..92b01858d7f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1775,12 +1775,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
int mlx5_recover_device(struct mlx5_core_dev *dev)
{
- int ret = -EIO;
+ if (!mlx5_core_is_sf(dev)) {
+ mlx5_pci_disable_device(dev);
+ if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED)
+ return -EIO;
+ }
- mlx5_pci_disable_device(dev);
- if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
- ret = mlx5_load_one(dev);
- return ret;
+ return mlx5_load_one(dev);
}
static struct pci_driver mlx5_core_driver = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 763c83a02380..11f3649fdaab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -346,8 +346,8 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
new_irq = irq_pool_create_irq(pool, affinity);
if (IS_ERR(new_irq)) {
if (!least_loaded_irq) {
- mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n",
- cpumask_first(affinity));
+ mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
+ PTR_ERR(new_irq));
mutex_unlock(&pool->lock);
return new_irq;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 0fe159809ba1..ea1b8ca5bf3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/mlx5/eswitch.h>
+#include <linux/err.h>
#include "dr_types.h"
#define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \
@@ -75,9 +76,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn)
}
dmn->uar = mlx5_get_uars_page(dmn->mdev);
- if (!dmn->uar) {
+ if (IS_ERR(dmn->uar)) {
mlx5dr_err(dmn, "Couldn't allocate UAR\n");
- ret = -ENOMEM;
+ ret = PTR_ERR(dmn->uar);
goto clean_pd;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 7f3322ce044c..6ac507ddf09a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -3283,7 +3283,7 @@ int ionic_lif_init(struct ionic_lif *lif)
return -EINVAL;
}
- lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL);
+ lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL);
if (!lif->dbid_inuse) {
dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n");
return -ENOMEM;
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 6a92a3fef75e..cd063f45785b 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -493,11 +493,11 @@ static void read_bulk_callback(struct urb *urb)
goto goon;
rx_status = buf[count - 2];
- if (rx_status & 0x1e) {
+ if (rx_status & 0x1c) {
netif_dbg(pegasus, rx_err, net,
"RX packet error %x\n", rx_status);
net->stats.rx_errors++;
- if (rx_status & 0x06) /* long or runt */
+ if (rx_status & 0x04) /* runt */
net->stats.rx_length_errors++;
if (rx_status & 0x08)
net->stats.rx_crc_errors++;
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 279d88128b2e..d56bc24709b5 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -528,7 +528,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
if (IS_ERR(phy->gpiod_ena)) {
nfc_err(dev, "Unable to get ENABLE GPIO\n");
- return PTR_ERR(phy->gpiod_ena);
+ r = PTR_ERR(phy->gpiod_ena);
+ goto out_free;
}
phy->se_status.is_ese_present =
@@ -539,7 +540,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
r = st21nfca_hci_platform_init(phy);
if (r < 0) {
nfc_err(&client->dev, "Unable to reboot st21nfca\n");
- return r;
+ goto out_free;
}
r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
@@ -548,15 +549,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
ST21NFCA_HCI_DRIVER_NAME, phy);
if (r < 0) {
nfc_err(&client->dev, "Unable to register IRQ handler\n");
- return r;
+ goto out_free;
}
- return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
- ST21NFCA_FRAME_HEADROOM,
- ST21NFCA_FRAME_TAILROOM,
- ST21NFCA_HCI_LLC_MAX_PAYLOAD,
- &phy->hdev,
- &phy->se_status);
+ r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
+ ST21NFCA_FRAME_HEADROOM,
+ ST21NFCA_FRAME_TAILROOM,
+ ST21NFCA_HCI_LLC_MAX_PAYLOAD,
+ &phy->hdev,
+ &phy->se_status);
+ if (r)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ kfree_skb(phy->pending_skb);
+ return r;
}
static int st21nfca_hci_i2c_remove(struct i2c_client *client)
@@ -567,6 +576,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client)
if (phy->powered)
st21nfca_hci_i2c_disable(phy);
+ if (phy->pending_skb)
+ kfree_skb(phy->pending_skb);
return 0;
}
diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c
index 04bc3b50aa7a..65b4a819f1bd 100644
--- a/drivers/platform/mellanox/mlxbf-pmc.c
+++ b/drivers/platform/mellanox/mlxbf-pmc.c
@@ -1374,8 +1374,8 @@ static int mlxbf_pmc_map_counters(struct device *dev)
pmc->block[i].counters = info[2];
pmc->block[i].type = info[3];
- if (IS_ERR(pmc->block[i].mmio_base))
- return PTR_ERR(pmc->block[i].mmio_base);
+ if (!pmc->block[i].mmio_base)
+ return -ENOMEM;
ret = mlxbf_pmc_create_groups(dev, i);
if (ret)
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 9aae45a45200..57553f9b4d1d 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -625,7 +625,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
}
gmux_data->iostart = res->start;
- gmux_data->iolen = res->end - res->start;
+ gmux_data->iolen = resource_size(res);
if (gmux_data->iolen < GMUX_MIN_IO_LEN) {
pr_err("gmux I/O region too small (%lu < %u)\n",
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index bd6d459afce5..08b2e85dcd7d 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -2954,8 +2954,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf,
char mybuf[64];
char *pbuf;
- if (nbytes > 64)
- nbytes = 64;
+ if (nbytes > 63)
+ nbytes = 63;
memset(mybuf, 0, sizeof(mybuf));
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index ce1ba1b93629..9419d6d1d8d2 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -586,9 +586,12 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
* Commands like INQUIRY may transfer less data than
* requested by the initiator via bufflen. Set residual
* count to make upper layer aware of the actual amount
- * of data returned.
+ * of data returned. There are cases when controller
+ * returns zero dataLen with non zero data - do not set
+ * residual count in that case.
*/
- scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
+ if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
+ scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
cmd->result = (DID_OK << 16);
break;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 8260f38025b7..aac343f7d7d3 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1773,11 +1773,15 @@ static void ffs_data_clear(struct ffs_data *ffs)
BUG_ON(ffs->gadget);
- if (ffs->epfiles)
+ if (ffs->epfiles) {
ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count);
+ ffs->epfiles = NULL;
+ }
- if (ffs->ffs_eventfd)
+ if (ffs->ffs_eventfd) {
eventfd_ctx_put(ffs->ffs_eventfd);
+ ffs->ffs_eventfd = NULL;
+ }
kfree(ffs->raw_descs_data);
kfree(ffs->raw_strings);
@@ -1790,7 +1794,6 @@ static void ffs_data_reset(struct ffs_data *ffs)
ffs_data_clear(ffs);
- ffs->epfiles = NULL;
ffs->raw_descs_data = NULL;
ffs->raw_descs = NULL;
ffs->raw_strings = NULL;
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 8c04a7d73388..de9a9ea2cabc 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -123,7 +123,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
/* Look for vendor-specific quirks */
if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
(pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK ||
- pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100 ||
pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1400)) {
if (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK &&
pdev->revision == 0x0) {
@@ -158,6 +157,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009)
xhci->quirks |= XHCI_BROKEN_STREAMS;
+ if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
+ pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100)
+ xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+
if (pdev->vendor == PCI_VENDOR_ID_NEC)
xhci->quirks |= XHCI_NEC_HOST;
diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c
index a9a65b4bbfed..0b21da4ee183 100644
--- a/drivers/usb/mtu3/mtu3_gadget.c
+++ b/drivers/usb/mtu3/mtu3_gadget.c
@@ -92,6 +92,13 @@ static int mtu3_ep_enable(struct mtu3_ep *mep)
interval = clamp_val(interval, 1, 16) - 1;
mult = usb_endpoint_maxp_mult(desc) - 1;
}
+ break;
+ case USB_SPEED_FULL:
+ if (usb_endpoint_xfer_isoc(desc))
+ interval = clamp_val(desc->bInterval, 1, 16);
+ else if (usb_endpoint_xfer_int(desc))
+ interval = clamp_val(desc->bInterval, 1, 255);
+
break;
default:
break; /*others are ignored */
@@ -235,6 +242,7 @@ struct usb_request *mtu3_alloc_request(struct usb_ep *ep, gfp_t gfp_flags)
mreq->request.dma = DMA_ADDR_INVALID;
mreq->epnum = mep->epnum;
mreq->mep = mep;
+ INIT_LIST_HEAD(&mreq->list);
trace_mtu3_alloc_request(mreq);
return &mreq->request;
diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c
index 3f414f91b589..2ea3157ddb6e 100644
--- a/drivers/usb/mtu3/mtu3_qmu.c
+++ b/drivers/usb/mtu3/mtu3_qmu.c
@@ -273,6 +273,8 @@ static int mtu3_prepare_tx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq)
gpd->dw3_info |= cpu_to_le32(GPD_EXT_FLAG_ZLP);
}
+ /* prevent reorder, make sure GPD's HWO is set last */
+ mb();
gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO);
mreq->gpd = gpd;
@@ -306,6 +308,8 @@ static int mtu3_prepare_rx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq)
gpd->next_gpd = cpu_to_le32(lower_32_bits(enq_dma));
ext_addr |= GPD_EXT_NGP(mtu, upper_32_bits(enq_dma));
gpd->dw3_info = cpu_to_le32(ext_addr);
+ /* prevent reorder, make sure GPD's HWO is set last */
+ mb();
gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO);
mreq->gpd = gpd;
@@ -445,7 +449,8 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum)
return;
}
mtu3_setbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_TXPKTRDY);
-
+ /* prevent reorder, make sure GPD's HWO is set last */
+ mb();
/* by pass the current GDP */
gpd_current->dw0_info |= cpu_to_le32(GPD_FLAGS_BPS | GPD_FLAGS_HWO);
diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c
index e21e1e86ad15..fe7a8e403409 100644
--- a/drivers/virt/nitro_enclaves/ne_misc_dev.c
+++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c
@@ -886,8 +886,9 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave,
goto put_pages;
}
- gup_rc = get_user_pages(mem_region.userspace_addr + memory_size, 1, FOLL_GET,
- ne_mem_region->pages + i, NULL);
+ gup_rc = get_user_pages_unlocked(mem_region.userspace_addr + memory_size, 1,
+ ne_mem_region->pages + i, FOLL_GET);
+
if (gup_rc < 0) {
rc = gup_rc;
diff --git a/fs/namespace.c b/fs/namespace.c
index 659a8f39c61a..b696543adab8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4263,12 +4263,11 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
return err;
err = user_path_at(dfd, path, kattr.lookup_flags, &target);
- if (err)
- return err;
-
- err = do_mount_setattr(&target, &kattr);
+ if (!err) {
+ err = do_mount_setattr(&target, &kattr);
+ path_put(&target);
+ }
finish_mount_kattr(&kattr);
- path_put(&target);
return err;
}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6b5d36babfcc..3d8ddc5eca8c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1282,4 +1282,10 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find(
}
#endif
+#ifdef CONFIG_SYSFB
+extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+#else
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { }
+#endif
+
#endif /* _LINUX_EFI_H */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 34de69b3b8ba..5df38332e413 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -388,8 +388,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t end, int nid, bool exact_nid);
phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
-static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
- phys_addr_t align)
+static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
+ phys_addr_t align)
{
return memblock_phys_alloc_range(size, align, 0,
MEMBLOCK_ALLOC_ACCESSIBLE);
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index bf79f3a890af..05f18e81f3e8 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
skb->tstamp = ktime_set(0, 0);
}
+struct tc_skb_cb {
+ struct qdisc_skb_cb qdisc_cb;
+
+ u16 mru;
+ bool post_ct;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+ struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+ BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+ return cb;
+}
+
#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 8c2d611639fc..6e7cd0033357 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -440,8 +440,6 @@ struct qdisc_skb_cb {
};
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
- u16 mru;
- bool post_ct;
};
typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 189fdb9db162..d314a180ab93 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -105,6 +105,7 @@ extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
+typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *);
void sctp_transport_walk_start(struct rhashtable_iter *iter);
void sctp_transport_walk_stop(struct rhashtable_iter *iter);
struct sctp_transport *sctp_transport_get_next(struct net *net,
@@ -115,9 +116,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr, void *p);
-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
- int (*cb_done)(struct sctp_transport *, void *),
- struct net *net, int *pos, void *p);
+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
+ struct net *net, int *pos, void *p);
int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
struct sctp_info *info);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 651bba654d77..8d2c3dd9f595 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1365,6 +1365,7 @@ struct sctp_endpoint {
u32 secid;
u32 peer_secid;
+ struct rcu_head rcu;
};
/* Recover the outter endpoint structure. */
@@ -1380,7 +1381,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t);
void sctp_endpoint_free(struct sctp_endpoint *);
void sctp_endpoint_put(struct sctp_endpoint *);
-void sctp_endpoint_hold(struct sctp_endpoint *);
+int sctp_endpoint_hold(struct sctp_endpoint *ep);
void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *);
struct sctp_association *sctp_endpoint_lookup_assoc(
const struct sctp_endpoint *ep,
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index f6e3c8c9c744..4fa4e979e948 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -263,7 +263,7 @@ enum nfc_sdp_attr {
#define NFC_SE_ENABLED 0x1
struct sockaddr_nfc {
- sa_family_t sa_family;
+ __kernel_sa_family_t sa_family;
__u32 dev_idx;
__u32 target_idx;
__u32 nfc_protocol;
@@ -271,14 +271,14 @@ struct sockaddr_nfc {
#define NFC_LLCP_MAX_SERVICE_NAME 63
struct sockaddr_nfc_llcp {
- sa_family_t sa_family;
+ __kernel_sa_family_t sa_family;
__u32 dev_idx;
__u32 target_idx;
__u32 nfc_protocol;
__u8 dsap; /* Destination SAP, if known */
__u8 ssap; /* Source SAP to be bound to */
char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */;
- size_t service_name_len;
+ __kernel_size_t service_name_len;
};
/* NFC socket protocols */
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index d3bc110430f9..36624990b577 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -185,6 +185,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
const char __user *buf, size_t count, loff_t *ppos)
{
struct damon_ctx *ctx = file->private_data;
+ struct damon_target *t, *next_t;
char *kbuf, *nrs;
unsigned long *targets;
ssize_t nr_targets;
@@ -224,6 +225,13 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
goto unlock_out;
}
+ /* remove previously set targets */
+ damon_for_each_target_safe(t, next_t, ctx) {
+ if (targetid_is_pid(ctx))
+ put_pid((struct pid *)t->id);
+ damon_destroy_target(t);
+ }
+
err = damon_set_targets(ctx, targets, nr_targets);
if (err) {
if (targetid_is_pid(ctx))
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f3d751105343..de2409889489 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -4522,6 +4522,38 @@ int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx,
}
#endif
+void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx,
+ unsigned long val)
+{
+ unsigned long intvl_jiffies = clock_t_to_jiffies(val);
+
+ if (intvl_jiffies < BR_MULTICAST_QUERY_INTVL_MIN) {
+ br_info(brmctx->br,
+ "trying to set multicast query interval below minimum, setting to %lu (%ums)\n",
+ jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MIN),
+ jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MIN));
+ intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN;
+ }
+
+ brmctx->multicast_query_interval = intvl_jiffies;
+}
+
+void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx,
+ unsigned long val)
+{
+ unsigned long intvl_jiffies = clock_t_to_jiffies(val);
+
+ if (intvl_jiffies < BR_MULTICAST_STARTUP_QUERY_INTVL_MIN) {
+ br_info(brmctx->br,
+ "trying to set multicast startup query interval below minimum, setting to %lu (%ums)\n",
+ jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN),
+ jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN));
+ intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN;
+ }
+
+ brmctx->multicast_startup_query_interval = intvl_jiffies;
+}
+
/**
* br_multicast_list_adjacent - Returns snooped multicast addresses
* @dev: The bridge port adjacent to which to retrieve addresses
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 5c6c4305ed23..e365cf82f061 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1357,7 +1357,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
- br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
+ br_multicast_set_query_intvl(&br->multicast_ctx, val);
}
if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
@@ -1369,7 +1369,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
- br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
+ br_multicast_set_startup_query_intvl(&br->multicast_ctx, val);
}
if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fd5e7e74573c..bd218c2b2cd9 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -28,6 +28,8 @@
#define BR_MAX_PORTS (1<<BR_PORT_BITS)
#define BR_MULTICAST_DEFAULT_HASH_MAX 4096
+#define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000)
+#define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN
#define BR_HWDOM_MAX BITS_PER_LONG
@@ -968,6 +970,10 @@ int br_multicast_dump_querier_state(struct sk_buff *skb,
int nest_attr);
size_t br_multicast_querier_state_size(void);
size_t br_rports_size(const struct net_bridge_mcast *brmctx);
+void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx,
+ unsigned long val);
+void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx,
+ unsigned long val);
static inline bool br_group_is_l2(const struct br_ip *group)
{
@@ -1152,9 +1158,9 @@ br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx)
static inline bool
br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx)
{
- return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
- br_multicast_ctx_is_vlan(brmctx) &&
- !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED);
+ return br_multicast_ctx_is_vlan(brmctx) &&
+ (!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) ||
+ !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED));
}
static inline bool
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index d9a89ddd0331..7b0c19772111 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -658,7 +658,7 @@ static ssize_t multicast_query_interval_show(struct device *d,
static int set_query_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
+ br_multicast_set_query_intvl(&br->multicast_ctx, val);
return 0;
}
@@ -706,7 +706,7 @@ static ssize_t multicast_startup_query_interval_show(
static int set_startup_query_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
+ br_multicast_set_startup_query_intvl(&br->multicast_ctx, val);
return 0;
}
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index 8ffd4ed2563c..a6382973b3e7 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -521,7 +521,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br,
u64 val;
val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]);
- v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
+ br_multicast_set_query_intvl(&v->br_mcast_ctx, val);
*changed = true;
}
if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) {
@@ -535,7 +535,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br,
u64 val;
val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]);
- v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
+ br_multicast_set_startup_query_intvl(&v->br_mcast_ctx, val);
*changed = true;
}
if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 91f53eeb0e79..e0878a500aa9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3934,8 +3934,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
- qdisc_skb_cb(skb)->mru = 0;
- qdisc_skb_cb(skb)->post_ct = false;
+ tc_skb_cb(skb)->mru = 0;
+ tc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
@@ -5088,8 +5088,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
qdisc_skb_cb(skb)->pkt_len = skb->len;
- qdisc_skb_cb(skb)->mru = 0;
- qdisc_skb_cb(skb)->post_ct = false;
+ tc_skb_cb(skb)->mru = 0;
+ tc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3a9422a5873e..dcea653a5204 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -2004,6 +2004,10 @@ static int __init inet_init(void)
ip_init();
+ /* Initialise per-cpu ipv4 mibs */
+ if (init_ipv4_mibs())
+ panic("%s: Cannot init ipv4 mibs\n", __func__);
+
/* Setup TCP slab cache for open requests. */
tcp_init();
@@ -2034,12 +2038,6 @@ static int __init inet_init(void)
if (init_inet_pernet_ops())
pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
- /*
- * Initialise per-cpu ipv4 mibs
- */
-
- if (init_ipv4_mibs())
- pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
ipv4_proc_init();
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7bee95d8d2df..8cd8c0bce098 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1204,7 +1204,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
return -EINVAL;
}
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index bb5f1650f11c..c189b4c8a182 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb,
pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR);
if (!pnest)
return -ENOMEM;
- nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+ rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+ if (rc) {
+ nla_nest_cancel(skb, pnest);
+ return rc;
+ }
if ((0x1 << np->id) == ndp->package_whitelist)
nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 90866ae45573..98e248b9c0b1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
u8 family, u16 zone, bool *defrag)
{
enum ip_conntrack_info ctinfo;
- struct qdisc_skb_cb cb;
struct nf_conn *ct;
int err = 0;
bool frag;
+ u16 mru;
/* Previously seen (loopback)? Ignore. */
ct = nf_ct_get(skb, &ctinfo);
@@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
return err;
skb_get(skb);
- cb = *qdisc_skb_cb(skb);
+ mru = tc_skb_cb(skb)->mru;
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
@@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (!err) {
*defrag = true;
- cb.mru = IPCB(skb)->frag_max_size;
+ mru = IPCB(skb)->frag_max_size;
}
} else { /* NFPROTO_IPV6 */
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
@@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (!err) {
*defrag = true;
- cb.mru = IP6CB(skb)->frag_max_size;
+ mru = IP6CB(skb)->frag_max_size;
}
#else
err = -EOPNOTSUPP;
@@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
}
if (err != -EINPROGRESS)
- *qdisc_skb_cb(skb) = cb;
+ tc_skb_cb(skb)->mru = mru;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
@@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
tcf_action_update_bstats(&c->common, skb);
if (clear) {
- qdisc_skb_cb(skb)->post_ct = false;
+ tc_skb_cb(skb)->post_ct = false;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
nf_conntrack_put(&ct->ct_general);
@@ -1048,7 +1048,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
out_push:
skb_push_rcsum(skb, nh_ofs);
- qdisc_skb_cb(skb)->post_ct = true;
+ tc_skb_cb(skb)->post_ct = true;
out_clear:
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e54f0a42270c..ff8a9383bf1c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1617,12 +1617,14 @@ int tcf_classify(struct sk_buff *skb,
/* If we missed on some chain */
if (ret == TC_ACT_UNSPEC && last_executed_chain) {
+ struct tc_skb_cb *cb = tc_skb_cb(skb);
+
ext = tc_skb_ext_alloc(skb);
if (WARN_ON_ONCE(!ext))
return TC_ACT_SHOT;
ext->chain = last_executed_chain;
- ext->mru = qdisc_skb_cb(skb)->mru;
- ext->post_ct = qdisc_skb_cb(skb)->post_ct;
+ ext->mru = cb->mru;
+ ext->post_ct = cb->post_ct;
}
return ret;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index eb6345a027e1..161bd91c8c6b 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -19,6 +19,7 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
#include <net/ip.h>
#include <net/flow_dissector.h>
#include <net/geneve.h>
@@ -309,7 +310,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
- bool post_ct = qdisc_skb_cb(skb)->post_ct;
+ bool post_ct = tc_skb_cb(skb)->post_ct;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c
index 8c06381391d6..5ded4c8672a6 100644
--- a/net/sched/sch_frag.c
+++ b/net/sched/sch_frag.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
#include <net/netlink.h>
#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
#include <net/dst.h>
#include <net/ip.h>
#include <net/ip6_fib.h>
@@ -137,7 +138,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb))
{
- u16 mru = qdisc_skb_cb(skb)->mru;
+ u16 mru = tc_skb_cb(skb)->mru;
int err;
if (mru && skb->len > mru + skb->dev->hard_header_len)
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 760b367644c1..a7d623171501 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -290,9 +290,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
return err;
}
-static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
+static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
@@ -302,6 +301,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
int err = 0;
lock_sock(sk);
+ if (ep != tsp->asoc->ep)
+ goto release;
list_for_each_entry(assoc, &ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
@@ -344,9 +345,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
return err;
}
-static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
+static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
const struct inet_diag_req_v2 *r = commp->r;
@@ -505,8 +505,8 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
- sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
- net, &pos, &commp);
+ sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump,
+ net, &pos, &commp);
cb->args[2] = pos;
done:
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 48c9c2c7602f..efffde7f2328 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
}
/* Final destructor for endpoint. */
+static void sctp_endpoint_destroy_rcu(struct rcu_head *head)
+{
+ struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu);
+ struct sock *sk = ep->base.sk;
+
+ sctp_sk(sk)->ep = NULL;
+ sock_put(sk);
+
+ kfree(ep);
+ SCTP_DBG_OBJCNT_DEC(ep);
+}
+
static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
{
struct sock *sk;
@@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
if (sctp_sk(sk)->bind_hash)
sctp_put_port(sk);
- sctp_sk(sk)->ep = NULL;
- /* Give up our hold on the sock */
- sock_put(sk);
-
- kfree(ep);
- SCTP_DBG_OBJCNT_DEC(ep);
+ call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu);
}
/* Hold a reference to an endpoint. */
-void sctp_endpoint_hold(struct sctp_endpoint *ep)
+int sctp_endpoint_hold(struct sctp_endpoint *ep)
{
- refcount_inc(&ep->base.refcnt);
+ return refcount_inc_not_zero(&ep->base.refcnt);
}
/* Release a reference to an endpoint and clean up if there are
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6b937bfd4751..d2215d24634e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5338,11 +5338,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
}
EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
- int (*cb_done)(struct sctp_transport *, void *),
- struct net *net, int *pos, void *p) {
+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
+ struct net *net, int *pos, void *p)
+{
struct rhashtable_iter hti;
struct sctp_transport *tsp;
+ struct sctp_endpoint *ep;
int ret;
again:
@@ -5351,26 +5352,32 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
- ret = cb(tsp, p);
- if (ret)
- break;
+ ep = tsp->asoc->ep;
+ if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */
+ ret = cb(ep, tsp, p);
+ if (ret)
+ break;
+ sctp_endpoint_put(ep);
+ }
(*pos)++;
sctp_transport_put(tsp);
}
sctp_transport_walk_stop(&hti);
if (ret) {
- if (cb_done && !cb_done(tsp, p)) {
+ if (cb_done && !cb_done(ep, tsp, p)) {
(*pos)++;
+ sctp_endpoint_put(ep);
sctp_transport_put(tsp);
goto again;
}
+ sctp_endpoint_put(ep);
sctp_transport_put(tsp);
}
return ret;
}
-EXPORT_SYMBOL_GPL(sctp_for_each_transport);
+EXPORT_SYMBOL_GPL(sctp_transport_traverse_process);
/* 7.2.1 Association Status (SCTP_STATUS)
diff --git a/net/smc/smc.h b/net/smc/smc.h
index d65e15f0c944..e6919fe31617 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -170,6 +170,11 @@ struct smc_connection {
u16 tx_cdc_seq; /* sequence # for CDC send */
u16 tx_cdc_seq_fin; /* sequence # - tx completed */
spinlock_t send_lock; /* protect wr_sends */
+ atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe
+ * - inc when post wqe,
+ * - dec on polled tx cqe
+ */
+ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 99acd337ba90..84c8a4374fdd 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
struct smc_sock *smc;
int diff;
- if (!conn)
- /* already dismissed */
- return;
-
smc = container_of(conn, struct smc_sock, conn);
bh_lock_sock(&smc->sk);
if (!wc_status) {
@@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
conn);
conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
}
+
+ if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) &&
+ unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
+ wake_up(&conn->cdc_pend_tx_wq);
+ WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0);
+
smc_tx_sndbuf_nonfull(smc);
bh_unlock_sock(&smc->sk);
}
@@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
conn->tx_cdc_seq++;
conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed);
+
+ atomic_inc(&conn->cdc_pend_tx_wr);
+ smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */
+
rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
if (!rc) {
smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
@@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
} else {
conn->tx_cdc_seq--;
conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
+ atomic_dec(&conn->cdc_pend_tx_wr);
}
return rc;
@@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn,
peer->token = htonl(local->token);
peer->prod_flags.failover_validation = 1;
+ /* We need to set pend->conn here to make sure smc_cdc_tx_handler()
+ * can handle properly
+ */
+ smc_cdc_add_pending_send(conn, pend);
+
+ atomic_inc(&conn->cdc_pend_tx_wr);
+ smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */
+
rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
+ if (unlikely(rc))
+ atomic_dec(&conn->cdc_pend_tx_wr);
+
return rc;
}
@@ -193,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
return rc;
}
-static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
- unsigned long data)
+void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn)
{
- struct smc_connection *conn = (struct smc_connection *)data;
- struct smc_cdc_tx_pend *cdc_pend =
- (struct smc_cdc_tx_pend *)tx_pend;
-
- return cdc_pend->conn == conn;
-}
-
-static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
-{
- struct smc_cdc_tx_pend *cdc_pend =
- (struct smc_cdc_tx_pend *)tx_pend;
-
- cdc_pend->conn = NULL;
-}
-
-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
-{
- struct smc_link *link = conn->lnk;
-
- smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
- smc_cdc_tx_filter, smc_cdc_tx_dismisser,
- (unsigned long)conn);
+ wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr));
}
/* Send a SMC-D CDC header.
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 0a0a89abd38b..696cc11f2303 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_wr_buf **wr_buf,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend);
-void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
+void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 5a9c22ee75fa..506b8498623b 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -604,7 +604,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
- if (smc_link_usable(lnk))
+ if (smc_link_sendable(lnk))
lnk->state = SMC_LNK_INACTIVE;
}
wake_up_all(&lgr->llc_msg_waiter);
@@ -1056,7 +1056,7 @@ void smc_conn_free(struct smc_connection *conn)
smc_ism_unset_conn(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
- smc_cdc_tx_dismiss_slots(conn);
+ smc_cdc_wait_pend_tx_wr(conn);
if (current_work() != &conn->abort_work)
cancel_work_sync(&conn->abort_work);
}
@@ -1133,7 +1133,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
smc_llc_link_clear(lnk, log);
smcr_buf_unmap_lgr(lnk);
smcr_rtoken_clear_link(lnk);
- smc_ib_modify_qp_reset(lnk);
+ smc_ib_modify_qp_error(lnk);
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
@@ -1264,7 +1264,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
else
tasklet_unlock_wait(&conn->rx_tsklet);
} else {
- smc_cdc_tx_dismiss_slots(conn);
+ smc_cdc_wait_pend_tx_wr(conn);
}
smc_lgr_unregister_conn(conn);
smc_close_active_abort(smc);
@@ -1387,11 +1387,16 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd)
/* Called when an SMCR device is removed or the smc module is unloaded.
* If smcibdev is given, all SMCR link groups using this device are terminated.
* If smcibdev is NULL, all SMCR link groups are terminated.
+ *
+ * We must wait here for QPs been destroyed before we destroy the CQs,
+ * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
+ * smc_sock cannot be released.
*/
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
+ LIST_HEAD(lgr_linkdown_list);
int i;
spin_lock_bh(&smc_lgr_list.lock);
@@ -1403,7 +1408,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].smcibdev == smcibdev)
- smcr_link_down_cond_sched(&lgr->lnk[i]);
+ list_move_tail(&lgr->list, &lgr_linkdown_list);
}
}
}
@@ -1415,6 +1420,16 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
__smc_lgr_terminate(lgr, false);
}
+ list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].smcibdev == smcibdev) {
+ mutex_lock(&lgr->llc_conf_mutex);
+ smcr_link_down_cond(&lgr->lnk[i]);
+ mutex_unlock(&lgr->llc_conf_mutex);
+ }
+ }
+ }
+
if (smcibdev) {
if (atomic_read(&smcibdev->lnk_cnt))
wait_event(smcibdev->lnks_deleted,
@@ -1514,7 +1529,6 @@ static void smcr_link_down(struct smc_link *lnk)
if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
return;
- smc_ib_modify_qp_reset(lnk);
to_lnk = smc_switch_conns(lgr, lnk, true);
if (!to_lnk) { /* no backup link available */
smcr_link_clear(lnk, true);
@@ -1742,6 +1756,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
+ init_waitqueue_head(&conn->cdc_pend_tx_wq);
INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
if (ini->is_smcd) {
conn->rx_off = sizeof(struct smcd_cdc_msg);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c043ecdca5c4..51a3e8248ade 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -366,6 +366,12 @@ static inline bool smc_link_usable(struct smc_link *lnk)
return true;
}
+static inline bool smc_link_sendable(struct smc_link *lnk)
+{
+ return smc_link_usable(lnk) &&
+ lnk->qp_attr.cur_qp_state == IB_QPS_RTS;
+}
+
static inline bool smc_link_active(struct smc_link *lnk)
{
return lnk->state == SMC_LNK_ACTIVE;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index a8845343d183..f0ec1f1d50fa 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -101,12 +101,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk)
IB_QP_MAX_QP_RD_ATOMIC);
}
-int smc_ib_modify_qp_reset(struct smc_link *lnk)
+int smc_ib_modify_qp_error(struct smc_link *lnk)
{
struct ib_qp_attr qp_attr;
memset(&qp_attr, 0, sizeof(qp_attr));
- qp_attr.qp_state = IB_QPS_RESET;
+ qp_attr.qp_state = IB_QPS_ERR;
return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE);
}
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 3085f5180da7..6967c3d52b03 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -79,6 +79,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk);
int smc_ib_ready_link(struct smc_link *lnk);
int smc_ib_modify_qp_rts(struct smc_link *lnk);
int smc_ib_modify_qp_reset(struct smc_link *lnk);
+int smc_ib_modify_qp_error(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
struct smc_buf_desc *buf_slot, u8 link_idx);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index f1d323439a2a..ee1f0fdba085 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1358,7 +1358,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
delllc.reason = htonl(rsn);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
- if (!smc_link_usable(&lgr->lnk[i]))
+ if (!smc_link_sendable(&lgr->lnk[i]))
continue;
if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
break;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index a419e9af36b9..59ca1a2d5c65 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link)
}
/* wait till all pending tx work requests on the given link are completed */
-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
+void smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
{
- if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link),
- SMC_WR_TX_WAIT_PENDING_TIME))
- return 0;
- else /* timeout */
- return -EPIPE;
+ wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link));
}
static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
@@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
struct smc_wr_tx_pend pnd_snd;
struct smc_link *link;
u32 pnd_snd_idx;
- int i;
link = wc->qp->qp_context;
@@ -115,14 +110,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
return;
if (wc->status) {
- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- /* clear full struct smc_wr_tx_pend including .priv */
- memset(&link->wr_tx_pends[i], 0,
- sizeof(link->wr_tx_pends[i]));
- memset(&link->wr_tx_bufs[i], 0,
- sizeof(link->wr_tx_bufs[i]));
- clear_bit(i, link->wr_tx_mask);
- }
/* terminate link */
smcr_link_down_cond_sched(link);
}
@@ -169,7 +156,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
{
*idx = link->wr_tx_cnt;
- if (!smc_link_usable(link))
+ if (!smc_link_sendable(link))
return -ENOLINK;
for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
if (!test_and_set_bit(*idx, link->wr_tx_mask))
@@ -212,7 +199,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
} else {
rc = wait_event_interruptible_timeout(
link->wr_tx_wait,
- !smc_link_usable(link) ||
+ !smc_link_sendable(link) ||
lgr->terminating ||
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
@@ -288,18 +275,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
unsigned long timeout)
{
struct smc_wr_tx_pend *pend;
+ u32 pnd_idx;
int rc;
pend = container_of(priv, struct smc_wr_tx_pend, priv);
pend->compl_requested = 1;
- init_completion(&link->wr_tx_compl[pend->idx]);
+ pnd_idx = pend->idx;
+ init_completion(&link->wr_tx_compl[pnd_idx]);
rc = smc_wr_tx_send(link, priv);
if (rc)
return rc;
/* wait for completion by smc_wr_tx_process_cqe() */
rc = wait_for_completion_interruptible_timeout(
- &link->wr_tx_compl[pend->idx], timeout);
+ &link->wr_tx_compl[pnd_idx], timeout);
if (rc <= 0)
rc = -ENODATA;
if (rc > 0)
@@ -349,25 +338,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
return rc;
}
-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
- smc_wr_tx_filter filter,
- smc_wr_tx_dismisser dismisser,
- unsigned long data)
-{
- struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_tx;
- int i;
-
- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
- if (wr_tx->type != wr_tx_hdr_type)
- continue;
- tx_pend = &link->wr_tx_pends[i].priv;
- if (filter(tx_pend, data))
- dismisser(tx_pend);
- }
-}
-
/****************************** receive queue ********************************/
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
@@ -572,10 +542,7 @@ void smc_wr_free_link(struct smc_link *lnk)
smc_wr_wakeup_reg_wait(lnk);
smc_wr_wakeup_tx_wait(lnk);
- if (smc_wr_tx_wait_no_pending_sends(lnk))
- memset(lnk->wr_tx_mask, 0,
- BITS_TO_LONGS(SMC_WR_BUF_CNT) *
- sizeof(*lnk->wr_tx_mask));
+ smc_wr_tx_wait_no_pending_sends(lnk);
wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 2bc626f230a5..cb58e60078f5 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -22,7 +22,6 @@
#define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */
#define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ)
-#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ)
#define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */
@@ -62,7 +61,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
static inline bool smc_wr_tx_link_hold(struct smc_link *link)
{
- if (!smc_link_usable(link))
+ if (!smc_link_sendable(link))
return false;
atomic_inc(&link->wr_tx_refcnt);
return true;
@@ -122,7 +121,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
unsigned long data);
-int smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
+void smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
int smc_wr_rx_post_init(struct smc_link *link);
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 52a000b057a5..3ccb2c70add4 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -219,7 +219,7 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "s390" && $bits == 64) {
if ($cc =~ /-DCC_USING_HOTPATCH/) {
- $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(bcrl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$";
+ $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(brcl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$";
$mcount_adjust = 0;
}
$alignment = 8;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 51432ea74044..9309e62d46ed 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5812,7 +5812,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
struct common_audit_data ad;
struct lsm_network_audit net = {0,};
char *addrp;
- u8 proto;
+ u8 proto = 0;
if (sk == NULL)
return NF_ACCEPT;
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 1da2e3722b12..6799b1122c9d 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -1051,10 +1051,11 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
return false;
if (!domain)
return true;
+ if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED]))
+ return false;
list_for_each_entry_rcu(ptr, &domain->acl_info_list, list,
srcu_read_lock_held(&tomoyo_ss)) {
u16 perm;
- u8 i;
if (ptr->is_deleted)
continue;
@@ -1065,23 +1066,23 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
*/
switch (ptr->type) {
case TOMOYO_TYPE_PATH_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_path_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_path_acl, head)->perm);
break;
case TOMOYO_TYPE_PATH2_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_path2_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_path2_acl, head)->perm);
break;
case TOMOYO_TYPE_PATH_NUMBER_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_path_number_acl, head)
+ perm = data_race(container_of(ptr, struct tomoyo_path_number_acl, head)
->perm);
break;
case TOMOYO_TYPE_MKDEV_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_mkdev_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_mkdev_acl, head)->perm);
break;
case TOMOYO_TYPE_INET_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_inet_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_inet_acl, head)->perm);
break;
case TOMOYO_TYPE_UNIX_ACL:
- data_race(perm = container_of(ptr, struct tomoyo_unix_acl, head)->perm);
+ perm = data_race(container_of(ptr, struct tomoyo_unix_acl, head)->perm);
break;
case TOMOYO_TYPE_MANUAL_TASK_ACL:
perm = 0;
@@ -1089,21 +1090,17 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
default:
perm = 1;
}
- for (i = 0; i < 16; i++)
- if (perm & (1 << i))
- count++;
+ count += hweight16(perm);
}
if (count < tomoyo_profile(domain->ns, domain->profile)->
pref[TOMOYO_PREF_MAX_LEARNING_ENTRY])
return true;
- if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) {
- domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true;
- /* r->granted = false; */
- tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
+ WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true);
+ /* r->granted = false; */
+ tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
#ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING
- pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n",
- domain->domainname->name);
+ pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n",
+ domain->domainname->name);
#endif
- }
return false;
}
diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c
index c0123bc31c0d..b7758dbe2371 100644
--- a/sound/hda/intel-sdw-acpi.c
+++ b/sound/hda/intel-sdw-acpi.c
@@ -132,8 +132,6 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
return AE_NOT_FOUND;
}
- info->handle = handle;
-
/*
* On some Intel platforms, multiple children of the HDAS
* device can be found, but only one of them is the SoundWire
@@ -144,6 +142,9 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE)
return AE_OK; /* keep going */
+ /* found the correct SoundWire controller */
+ info->handle = handle;
+
/* device found, stop namespace walk */
return AE_CTRL_TERMINATE;
}
@@ -164,8 +165,14 @@ int sdw_intel_acpi_scan(acpi_handle *parent_handle,
acpi_status status;
info->handle = NULL;
+ /*
+ * In the HDAS ACPI scope, 'SNDW' may be either the child of
+ * 'HDAS' or the grandchild of 'HDAS'. So let's go through
+ * the ACPI from 'HDAS' at max depth of 2 to find the 'SNDW'
+ * device.
+ */
status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
- parent_handle, 1,
+ parent_handle, 2,
sdw_intel_acpi_cb,
NULL, info, NULL);
if (ACPI_FAILURE(status) || info->handle == NULL)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c32c2eb16d7d..18b56256bb6f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2463,7 +2463,7 @@ static int process_switch_event(struct perf_tool *tool,
if (perf_event__process_switch(tool, event, sample, machine) < 0)
return -1;
- if (scripting_ops && scripting_ops->process_switch)
+ if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample))
scripting_ops->process_switch(event, sample, machine);
if (!script->show_switch_events)
diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py
index 1d3a189a9a54..66452a8ec358 100644
--- a/tools/perf/scripts/python/intel-pt-events.py
+++ b/tools/perf/scripts/python/intel-pt-events.py
@@ -32,8 +32,7 @@ try:
except:
broken_pipe_exception = IOError
-glb_switch_str = None
-glb_switch_printed = True
+glb_switch_str = {}
glb_insn = False
glb_disassembler = None
glb_src = False
@@ -70,6 +69,7 @@ def trace_begin():
ap = argparse.ArgumentParser(usage = "", add_help = False)
ap.add_argument("--insn-trace", action='store_true')
ap.add_argument("--src-trace", action='store_true')
+ ap.add_argument("--all-switch-events", action='store_true')
global glb_args
global glb_insn
global glb_src
@@ -256,10 +256,6 @@ def print_srccode(comm, param_dict, sample, symbol, dso, with_insn):
print(start_str, src_str)
def do_process_event(param_dict):
- global glb_switch_printed
- if not glb_switch_printed:
- print(glb_switch_str)
- glb_switch_printed = True
event_attr = param_dict["attr"]
sample = param_dict["sample"]
raw_buf = param_dict["raw_buf"]
@@ -274,6 +270,11 @@ def do_process_event(param_dict):
dso = get_optional(param_dict, "dso")
symbol = get_optional(param_dict, "symbol")
+ cpu = sample["cpu"]
+ if cpu in glb_switch_str:
+ print(glb_switch_str[cpu])
+ del glb_switch_str[cpu]
+
if name[0:12] == "instructions":
if glb_src:
print_srccode(comm, param_dict, sample, symbol, dso, True)
@@ -336,8 +337,6 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x):
sys.exit(1)
def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x):
- global glb_switch_printed
- global glb_switch_str
if out:
out_str = "Switch out "
else:
@@ -350,6 +349,10 @@ def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_pree
machine_str = ""
else:
machine_str = "machine PID %d" % machine_pid
- glb_switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \
+ switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \
(out_str, pid, tid, cpu, ts / 1000000000, ts %1000000000, np_pid, np_tid, machine_str, preempt_str)
- glb_switch_printed = False
+ if glb_args.all_switch_events:
+ print(switch_str);
+ else:
+ global glb_switch_str
+ glb_switch_str[cpu] = switch_str
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 824bceb063bf..c3ceac138810 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -3540,6 +3540,7 @@ static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args)
*args = p;
return 0;
}
+ p += 1;
while (1) {
vmcs = strtoull(p, &p, 0);
if (errno)
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 7f26591f236b..3ea73013d956 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -132,7 +132,7 @@ run_test() {
local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
sed -e 's/\[//' -e 's/:.*//'`
if [ $rcv != $pkts ]; then
- echo " fail - received $rvs packets, expected $pkts"
+ echo " fail - received $rcv packets, expected $pkts"
ret=1
return
fi
@@ -185,6 +185,7 @@ for family in 4 6; do
IPT=iptables
SUFFIX=24
VXDEV=vxlan
+ PING=ping
if [ $family = 6 ]; then
BM_NET=$BM_NET_V6
@@ -192,6 +193,7 @@ for family in 4 6; do
SUFFIX="64 nodad"
VXDEV=vxlan6
IPT=ip6tables
+ PING="ping6"
fi
echo "IPv$family"
@@ -237,7 +239,7 @@ for family in 4 6; do
# load arp cache before running the test to reduce the amount of
# stray traffic on top of the UDP tunnel
- ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null
+ ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
cleanup
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index c66da6ffd6d8..7badaf215de2 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -156,13 +156,13 @@ struct testcase testcases_v4[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
@@ -259,13 +259,13 @@ struct testcase testcases_v6[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 17512a43885e..f1fdaa270291 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -419,6 +419,7 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
+ const char *bind_addr = NULL;
int max_len, hdrlen;
int c;
@@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv)
cfg_cpu = strtol(optarg, NULL, 0);
break;
case 'D':
- setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+ bind_addr = optarg;
break;
case 'l':
cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
@@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv)
}
}
+ if (!bind_addr)
+ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+ setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
+
if (optind != argc)
usage(argv[0]);
^ permalink raw reply related
* [PATCH v5 00/21] AMX Support in KVM
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
Thanks a lot for all the review comments and guidance! Hope this
version is in a good state now. :)
(Jing is temporarily leave for family reason, Yang helped work out
this version)
----
v4->v5:
- Directly call fpu core to expand fpstate buffer in kvm_check_cpuid()
and remove duplicated permission check there (Sean)
- Accordingly remove Thomas's reviewed-by as a different wrapper is
introduced now (patch-7)
- Properly queue #NM exception in nested scenario (Sean)
- Verify non-XFD related #NM usage in nested scenario (Sean)
- Hide XFD in kvm_cpu_cap on 32bit host kernels (Sean)
- Use xstate_required_size() in KVM_CAP_XSAVE2 which may be called
before any vcpu is created (Sean/Paolo)
- Replace boot_cpu_has with kvm_cpu_cap_has when disabling RDMSR
interception for xfd_err (Sean)
v3->v4:
- Verify kvm selftest for AMX (Paolo)
- Expand fpstate buffer in kvm_check_cpuid() and improve patch
description (Sean)
- Drop 'preemption' word in #NM interception patch (Sean)
- Remove 'trap_nm' flag. Replace it by: (Sean)
* Trapping #NM according to guest_fpu::xfd when write to xfd is
intercepted.
* Always trapping #NM when xfd write interception is disabled
- Use better name for #NM related functions (Sean)
- Drop '#ifdef CONFIG_X86_64' in __kvm_set_xcr (Sean)
- Update description for KVM_CAP_XSAVE2 and prevent the guest from
using the wrong ioctl (Sean)
- Replace 'xfd_out_of_sync' with a better name (Sean)
v2->v3:
- Trap #NM until write IA32_XFD with a non-zero value (Thomas)
- Revise return value in __xstate_request_perm() (Thomas)
- Revise doc for KVM_GET_SUPPORTED_CPUID (Paolo)
- Add Thomas's reviewed-by on one patch
- Reorder disabling read interception of XFD_ERR patch (Paolo)
- Move disabling r/w interception of XFD from x86.c to vmx.c (Paolo)
- Provide the API doc together with the new KVM_GET_XSAVE2 ioctl (Paolo)
- Make KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) return minimum size of struct
kvm_xsave (4K) (Paolo)
- Request permission at the start of vm_create_with_vcpus() in selftest
- Request permission conditionally when XFD is supported (Paolo)
v1->v2:
- Live migration supported and verified with a selftest
- Rebase to Thomas's new series for guest fpstate reallocation [1]
- Expand fpstate at KVM_SET_CPUID2 instead of when emulating XCR0
and IA32_XFD (Thomas/Paolo)
- Accordingly remove all exit-to-userspace stuff
- Intercept #NM to save guest XFD_ERR and restore host/guest value
at preemption on/off boundary (Thomas)
- Accordingly remove all xfd_err logic in preemption callback and
fpu_swap_kvm_fpstate()
- Reuse KVM_SET_XSAVE to handle both legacy and expanded buffer (Paolo)
- Don't return dynamic bits w/o prctl() in KVM_GET_SUPPORTED_CPUID (Paolo)
- Check guest permissions for dynamic features in CPUID[0xD] instead
of only for AMX at KVM_SET_CPUID (Paolo)
- Remove dynamic bit check for 32-bit guest in __kvm_set_xcr() (Paolo)
- Fix CPUID emulation for 0x1d and 0x1e (Paolo)
- Move "disable interception" to the end of the series (Paolo)
This series brings AMX (Advanced Matrix eXtensions) virtualization support to
KVM. The preparatory series from Thomas [1] is also included.
A large portion of the changes in this series is to deal with eXtended Feature
Disable (XFD) which allows resizing of the fpstate buffer to support
dynamically-enabled XSTATE features with large state component (e.g. 8K for AMX).
There are a lot of simplications when comparing v5 to the original proposal [2]
and the first version [3]. Thanks to Thomas, Paolo and Sean for many good
suggestions.
The support is based on following key changes:
- Guest permissions for dynamically-enabled XSAVE features
Native tasks have to request permission via prctl() before touching
a dynamic-resized XSTATE compoenent. Introduce guest permissions
for the similar purpose. Userspace VMM is expected to request guest
permission only once when the first vCPU is created.
KVM checks guest permission in KVM_SET_CPUID2. Setting XFD in guest
cpuid w/o proper permissions fails this operation. In the meantime,
unpermitted features are also excluded in KVM_GET_SUPPORTED_CPUID.
- Extend fpstate reallocation mechanism to cover guest fpu
Unlike native tasks which have reallocation triggered from #NM
handler, guest fpstate reallocation is requested by KVM when it
identifies the intention on using dynamically-enabled XSAVE
features inside guest.
Extend fpu core to allow KVM request fpstate buffer expansion
for a guest fpu containter.
- Trigger fpstate reallocation in KVM
This could be done either before guest runs or until xfd is updated
in the emulation path. According to discussion [1] we decide to
go the former option in KVM_SET_CPUID2, with fpstate buffer sized
accordingly. This spares a lot of code and also avoid imposing an
ordered restore sequence (XCR0, XFD and XSTATE) to userspace VMM.
- RDMSR/WRMSR emulation for IA32_XFD
Because fpstate expansion is completed in KVM_SET_CPUID2, emulating
r/w access to IA32_XFD simply involves the xfd field in the guest
fpu container. If write and guest fpu is currently active, the
software state (guest_fpstate::xfd and per-cpu xfd cache) is also
updated.
- RDMSR/WRMSR emulation for XFD_ERR
When XFD causes an instruction to generate #NM, XFD_ERR contains
information about which disabled state components are being accessed.
It'd be problematic if the XFD_ERR value generated in guest is
consumed/clobbered by the host before the guest itself doing so.
Intercept #NM exception to save the guest XFD_ERR value when write
IA32_XFD with a non-zero value for 1st time. There is at most one
interception per guest task given a dynamic feature.
RDMSR/WRMSR emulation uses the saved value. The host value (always
ZERO outside of the host #NM handler) is restored before enabling
interrupts. The saved guest value is restored right before entering
the guest (with interrupts disabled).
- Get/set dynamic xfeature state for migration
Introduce new capability (KVM_CAP_XSAVE2) to deal with >4KB fpstate
buffer. Reading this capability returns the size of the current
guest fpstate (e.g. after expansion). Userspace VMM uses a new ioctl
(KVM_GET_XSAVE2) to read guest fpstate from the kernel and reuses
the existing ioctl (KVM_SET_XSAVE) to update guest fpsate to the
kernel. KVM_SET_XSAVE is extended to do properly_sized memdup_user()
based on the guest fpstate.
- Expose related cpuid bits to guest
The last step is to allow exposing XFD, AMX_TILE, AMX_INT8 and
AMX_BF16 in guest cpuid. Adding those bits into kvm_cpu_caps finally
activates all previous logics in this series
- Optimization: disable interception for IA32_XFD
IA32_XFD can be frequently updated by the guest, as it is part of
the task state and swapped in context switch when prev and next have
different XFD setting. Always intercepting WRMSR can easily cause
non-negligible overhead.
Disable r/w emulation for IA32_XFD after intercepting the first
WRMSR(IA32_XFD) with a non-zero value. However MSR passthrough
implies the software state (guest_fpstate::xfd and per-cpu xfd
cache) might be out of sync with MSR. This suggests KVM needs to
re-sync them at VM-exit before preemption is enabled.
Thanks Jun Nakajima and Kevin Tian for the design suggestions when this was
being internally worked on.
[1] https://lore.kernel.org/all/20211214022825.563892248@linutronix.de/
[2] https://www.spinics.net/lists/kvm/msg259015.html
[3] https://lore.kernel.org/lkml/20211208000359.2853257-1-yang.zhong@intel.com/
Thanks,
Yang
----
Guang Zeng (1):
kvm: x86: Add support for getting/setting expanded xstate buffer
Jing Liu (11):
kvm: x86: Fix xstate_required_size() to follow XSTATE alignment rule
kvm: x86: Exclude unpermitted xfeatures at KVM_GET_SUPPORTED_CPUID
x86/fpu: Make XFD initialization in __fpstate_reset() a function
argument
kvm: x86: Enable dynamic xfeatures at KVM_SET_CPUID2
kvm: x86: Add emulation for IA32_XFD
x86/fpu: Prepare xfd_err in struct fpu_guest
kvm: x86: Intercept #NM for saving IA32_XFD_ERR
kvm: x86: Emulate IA32_XFD_ERR for guest
kvm: x86: Disable RDMSR interception of IA32_XFD_ERR
kvm: x86: Add XCR0 support for Intel AMX
kvm: x86: Add CPUID support for Intel AMX
Kevin Tian (2):
x86/fpu: Provide fpu_update_guest_xfd() for IA32_XFD emulation
kvm: x86: Disable interception for IA32_XFD on demand
Sean Christopherson (1):
x86/fpu: Provide fpu_enable_guest_xfd_features() for KVM
Thomas Gleixner (5):
x86/fpu: Extend fpu_xstate_prctl() with guest permissions
x86/fpu: Prepare guest FPU for dynamically enabled FPU features
x86/fpu: Add guest support to xfd_enable_feature()
x86/fpu: Add uabi_size to guest_fpu
x86/fpu: Provide fpu_sync_guest_vmexit_xfd_state()
Wei Wang (1):
kvm: selftests: Add support for KVM_CAP_XSAVE2
Documentation/virt/kvm/api.rst | 46 +++++-
arch/x86/include/asm/cpufeatures.h | 2 +
arch/x86/include/asm/fpu/api.h | 11 ++
arch/x86/include/asm/fpu/types.h | 32 ++++
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/include/uapi/asm/kvm.h | 16 +-
arch/x86/include/uapi/asm/prctl.h | 26 ++--
arch/x86/kernel/fpu/core.c | 94 ++++++++++-
arch/x86/kernel/fpu/xstate.c | 147 +++++++++++-------
arch/x86/kernel/fpu/xstate.h | 15 +-
arch/x86/kernel/process.c | 2 +
arch/x86/kvm/cpuid.c | 86 +++++++---
arch/x86/kvm/cpuid.h | 2 +
arch/x86/kvm/vmx/vmcs.h | 5 +
arch/x86/kvm/vmx/vmx.c | 68 ++++++++
arch/x86/kvm/vmx/vmx.h | 2 +-
arch/x86/kvm/x86.c | 112 ++++++++++++-
include/uapi/linux/kvm.h | 4 +
tools/arch/x86/include/uapi/asm/kvm.h | 16 +-
tools/include/uapi/linux/kvm.h | 3 +
.../testing/selftests/kvm/include/kvm_util.h | 2 +
.../selftests/kvm/include/x86_64/processor.h | 10 ++
tools/testing/selftests/kvm/lib/kvm_util.c | 32 ++++
.../selftests/kvm/lib/x86_64/processor.c | 67 +++++++-
.../testing/selftests/kvm/x86_64/evmcs_test.c | 2 +-
tools/testing/selftests/kvm/x86_64/smm_test.c | 2 +-
.../testing/selftests/kvm/x86_64/state_test.c | 2 +-
.../kvm/x86_64/vmx_preemption_timer_test.c | 2 +-
28 files changed, 702 insertions(+), 107 deletions(-)
^ permalink raw reply
* [PATCH v5 02/21] x86/fpu: Prepare guest FPU for dynamically enabled FPU features
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Thomas Gleixner <tglx@linutronix.de>
To support dynamically enabled FPU features for guests prepare the guest
pseudo FPU container to keep track of the currently enabled xfeatures and
the guest permissions.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/include/asm/fpu/types.h | 13 +++++++++++++
arch/x86/kernel/fpu/core.c | 26 +++++++++++++++++++++++++-
2 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 6ddf80637697..c752d0aa23a4 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -504,6 +504,19 @@ struct fpu {
* Guest pseudo FPU container
*/
struct fpu_guest {
+ /*
+ * @xfeatures: xfeature bitmap of features which are
+ * currently enabled for the guest vCPU.
+ */
+ u64 xfeatures;
+
+ /*
+ * @perm: xfeature bitmap of features which are
+ * permitted to be enabled for the guest
+ * vCPU.
+ */
+ u64 perm;
+
/*
* @fpstate: Pointer to the allocated guest fpstate
*/
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index ab19b3d8b2f7..eddeeb4ed2f5 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -201,6 +201,26 @@ void fpu_reset_from_exception_fixup(void)
#if IS_ENABLED(CONFIG_KVM)
static void __fpstate_reset(struct fpstate *fpstate);
+static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
+{
+ struct fpu_state_perm *fpuperm;
+ u64 perm;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ spin_lock_irq(¤t->sighand->siglock);
+ fpuperm = ¤t->group_leader->thread.fpu.guest_perm;
+ perm = fpuperm->__state_perm;
+
+ /* First fpstate allocation locks down permissions. */
+ WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);
+
+ spin_unlock_irq(¤t->sighand->siglock);
+
+ gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
+}
+
bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
{
struct fpstate *fpstate;
@@ -216,7 +236,11 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
fpstate->is_valloc = true;
fpstate->is_guest = true;
- gfpu->fpstate = fpstate;
+ gfpu->fpstate = fpstate;
+ gfpu->xfeatures = fpu_user_cfg.default_features;
+ gfpu->perm = fpu_user_cfg.default_features;
+ fpu_init_guest_permissions(gfpu);
+
return true;
}
EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
^ permalink raw reply related
* [PATCH v5 01/21] x86/fpu: Extend fpu_xstate_prctl() with guest permissions
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Thomas Gleixner <tglx@linutronix.de>
KVM requires a clear separation of host user space and guest permissions
for dynamic XSTATE components.
Add a guest permissions member to struct fpu and a separate set of prctl()
arguments: ARCH_GET_XCOMP_GUEST_PERM and ARCH_REQ_XCOMP_GUEST_PERM.
The semantics are equivalent to the host user space permission control
except for the following constraints:
1) Permissions have to be requested before the first vCPU is created
2) Permissions are frozen when the first vCPU is created to ensure
consistency. Any attempt to expand permissions via the prctl() after
that point is rejected.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/include/asm/fpu/api.h | 2 ++
arch/x86/include/asm/fpu/types.h | 9 ++++++
arch/x86/include/uapi/asm/prctl.h | 26 ++++++++-------
arch/x86/kernel/fpu/core.c | 3 ++
arch/x86/kernel/fpu/xstate.c | 53 +++++++++++++++++++++++--------
arch/x86/kernel/fpu/xstate.h | 13 ++++++--
arch/x86/kernel/process.c | 2 ++
7 files changed, 80 insertions(+), 28 deletions(-)
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index c2767a6a387e..d8c222290e68 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -132,6 +132,8 @@ static inline void fpstate_free(struct fpu *fpu) { }
/* fpstate-related functions which are exported to KVM */
extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);
+extern inline u64 xstate_get_guest_group_perm(void);
+
/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c06c82ab355..6ddf80637697 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -387,6 +387,8 @@ struct fpstate {
/* @regs is dynamically sized! Don't add anything after @regs! */
} __aligned(64);
+#define FPU_GUEST_PERM_LOCKED BIT_ULL(63)
+
struct fpu_state_perm {
/*
* @__state_perm:
@@ -476,6 +478,13 @@ struct fpu {
*/
struct fpu_state_perm perm;
+ /*
+ * @guest_perm:
+ *
+ * Permission related information for guest pseudo FPUs
+ */
+ struct fpu_state_perm guest_perm;
+
/*
* @__fpstate:
*
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 754a07856817..500b96e71f18 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -2,20 +2,22 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
-#define ARCH_GET_CPUID 0x1011
-#define ARCH_SET_CPUID 0x1012
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
-#define ARCH_GET_XCOMP_SUPP 0x1021
-#define ARCH_GET_XCOMP_PERM 0x1022
-#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_SUPP 0x1021
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
-#define ARCH_MAP_VDSO_X32 0x2001
-#define ARCH_MAP_VDSO_32 0x2002
-#define ARCH_MAP_VDSO_64 0x2003
+#define ARCH_MAP_VDSO_X32 0x2001
+#define ARCH_MAP_VDSO_32 0x2002
+#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8ea306b1bf8e..ab19b3d8b2f7 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -450,6 +450,8 @@ void fpstate_reset(struct fpu *fpu)
fpu->perm.__state_perm = fpu_kernel_cfg.default_features;
fpu->perm.__state_size = fpu_kernel_cfg.default_size;
fpu->perm.__user_state_size = fpu_user_cfg.default_size;
+ /* Same defaults for guests */
+ fpu->guest_perm = fpu->perm;
}
static inline void fpu_inherit_perms(struct fpu *dst_fpu)
@@ -460,6 +462,7 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu)
spin_lock_irq(¤t->sighand->siglock);
/* Fork also inherits the permissions of the parent */
dst_fpu->perm = src_fpu->perm;
+ dst_fpu->guest_perm = src_fpu->guest_perm;
spin_unlock_irq(¤t->sighand->siglock);
}
}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d28829403ed0..5f01d463859d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1595,7 +1595,7 @@ static int validate_sigaltstack(unsigned int usize)
return 0;
}
-static int __xstate_request_perm(u64 permitted, u64 requested)
+static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
{
/*
* This deliberately does not exclude !XSAVES as we still might
@@ -1605,9 +1605,10 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
*/
bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
struct fpu *fpu = ¤t->group_leader->thread.fpu;
+ struct fpu_state_perm *perm;
unsigned int ksize, usize;
u64 mask;
- int ret;
+ int ret = 0;
/* Check whether fully enabled */
if ((permitted & requested) == requested)
@@ -1621,15 +1622,18 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
mask &= XFEATURE_MASK_USER_SUPPORTED;
usize = xstate_calculate_size(mask, false);
- ret = validate_sigaltstack(usize);
- if (ret)
- return ret;
+ if (!guest) {
+ ret = validate_sigaltstack(usize);
+ if (ret)
+ return ret;
+ }
+ perm = guest ? &fpu->guest_perm : &fpu->perm;
/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
- WRITE_ONCE(fpu->perm.__state_perm, requested);
+ WRITE_ONCE(perm->__state_perm, requested);
/* Protected by sighand lock */
- fpu->perm.__state_size = ksize;
- fpu->perm.__user_state_size = usize;
+ perm->__state_size = ksize;
+ perm->__user_state_size = usize;
return ret;
}
@@ -1640,7 +1644,7 @@ static const u64 xstate_prctl_req[XFEATURE_MAX] = {
[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
};
-static int xstate_request_perm(unsigned long idx)
+static int xstate_request_perm(unsigned long idx, bool guest)
{
u64 permitted, requested;
int ret;
@@ -1661,14 +1665,19 @@ static int xstate_request_perm(unsigned long idx)
return -EOPNOTSUPP;
/* Lockless quick check */
- permitted = xstate_get_host_group_perm();
+ permitted = xstate_get_group_perm(guest);
if ((permitted & requested) == requested)
return 0;
/* Protect against concurrent modifications */
spin_lock_irq(¤t->sighand->siglock);
- permitted = xstate_get_host_group_perm();
- ret = __xstate_request_perm(permitted, requested);
+ permitted = xstate_get_group_perm(guest);
+
+ /* First vCPU allocation locks the permissions. */
+ if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
+ ret = -EBUSY;
+ else
+ ret = __xstate_request_perm(permitted, requested, guest);
spin_unlock_irq(¤t->sighand->siglock);
return ret;
}
@@ -1713,12 +1722,18 @@ int xfd_enable_feature(u64 xfd_err)
return 0;
}
#else /* CONFIG_X86_64 */
-static inline int xstate_request_perm(unsigned long idx)
+static inline int xstate_request_perm(unsigned long idx, bool guest)
{
return -EPERM;
}
#endif /* !CONFIG_X86_64 */
+inline u64 xstate_get_guest_group_perm(void)
+{
+ return xstate_get_group_perm(true);
+}
+EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
+
/**
* fpu_xstate_prctl - xstate permission operations
* @tsk: Redundant pointer to current
@@ -1742,6 +1757,7 @@ long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
u64 __user *uptr = (u64 __user *)arg2;
u64 permitted, supported;
unsigned long idx = arg2;
+ bool guest = false;
if (tsk != current)
return -EPERM;
@@ -1760,11 +1776,20 @@ long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
permitted &= XFEATURE_MASK_USER_SUPPORTED;
return put_user(permitted, uptr);
+ case ARCH_GET_XCOMP_GUEST_PERM:
+ permitted = xstate_get_guest_group_perm();
+ permitted &= XFEATURE_MASK_USER_SUPPORTED;
+ return put_user(permitted, uptr);
+
+ case ARCH_REQ_XCOMP_GUEST_PERM:
+ guest = true;
+ fallthrough;
+
case ARCH_REQ_XCOMP_PERM:
if (!IS_ENABLED(CONFIG_X86_64))
return -EOPNOTSUPP;
- return xstate_request_perm(idx);
+ return xstate_request_perm(idx, guest);
default:
return -EINVAL;
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 86ea7c0fa2f6..98a472775c97 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -20,10 +20,19 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
}
-static inline u64 xstate_get_host_group_perm(void)
+static inline u64 xstate_get_group_perm(bool guest)
{
+ struct fpu *fpu = ¤t->group_leader->thread.fpu;
+ struct fpu_state_perm *perm;
+
/* Pairs with WRITE_ONCE() in xstate_request_perm() */
- return READ_ONCE(current->group_leader->thread.fpu.perm.__state_perm);
+ perm = guest ? &fpu->guest_perm : &fpu->perm;
+ return READ_ONCE(perm->__state_perm);
+}
+
+static inline u64 xstate_get_host_group_perm(void)
+{
+ return xstate_get_group_perm(false);
}
enum xstate_copy_mode {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 04143a653a8a..d7bc23589062 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -993,6 +993,8 @@ long do_arch_prctl_common(struct task_struct *task, int option,
case ARCH_GET_XCOMP_SUPP:
case ARCH_GET_XCOMP_PERM:
case ARCH_REQ_XCOMP_PERM:
+ case ARCH_GET_XCOMP_GUEST_PERM:
+ case ARCH_REQ_XCOMP_GUEST_PERM:
return fpu_xstate_prctl(task, option, arg2);
}
^ permalink raw reply related
* [PATCH v5 09/21] x86/fpu: Provide fpu_update_guest_xfd() for IA32_XFD emulation
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Kevin Tian <kevin.tian@intel.com>
Guest XFD can be updated either in the emulation path or in the
restore path.
Provide a wrapper to update guest_fpu::fpstate::xfd. If the guest
fpstate is currently in-use, also update the per-cpu xfd cache and
the actual MSR.
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/include/asm/fpu/api.h | 6 ++++++
arch/x86/kernel/fpu/core.c | 12 ++++++++++++
2 files changed, 18 insertions(+)
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 1ed2a247a84e..e4d10155290b 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -140,6 +140,12 @@ extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest);
extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures);
+#ifdef CONFIG_X86_64
+extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd);
+#else
+static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { }
+#endif
+
extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 5b08d20a4005..8f7f9bfc2506 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -278,6 +278,18 @@ int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures)
}
EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features);
+#ifdef CONFIG_X86_64
+void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)
+{
+ fpregs_lock();
+ guest_fpu->fpstate->xfd = xfd;
+ if (guest_fpu->fpstate->in_use)
+ xfd_update_state(guest_fpu->fpstate);
+ fpregs_unlock();
+}
+EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
+#endif /* CONFIG_X86_64 */
+
int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
{
struct fpstate *guest_fps = guest_fpu->fpstate;
^ permalink raw reply related
* [PATCH v5 12/21] kvm: x86: Intercept #NM for saving IA32_XFD_ERR
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Jing Liu <jing2.liu@intel.com>
Guest IA32_XFD_ERR is generally modified in two places:
- Set by CPU when #NM is triggered;
- Cleared by guest in its #NM handler;
Intercept #NM for the first case when a nonzero value is written
to IA32_XFD. Nonzero indicates that the guest is willing to do
dynamic fpstate expansion for certain xfeatures, thus KVM needs to
manage and virtualize guest XFD_ERR properly. The vcpu exception
bitmap is updated in XFD write emulation according to guest_fpu::xfd.
Save the current XFD_ERR value to the guest_fpu container in the #NM
VM-exit handler. This must be done with interrupt disabled, otherwise
the unsaved MSR value may be clobbered by host activity.
The saving operation is conducted conditionally only when guest_fpu:xfd
includes a non-zero value. Doing so also avoids misread on a platform
which doesn't support XFD but #NM is triggered due to L1 interception.
Queueing #NM to the guest is postponed to handle_exception_nmi(). This
goes through the nested_vmx check so a virtual vmexit is queued instead
when #NM is triggered in L2 but L1 wants to intercept it.
Restore the host value (always ZERO outside of the host #NM
handler) before enabling interrupt.
Restore the guest value from the guest_fpu container right before
entering the guest (with interrupt disabled).
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/kvm/vmx/vmcs.h | 5 +++++
arch/x86/kvm/vmx/vmx.c | 48 +++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 6 ++++++
3 files changed, 59 insertions(+)
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 6e5de2e2b0da..e325c290a816 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -129,6 +129,11 @@ static inline bool is_machine_check(u32 intr_info)
return is_exception_n(intr_info, MC_VECTOR);
}
+static inline bool is_nm_fault(u32 intr_info)
+{
+ return is_exception_n(intr_info, NM_VECTOR);
+}
+
/* Undocumented: icebp/int1 */
static inline bool is_icebp(u32 intr_info)
{
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fe06b02994e6..939e6aad128c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -36,6 +36,7 @@
#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/fpu/api.h>
+#include <asm/fpu/xstate.h>
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
@@ -761,6 +762,13 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, match);
}
+ /*
+ * Trap #NM if guest xfd contains a non-zero value so guest XFD_ERR
+ * can be saved timely.
+ */
+ if (vcpu->arch.guest_fpu.fpstate->xfd)
+ eb |= (1u << NM_VECTOR);
+
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -1967,6 +1975,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_KERNEL_GS_BASE:
vmx_write_guest_kernel_gs_base(vmx, data);
break;
+ case MSR_IA32_XFD:
+ ret = kvm_set_msr_common(vcpu, msr_info);
+ /* Update #NM interception according to guest xfd */
+ if (!ret)
+ vmx_update_exception_bitmap(vcpu);
+ break;
#endif
case MSR_IA32_SYSENTER_CS:
if (is_guest_mode(vcpu))
@@ -4798,6 +4812,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
if (is_machine_check(intr_info) || is_nmi(intr_info))
return 1; /* handled by handle_exception_nmi_irqoff() */
+ /*
+ * Queue the exception here instead of in handle_nm_fault_irqoff().
+ * This ensures the nested_vmx check is not skipped so vmexit can
+ * be reflected to L1 (when it intercepts #NM) before reaching this
+ * point.
+ */
+ if (is_nm_fault(intr_info)) {
+ kvm_queue_exception(vcpu, NM_VECTOR);
+ return 1;
+ }
+
if (is_invalid_opcode(intr_info))
return handle_ud(vcpu);
@@ -6399,6 +6424,26 @@ static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
kvm_after_interrupt(vcpu);
}
+static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Save xfd_err to guest_fpu before interrupt is enabled, so the
+ * MSR value is not clobbered by the host activity before the guest
+ * has chance to consume it.
+ *
+ * We should not blindly read xfd_err here, since this exception
+ * might be caused by L1 interception on a platform which doesn't
+ * support xfd at all.
+ *
+ * Do it conditionally upon guest_fpu::xfd. xfd_err matters
+ * only when xfd contains a non-zero value.
+ *
+ * Queuing exception is done in vmx_handle_exit. See comment there.
+ */
+ if (vcpu->arch.guest_fpu.fpstate->xfd)
+ rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
+}
+
static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
{
const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist;
@@ -6407,6 +6452,9 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
/* if exit due to PF check for async PF */
if (is_page_fault(intr_info))
vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
+ /* if exit due to NM, handle before interrupts are enabled */
+ else if (is_nm_fault(intr_info))
+ handle_nm_fault_irqoff(&vmx->vcpu);
/* Handle machine checks before interrupts are enabled */
else if (is_machine_check(intr_info))
kvm_machine_check();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21ce65220e38..2c988f8ca616 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9953,6 +9953,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (test_thread_flag(TIF_NEED_FPU_LOAD))
switch_fpu_return();
+ if (vcpu->arch.guest_fpu.xfd_err)
+ wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
+
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
@@ -10016,6 +10019,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
static_call(kvm_x86_handle_exit_irqoff)(vcpu);
+ if (vcpu->arch.guest_fpu.xfd_err)
+ wrmsrl(MSR_IA32_XFD_ERR, 0);
+
/*
* Consume any pending interrupts, including the possible source of
* VM-Exit on SVM and any ticks that occur between VM-Exit and now.
^ permalink raw reply related
* [PATCH v5 10/21] kvm: x86: Add emulation for IA32_XFD
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Jing Liu <jing2.liu@intel.com>
Intel's eXtended Feature Disable (XFD) feature allows the software
to dynamically adjust fpstate buffer size for XSAVE features which
have large state.
Because guest fpstate has been expanded for all possible dynamic
xstates at KVM_SET_CPUID2, emulation of the IA32_XFD MSR is
straightforward. For write just call fpu_update_guest_xfd() to
update the guest fpu container once all the sanity checks are passed.
For read simply return the cached value in the container.
Signed-off-by: Zeng Guang <guang.zeng@intel.com>
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/kvm/x86.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c194a8cbd25f..21ce65220e38 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1377,6 +1377,7 @@ static const u32 msrs_to_save_all[] = {
MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
+ MSR_IA32_XFD,
};
static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
@@ -3686,6 +3687,19 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.msr_misc_features_enables = data;
break;
+#ifdef CONFIG_X86_64
+ case MSR_IA32_XFD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ return 1;
+
+ if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
+ vcpu->arch.guest_supported_xcr0))
+ return 1;
+
+ fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data);
+ break;
+#endif
default:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
@@ -4006,6 +4020,15 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_K7_HWCR:
msr_info->data = vcpu->arch.msr_hwcr;
break;
+#ifdef CONFIG_X86_64
+ case MSR_IA32_XFD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ return 1;
+
+ msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd;
+ break;
+#endif
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
@@ -6441,6 +6464,10 @@ static void kvm_init_msr_list(void)
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
continue;
break;
+ case MSR_IA32_XFD:
+ if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
+ continue;
+ break;
default:
break;
}
^ permalink raw reply related
* [PATCH v5 14/21] kvm: x86: Disable RDMSR interception of IA32_XFD_ERR
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Jing Liu <jing2.liu@intel.com>
This saves one unnecessary VM-exit in guest #NM handler, given that the
MSR is already restored with the guest value before the guest is resumed.
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/kvm/vmx/vmx.c | 6 ++++++
arch/x86/kvm/vmx/vmx.h | 2 +-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 939e6aad128c..b601dc8a7f13 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -162,6 +162,7 @@ static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_FS_BASE,
MSR_GS_BASE,
MSR_KERNEL_GS_BASE,
+ MSR_IA32_XFD_ERR,
#endif
MSR_IA32_SYSENTER_CS,
MSR_IA32_SYSENTER_ESP,
@@ -7290,6 +7291,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
}
}
+ if (kvm_cpu_cap_has(X86_FEATURE_XFD))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
+ !guest_cpuid_has(vcpu, X86_FEATURE_XFD));
+
+
set_cr4_guest_host_mask(vmx);
vmx_write_encls_bitmap(vcpu, NULL);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index acb874db02da..e0922c80c9d1 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -352,7 +352,7 @@ struct vcpu_vmx {
struct lbr_desc lbr_desc;
/* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS 13
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS 14
struct {
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
^ permalink raw reply related
* [PATCH v5 15/21] kvm: x86: Add XCR0 support for Intel AMX
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Jing Liu <jing2.liu@intel.com>
Two XCR0 bits are defined for AMX to support XSAVE mechanism. Bit 17
is for tilecfg and bit 18 is for tiledata.
The value of XCR0[17:18] is always either 00b or 11b. Also, SDM
recommends that only 64-bit operating systems enable Intel AMX by
setting XCR0[18:17]. 32-bit host kernel never sets the tile bits in
vcpu->arch.guest_supported_xcr0.
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/kvm/x86.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 434986928552..553e4af12edc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -211,7 +211,7 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
- | XFEATURE_MASK_PKRU)
+ | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
u64 __read_mostly host_efer;
EXPORT_SYMBOL_GPL(host_efer);
@@ -1010,6 +1010,11 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
return 1;
}
+
+ if ((xcr0 & XFEATURE_MASK_XTILE) &&
+ ((xcr0 & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE))
+ return 1;
+
vcpu->arch.xcr0 = xcr0;
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
^ permalink raw reply related
* [PATCH v5 16/21] kvm: x86: Add CPUID support for Intel AMX
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Jing Liu <jing2.liu@intel.com>
Extend CPUID emulation to support XFD, AMX_TILE, AMX_INT8 and
AMX_BF16. Adding those bits into kvm_cpu_caps finally activates all
previous logics in this series.
Hide XFD on 32bit host kernels. Otherwise it leads to a weird situation
where KVM tells userspace to migrate MSR_IA32_XFD and then rejects
attempts to read/write the MSR.
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/kvm/cpuid.c | 27 +++++++++++++++++++++++++--
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index d5b5f2ab87a0..da872b6f8d8b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -299,7 +299,9 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a0fedf1514ab..ba4c3d5d2386 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -442,9 +442,11 @@ void kvm_set_cpu_caps(void)
#ifdef CONFIG_X86_64
unsigned int f_gbpages = F(GBPAGES);
unsigned int f_lm = F(LM);
+ unsigned int f_xfd = F(XFD);
#else
unsigned int f_gbpages = 0;
unsigned int f_lm = 0;
+ unsigned int f_xfd = 0;
#endif
memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps));
@@ -512,7 +514,8 @@ void kvm_set_cpu_caps(void)
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
- F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16)
+ F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
+ F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16)
);
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
@@ -531,7 +534,7 @@ void kvm_set_cpu_caps(void)
);
kvm_cpu_cap_mask(CPUID_D_1_EAX,
- F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES)
+ F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
);
kvm_cpu_cap_init_scattered(CPUID_12_EAX,
@@ -657,6 +660,8 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
case 0x14:
case 0x17:
case 0x18:
+ case 0x1d:
+ case 0x1e:
case 0x1f:
case 0x8000001d:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
@@ -929,6 +934,24 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
goto out;
}
break;
+ /* Intel AMX TILE */
+ case 0x1d:
+ if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) {
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+ break;
+ }
+
+ for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) {
+ if (!do_host_cpuid(array, function, i))
+ goto out;
+ }
+ break;
+ case 0x1e: /* TMUL information */
+ if (!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE)) {
+ entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+ break;
+ }
+ break;
case KVM_CPUID_SIGNATURE: {
const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
entry->eax = KVM_CPUID_FEATURES;
^ permalink raw reply related
* [PATCH v5 21/21] kvm: x86: Disable interception for IA32_XFD on demand
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Kevin Tian <kevin.tian@intel.com>
Always intercepting IA32_XFD causes non-negligible overhead when this
register is updated frequently in the guest.
Disable r/w emulation after intercepting the first WRMSR(IA32_XFD)
with a non-zero value.
Disable WRMSR emulation implies that IA32_XFD becomes out-of-sync
with the software states in fpstate and the per-cpu xfd cache. This
leads to two additional changes accordingly:
- Call fpu_sync_guest_vmexit_xfd_state() after vm-exit to bring
software states back in-sync with the MSR, before handle_exit_irqoff()
is called.
- Always trap #NM once write interception is disabled for IA32_XFD.
The #NM exception is rare if the guest doesn't use dynamic
features. Otherwise, there is at most one exception per guest
task given a dynamic feature.
p.s. We have confirmed that SDM is being revised to say that
when setting IA32_XFD[18] the AMX register state is not guaranteed
to be preserved. This clarification avoids adding mess for a creative
guest which sets IA32_XFD[18]=1 before saving active AMX state to
its own storage.
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/vmx/vmx.c | 24 +++++++++++++++++++-----
arch/x86/kvm/vmx/vmx.h | 2 +-
arch/x86/kvm/x86.c | 8 ++++++++
4 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5d97f4adc1cb..2e8ffe269c23 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -647,6 +647,7 @@ struct kvm_vcpu_arch {
u64 smi_count;
bool tpr_access_reporting;
bool xsaves_enabled;
+ bool xfd_no_write_intercept;
u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b601dc8a7f13..a3be4b02df2c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -162,6 +162,7 @@ static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_FS_BASE,
MSR_GS_BASE,
MSR_KERNEL_GS_BASE,
+ MSR_IA32_XFD,
MSR_IA32_XFD_ERR,
#endif
MSR_IA32_SYSENTER_CS,
@@ -764,10 +765,11 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
}
/*
- * Trap #NM if guest xfd contains a non-zero value so guest XFD_ERR
- * can be saved timely.
+ * Disabling xfd interception indicates that dynamic xfeatures
+ * might be used in the guest. Always trap #NM in this case
+ * to save guest xfd_err timely.
*/
- if (vcpu->arch.guest_fpu.fpstate->xfd)
+ if (vcpu->arch.xfd_no_write_intercept)
eb |= (1u << NM_VECTOR);
vmcs_write32(EXCEPTION_BITMAP, eb);
@@ -1978,9 +1980,21 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_XFD:
ret = kvm_set_msr_common(vcpu, msr_info);
- /* Update #NM interception according to guest xfd */
- if (!ret)
+ /*
+ * Always intercepting WRMSR could incur non-negligible
+ * overhead given xfd might be changed frequently in
+ * guest context switch. Disable write interception
+ * upon the first write with a non-zero value (indicating
+ * potential usage on dynamic xfeatures). Also update
+ * exception bitmap to trap #NM for proper virtualization
+ * of guest xfd_err.
+ */
+ if (!ret && data) {
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD,
+ MSR_TYPE_RW);
+ vcpu->arch.xfd_no_write_intercept = true;
vmx_update_exception_bitmap(vcpu);
+ }
break;
#endif
case MSR_IA32_SYSENTER_CS:
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index e0922c80c9d1..7f2c82e7f38f 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -352,7 +352,7 @@ struct vcpu_vmx {
struct lbr_desc lbr_desc;
/* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS 14
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS 15
struct {
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 92a1ea13ad51..df0150671b98 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10084,6 +10084,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
+ /*
+ * Sync xfd before calling handle_exit_irqoff() which may
+ * rely on the fact that guest_fpu::xfd is up-to-date (e.g.
+ * in #NM irqoff handler).
+ */
+ if (vcpu->arch.xfd_no_write_intercept)
+ fpu_sync_guest_vmexit_xfd_state();
+
static_call(kvm_x86_handle_exit_irqoff)(vcpu);
if (vcpu->arch.guest_fpu.xfd_err)
^ permalink raw reply related
* [PATCH v5 11/21] x86/fpu: Prepare xfd_err in struct fpu_guest
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Jing Liu <jing2.liu@intel.com>
When XFD causes an instruction to generate #NM, IA32_XFD_ERR
contains information about which disabled state components are
being accessed. The #NM handler is expected to check this
information and then enable the state components by clearing
IA32_XFD for the faulting task (if having permission).
If the XFD_ERR value generated in guest is consumed/clobbered
by the host before the guest itself doing so, it may lead to
non-XFD-related #NM treated as XFD #NM in host (due to non-zero
value in XFD_ERR), or XFD-related #NM treated as non-XFD #NM in
guest (XFD_ERR cleared by the host #NM handler).
Introduce a new field in fpu_guest to save the guest xfd_err value.
KVM is expected to save guest xfd_err before interrupt is enabled
and restore it right before entering the guest (with interrupt
disabled).
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/include/asm/fpu/types.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index c752d0aa23a4..3795d0573773 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -517,6 +517,11 @@ struct fpu_guest {
*/
u64 perm;
+ /*
+ * @xfd_err: Save the guest value.
+ */
+ u64 xfd_err;
+
/*
* @fpstate: Pointer to the allocated guest fpstate
*/
^ permalink raw reply related
* [PATCH v5 06/21] x86/fpu: Add guest support to xfd_enable_feature()
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Thomas Gleixner <tglx@linutronix.de>
Guest support for dynamically enabled FPU features requires a few
modifications to the enablement function which is currently invoked from
the #NM handler:
1) Use guest permissions and sizes for the update
2) Update fpu_guest state accordingly
3) Take into account that the enabling can be triggered either from a
running guest via XSETBV and MSR_IA32_XFD write emulation or from
a guest restore. In the latter case the guests fpstate is not the
current tasks active fpstate.
Split the function and implement the guest mechanics throughout the
callchain.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/kernel/fpu/xstate.c | 93 +++++++++++++++++++++---------------
arch/x86/kernel/fpu/xstate.h | 2 +
2 files changed, 56 insertions(+), 39 deletions(-)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 5f01d463859d..0c0b2323cdec 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1499,29 +1499,6 @@ void fpstate_free(struct fpu *fpu)
vfree(fpu->fpstate);
}
-/**
- * fpu_install_fpstate - Update the active fpstate in the FPU
- *
- * @fpu: A struct fpu * pointer
- * @newfps: A struct fpstate * pointer
- *
- * Returns: A null pointer if the last active fpstate is the embedded
- * one or the new fpstate is already installed;
- * otherwise, a pointer to the old fpstate which has to
- * be freed by the caller.
- */
-static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
- struct fpstate *newfps)
-{
- struct fpstate *oldfps = fpu->fpstate;
-
- if (fpu->fpstate == newfps)
- return NULL;
-
- fpu->fpstate = newfps;
- return oldfps != &fpu->__fpstate ? oldfps : NULL;
-}
-
/**
* fpstate_realloc - Reallocate struct fpstate for the requested new features
*
@@ -1529,6 +1506,7 @@ static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
* of that task
* @ksize: The required size for the kernel buffer
* @usize: The required size for user space buffers
+ * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations
*
* Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
* terminates quickly, vfree()-induced IPIs may be a concern, but tasks
@@ -1537,13 +1515,13 @@ static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
* Returns: 0 on success, -ENOMEM on allocation error.
*/
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
- unsigned int usize)
+ unsigned int usize, struct fpu_guest *guest_fpu)
{
struct fpu *fpu = ¤t->thread.fpu;
struct fpstate *curfps, *newfps = NULL;
unsigned int fpsize;
+ bool in_use;
- curfps = fpu->fpstate;
fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
newfps = vzalloc(fpsize);
@@ -1553,28 +1531,55 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
newfps->user_size = usize;
newfps->is_valloc = true;
+ /*
+ * When a guest FPU is supplied, use @guest_fpu->fpstate
+ * as reference independent whether it is in use or not.
+ */
+ curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
+
+ /* Determine whether @curfps is the active fpstate */
+ in_use = fpu->fpstate == curfps;
+
+ if (guest_fpu) {
+ newfps->is_guest = true;
+ newfps->is_confidential = curfps->is_confidential;
+ newfps->in_use = curfps->in_use;
+ guest_fpu->xfeatures |= xfeatures;
+ }
+
fpregs_lock();
/*
- * Ensure that the current state is in the registers before
- * swapping fpstate as that might invalidate it due to layout
- * changes.
+ * If @curfps is in use, ensure that the current state is in the
+ * registers before swapping fpstate as that might invalidate it
+ * due to layout changes.
*/
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->xfd = curfps->xfd & ~xfeatures;
- curfps = fpu_install_fpstate(fpu, newfps);
-
/* Do the final updates within the locked region */
xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
- xfd_update_state(newfps);
+ if (guest_fpu) {
+ guest_fpu->fpstate = newfps;
+ /* If curfps is active, update the FPU fpstate pointer */
+ if (in_use)
+ fpu->fpstate = newfps;
+ } else {
+ fpu->fpstate = newfps;
+ }
+
+ if (in_use)
+ xfd_update_state(fpu->fpstate);
fpregs_unlock();
- vfree(curfps);
+ /* Only free valloc'ed state */
+ if (curfps && curfps->is_valloc)
+ vfree(curfps);
+
return 0;
}
@@ -1682,14 +1687,16 @@ static int xstate_request_perm(unsigned long idx, bool guest)
return ret;
}
-int xfd_enable_feature(u64 xfd_err)
+int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
{
u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
+ struct fpu_state_perm *perm;
unsigned int ksize, usize;
struct fpu *fpu;
if (!xfd_event) {
- pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
+ if (!guest_fpu)
+ pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
return 0;
}
@@ -1697,14 +1704,16 @@ int xfd_enable_feature(u64 xfd_err)
spin_lock_irq(¤t->sighand->siglock);
/* If not permitted let it die */
- if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) {
+ if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
spin_unlock_irq(¤t->sighand->siglock);
return -EPERM;
}
fpu = ¤t->group_leader->thread.fpu;
- ksize = fpu->perm.__state_size;
- usize = fpu->perm.__user_state_size;
+ perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
+ ksize = perm->__state_size;
+ usize = perm->__user_state_size;
+
/*
* The feature is permitted. State size is sufficient. Dropping
* the lock is safe here even if more features are added from
@@ -1717,10 +1726,16 @@ int xfd_enable_feature(u64 xfd_err)
* Try to allocate a new fpstate. If that fails there is no way
* out.
*/
- if (fpstate_realloc(xfd_event, ksize, usize))
+ if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
return -EFAULT;
return 0;
}
+
+int xfd_enable_feature(u64 xfd_err)
+{
+ return __xfd_enable_feature(xfd_err, NULL);
+}
+
#else /* CONFIG_X86_64 */
static inline int xstate_request_perm(unsigned long idx, bool guest)
{
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 98a472775c97..3e63f723525b 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -55,6 +55,8 @@ extern void fpu__init_system_xstate(unsigned int legacy_size);
extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
+
static inline u64 xfeatures_mask_supervisor(void)
{
return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
^ permalink raw reply related
* [PATCH v5 20/21] x86/fpu: Provide fpu_sync_guest_vmexit_xfd_state()
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Thomas Gleixner <tglx@linutronix.de>
KVM can disable the write emulation for the XFD MSR when the vCPU's fpstate
is already correctly sized to reduce the overhead.
When write emulation is disabled the XFD MSR state after a VMEXIT is
unknown and therefore not in sync with the software states in fpstate and
the per CPU XFD cache.
Provide fpu_sync_guest_vmexit_xfd_state() which has to be invoked after a
VMEXIT before enabling interrupts when write emulation is disabled for the
XFD MSR.
It could be invoked unconditionally even when write emulation is enabled
for the price of a pointless MSR read.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/include/asm/fpu/api.h | 2 ++
arch/x86/kernel/fpu/core.c | 24 ++++++++++++++++++++++++
2 files changed, 26 insertions(+)
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index e4d10155290b..a467eb80f9ed 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -142,8 +142,10 @@ extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatu
#ifdef CONFIG_X86_64
extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd);
+extern void fpu_sync_guest_vmexit_xfd_state(void);
#else
static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { }
+static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
#endif
extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 5e7de79c1fe1..e5dfd8b9825a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -289,6 +289,30 @@ void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)
fpregs_unlock();
}
EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
+
+/**
+ * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state
+ *
+ * Must be invoked from KVM after a VMEXIT before enabling interrupts when
+ * XFD write emulation is disabled. This is required because the guest can
+ * freely modify XFD and the state at VMEXIT is not guaranteed to be the
+ * same as the state on VMENTER. So software state has to be udpated before
+ * any operation which depends on it can take place.
+ *
+ * Note: It can be invoked unconditionally even when write emulation is
+ * enabled for the price of a then pointless MSR read.
+ */
+void fpu_sync_guest_vmexit_xfd_state(void)
+{
+ struct fpstate *fps = current->thread.fpu.fpstate;
+
+ lockdep_assert_irqs_disabled();
+ if (fpu_state_size_dynamic()) {
+ rdmsrl(MSR_IA32_XFD, fps->xfd);
+ __this_cpu_write(xfd_state, fps->xfd);
+ }
+}
+EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
#endif /* CONFIG_X86_64 */
int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
^ permalink raw reply related
* [PATCH v5 18/21] kvm: x86: Add support for getting/setting expanded xstate buffer
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Guang Zeng <guang.zeng@intel.com>
With KVM_CAP_XSAVE, userspace uses a hardcoded 4KB buffer to get/set
xstate data from/to KVM. This doesn't work when dynamic xfeatures
(e.g. AMX) are exposed to the guest as they require a larger buffer
size.
Introduce a new capability (KVM_CAP_XSAVE2). Userspace VMM gets the
required xstate buffer size via KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2).
KVM_SET_XSAVE is extended to work with both legacy and new capabilities
by doing properly-sized memdup_user() based on the guest fpu container.
KVM_GET_XSAVE is kept for backward-compatible reason. Instead,
KVM_GET_XSAVE2 is introduced under KVM_CAP_XSAVE2 as the preferred
interface for getting xstate buffer (4KB or larger size) from KVM
(Link: https://lkml.org/lkml/2021/12/15/510)
Also, update the api doc with the new KVM_GET_XSAVE2 ioctl.
Signed-off-by: Guang Zeng <guang.zeng@intel.com>
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
Documentation/virt/kvm/api.rst | 42 ++++++++++++++++++++++++++++--
arch/x86/include/uapi/asm/kvm.h | 16 +++++++++++-
arch/x86/kvm/cpuid.c | 2 +-
arch/x86/kvm/cpuid.h | 2 ++
arch/x86/kvm/x86.c | 45 ++++++++++++++++++++++++++++++++-
include/uapi/linux/kvm.h | 4 +++
6 files changed, 106 insertions(+), 5 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index f4ea5e41a4d0..d3791a14eb9a 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1569,6 +1569,7 @@ otherwise it will return EBUSY error.
struct kvm_xsave {
__u32 region[1024];
+ __u32 extra[0];
};
This ioctl would copy current vcpu's xsave struct to the userspace.
@@ -1577,7 +1578,7 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
4.43 KVM_SET_XSAVE
------------------
-:Capability: KVM_CAP_XSAVE
+:Capability: KVM_CAP_XSAVE and KVM_CAP_XSAVE2
:Architectures: x86
:Type: vcpu ioctl
:Parameters: struct kvm_xsave (in)
@@ -1588,9 +1589,18 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
struct kvm_xsave {
__u32 region[1024];
+ __u32 extra[0];
};
-This ioctl would copy userspace's xsave struct to the kernel.
+This ioctl would copy userspace's xsave struct to the kernel. It copies
+as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2),
+when invoked on the vm file descriptor. The size value returned by
+KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
+Currently, it is only greater than 4096 if a dynamic feature has been
+enabled with ``arch_prctl()``, but this may change in the future.
+
+The offsets of the state save areas in struct kvm_xsave follow the
+contents of CPUID leaf 0xD on the host.
4.44 KVM_GET_XCRS
@@ -5535,6 +5545,34 @@ the trailing ``'\0'``, is indicated by ``name_size`` in the header.
The Stats Data block contains an array of 64-bit values in the same order
as the descriptors in Descriptors block.
+4.42 KVM_GET_XSAVE2
+------------------
+
+:Capability: KVM_CAP_XSAVE2
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_xsave (out)
+:Returns: 0 on success, -1 on error
+
+
+::
+
+ struct kvm_xsave {
+ __u32 region[1024];
+ __u32 extra[0];
+ };
+
+This ioctl would copy current vcpu's xsave struct to the userspace. It
+copies as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+when invoked on the vm file descriptor. The size value returned by
+KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
+Currently, it is only greater than 4096 if a dynamic feature has been
+enabled with ``arch_prctl()``, but this may change in the future.
+
+The offsets of the state save areas in struct kvm_xsave follow the contents
+of CPUID leaf 0xD on the host.
+
+
5. The kvm_run structure
========================
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5a776a08f78c..2da3316bb559 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -373,9 +373,23 @@ struct kvm_debugregs {
__u64 reserved[9];
};
-/* for KVM_CAP_XSAVE */
+/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
+ /*
+ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * respectively, when invoked on the vm file descriptor.
+ *
+ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * will always be at least 4096. Currently, it is only greater
+ * than 4096 if a dynamic feature has been enabled with
+ * ``arch_prctl()``, but this may change in the future.
+ *
+ * The offsets of the state save areas in struct kvm_xsave follow
+ * the contents of CPUID leaf 0xD on the host.
+ */
__u32 region[1024];
+ __u32 extra[0];
};
#define KVM_MAX_XCRS 16
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ba4c3d5d2386..c55e57b30e81 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -32,7 +32,7 @@
u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
EXPORT_SYMBOL_GPL(kvm_cpu_caps);
-static u32 xstate_required_size(u64 xstate_bv, bool compacted)
+u32 xstate_required_size(u64 xstate_bv, bool compacted)
{
int feature_bit = 0;
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c99edfff7f82..8a770b481d9d 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -30,6 +30,8 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool exact_only);
+u32 xstate_required_size(u64 xstate_bv, bool compacted);
+
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 553e4af12edc..92a1ea13ad51 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4314,6 +4314,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
else
r = 0;
break;
+ case KVM_CAP_XSAVE2: {
+ u64 guest_perm = xstate_get_guest_group_perm();
+
+ r = xstate_required_size(supported_xcr0 & guest_perm, false);
+ if (r < sizeof(struct kvm_xsave))
+ r = sizeof(struct kvm_xsave);
+ break;
+ }
default:
break;
}
@@ -4917,6 +4925,16 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
vcpu->arch.pkru);
}
+static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
+ u8 *state, unsigned int size)
+{
+ if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
+ return;
+
+ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+ state, size, vcpu->arch.pkru);
+}
+
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
@@ -5370,6 +5388,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_XSAVE: {
+ r = -EINVAL;
+ if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave))
+ break;
+
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
r = -ENOMEM;
if (!u.xsave)
@@ -5384,7 +5406,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SET_XSAVE: {
- u.xsave = memdup_user(argp, sizeof(*u.xsave));
+ int size = vcpu->arch.guest_fpu.uabi_size;
+
+ u.xsave = memdup_user(argp, size);
if (IS_ERR(u.xsave)) {
r = PTR_ERR(u.xsave);
goto out_nofree;
@@ -5393,6 +5417,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
break;
}
+
+ case KVM_GET_XSAVE2: {
+ int size = vcpu->arch.guest_fpu.uabi_size;
+
+ u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT);
+ r = -ENOMEM;
+ if (!u.xsave)
+ break;
+
+ kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
+
+ r = -EFAULT;
+ if (copy_to_user(argp, u.xsave, size))
+ break;
+
+ r = 0;
+ break;
+ }
+
case KVM_GET_XCRS: {
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
r = -ENOMEM;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 12421e76adcb..6305426f1bc0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1131,6 +1131,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
+#define KVM_CAP_XSAVE2 207
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1621,6 +1622,9 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
+
struct kvm_s390_pv_sec_parm {
__u64 origin;
__u64 length;
^ permalink raw reply related
* [PATCH v5 19/21] kvm: selftests: Add support for KVM_CAP_XSAVE2
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Wei Wang <wei.w.wang@intel.com>
When KVM_CAP_XSAVE2 is supported, userspace is expected to allocate
buffer for KVM_GET_XSAVE2 and KVM_SET_XSAVE using the size returned
by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2).
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Guang Zeng <guang.zeng@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
tools/arch/x86/include/uapi/asm/kvm.h | 16 ++++-
tools/include/uapi/linux/kvm.h | 3 +
.../testing/selftests/kvm/include/kvm_util.h | 2 +
.../selftests/kvm/include/x86_64/processor.h | 10 +++
tools/testing/selftests/kvm/lib/kvm_util.c | 32 +++++++++
.../selftests/kvm/lib/x86_64/processor.c | 67 ++++++++++++++++++-
.../testing/selftests/kvm/x86_64/evmcs_test.c | 2 +-
tools/testing/selftests/kvm/x86_64/smm_test.c | 2 +-
.../testing/selftests/kvm/x86_64/state_test.c | 2 +-
.../kvm/x86_64/vmx_preemption_timer_test.c | 2 +-
10 files changed, 130 insertions(+), 8 deletions(-)
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 5a776a08f78c..2da3316bb559 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -373,9 +373,23 @@ struct kvm_debugregs {
__u64 reserved[9];
};
-/* for KVM_CAP_XSAVE */
+/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
+ /*
+ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * respectively, when invoked on the vm file descriptor.
+ *
+ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * will always be at least 4096. Currently, it is only greater
+ * than 4096 if a dynamic feature has been enabled with
+ * ``arch_prctl()``, but this may change in the future.
+ *
+ * The offsets of the state save areas in struct kvm_xsave follow
+ * the contents of CPUID leaf 0xD on the host.
+ */
__u32 region[1024];
+ __u32 extra[0];
};
#define KVM_MAX_XCRS 16
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 1daa45268de2..f066637ee206 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1131,6 +1131,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
+#define KVM_CAP_XSAVE2 207
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1551,6 +1552,8 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_XSAVE */
#define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave)
#define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave)
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
/* Available with KVM_CAP_XCRS */
#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs)
#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 2d62edc49d67..65ace3f01fad 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -85,6 +85,7 @@ extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
int kvm_check_cap(long cap);
+int vm_check_cap(struct kvm_vm *vm, long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_enable_cap *cap);
@@ -316,6 +317,7 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
* guest_code - The vCPU's entry point
*/
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+void vm_xsave_req_perm(void);
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 05e65ca1c30c..58633e51960f 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -10,8 +10,10 @@
#include <assert.h>
#include <stdint.h>
+#include <syscall.h>
#include <asm/msr-index.h>
+#include <asm/prctl.h>
#include "../kvm_util.h"
@@ -352,6 +354,7 @@ struct kvm_x86_state;
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_x86_state *state);
+void kvm_x86_state_cleanup(struct kvm_x86_state *state);
struct kvm_msr_list *kvm_get_msr_index_list(void);
uint64_t kvm_get_feature_msr(uint64_t msr_index);
@@ -443,4 +446,11 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
+#define XSTATE_XTILE_CFG_BIT 17
+#define XSTATE_XTILE_DATA_BIT 18
+
+#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
+#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT)
+#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \
+ XSTATE_XTILE_DATA_MASK)
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 53d2b5d04b82..318ac76fd2f1 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -85,6 +85,33 @@ int kvm_check_cap(long cap)
return ret;
}
+/* VM Check Capability
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int vm_check_cap(struct kvm_vm *vm, long cap)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap);
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ return ret;
+}
+
/* VM Enable Capability
*
* Input Args:
@@ -344,6 +371,11 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
struct kvm_vm *vm;
int i;
+ /*
+ * Permission needs to be requested before KVM_SET_CPUID2.
+ */
+ vm_xsave_req_perm();
+
/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index eef7b34756d5..f19d6d201977 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -650,6 +650,45 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
vcpu_sregs_set(vm, vcpuid, &sregs);
}
+#define CPUID_XFD_BIT (1 << 4)
+static bool is_xfd_supported(void)
+{
+ int eax, ebx, ecx, edx;
+ const int leaf = 0xd, subleaf = 0x1;
+
+ __asm__ __volatile__(
+ "cpuid"
+ : /* output */ "=a"(eax), "=b"(ebx),
+ "=c"(ecx), "=d"(edx)
+ : /* input */ "0"(leaf), "2"(subleaf));
+
+ return !!(eax & CPUID_XFD_BIT);
+}
+
+void vm_xsave_req_perm(void)
+{
+ unsigned long bitmask;
+ long rc;
+
+ if (!is_xfd_supported())
+ return;
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
+ XSTATE_XTILE_DATA_BIT);
+ /*
+ * The older kernel version(<5.15) can't support
+ * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
+ */
+ if (rc)
+ return;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+ TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+ TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK,
+ "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
+ bitmask);
+}
+
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
struct kvm_mp_state mp_state;
@@ -1018,10 +1057,10 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
}
struct kvm_x86_state {
+ struct kvm_xsave *xsave;
struct kvm_vcpu_events events;
struct kvm_mp_state mp_state;
struct kvm_regs regs;
- struct kvm_xsave xsave;
struct kvm_xcrs xcrs;
struct kvm_sregs sregs;
struct kvm_debugregs debugregs;
@@ -1069,6 +1108,22 @@ struct kvm_msr_list *kvm_get_msr_index_list(void)
return list;
}
+static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu,
+ struct kvm_x86_state *state)
+{
+ int size;
+
+ size = vm_check_cap(vm, KVM_CAP_XSAVE2);
+ if (!size)
+ size = sizeof(struct kvm_xsave);
+
+ state->xsave = malloc(size);
+ if (size == sizeof(struct kvm_xsave))
+ return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave);
+ else
+ return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave);
+}
+
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
@@ -1112,7 +1167,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
r);
- r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
+ r = vcpu_save_xsave_state(vm, vcpu, state);
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
r);
@@ -1157,7 +1212,7 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int r;
- r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
+ r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
r);
@@ -1198,6 +1253,12 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
}
}
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
+{
+ free(state->xsave);
+ free(state);
+}
+
bool is_intel_cpu(void)
{
int eax, ebx, ecx, edx;
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 2b46dcca86a8..4c7841dfd481 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -129,7 +129,7 @@ static void save_restore_vm(struct kvm_vm *vm)
vcpu_set_hv_cpuid(vm, VCPU_ID);
vcpu_enable_evmcs(vm, VCPU_ID);
vcpu_load_state(vm, VCPU_ID, state);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(®s2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, ®s2);
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index d0fe2fdce58c..2da8eb8e2d96 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -212,7 +212,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 32854c1462ad..2e0a92da8ff5 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -218,7 +218,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(®s2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, ®s2);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index a07480aed397..ff92e25b6f1e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -244,7 +244,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(®s2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, ®s2);
^ permalink raw reply related
* [PATCH v5 17/21] x86/fpu: Add uabi_size to guest_fpu
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Thomas Gleixner <tglx@linutronix.de>
Userspace needs to inquire KVM about the buffer size to work
with the new KVM_SET_XSAVE and KVM_GET_XSAVE2. Add the size info
to guest_fpu for KVM to access.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/include/asm/fpu/types.h | 5 +++++
arch/x86/kernel/fpu/core.c | 1 +
arch/x86/kernel/fpu/xstate.c | 1 +
3 files changed, 7 insertions(+)
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3795d0573773..eb7cd1139d97 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -522,6 +522,11 @@ struct fpu_guest {
*/
u64 xfd_err;
+ /*
+ * @uabi_size: Size required for save/restore
+ */
+ unsigned int uabi_size;
+
/*
* @fpstate: Pointer to the allocated guest fpstate
*/
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8f7f9bfc2506..5e7de79c1fe1 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -240,6 +240,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
gfpu->fpstate = fpstate;
gfpu->xfeatures = fpu_user_cfg.default_features;
gfpu->perm = fpu_user_cfg.default_features;
+ gfpu->uabi_size = fpu_user_cfg.default_size;
fpu_init_guest_permissions(gfpu);
return true;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0c0b2323cdec..10fe072f1c92 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1545,6 +1545,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
newfps->is_confidential = curfps->is_confidential;
newfps->in_use = curfps->in_use;
guest_fpu->xfeatures |= xfeatures;
+ guest_fpu->uabi_size = usize;
}
fpregs_lock();
^ permalink raw reply related
* [PATCH v5 08/21] kvm: x86: Enable dynamic xfeatures at KVM_SET_CPUID2
From: Yang Zhong @ 2022-01-05 12:35 UTC (permalink / raw)
To: x86, kvm, linux-kernel, linux-doc, linux-kselftest, tglx, mingo,
bp, dave.hansen, pbonzini, corbet, shuah, seanjc
Cc: jun.nakajima, kevin.tian, jing2.liu, jing2.liu, guang.zeng,
wei.w.wang, yang.zhong
In-Reply-To: <20220105123532.12586-1-yang.zhong@intel.com>
From: Jing Liu <jing2.liu@intel.com>
KVM can request fpstate expansion in two approaches:
1) When intercepting guest updates to XCR0 and XFD MSR;
2) Before vcpu runs (e.g. at KVM_SET_CPUID2);
The first option doesn't waste memory for legacy guest if it doesn't
support XFD. However doing so introduces more complexity and also
imposes an order requirement in the restoring path, i.e. XCR0/XFD
must be restored before XSTATE.
Given that the agreement is to do the static approach. This is
considered a better tradeoff though it does waste 8K memory for
legacy guest if its CPUID includes dynamically-enabled xfeatures.
Successful fpstate expansion requires userspace VMM to acquire
guest xstate permissions before calling KVM_SET_CPUID2.
Also take the chance to adjust the indent in kvm_set_cpuid().
Signed-off-by: Jing Liu <jing2.liu@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yang Zhong <yang.zhong@intel.com>
---
arch/x86/kvm/cpuid.c | 42 +++++++++++++++++++++++++++++-------------
1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index eb52dde5deec..a0fedf1514ab 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -84,9 +84,12 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
return NULL;
}
-static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
+static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid_entry2 *entries,
+ int nent)
{
struct kvm_cpuid_entry2 *best;
+ u64 xfeatures;
/*
* The existing code assumes virtual address is 48-bit or 57-bit in the
@@ -100,7 +103,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
return -EINVAL;
}
- return 0;
+ /*
+ * Exposing dynamic xfeatures to the guest requires additional
+ * enabling in the FPU, e.g. to expand the guest XSAVE state size.
+ */
+ best = cpuid_entry2_find(entries, nent, 0xd, 0);
+ if (!best)
+ return 0;
+
+ xfeatures = best->eax | ((u64)best->edx << 32);
+ xfeatures &= XFEATURE_MASK_USER_DYNAMIC;
+ if (!xfeatures)
+ return 0;
+
+ return fpu_enable_guest_xfd_features(&vcpu->arch.guest_fpu, xfeatures);
}
static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
@@ -280,21 +296,21 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
{
- int r;
+ int r;
- r = kvm_check_cpuid(e2, nent);
- if (r)
- return r;
+ r = kvm_check_cpuid(vcpu, e2, nent);
+ if (r)
+ return r;
- kvfree(vcpu->arch.cpuid_entries);
- vcpu->arch.cpuid_entries = e2;
- vcpu->arch.cpuid_nent = nent;
+ kvfree(vcpu->arch.cpuid_entries);
+ vcpu->arch.cpuid_entries = e2;
+ vcpu->arch.cpuid_nent = nent;
- kvm_update_kvm_cpuid_base(vcpu);
- kvm_update_cpuid_runtime(vcpu);
- kvm_vcpu_after_set_cpuid(vcpu);
+ kvm_update_kvm_cpuid_base(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
+ kvm_vcpu_after_set_cpuid(vcpu);
- return 0;
+ return 0;
}
/* when an old userspace process fills a new kernel module */
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.