* [PATCH v2 5/7] pci: make pci_match_one_device match on ID instead of device
From: Gary Guo @ 2026-06-30 11:09 UTC (permalink / raw)
To: Bjorn Helgaas, Zhenzhong Duan, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Damien Le Moal,
Niklas Cassel, GOTO Masanori, YOKOTA Hiroshi,
James E.J. Bottomley, Martin K. Petersen, Vaibhav Gupta,
Jens Taprogge, Ido Schimmel, Petr Machata, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: linux-pci, driver-core, linux-kernel, linux-ide, linux-scsi,
industrypack-devel, netdev, Gary Guo
In-Reply-To: <20260630-pci_id_fix-v2-0-b834a98c0af2@garyguo.net>
There is a need to match just IDs instead of against devices. Thus rename
this function to pci_match_one_id, and add a pci_id_from_device helper to
make it easy to convert users.
Similar convert pci_match_id to do_pci_match_id, however the existing API
is kept due to quite a few users.
Signed-off-by: Gary Guo <gary@garyguo.net>
---
drivers/pci/pci-driver.c | 38 ++++++++++++++++++++++++++++----------
drivers/pci/pci.h | 36 ++++++++++++++++++++++++++----------
drivers/pci/search.c | 6 ++++--
3 files changed, 58 insertions(+), 22 deletions(-)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index f36778e62ac1..0507cb801310 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -90,6 +90,27 @@ static void pci_free_dynids(struct pci_driver *drv)
spin_unlock(&drv->dynids.lock);
}
+/**
+ * do_pci_match_id - See if a PCI ID matches a given pci_id table
+ * @ids: array of PCI device ID structures to search in
+ * @dev_id: the actual PCI device ID structure to match against.
+ *
+ * Returns the matching pci_device_id structure or
+ * %NULL if there is no match.
+ */
+static const struct pci_device_id *do_pci_match_id(const struct pci_device_id *ids,
+ const struct pci_device_id *dev_id)
+{
+ if (ids) {
+ while (ids->vendor || ids->subvendor || ids->class_mask) {
+ if (pci_match_one_id(ids, dev_id))
+ return ids;
+ ids++;
+ }
+ }
+ return NULL;
+}
+
/**
* pci_match_id - See if a PCI device matches a given pci_id table
* @ids: array of PCI device ID structures to search in
@@ -105,14 +126,9 @@ static void pci_free_dynids(struct pci_driver *drv)
const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
struct pci_dev *dev)
{
- if (ids) {
- while (ids->vendor || ids->subvendor || ids->class_mask) {
- if (pci_match_one_device(ids, dev))
- return ids;
- ids++;
- }
- }
- return NULL;
+ struct pci_device_id dev_id = pci_id_from_device(dev);
+
+ return do_pci_match_id(ids, &dev_id);
}
EXPORT_SYMBOL(pci_match_id);
@@ -138,6 +154,7 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
{
struct pci_dynid *dynid;
const struct pci_device_id *found_id = NULL, *ids;
+ struct pci_device_id dev_id;
int ret;
/* When driver_override is set, only bind to the matching driver */
@@ -145,10 +162,11 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
if (ret == 0)
return NULL;
+ dev_id = pci_id_from_device(dev);
/* Look at the dynamic ids first, before the static ones */
spin_lock(&drv->dynids.lock);
list_for_each_entry(dynid, &drv->dynids.list, node) {
- if (pci_match_one_device(&dynid->id, dev)) {
+ if (pci_match_one_id(&dynid->id, &dev_id)) {
found_id = &dynid->id;
break;
}
@@ -158,7 +176,7 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
if (found_id)
return found_id;
- for (ids = drv->id_table; (found_id = pci_match_id(ids, dev));
+ for (ids = drv->id_table; (found_id = do_pci_match_id(ids, &dev_id));
ids = found_id + 1) {
/*
* The match table is split based on driver_override.
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4469e1a77f3c..0567a8762baa 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -442,21 +442,37 @@ static inline int pci_setup_cardbus(char *str) { return -ENOENT; }
#endif /* CONFIG_CARDBUS */
/**
- * pci_match_one_device - Tell if a PCI device structure has a matching
- * PCI device id structure
- * @id: single PCI device id structure to match
- * @dev: the PCI device structure to match against
+ * pci_id_from_device - Obtain a pci_device_id from a PCI device
+ * @dev: the PCI device
+ *
+ * Returns a pci_device_id filled.
+ */
+static inline struct pci_device_id pci_id_from_device(const struct pci_dev *dev)
+{
+ return (struct pci_device_id) {
+ .vendor = dev->vendor,
+ .device = dev->device,
+ .subvendor = dev->subsystem_vendor,
+ .subdevice = dev->subsystem_device,
+ .class = dev->class,
+ };
+}
+
+/**
+ * pci_match_one_id - Tell if a PCI device ID matches a needle PCI device id
+ * @id: single PCI device id structure to match against (needle)
+ * @dev_id: the actual ID from the PCI device (can be created via pci_id_from_device)
*
* Returns the matching pci_device_id structure or %NULL if there is no match.
*/
static inline const struct pci_device_id *
-pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
+pci_match_one_id(const struct pci_device_id *id, const struct pci_device_id *dev_id)
{
- if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
- (id->device == PCI_ANY_ID || id->device == dev->device) &&
- (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
- (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
- !((id->class ^ dev->class) & id->class_mask))
+ if ((id->vendor == PCI_ANY_ID || id->vendor == dev_id->vendor) &&
+ (id->device == PCI_ANY_ID || id->device == dev_id->device) &&
+ (id->subvendor == PCI_ANY_ID || id->subvendor == dev_id->subvendor) &&
+ (id->subdevice == PCI_ANY_ID || id->subdevice == dev_id->subdevice) &&
+ !((id->class ^ dev_id->class) & id->class_mask))
return id;
return NULL;
}
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index e3d3177fce54..c8c4bfe7817b 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -245,8 +245,10 @@ static int match_pci_dev_by_id(struct device *dev, const void *data)
{
struct pci_dev *pdev = to_pci_dev(dev);
const struct pci_device_id *id = data;
+ struct pci_device_id dev_id;
- if (pci_match_one_device(id, pdev))
+ dev_id = pci_id_from_device(pdev);
+ if (pci_match_one_id(id, &dev_id))
return 1;
return 0;
}
@@ -418,7 +420,7 @@ EXPORT_SYMBOL(pci_get_class);
*
* Iterates through the list of known PCI devices. If a PCI device is found
* with a matching base class code, the reference count to the device is
- * incremented. See pci_match_one_device() to figure out how does this works.
+ * incremented. See pci_match_one_id() to figure out how does this works.
* A new search is initiated by passing %NULL as the @from argument.
* Otherwise if @from is not %NULL, searches continue from next device on the
* global list. The reference count for @from is always decremented if it is
--
2.54.0
^ permalink raw reply related
* [PATCH v2 7/7] pci: fix UAF when probe runs concurrent to dyn ID removal
From: Gary Guo @ 2026-06-30 11:09 UTC (permalink / raw)
To: Bjorn Helgaas, Zhenzhong Duan, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Damien Le Moal,
Niklas Cassel, GOTO Masanori, YOKOTA Hiroshi,
James E.J. Bottomley, Martin K. Petersen, Vaibhav Gupta,
Jens Taprogge, Ido Schimmel, Petr Machata, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: linux-pci, driver-core, linux-kernel, linux-ide, linux-scsi,
industrypack-devel, netdev, Sashiko, Gary Guo
In-Reply-To: <20260630-pci_id_fix-v2-0-b834a98c0af2@garyguo.net>
Dynamic IDs are only guaranteed to be valid when dynids.lock is held,
as remove_id_store can free the node. Thus, make a copy in
pci_match_device. Also, clarify that the id parameter is only valid during
probe.
Reported-by: Sashiko <sashiko-bot@kernel.org>
Link: https://lore.kernel.org/all/20260619170503.518F61F00A3A@smtp.kernel.org/
Fixes: 0994375e9614 ("PCI: add remove_id sysfs entry")
Signed-off-by: Gary Guo <gary@garyguo.net>
---
drivers/pci/pci-driver.c | 58 ++++++++++++++++++++++++------------------------
include/linux/pci.h | 1 +
2 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index df1be7ea2bde..fad028b9dc53 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -179,14 +179,16 @@ static const struct pci_device_id pci_device_id_any = {
* pci_match_device - See if a device matches a driver's list of IDs
* @drv: the PCI driver to match against
* @dev: the PCI device structure to match against
+ * @id: Matched pci_device_id
*
* Used by a driver to check whether a PCI device is in its list of
* supported devices or in the dynids list, which may have been augmented
- * via the sysfs "new_id" file. Returns the matching pci_device_id
- * structure or %NULL if there is no match.
+ * via the sysfs "new_id" file. Returns true if there is a match, the matched
+ * ID is stored in @id.
*/
-static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
- struct pci_dev *dev)
+static bool pci_match_device(struct pci_driver *drv,
+ struct pci_dev *dev,
+ struct pci_device_id *id)
{
struct pci_dynid *dynid;
const struct pci_device_id *found_id = NULL;
@@ -196,30 +198,33 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
/* When driver_override is set, only bind to the matching driver */
ret = device_match_driver_override(&dev->dev, &drv->driver);
if (ret == 0)
- return NULL;
+ return false;
dev_id = pci_id_from_device(dev);
/* Look at the dynamic ids first, before the static ones */
- spin_lock(&drv->dynids.lock);
- list_for_each_entry(dynid, &drv->dynids.list, node) {
- if (pci_match_one_id(&dynid->id, &dev_id)) {
- found_id = &dynid->id;
- break;
+ {
+ guard(spinlock)(&drv->dynids.lock);
+ list_for_each_entry(dynid, &drv->dynids.list, node) {
+ if (pci_match_one_id(&dynid->id, &dev_id)) {
+ *id = dynid->id;
+ return true;
+ }
}
}
- spin_unlock(&drv->dynids.lock);
-
- if (found_id)
- return found_id;
found_id = do_pci_match_id(drv->id_table, &dev_id, ret > 0);
- if (found_id)
- return found_id;
+ if (found_id) {
+ *id = *found_id;
+ return true;
+ }
/* driver_override will always match, send a dummy id */
- if (ret > 0)
- return &pci_device_id_any;
- return NULL;
+ if (ret > 0) {
+ *id = pci_device_id_any;
+ return true;
+ }
+
+ return false;
}
/**
@@ -465,15 +470,14 @@ void pci_probe_flush_workqueue(void)
*/
static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
{
- const struct pci_device_id *id;
+ struct pci_device_id id;
int error = 0;
if (drv->probe) {
error = -ENODEV;
- id = pci_match_device(drv, pci_dev);
- if (id)
- error = pci_call_probe(drv, pci_dev, id);
+ if (pci_match_device(drv, pci_dev, &id))
+ error = pci_call_probe(drv, pci_dev, &id);
}
return error;
}
@@ -1558,17 +1562,13 @@ static int pci_bus_match(struct device *dev, const struct device_driver *drv)
{
struct pci_dev *pci_dev = to_pci_dev(dev);
struct pci_driver *pci_drv;
- const struct pci_device_id *found_id;
+ struct pci_device_id id;
if (pci_dev_binding_disallowed(pci_dev))
return 0;
pci_drv = (struct pci_driver *)to_pci_driver(drv);
- found_id = pci_match_device(pci_drv, pci_dev);
- if (found_id)
- return 1;
-
- return 0;
+ return pci_match_device(pci_drv, pci_dev, &id);
}
/**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ebb5b9d76360..f128d8c0cbb6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -979,6 +979,7 @@ struct module;
* function returns zero when the driver chooses to
* take "ownership" of the device or an error code
* (negative number) otherwise.
+ * The pci_device_id parameter is only valid during probe.
* The probe function always gets called from process
* context, so it can sleep.
* @remove: The remove() function gets called whenever a device
--
2.54.0
^ permalink raw reply related
* [PATCH v2 4/7] mlxsw: don't keep pci_device_id
From: Gary Guo @ 2026-06-30 11:09 UTC (permalink / raw)
To: Bjorn Helgaas, Zhenzhong Duan, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Damien Le Moal,
Niklas Cassel, GOTO Masanori, YOKOTA Hiroshi,
James E.J. Bottomley, Martin K. Petersen, Vaibhav Gupta,
Jens Taprogge, Ido Schimmel, Petr Machata, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: linux-pci, driver-core, linux-kernel, linux-ide, linux-scsi,
industrypack-devel, netdev, Gary Guo
In-Reply-To: <20260630-pci_id_fix-v2-0-b834a98c0af2@garyguo.net>
pci_device_id is not guaranteed to live longer than probe due to presence
of dynamic ID. This stored ID is unused so remove it.
Signed-off-by: Gary Guo <gary@garyguo.net>
---
drivers/net/ethernet/mellanox/mlxsw/pci.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 0da85d36647d..bfe3268dfdc1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -130,7 +130,6 @@ struct mlxsw_pci {
} comp;
} cmd;
struct mlxsw_bus_info bus_info;
- const struct pci_device_id *id;
enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
u8 num_cqs; /* Number of CQs */
u8 num_sdqs; /* Number of SDQs */
@@ -1768,7 +1767,6 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci,
}
static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
- const struct pci_device_id *id,
u32 *p_sys_status)
{
unsigned long end;
@@ -1839,7 +1837,7 @@ static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci)
}
static int
-mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
+mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci)
{
struct pci_dev *pdev = mlxsw_pci->pdev;
bool pci_reset_sbr_supported = false;
@@ -1848,7 +1846,7 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
u32 sys_status;
int err;
- err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
+ err = mlxsw_pci_sys_ready_wait(mlxsw_pci, &sys_status);
if (err) {
dev_err(&pdev->dev, "Failed to reach system ready status before reset. Status is 0x%x\n",
sys_status);
@@ -1880,7 +1878,7 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
if (err)
return err;
- err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
+ err = mlxsw_pci_sys_ready_wait(mlxsw_pci, &sys_status);
if (err) {
dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
sys_status);
@@ -1932,7 +1930,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
if (!mbox)
return -ENOMEM;
- err = mlxsw_pci_reset(mlxsw_pci, mlxsw_pci->id);
+ err = mlxsw_pci_reset(mlxsw_pci);
if (err)
goto err_reset;
@@ -2464,7 +2462,6 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
mlxsw_pci->bus_info.dev = &pdev->dev;
mlxsw_pci->bus_info.read_clock_capable = true;
- mlxsw_pci->id = id;
err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
&mlxsw_pci_bus, mlxsw_pci, false,
--
2.54.0
^ permalink raw reply related
* [PATCH v2 1/7] ata: don't keep pci_device_id
From: Gary Guo @ 2026-06-30 11:09 UTC (permalink / raw)
To: Bjorn Helgaas, Zhenzhong Duan, Greg Kroah-Hartman,
Rafael J. Wysocki, Danilo Krummrich, Damien Le Moal,
Niklas Cassel, GOTO Masanori, YOKOTA Hiroshi,
James E.J. Bottomley, Martin K. Petersen, Vaibhav Gupta,
Jens Taprogge, Ido Schimmel, Petr Machata, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: linux-pci, driver-core, linux-kernel, linux-ide, linux-scsi,
industrypack-devel, netdev, Gary Guo
In-Reply-To: <20260630-pci_id_fix-v2-0-b834a98c0af2@garyguo.net>
pci_device_id is not guaranteed to live longer than probe due to presence
of dynamic ID. All information apart from driver_data can be easily
retrieved from pci_dev, so just store driver_data.
Signed-off-by: Gary Guo <gary@garyguo.net>
---
drivers/ata/ata_generic.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index e70b6c089cf1..18ea740ca582 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -51,11 +51,11 @@ enum {
static int generic_set_mode(struct ata_link *link, struct ata_device **unused)
{
struct ata_port *ap = link->ap;
- const struct pci_device_id *id = ap->host->private_data;
+ unsigned long driver_data = (unsigned long)ap->host->private_data;
int dma_enabled = 0;
struct ata_device *dev;
- if (id->driver_data & ATA_GEN_FORCE_DMA) {
+ if (driver_data & ATA_GEN_FORCE_DMA) {
dma_enabled = 0xff;
} else if (ap->ioaddr.bmdma_addr) {
/* Bits 5 and 6 indicate if DMA is active on master/slave */
@@ -206,7 +206,7 @@ static int ata_generic_init_one(struct pci_dev *dev, const struct pci_device_id
return rc;
pcim_pin_device(dev);
}
- return ata_pci_bmdma_init_one(dev, ppi, &generic_sht, (void *)id, 0);
+ return ata_pci_bmdma_init_one(dev, ppi, &generic_sht, (void *)id->driver_data, 0);
}
static const struct pci_device_id ata_generic[] = {
--
2.54.0
^ permalink raw reply related
* Re: [PATCH v4 net 1/3] i40e: unregister netdev before clearing VSI on reinit failure
From: Maciej Fijalkowski @ 2026-06-30 11:11 UTC (permalink / raw)
To: intel-wired-lan
Cc: netdev, magnus.karlsson, kuba, pabeni, horms, przemyslaw.kitszel,
jacob.e.keller
In-Reply-To: <20260625151431.1102838-2-maciej.fijalkowski@intel.com>
On Thu, Jun 25, 2026 at 05:14:29PM +0200, Maciej Fijalkowski wrote:
> i40e_vsi_reinit_setup() tears down the existing VSI queue/ring backing
> state before allocating replacement arrays and queue tracking. If one of
> these early allocations fails, the function jumps directly to err_vsi
> and calls i40e_vsi_clear().
>
> For a registered netdev, this frees the VSI while
> netdev_priv(netdev)->vsi can still point at it, leaving the registered
> netdev with dangling private driver state.
>
> Split the error path so failures after destructive reinit teardown first
> unregister and free the netdev before clearing the VSI.
>
> Fixes: d2a69fefd756 ("i40e: Fix changing previously set num_queue_pairs for PFs")
> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> ---
> drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index a04683004a56..471fa7f7b643 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -14274,7 +14274,7 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
> i40e_set_num_rings_in_vsi(vsi);
> ret = i40e_vsi_alloc_arrays(vsi, false);
> if (ret)
> - goto err_vsi;
> + goto err_netdev;
>
> alloc_queue_pairs = vsi->alloc_queue_pairs *
> (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
> @@ -14284,7 +14284,7 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
> dev_info(&pf->pdev->dev,
> "failed to get tracking for %d queues for VSI %d err %d\n",
> alloc_queue_pairs, vsi->seid, ret);
> - goto err_vsi;
> + goto err_netdev;
> }
> vsi->base_queue = ret;
>
> @@ -14309,6 +14309,7 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
>
> err_rings:
> i40e_vsi_free_q_vectors(vsi);
> +err_netdev:
> if (vsi->netdev_registered) {
> vsi->netdev_registered = false;
> unregister_netdev(vsi->netdev);
Sashiko says:
---
Could this result in a deadlock when called during a device rebuild?
Looking at i40e_rebuild(), it explicitly acquires the RTNL lock before
proceeding:
drivers/net/ethernet/intel/i40e/i40e_main.c:i40e_rebuild() {
...
if (!lock_acquired)
rtnl_lock();
ret = i40e_setup_pf_switch(pf, reinit, true);
...
}
If i40e_setup_pf_switch() calls i40e_vsi_reinit_setup() and takes this new
err_netdev path, unregister_netdev() will unconditionally attempt to acquire
rtnl_lock(), leading to a deadlock on the non-recursive mutex.
---
which is another valid concern. I'll take a stab at addressing this, but
looking at a bigger picture, we don't propagate errors from rebuild path,
so I wouldn't be surprised that in the next iteration Sashiko would point
it out. I'd say that would be a too big refactor for this series.
> @@ -14318,7 +14319,6 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
> if (vsi->type == I40E_VSI_MAIN)
> i40e_devlink_destroy_port(pf);
> i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
> -err_vsi:
> i40e_vsi_clear(vsi);
> return NULL;
> }
> --
> 2.43.0
>
^ permalink raw reply
* Re: [PATCH v4 net-next] bonding: no longer rely on RTNL in bond_fill_info()
From: Nikolay Aleksandrov @ 2026-06-30 11:14 UTC (permalink / raw)
To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, netdev, eric.dumazet, Jay Vosburgh, Andrew Lunn
In-Reply-To: <20260629173200.469953-1-edumazet@google.com>
On 29/06/2026 20:32, Eric Dumazet wrote:
> Add READ_ONCE()/WRITE_ONCE() annotations on port->is_enabled.
> While this field is written under bond->mode_lock protection,
> is is read without this lock being held.
>
> Change bond_fill_info() to acquire RCU and use READ_ONCE()
> to read bond->params fields that can be updated concurrently
> from sysfs/procfs/rtnetlink.
>
> Add const qualifiers to bond_uses_primary(), __agg_active_ports(),
> bond_option_active_slave_get_rcu(), bond_3ad_get_active_agg_info(),
> __bond_3ad_get_active_agg_info() helpers.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Jay Vosburgh <jv@jvosburgh.net>
> Cc: Andrew Lunn <andrew+netdev@lunn.ch>
> ---
> v4: addressed Sashiko/Jakub feedback
>
> drivers/net/bonding/bond_3ad.c | 24 ++++---
> drivers/net/bonding/bond_netlink.c | 109 ++++++++++++++++-------------
> drivers/net/bonding/bond_options.c | 8 +--
> include/net/bond_3ad.h | 4 +-
> include/net/bonding.h | 8 +--
> 5 files changed, 85 insertions(+), 68 deletions(-)
>
A bit late to the party but fwiw:
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Cheers,
Nik
^ permalink raw reply
* Re: [PATCH net v3 1/1] net/sched: sch_teql: Introduce slaves_lock to avoid race condition and UAF
From: Paolo Abeni @ 2026-06-30 11:15 UTC (permalink / raw)
To: Jamal Hadi Salim, netdev
Cc: davem, edumazet, kuba, horms, victor, jiri, security,
zdi-disclosures, stable
In-Reply-To: <20260628111229.669751-1-jhs@mojatatu.com>
On 6/28/26 1:12 PM, Jamal Hadi Salim wrote:
> The teql master->slaves singly linked list is not protected against
> multiple writes. It can be mod'ed concurently from teql_master_xmit(),
> teql_dequeue(), teql_init() and teql_destroy() without holding any list
> lock or RCU protection.
>
> zdi-disclosures@trendmicro.com has demonstrated that the qdisc is freed
> after an RCU grace period, but teql_master_xmit() running on another
> CPU can still hold a stale pointer into the list, resulting in a
> slab-use-after-free:
>
> BUG: KASAN: slab-use-after-free in teql_master_xmit+0xf0f/0x16b0
> Read of size 8 at addr ffff888013fb0440 by task poc/332
> Freed 512-byte region [ffff888013fb0400, ffff888013fb0600) (kmalloc-512)
>
> The fix?
> Add a per-master slaves_lock spinlock that serializes all mutations of
> master->slaves and the NEXT_SLAVE() links in teql_destroy() and
> teql_qdisc_init(). teql_master_xmit() also takes the same slaves_lock
> around those updates.
> Annotate master->slaves and the per-slave ->next pointer with __rcu and
> use the appropriate RCU accessors everywhere they are touched:
> rcu_assign_pointer() on the writer side (under slaves_lock),
> rcu_dereference_protected() for the writer-side loads (also under
> slaves_lock), rcu_dereference_bh() for the loads in teql_master_xmit() and
> rtnl_dereference() for the loads in teql_master_open()/teql_master_mtu(),
> which run under RTNL.
> Pair this with rcu_read_lock_bh()/rcu_read_unlock_bh() around the list
> traversal in teql_master_xmit(), so that readers either observe a fully
> linked list or are deferred until the in-flight mutation completes. The two
> early-return paths in teql_master_xmit() are updated to release the RCU-bh
> read-side critical section before returning, since leaving it held would
> disable BH on that CPU for good.
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Reported-by: zdi-disclosures@trendmicro.com
> Tested-by: Victor Nogueira <victor@mojatatu.com>
> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Looks good, thanks!
Please note that sashiko/gemini found a pre-existing issues which may
require a follow-up/separate fix:
https://sashiko.dev/#/patchset/20260628111229.669751-1-jhs%40mojatatu.com
(the 2nd one in the above link, IDK how to generate a direct link to a
specific comment)
/P
^ permalink raw reply
* [PATCH] net: airoha: fix MIB stats collection to be lossless
From: Aniket Negi @ 2026-06-30 11:18 UTC (permalink / raw)
To: Lorenzo Bianconi
Cc: Andrew Lunn, David S . Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Christian Marangi, Simon Horman, linux-arm-kernel,
linux-mediatek, netdev, linux-kernel, Aniket Negi
The airoha_dev_get_hw_stats() function had two correctness issues in the
way it collects hardware MIB counters.
Bug 1: Read-clear race causes silent packet loss in statistics
airoha_update_hw_stats() read all MIB registers and then cleared them
via REG_FE_GDM_MIB_CLEAR. There is a time window between the last
register read and the hardware clear. Any packet that the hardware
counts during this window is lost: the register is incremented, then
cleared, without the increment ever being read by software. Under
sustained traffic this causes a permanent and growing undercount in all
reported statistics.
This is particularly misleading for tx_ok_pkts and tx_ok_bytes, which
routers and traffic monitors use to detect packet forwarding loss
between two points in a hardware-accelerated path (e.g., between two
netdevs in the QDMA/PPE fast-path). An inaccurate count makes it
impossible to reliably attribute drops in the forwarding pipeline
without capturing traffic at both ends independently.
Bug 2: 32-bit counter overflow causes stat corruption
Several MIB registers are only 32 bits wide: tx_drops, tx_broadcast,
tx_multicast, rx_drops, rx_broadcast, rx_multicast, rx_errors,
rx_crc_error, rx_over_errors, rx_fragment, rx_jabber, and the runt and
long buckets of the tx_len[]/rx_len[].
The original code relied on MIB_CLEAR to keep register values small
enough that a simple '+= val' per cycle did not lose data across a
wrap. Once clearing is removed (to fix Bug 1), raw '+= val' silently
corrupts the accumulated software counter on overflow.
Fix both issues together:
- 64-bit H+L register pairs (tx_ok_pkts, tx_ok_bytes, tx_len[1..5],
rx_ok_pkts, rx_ok_bytes, rx_len[1..5]): read directly from hardware
without clearing. Hardware accumulates the full running total; a
single direct assignment per poll is correct and lossless.
- 32-bit registers (tx_drops, tx_broadcast, tx_multicast, rx_drops,
rx_broadcast, rx_multicast, rx_errors, rx_crc_error, rx_over_errors,
rx_fragment, rx_jabber, and the runt/long buckets in tx_len[0]/[6]
and rx_len[0]/[6]): track the previous hardware value in a new
hw_prev_stats sub-struct inside airoha_hw_stats and accumulate
(u32)(curr - prev) into the 64-bit software counter. Unsigned
subtraction handles wrap-around transparently:
prev=0xFFFFFF00, curr=0x00000010 -> delta=(u32)(0x10-0xFFFFFF00)=0x110
Remove the REG_FE_GDM_MIB_CLEAR write from airoha_update_hw_stats()
entirely. Because the driver no longer clears hardware counters, the
read-clear race window is eliminated.
The hw_prev_stats fields are zero-initialised by the existing
devm_kzalloc() call in airoha_alloc_gdm_device().
Fixes: 8f4695fb67b2 ("net: airoha: better handle MIBs for GDM ports with multiple devs attached")
Signed-off-by: Aniket Negi <aniket.negi03@gmail.com>
---
drivers/net/ethernet/airoha/airoha_eth.c | 132 +++++++++++------------
drivers/net/ethernet/airoha/airoha_eth.h | 22 ++++
2 files changed, 86 insertions(+), 68 deletions(-)
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index 1caf6766f2c0..7ae4e294478e 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -1696,133 +1696,133 @@ static void airoha_dev_get_hw_stats(struct airoha_gdm_dev *dev)
u64_stats_update_begin(&dev->stats.syncp);
- /* TX */
+ /* TX - 64-bit H+L registers: hw accumulates the total, read directly. */
val = airoha_fe_rr(eth, REG_FE_GDM_TX_OK_PKT_CNT_H(port->id));
- dev->stats.tx_ok_pkts += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_TX_OK_PKT_CNT_L(port->id));
- dev->stats.tx_ok_pkts += val;
+ dev->stats.tx_ok_pkts = (u64)val << 32;
+ dev->stats.tx_ok_pkts += airoha_fe_rr(eth, REG_FE_GDM_TX_OK_PKT_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_TX_OK_BYTE_CNT_H(port->id));
- dev->stats.tx_ok_bytes += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_TX_OK_BYTE_CNT_L(port->id));
- dev->stats.tx_ok_bytes += val;
+ dev->stats.tx_ok_bytes = (u64)val << 32;
+ dev->stats.tx_ok_bytes += airoha_fe_rr(eth, REG_FE_GDM_TX_OK_BYTE_CNT_L(port->id));
+ /* TX - 32-bit registers: accumulate delta to handle wrap-around. */
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_DROP_CNT(port->id));
- dev->stats.tx_drops += val;
+ dev->stats.tx_drops += (u32)(val - dev->stats.hw_prev_stats.tx_drops);
+ dev->stats.hw_prev_stats.tx_drops = val;
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_BC_CNT(port->id));
- dev->stats.tx_broadcast += val;
+ dev->stats.tx_broadcast += (u32)(val - dev->stats.hw_prev_stats.tx_broadcast);
+ dev->stats.hw_prev_stats.tx_broadcast = val;
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_MC_CNT(port->id));
- dev->stats.tx_multicast += val;
+ dev->stats.tx_multicast += (u32)(val - dev->stats.hw_prev_stats.tx_multicast);
+ dev->stats.hw_prev_stats.tx_multicast = val;
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_RUNT_CNT(port->id));
- dev->stats.tx_len[i] += val;
+ dev->stats.tx_len[i] += (u32)(val - dev->stats.hw_prev_stats.tx_len[i]);
+ dev->stats.hw_prev_stats.tx_len[i] = val;
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_E64_CNT_H(port->id));
- dev->stats.tx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_E64_CNT_L(port->id));
- dev->stats.tx_len[i++] += val;
+ dev->stats.tx_len[i] += (u64)val << 32;
+ dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_E64_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L64_CNT_H(port->id));
- dev->stats.tx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L64_CNT_L(port->id));
- dev->stats.tx_len[i++] += val;
+ dev->stats.tx_len[i] = (u64)val << 32;
+ dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L64_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L127_CNT_H(port->id));
- dev->stats.tx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L127_CNT_L(port->id));
- dev->stats.tx_len[i++] += val;
+ dev->stats.tx_len[i] = (u64)val << 32;
+ dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L127_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L255_CNT_H(port->id));
- dev->stats.tx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L255_CNT_L(port->id));
- dev->stats.tx_len[i++] += val;
+ dev->stats.tx_len[i] = (u64)val << 32;
+ dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L255_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L511_CNT_H(port->id));
- dev->stats.tx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L511_CNT_L(port->id));
- dev->stats.tx_len[i++] += val;
+ dev->stats.tx_len[i] = (u64)val << 32;
+ dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L511_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L1023_CNT_H(port->id));
- dev->stats.tx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L1023_CNT_L(port->id));
- dev->stats.tx_len[i++] += val;
+ dev->stats.tx_len[i] = (u64)val << 32;
+ dev->stats.tx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_L1023_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_TX_ETH_LONG_CNT(port->id));
- dev->stats.tx_len[i++] += val;
+ dev->stats.tx_len[i] += (u32)(val - dev->stats.hw_prev_stats.tx_len[i]);
+ dev->stats.hw_prev_stats.tx_len[i++] = val;
/* RX */
val = airoha_fe_rr(eth, REG_FE_GDM_RX_OK_PKT_CNT_H(port->id));
- dev->stats.rx_ok_pkts += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_RX_OK_PKT_CNT_L(port->id));
- dev->stats.rx_ok_pkts += val;
+ dev->stats.rx_ok_pkts = (u64)val << 32;
+ dev->stats.rx_ok_pkts += airoha_fe_rr(eth, REG_FE_GDM_RX_OK_PKT_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_RX_OK_BYTE_CNT_H(port->id));
- dev->stats.rx_ok_bytes += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_RX_OK_BYTE_CNT_L(port->id));
- dev->stats.rx_ok_bytes += val;
+ dev->stats.rx_ok_bytes = (u64)val << 32;
+ dev->stats.rx_ok_bytes += airoha_fe_rr(eth, REG_FE_GDM_RX_OK_BYTE_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_DROP_CNT(port->id));
- dev->stats.rx_drops += val;
+ dev->stats.rx_drops += (u32)(val - dev->stats.hw_prev_stats.rx_drops);
+ dev->stats.hw_prev_stats.rx_drops = val;
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_BC_CNT(port->id));
- dev->stats.rx_broadcast += val;
+ dev->stats.rx_broadcast += (u32)(val - dev->stats.hw_prev_stats.rx_broadcast);
+ dev->stats.hw_prev_stats.rx_broadcast = val;
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_MC_CNT(port->id));
- dev->stats.rx_multicast += val;
+ dev->stats.rx_multicast += (u32)(val - dev->stats.hw_prev_stats.rx_multicast);
+ dev->stats.hw_prev_stats.rx_multicast = val;
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ERROR_DROP_CNT(port->id));
- dev->stats.rx_errors += val;
+ dev->stats.rx_errors += (u32)(val - dev->stats.hw_prev_stats.rx_errors);
+ dev->stats.hw_prev_stats.rx_errors = val;
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_CRC_ERR_CNT(port->id));
- dev->stats.rx_crc_error += val;
+ dev->stats.rx_crc_error += (u32)(val - dev->stats.hw_prev_stats.rx_crc_error);
+ dev->stats.hw_prev_stats.rx_crc_error = val;
val = airoha_fe_rr(eth, REG_FE_GDM_RX_OVERFLOW_DROP_CNT(port->id));
- dev->stats.rx_over_errors += val;
+ dev->stats.rx_over_errors += (u32)(val - dev->stats.hw_prev_stats.rx_over_errors);
+ dev->stats.hw_prev_stats.rx_over_errors = val;
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_FRAG_CNT(port->id));
- dev->stats.rx_fragment += val;
+ dev->stats.rx_fragment += (u32)(val - dev->stats.hw_prev_stats.rx_fragment);
+ dev->stats.hw_prev_stats.rx_fragment = val;
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_JABBER_CNT(port->id));
- dev->stats.rx_jabber += val;
+ dev->stats.rx_jabber += (u32)(val - dev->stats.hw_prev_stats.rx_jabber);
+ dev->stats.hw_prev_stats.rx_jabber = val;
i = 0;
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_RUNT_CNT(port->id));
- dev->stats.rx_len[i] += val;
+ dev->stats.rx_len[i] += (u32)(val - dev->stats.hw_prev_stats.rx_len[i]);
+ dev->stats.hw_prev_stats.rx_len[i] = val;
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_E64_CNT_H(port->id));
- dev->stats.rx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_E64_CNT_L(port->id));
- dev->stats.rx_len[i++] += val;
+ dev->stats.rx_len[i] += (u64)val << 32;
+ dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_E64_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L64_CNT_H(port->id));
- dev->stats.rx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L64_CNT_L(port->id));
- dev->stats.rx_len[i++] += val;
+ dev->stats.rx_len[i] = (u64)val << 32;
+ dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L64_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L127_CNT_H(port->id));
- dev->stats.rx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L127_CNT_L(port->id));
- dev->stats.rx_len[i++] += val;
+ dev->stats.rx_len[i] = (u64)val << 32;
+ dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L127_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L255_CNT_H(port->id));
- dev->stats.rx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L255_CNT_L(port->id));
- dev->stats.rx_len[i++] += val;
+ dev->stats.rx_len[i] = (u64)val << 32;
+ dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L255_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L511_CNT_H(port->id));
- dev->stats.rx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L511_CNT_L(port->id));
- dev->stats.rx_len[i++] += val;
+ dev->stats.rx_len[i] = (u64)val << 32;
+ dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L511_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L1023_CNT_H(port->id));
- dev->stats.rx_len[i] += ((u64)val << 32);
- val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L1023_CNT_L(port->id));
- dev->stats.rx_len[i++] += val;
+ dev->stats.rx_len[i] = (u64)val << 32;
+ dev->stats.rx_len[i++] += airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_L1023_CNT_L(port->id));
val = airoha_fe_rr(eth, REG_FE_GDM_RX_ETH_LONG_CNT(port->id));
- dev->stats.rx_len[i++] += val;
+ dev->stats.rx_len[i] += (u32)(val - dev->stats.hw_prev_stats.rx_len[i]);
+ dev->stats.hw_prev_stats.rx_len[i++] = val;
u64_stats_update_end(&dev->stats.syncp);
}
@@ -1839,10 +1839,6 @@ static void airoha_update_hw_stats(struct airoha_gdm_dev *dev)
airoha_dev_get_hw_stats(port->devs[i]);
}
- /* Reset MIB counters */
- airoha_fe_set(dev->eth, REG_FE_GDM_MIB_CLEAR(port->id),
- FE_GDM_MIB_RX_CLEAR_MASK | FE_GDM_MIB_TX_CLEAR_MASK);
-
spin_unlock(&port->stats_lock);
}
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index 2765244d937c..af12ad6eac17 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -244,6 +244,28 @@ struct airoha_hw_stats {
u64 rx_fragment;
u64 rx_jabber;
u64 rx_len[7];
+
+ struct {
+ /* Previous HW register values for 32-bit counter delta tracking.
+ * Storing the last seen value and accumulating (u32)(curr - prev)
+ * in 64-bit software counter & handles wrap-around transparently
+ * via unsigned arithmetic. These fields are never reported to
+ * userspace.
+ */
+ u32 tx_drops;
+ u32 tx_broadcast;
+ u32 tx_multicast;
+ u32 tx_len[7];
+ u32 rx_drops;
+ u32 rx_broadcast;
+ u32 rx_multicast;
+ u32 rx_errors;
+ u32 rx_crc_error;
+ u32 rx_over_errors;
+ u32 rx_fragment;
+ u32 rx_jabber;
+ u32 rx_len[7];
+ } hw_prev_stats;
};
enum {
--
2.43.0
^ permalink raw reply related
* [PATCH net-next v2 0/5] net: dsa: realtek: rtl8366rb: Use generic RTL83xx code
From: Linus Walleij @ 2026-06-30 11:19 UTC (permalink / raw)
To: Luiz Angelo Daros de Luca, Alvin Šipraga, Andrew Lunn,
Vladimir Oltean, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, Linus Walleij
As a follow-up to Luiz's and Alvin's series improvining the
generic handling of the Realtek DSA switches, this small
series brings the RTL8366RB closer to the way things are done
in the RTL8365MB driver.
This patch series switches over to using the generic helpers
for:
- Bridge joining and leaving (isolation)
- STP handling
- Learning enable/disable
It would be appreciated if this doesn't lead to AI-automated
request to fix the entire universe (hi Sashiko, I'm looking
at you but I bet you will do you compulsive C3P0-style review
anyway) since I'm just moving code around so some helper
functions come before their new users. The code itself is
pretty straight-forward.
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
Changes in v2:
- Rebase on v7.2-rc1
- Prepend a patch making the learning callbacks optional so we can add
functionality one patch at a time.
- Link to v1: https://patch.msgid.link/20260612-rtl8366rb-improvements-v1-0-9232286fc20c@kernel.org
---
Linus Walleij (5):
net: dsa: realtek: rtl83xx: Make learning optional in join/leave
net: dsa: realtek: rtl8366rb: Switch to generic port_bridge* handlers
net: dsa: realtek: rtl8366rb: Use DSA port iterators
net: dsa: realtek: rtl8366rb: Disable STP learning on all ports in setup
net: dsa: realtek: rtl8366rb: Switch to generic learning enablement
drivers/net/dsa/realtek/rtl8366rb.c | 268 ++++++++++++++++++------------------
drivers/net/dsa/realtek/rtl83xx.c | 26 ++--
2 files changed, 143 insertions(+), 151 deletions(-)
---
base-commit: dc59e4fea9d83f03bad6bddf3fa2e52491777482
change-id: 20260611-rtl8366rb-improvements-d69f2145219d
Best regards,
--
Linus Walleij <linusw@kernel.org>
^ permalink raw reply
* [PATCH net-next v2 1/5] net: dsa: realtek: rtl83xx: Make learning optional in join/leave
From: Linus Walleij @ 2026-06-30 11:19 UTC (permalink / raw)
To: Luiz Angelo Daros de Luca, Alvin Šipraga, Andrew Lunn,
Vladimir Oltean, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, Linus Walleij
In-Reply-To: <20260630-rtl8366rb-improvements-v2-0-05eb9d6a37f5@kernel.org>
Mostly to make it possible to add rtl83xx support piece by piece,
make the port learning callback optional in rtl83xx_port_bridge_join()
and rtl83xx_port_bridge_leave().
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
drivers/net/dsa/realtek/rtl83xx.c | 26 ++++++++++++--------------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/drivers/net/dsa/realtek/rtl83xx.c b/drivers/net/dsa/realtek/rtl83xx.c
index 71124ecca92f..90843d52c5a8 100644
--- a/drivers/net/dsa/realtek/rtl83xx.c
+++ b/drivers/net/dsa/realtek/rtl83xx.c
@@ -356,9 +356,6 @@ int rtl83xx_port_bridge_join(struct dsa_switch *ds, int port,
if (!priv->ops->port_add_isolation)
return -EOPNOTSUPP;
- if (!priv->ops->port_set_learning)
- return -EOPNOTSUPP;
-
dev_dbg(priv->dev, "bridge %d join port %d\n", bridge.num, port);
/* Add this port to the isolation group of every other port
@@ -396,9 +393,11 @@ int rtl83xx_port_bridge_join(struct dsa_switch *ds, int port,
goto undo_self_isolation;
}
- ret = priv->ops->port_set_learning(priv, port, true);
- if (ret)
- goto undo_efid;
+ if (priv->ops->port_set_learning) {
+ ret = priv->ops->port_set_learning(priv, port, true);
+ if (ret)
+ goto undo_efid;
+ }
return 0;
@@ -443,9 +442,6 @@ void rtl83xx_port_bridge_leave(struct dsa_switch *ds, int port,
if (!priv->ops->port_remove_isolation)
return;
- if (!priv->ops->port_set_learning)
- return;
-
dev_dbg(priv->dev, "bridge %d leave port %d\n", bridge.num, port);
/* Remove this port from the isolation group of every other
@@ -474,11 +470,13 @@ void rtl83xx_port_bridge_leave(struct dsa_switch *ds, int port,
* downstream DSA ports from the isolation group.
*/
- ret = priv->ops->port_set_learning(priv, port, false);
- if (ret)
- dev_err(priv->dev,
- "failed to disable learning on port %d: %pe\n",
- port, ERR_PTR(ret));
+ if (priv->ops->port_set_learning) {
+ ret = priv->ops->port_set_learning(priv, port, false);
+ if (ret)
+ dev_err(priv->dev,
+ "failed to disable learning on port %d: %pe\n",
+ port, ERR_PTR(ret));
+ }
/* Remove those ports from the isolation group of this port */
ret = priv->ops->port_remove_isolation(priv, port, mask);
--
2.54.0
^ permalink raw reply related
* [PATCH net-next v2 2/5] net: dsa: realtek: rtl8366rb: Switch to generic port_bridge* handlers
From: Linus Walleij @ 2026-06-30 11:19 UTC (permalink / raw)
To: Luiz Angelo Daros de Luca, Alvin Šipraga, Andrew Lunn,
Vladimir Oltean, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, Linus Walleij
In-Reply-To: <20260630-rtl8366rb-improvements-v2-0-05eb9d6a37f5@kernel.org>
The RTL8366RB is using its own sub-standard port isolation code.
Implement the required isolation helpers, use these directly in
the port setup callback, and switch over to the standard port
isolation code.
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
drivers/net/dsa/realtek/rtl8366rb.c | 108 ++++++++++++------------------------
1 file changed, 36 insertions(+), 72 deletions(-)
diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index 103039fe3086..8b57ef3bf03a 100644
--- a/drivers/net/dsa/realtek/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -791,6 +791,35 @@ static int rtl8366rb_setup_all_leds_off(struct realtek_priv *priv)
return ret;
}
+static int rtl8366rb_port_set_isolation(struct realtek_priv *priv, int port,
+ u32 mask)
+{
+ /* Bit 0 enables isolation so set this if we enable isolation
+ * any of the ports an clear it if we disable on all of them.
+ */
+ if (mask)
+ mask = RTL8366RB_PORT_ISO_PORTS(mask) | RTL8366RB_PORT_ISO_EN;
+
+ return regmap_write(priv->map, RTL8366RB_PORT_ISO(port),
+ mask);
+}
+
+static int rtl8366rb_port_add_isolation(struct realtek_priv *priv, int port,
+ u32 mask)
+{
+ /* We assume isolation bit is on */
+ return regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port),
+ RTL8366RB_PORT_ISO_PORTS(mask),
+ RTL8366RB_PORT_ISO_PORTS(mask));
+}
+
+static int rtl8366rb_port_remove_isolation(struct realtek_priv *priv, int port,
+ u32 mask)
+{
+ return regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port),
+ RTL8366RB_PORT_ISO_PORTS(mask), 0);
+}
+
static int rtl8366rb_setup(struct dsa_switch *ds)
{
struct realtek_priv *priv = ds->priv;
@@ -868,16 +897,13 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
/* Isolate all user ports so they can only send packets to itself and the CPU port */
for (i = 0; i < RTL8366RB_PORT_NUM_CPU; i++) {
- ret = regmap_write(priv->map, RTL8366RB_PORT_ISO(i),
- RTL8366RB_PORT_ISO_PORTS(BIT(RTL8366RB_PORT_NUM_CPU)) |
- RTL8366RB_PORT_ISO_EN);
+ ret = rtl8366rb_port_set_isolation(priv, i, BIT(RTL8366RB_PORT_NUM_CPU));
if (ret)
return ret;
}
/* CPU port can send packets to all ports */
- ret = regmap_write(priv->map, RTL8366RB_PORT_ISO(RTL8366RB_PORT_NUM_CPU),
- RTL8366RB_PORT_ISO_PORTS(dsa_user_ports(ds)) |
- RTL8366RB_PORT_ISO_EN);
+ ret = rtl8366rb_port_set_isolation(priv, RTL8366RB_PORT_NUM_CPU,
+ dsa_user_ports(ds));
if (ret)
return ret;
@@ -1184,70 +1210,6 @@ rtl8366rb_port_disable(struct dsa_switch *ds, int port)
return;
}
-static int
-rtl8366rb_port_bridge_join(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge,
- bool *tx_fwd_offload,
- struct netlink_ext_ack *extack)
-{
- struct realtek_priv *priv = ds->priv;
- unsigned int port_bitmap = 0;
- int ret, i;
-
- /* Loop over all other ports than the current one */
- for (i = 0; i < RTL8366RB_PORT_NUM_CPU; i++) {
- /* Current port handled last */
- if (i == port)
- continue;
- /* Not on this bridge */
- if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge))
- continue;
- /* Join this port to each other port on the bridge */
- ret = regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(i),
- RTL8366RB_PORT_ISO_PORTS(BIT(port)),
- RTL8366RB_PORT_ISO_PORTS(BIT(port)));
- if (ret)
- dev_err(priv->dev, "failed to join port %d\n", port);
-
- port_bitmap |= BIT(i);
- }
-
- /* Set the bits for the ports we can access */
- return regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port),
- RTL8366RB_PORT_ISO_PORTS(port_bitmap),
- RTL8366RB_PORT_ISO_PORTS(port_bitmap));
-}
-
-static void
-rtl8366rb_port_bridge_leave(struct dsa_switch *ds, int port,
- struct dsa_bridge bridge)
-{
- struct realtek_priv *priv = ds->priv;
- unsigned int port_bitmap = 0;
- int ret, i;
-
- /* Loop over all other ports than this one */
- for (i = 0; i < RTL8366RB_PORT_NUM_CPU; i++) {
- /* Current port handled last */
- if (i == port)
- continue;
- /* Not on this bridge */
- if (!dsa_port_offloads_bridge(dsa_to_port(ds, i), &bridge))
- continue;
- /* Remove this port from any other port on the bridge */
- ret = regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(i),
- RTL8366RB_PORT_ISO_PORTS(BIT(port)), 0);
- if (ret)
- dev_err(priv->dev, "failed to leave port %d\n", port);
-
- port_bitmap |= BIT(i);
- }
-
- /* Clear the bits for the ports we can not access, leave ourselves */
- regmap_update_bits(priv->map, RTL8366RB_PORT_ISO(port),
- RTL8366RB_PORT_ISO_PORTS(port_bitmap), 0);
-}
-
/**
* rtl8366rb_drop_untagged() - make the switch drop untagged and C-tagged frames
* @priv: SMI state container
@@ -1801,8 +1763,8 @@ static const struct dsa_switch_ops rtl8366rb_switch_ops = {
.get_strings = rtl8366_get_strings,
.get_ethtool_stats = rtl8366_get_ethtool_stats,
.get_sset_count = rtl8366_get_sset_count,
- .port_bridge_join = rtl8366rb_port_bridge_join,
- .port_bridge_leave = rtl8366rb_port_bridge_leave,
+ .port_bridge_join = rtl83xx_port_bridge_join,
+ .port_bridge_leave = rtl83xx_port_bridge_leave,
.port_vlan_filtering = rtl8366rb_vlan_filtering,
.port_vlan_add = rtl8366_vlan_add,
.port_vlan_del = rtl8366_vlan_del,
@@ -1830,6 +1792,8 @@ static const struct realtek_ops rtl8366rb_ops = {
.is_vlan_valid = rtl8366rb_is_vlan_valid,
.enable_vlan = rtl8366rb_enable_vlan,
.enable_vlan4k = rtl8366rb_enable_vlan4k,
+ .port_add_isolation = rtl8366rb_port_add_isolation,
+ .port_remove_isolation = rtl8366rb_port_remove_isolation,
.phy_read = rtl8366rb_phy_read,
.phy_write = rtl8366rb_phy_write,
};
--
2.54.0
^ permalink raw reply related
* [PATCH net-next v2 3/5] net: dsa: realtek: rtl8366rb: Use DSA port iterators
From: Linus Walleij @ 2026-06-30 11:19 UTC (permalink / raw)
To: Luiz Angelo Daros de Luca, Alvin Šipraga, Andrew Lunn,
Vladimir Oltean, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, Linus Walleij
In-Reply-To: <20260630-rtl8366rb-improvements-v2-0-05eb9d6a37f5@kernel.org>
Instead of custom loops for intializing the ports (including the
CPU port) use the DSA helpers dsa_switch_for_each_port() and
dsa_switch_for_each_cpu_port() following the pattern in RTL8365MB by
accumulatong masks for the upstream and downstream ports.
This gives us similar enough code to the RTL8365MB that we
can start using more generic rtl83xx helpers.
Reviewed-by: Luiz Angelo Daros de Luca <luizluca@gmail.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
drivers/net/dsa/realtek/rtl8366rb.c | 49 +++++++++++++++++++++++++++++++------
1 file changed, 41 insertions(+), 8 deletions(-)
diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index 8b57ef3bf03a..64215a0d5d6d 100644
--- a/drivers/net/dsa/realtek/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -824,7 +824,10 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
{
struct realtek_priv *priv = ds->priv;
const struct rtl8366rb_jam_tbl_entry *jam_table;
+ u32 downports_mask = 0;
struct rtl8366rb *rb;
+ u32 upports_mask = 0;
+ struct dsa_port *dp;
u32 chip_ver = 0;
u32 chip_id = 0;
int jam_size;
@@ -895,17 +898,47 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
if (ret)
return ret;
- /* Isolate all user ports so they can only send packets to itself and the CPU port */
- for (i = 0; i < RTL8366RB_PORT_NUM_CPU; i++) {
- ret = rtl8366rb_port_set_isolation(priv, i, BIT(RTL8366RB_PORT_NUM_CPU));
+ /* Start with all ports blocked, including unused ports */
+ dsa_switch_for_each_port(dp, ds) {
+ /* Start with all ports completely isolated */
+ ret = rtl8366rb_port_set_isolation(priv, dp->index, 0);
+ if (ret)
+ return ret;
+
+ /* Collect CPU ports. If we support cascade switches, it should
+ * also include the upstream DSA ports.
+ */
+ if (!dsa_port_is_cpu(dp))
+ continue;
+
+ upports_mask |= BIT(dp->index);
+ }
+
+ /* Configure user ports */
+ dsa_switch_for_each_port(dp, ds) {
+ if (!dsa_port_is_user(dp))
+ continue;
+
+ /* Forward only to the CPU */
+ ret = rtl8366rb_port_set_isolation(priv, dp->index, upports_mask);
+ if (ret)
+ return ret;
+
+ /* If we support cascade switches, it should also include the
+ * downstream DSA ports.
+ */
+ downports_mask |= BIT(dp->index);
+ }
+
+ /* Configure CPU ports. If we support cascade switches, this will also
+ * include DSA ports.
+ */
+ dsa_switch_for_each_cpu_port(dp, ds) {
+ /* Forward to all user ports */
+ ret = rtl8366rb_port_set_isolation(priv, dp->index, downports_mask);
if (ret)
return ret;
}
- /* CPU port can send packets to all ports */
- ret = rtl8366rb_port_set_isolation(priv, RTL8366RB_PORT_NUM_CPU,
- dsa_user_ports(ds));
- if (ret)
- return ret;
/* Set up the "green ethernet" feature */
ret = rtl8366rb_jam_table(rtl8366rb_green_jam,
--
2.54.0
^ permalink raw reply related
* [PATCH net-next v2 4/5] net: dsa: realtek: rtl8366rb: Disable STP learning on all ports in setup
From: Linus Walleij @ 2026-06-30 11:19 UTC (permalink / raw)
To: Luiz Angelo Daros de Luca, Alvin Šipraga, Andrew Lunn,
Vladimir Oltean, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, Linus Walleij
In-Reply-To: <20260630-rtl8366rb-improvements-v2-0-05eb9d6a37f5@kernel.org>
When we loop over all ports in the switch .setup() callback,
make sure to disable learning on all user ports. This is what
is normally expected and what the RTL8365MB is doing.
Move the code around to accommodate for the new call.
Reviewed-by: Luiz Angelo Daros de Luca <luizluca@gmail.com>
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
drivers/net/dsa/realtek/rtl8366rb.c | 74 ++++++++++++++++++++-----------------
1 file changed, 40 insertions(+), 34 deletions(-)
diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index 64215a0d5d6d..155bf0010d5f 100644
--- a/drivers/net/dsa/realtek/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -820,6 +820,40 @@ static int rtl8366rb_port_remove_isolation(struct realtek_priv *priv, int port,
RTL8366RB_PORT_ISO_PORTS(mask), 0);
}
+static void
+rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
+{
+ struct realtek_priv *priv = ds->priv;
+ u32 val;
+ int i;
+
+ switch (state) {
+ case BR_STATE_DISABLED:
+ val = RTL8366RB_STP_STATE_DISABLED;
+ break;
+ case BR_STATE_BLOCKING:
+ case BR_STATE_LISTENING:
+ val = RTL8366RB_STP_STATE_BLOCKING;
+ break;
+ case BR_STATE_LEARNING:
+ val = RTL8366RB_STP_STATE_LEARNING;
+ break;
+ case BR_STATE_FORWARDING:
+ val = RTL8366RB_STP_STATE_FORWARDING;
+ break;
+ default:
+ dev_err(priv->dev, "unknown bridge state requested\n");
+ return;
+ }
+
+ /* Set the same status for the port on all the FIDs */
+ for (i = 0; i < RTL8366RB_NUM_FIDS; i++) {
+ regmap_update_bits(priv->map, RTL8366RB_STP_STATE_BASE + i,
+ RTL8366RB_STP_STATE_MASK(port),
+ RTL8366RB_STP_STATE(port, val));
+ }
+}
+
static int rtl8366rb_setup(struct dsa_switch *ds)
{
struct realtek_priv *priv = ds->priv;
@@ -900,6 +934,12 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
/* Start with all ports blocked, including unused ports */
dsa_switch_for_each_port(dp, ds) {
+ /* Set the initial STP state of all ports to DISABLED, otherwise
+ * ports will still forward frames to the CPU despite being
+ * administratively down by default.
+ */
+ rtl8366rb_port_stp_state_set(ds, dp->index, BR_STATE_DISABLED);
+
/* Start with all ports completely isolated */
ret = rtl8366rb_port_set_isolation(priv, dp->index, 0);
if (ret)
@@ -1320,40 +1360,6 @@ rtl8366rb_port_bridge_flags(struct dsa_switch *ds, int port,
return 0;
}
-static void
-rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
-{
- struct realtek_priv *priv = ds->priv;
- u32 val;
- int i;
-
- switch (state) {
- case BR_STATE_DISABLED:
- val = RTL8366RB_STP_STATE_DISABLED;
- break;
- case BR_STATE_BLOCKING:
- case BR_STATE_LISTENING:
- val = RTL8366RB_STP_STATE_BLOCKING;
- break;
- case BR_STATE_LEARNING:
- val = RTL8366RB_STP_STATE_LEARNING;
- break;
- case BR_STATE_FORWARDING:
- val = RTL8366RB_STP_STATE_FORWARDING;
- break;
- default:
- dev_err(priv->dev, "unknown bridge state requested\n");
- return;
- }
-
- /* Set the same status for the port on all the FIDs */
- for (i = 0; i < RTL8366RB_NUM_FIDS; i++) {
- regmap_update_bits(priv->map, RTL8366RB_STP_STATE_BASE + i,
- RTL8366RB_STP_STATE_MASK(port),
- RTL8366RB_STP_STATE(port, val));
- }
-}
-
static void
rtl8366rb_port_fast_age(struct dsa_switch *ds, int port)
{
--
2.54.0
^ permalink raw reply related
* [PATCH net-next v2 5/5] net: dsa: realtek: rtl8366rb: Switch to generic learning enablement
From: Linus Walleij @ 2026-06-30 11:19 UTC (permalink / raw)
To: Luiz Angelo Daros de Luca, Alvin Šipraga, Andrew Lunn,
Vladimir Oltean, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: netdev, Linus Walleij
In-Reply-To: <20260630-rtl8366rb-improvements-v2-0-05eb9d6a37f5@kernel.org>
Instead of just writing the learning disablement register in setup
and a custom handling of BR_LEARNING, implement the generic RTL83xx
.port_set_learning() callback for setting learning on a port, and
call this in the per-port loop in .setup().
Instead of the custom rtl83366rb_port_bridge_flags() function for
setting learning mode on each port, use the RTL83xx generic
rtl83xx_port_bridge_flags() callback.
Signed-off-by: Linus Walleij <linusw@kernel.org>
---
drivers/net/dsa/realtek/rtl8366rb.c | 43 +++++++++++++++----------------------
1 file changed, 17 insertions(+), 26 deletions(-)
diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c
index 155bf0010d5f..d2fa8ff6a5d0 100644
--- a/drivers/net/dsa/realtek/rtl8366rb.c
+++ b/drivers/net/dsa/realtek/rtl8366rb.c
@@ -854,6 +854,16 @@ rtl8366rb_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
}
}
+static int rtl8366rb_port_set_learning(struct realtek_priv *priv, int port,
+ bool enable)
+{
+ /* Notice inverted semantics in this register: setting a bit disables
+ * learning instead of enabling it.
+ */
+ return regmap_update_bits(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
+ BIT(port), enable ? 0 : BIT(port));
+}
+
static int rtl8366rb_setup(struct dsa_switch *ds)
{
struct realtek_priv *priv = ds->priv;
@@ -945,6 +955,11 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
if (ret)
return ret;
+ /* Disable learning */
+ ret = rtl8366rb_port_set_learning(priv, dp->index, false);
+ if (ret)
+ return ret;
+
/* Collect CPU ports. If we support cascade switches, it should
* also include the upstream DSA ports.
*/
@@ -1037,12 +1052,6 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
rb->max_mtu[i] = ETH_DATA_LEN;
}
- /* Disable learning for all ports */
- ret = regmap_write(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
- RTL8366RB_PORT_ALL);
- if (ret)
- return ret;
-
/* Enable auto ageing for all ports */
ret = regmap_write(priv->map, RTL8366RB_SECURITY_CTRL, 0);
if (ret)
@@ -1341,25 +1350,6 @@ rtl8366rb_port_pre_bridge_flags(struct dsa_switch *ds, int port,
return 0;
}
-static int
-rtl8366rb_port_bridge_flags(struct dsa_switch *ds, int port,
- struct switchdev_brport_flags flags,
- struct netlink_ext_ack *extack)
-{
- struct realtek_priv *priv = ds->priv;
- int ret;
-
- if (flags.mask & BR_LEARNING) {
- ret = regmap_update_bits(priv->map, RTL8366RB_PORT_LEARNDIS_CTRL,
- BIT(port),
- (flags.val & BR_LEARNING) ? 0 : BIT(port));
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
static void
rtl8366rb_port_fast_age(struct dsa_switch *ds, int port)
{
@@ -1810,7 +1800,7 @@ static const struct dsa_switch_ops rtl8366rb_switch_ops = {
.port_enable = rtl8366rb_port_enable,
.port_disable = rtl8366rb_port_disable,
.port_pre_bridge_flags = rtl8366rb_port_pre_bridge_flags,
- .port_bridge_flags = rtl8366rb_port_bridge_flags,
+ .port_bridge_flags = rtl83xx_port_bridge_flags,
.port_stp_state_set = rtl8366rb_port_stp_state_set,
.port_fast_age = rtl8366rb_port_fast_age,
.port_change_mtu = rtl8366rb_change_mtu,
@@ -1833,6 +1823,7 @@ static const struct realtek_ops rtl8366rb_ops = {
.enable_vlan4k = rtl8366rb_enable_vlan4k,
.port_add_isolation = rtl8366rb_port_add_isolation,
.port_remove_isolation = rtl8366rb_port_remove_isolation,
+ .port_set_learning = rtl8366rb_port_set_learning,
.phy_read = rtl8366rb_phy_read,
.phy_write = rtl8366rb_phy_write,
};
--
2.54.0
^ permalink raw reply related
* [PATCH net V2 0/3] net/mlx5: LAG bug fixes
From: Tariq Toukan @ 2026-06-30 11:29 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
netdev, Paolo Abeni
Cc: Edward Srouji, Jacob Keller, Kees Cook, Leon Romanovsky,
linux-kernel, linux-rdma, Maher Sanalla, Mark Bloch,
Moshe Shemesh, Or Har-Toov, Rongwei Liu, Saeed Mahameed,
Shay Drori, Simon Horman, Tariq Toukan
Hi,
Three bug fixes by Shay in the mlx5 LAG subsystem.
Patch 1 fixes an off-by-one in the error rollback path of
mlx5_lag_create_single_fdb_filter(): the loop started from the
failed index i, potentially operating on uninitialized state or
double-tearing-down an entry that had already self-rolled-back.
The rollback should start from i - 1.
Patch 2 fixes a hang in mlx5_mpesw_work(): when
mlx5_lag_get_devcom_comp() returns NULL the function returned
early without calling complete(), blocking any caller waiting on
mpesww->comp indefinitely.
Patch 3 fixes a kernel crash during teardown when
mlx5_lag_get_dev_seq() returns an error because no device is
marked as master or the peer is no longer in the LAG. The peer
flow cleanup is now skipped instead of proceeding with a bad
pointer.
This series by Shay fixes three bugs in the mlx5 LAG subsystem.
Regards,
Tariq
V2:
- Rebase.
- Patch 3: simplify to a single 'continue' on seq lookup failure.
V1:
https://lore.kernel.org/all/20260617063204.547427-2-tariqt@nvidia.com/
Find replies to previous Sashiko comments here:
https://lore.kernel.org/all/e18662ac-413e-43f6-ac65-a4e15fd47bb7@nvidia.com/
Shay Drory (3):
net/mlx5: LAG, Fix off-by-one in single-FDB error rollback
net/mlx5: LAG, MPESW, Fix missing complete() on devcom error
net/mlx5e: TC, skip peer flow cleanup when LAG seq is unavailable
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++
drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 7 +++++--
drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c | 2 +-
3 files changed, 9 insertions(+), 3 deletions(-)
base-commit: dbf803bc4a8b0522c9a12560c20905a5952d1cb9
--
2.44.0
^ permalink raw reply
* [PATCH net V2 1/3] net/mlx5: LAG, Fix off-by-one in single-FDB error rollback
From: Tariq Toukan @ 2026-06-30 11:29 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
netdev, Paolo Abeni
Cc: Edward Srouji, Jacob Keller, Kees Cook, Leon Romanovsky,
linux-kernel, linux-rdma, Maher Sanalla, Mark Bloch,
Moshe Shemesh, Or Har-Toov, Rongwei Liu, Saeed Mahameed,
Shay Drori, Simon Horman, Tariq Toukan
In-Reply-To: <20260630112917.698313-1-tariqt@nvidia.com>
From: Shay Drory <shayd@nvidia.com>
On failure at index i, the reverse cleanup loop in
mlx5_lag_create_single_fdb() starts from i, so the failed index
itself is rolled back. That can operate on uninitialized state or
double-tear-down a rule the add_one path already self-rolled-back.
Start the rollback from i - 1 so only successfully-installed entries
are undone.
Fixes: ddbb5ddc43ad ("net/mlx5: LAG, Refactor lag logic")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c
index 113866494d16..6b4ad3c53f2f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c
@@ -78,7 +78,7 @@ static int mlx5_lag_create_single_fdb_filter(struct mlx5_lag *ldev, u32 filter)
}
return 0;
err:
- mlx5_lag_for_each_reverse(j, i, 0, ldev, filter) {
+ mlx5_lag_for_each_reverse(j, i - 1, 0, ldev, filter) {
struct mlx5_eswitch *slave_esw;
if (j == master_idx)
--
2.44.0
^ permalink raw reply related
* [PATCH net V2 2/3] net/mlx5: LAG, MPESW, Fix missing complete() on devcom error
From: Tariq Toukan @ 2026-06-30 11:29 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
netdev, Paolo Abeni
Cc: Edward Srouji, Jacob Keller, Kees Cook, Leon Romanovsky,
linux-kernel, linux-rdma, Maher Sanalla, Mark Bloch,
Moshe Shemesh, Or Har-Toov, Rongwei Liu, Saeed Mahameed,
Shay Drori, Simon Horman, Tariq Toukan
In-Reply-To: <20260630112917.698313-1-tariqt@nvidia.com>
From: Shay Drory <shayd@nvidia.com>
mlx5_mpesw_work() returned without calling complete() when
mlx5_lag_get_devcom_comp() returned NULL. A caller that queued the
work and waited on mpesww->comp would block indefinitely.
Funnel the early-return path through a new "complete" label so the
waiter is always woken.
Fixes: b430c1b4f63b ("net/mlx5: Replace global mlx5_intf_lock with HCA devcom component lock")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index 50bfb450c71e..abf72026c751 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -194,8 +194,10 @@ static void mlx5_mpesw_work(struct work_struct *work)
struct mlx5_lag *ldev = mpesww->lag;
devcom = mlx5_lag_get_devcom_comp(ldev);
- if (!devcom)
- return;
+ if (!devcom) {
+ mpesww->result = -ENODEV;
+ goto complete;
+ }
mlx5_devcom_comp_lock(devcom);
mlx5_mpesw_sd_devcoms_lock(ldev);
@@ -213,6 +215,7 @@ static void mlx5_mpesw_work(struct work_struct *work)
mutex_unlock(&ldev->lock);
mlx5_mpesw_sd_devcoms_unlock(ldev);
mlx5_devcom_comp_unlock(devcom);
+complete:
complete(&mpesww->comp);
}
--
2.44.0
^ permalink raw reply related
* [PATCH net V2 3/3] net/mlx5e: TC, skip peer flow cleanup when LAG seq is unavailable
From: Tariq Toukan @ 2026-06-30 11:29 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
netdev, Paolo Abeni
Cc: Edward Srouji, Jacob Keller, Kees Cook, Leon Romanovsky,
linux-kernel, linux-rdma, Maher Sanalla, Mark Bloch,
Moshe Shemesh, Or Har-Toov, Rongwei Liu, Saeed Mahameed,
Shay Drori, Simon Horman, Tariq Toukan
In-Reply-To: <20260630112917.698313-1-tariqt@nvidia.com>
From: Shay Drory <shayd@nvidia.com>
mlx5_lag_get_dev_seq() will return error when the peer isn't in the LAG
or when no device is marked as master. Result bad memory access and kernel
crash[1].
Hence, skip the peer when lookup fails.
Note: In case there are peer flows, they are cleaned before LAG cleared
the master mark.
[1]
RIP: 0010:mlx5e_tc_del_fdb_peers_flow+0x3d/0x350 [mlx5_core]
Call Trace:
<TASK>
mlx5e_tc_clean_fdb_peer_flows+0xc1/0x130 [mlx5_core]
mlx5_esw_offloads_unpair+0x3a/0x400 [mlx5_core]
mlx5_esw_offloads_devcom_event+0xee/0x360 [mlx5_core]
mlx5_devcom_send_event+0x7a/0x140 [mlx5_core]
mlx5_esw_offloads_devcom_cleanup+0x2f/0x90 [mlx5_core]
mlx5e_tc_esw_cleanup+0x28/0xf0 [mlx5_core]
mlx5e_rep_tc_cleanup+0x19/0x30 [mlx5_core]
mlx5e_cleanup_uplink_rep_tx+0x36/0x40 [mlx5_core]
mlx5e_cleanup_rep_tx+0x55/0x60 [mlx5_core]
mlx5e_detach_netdev+0x96/0xf0 [mlx5_core]
mlx5e_netdev_change_profile+0x5b/0x120 [mlx5_core]
mlx5e_netdev_attach_nic_profile+0x1b/0x30 [mlx5_core]
mlx5e_vport_rep_unload+0xdd/0x110 [mlx5_core]
__esw_offloads_unload_rep+0x81/0xb0 [mlx5_core]
mlx5_eswitch_unregister_vport_reps+0x1d7/0x220 [mlx5_core]
mlx5e_rep_remove+0x22/0x30 [mlx5_core]
device_release_driver_internal+0x194/0x1f0
bus_remove_device+0xe8/0x1b0
device_del+0x159/0x3c0
mlx5_rescan_drivers_locked+0xbc/0x2d0 [mlx5_core]
mlx5_unregister_device+0x54/0x80 [mlx5_core]
mlx5_uninit_one+0x73/0x130 [mlx5_core]
remove_one+0x78/0xe0 [mlx5_core]
pci_device_remove+0x39/0xa0
Fixes: 971b28accc09 ("net/mlx5: LAG, replace mlx5_get_dev_index with LAG sequence number")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 910492eb51f2..1bc7b9019124 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -5547,6 +5547,9 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) {
i = mlx5_lag_get_dev_seq(peer_esw->dev);
+ if (i < 0)
+ continue;
+
list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i])
mlx5e_tc_del_fdb_peers_flow(flow);
}
--
2.44.0
^ permalink raw reply related
* [Intel-wired-lan] [PATCH net v2] igc: Fix RX HW timestamp reporting when NET_RX_BUSY_POLL is disabled
From: Ding Meng @ 2026-06-30 11:15 UTC (permalink / raw)
To: anthony.l.nguyen, przemyslaw.kitszel, andrew+netdev, davem,
edumazet, kuba, pabeni, jan.kiszka, florian.bezdeka
Cc: intel-wired-lan, linux-kernel, netdev, meng.ding, wq.wang,
pmenzel, stable, Aleksandr Loktionov, Piotr Kwapulinski
When CONFIG_NET_RX_BUSY_POLL is deactivated, fetching RX HW timestamps
from the NIC no longer works as expected, often resulting in incorrect
or negative values such as "HW raw -121948.050407424".
This occurs because disabling CONFIG_NET_RX_BUSY_POLL disables the
SKB NAPI mapping in __skb_mark_napi_id(). Consequently, get_timestamp()
fails to perform its driver lookup, and the igc driver's struct
net_device_ops::ndo_get_tstamp is never invoked.
Instead, get_timestamp() falls back to use shhwtstamps(skb)->hwtstamp,
a field that the driver has not populated. This results in incorrect
timestamps.
Fix this by populating the hwtstamp field with the correct timestamp
in the default timer when CONFIG_NET_RX_BUSY_POLL is disabled.
The "igc_adapter" is passed to igc_construct_skb() to enable
igc_ptp_rx_pktstamp() to access the necessary adapter details for
adjusting the timestamp.
Test case:
Disable CONFIG_NET_RX_BUSY_POLL.
Sender:
# tools/testing/selftests/net/timestamping en0 \
SOF_TIMESTAMPING_TX_HARDWARE PTPV2 IP_MULTICAST_LOOP
Receiver:
# tools/testing/selftests/net/timestamping en0 \
SOF_TIMESTAMPING_RX_HARDWARE SOF_TIMESTAMPING_RAW_HARDWARE PTPV2
Before patch, receiver prints
HW raw -121948.050407424
After patch, receiver prints
HW raw 1760648763.746974064
Fixes: 069b142f5819 ("igc: Add support for PTP .getcyclesx64()")
Cc: stable@vger.kernel.org
Co-developed-by: Florian Bezdeka <florian.bezdeka@siemens.com>
Signed-off-by: Florian Bezdeka <florian.bezdeka@siemens.com>
Signed-off-by: Ding Meng <meng.ding@siemens.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Piotr Kwapulinski <piotr.kwapulinski@intel.com>
---
V2:
- update commit message(suggested by Paul Menzel):
add error log
explain why need to pass igc_adapter
add test case
- move variable declarations on top of the function
- Cc stable@vger.kernel.org
V1: https://lore.kernel.org/intel-wired-lan/20260622041718.6106-1-meng.ding@siemens.com/
---
drivers/net/ethernet/intel/igc/igc_main.c | 41 ++++++++++++++++-------
1 file changed, 29 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 8ac16808023..5c4beb8b5d4 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1992,7 +1992,29 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
return skb;
}
-static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
+static void igc_construct_skb_timestamps(struct igc_adapter *adapter,
+ struct sk_buff *skb,
+ struct igc_xdp_buff *ctx)
+{
+#ifndef CONFIG_NET_RX_BUSY_POLL
+ struct igc_inline_rx_tstamps *tstamps;
+#endif
+
+ if (!ctx->rx_ts)
+ return;
+
+#ifndef CONFIG_NET_RX_BUSY_POLL
+ tstamps = ctx->rx_ts;
+ skb_hwtstamps(skb)->hwtstamp = igc_ptp_rx_pktstamp(adapter,
+ tstamps->timer0);
+#else
+ skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV;
+ skb_hwtstamps(skb)->netdev_data = ctx->rx_ts;
+#endif
+}
+
+static struct sk_buff *igc_construct_skb(struct igc_adapter *adapter,
+ struct igc_ring *rx_ring,
struct igc_rx_buffer *rx_buffer,
struct igc_xdp_buff *ctx)
{
@@ -2013,10 +2035,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
if (unlikely(!skb))
return NULL;
- if (ctx->rx_ts) {
- skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV;
- skb_hwtstamps(skb)->netdev_data = ctx->rx_ts;
- }
+ igc_construct_skb_timestamps(adapter, skb, ctx);
/* Determine available headroom for copy */
headlen = size;
@@ -2686,7 +2705,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
else if (ring_uses_build_skb(rx_ring))
skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp);
else
- skb = igc_construct_skb(rx_ring, rx_buffer, &ctx);
+ skb = igc_construct_skb(adapter, rx_ring, rx_buffer, &ctx);
/* exit if we failed to retrieve a buffer */
if (!xdp_res && !skb) {
@@ -2738,7 +2757,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
return total_packets;
}
-static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
+static struct sk_buff *igc_construct_skb_zc(struct igc_adapter *adapter,
+ struct igc_ring *ring,
struct igc_xdp_buff *ctx)
{
struct xdp_buff *xdp = &ctx->xdp;
@@ -2760,10 +2780,7 @@ static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
__skb_pull(skb, metasize);
}
- if (ctx->rx_ts) {
- skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV;
- skb_hwtstamps(skb)->netdev_data = ctx->rx_ts;
- }
+ igc_construct_skb_timestamps(adapter, skb, ctx);
return skb;
}
@@ -2775,7 +2792,7 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
struct igc_ring *ring = q_vector->rx.ring;
struct sk_buff *skb;
- skb = igc_construct_skb_zc(ring, ctx);
+ skb = igc_construct_skb_zc(q_vector->adapter, ring, ctx);
if (!skb) {
ring->rx_stats.alloc_failed++;
set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags);
base-commit: 4549871118cf616eecdd2d939f78e3b9e1dddc48
--
2.47.3
^ permalink raw reply related
* Re: [PATCH net] selftests: drv-net: tso: don't touch dangerous feature bits
From: Daniel Zahka @ 2026-06-30 11:38 UTC (permalink / raw)
To: Jakub Kicinski, davem
Cc: netdev, edumazet, pabeni, andrew+netdev, horms, shuah,
linux-kselftest
In-Reply-To: <20260629233923.2151144-1-kuba@kernel.org>
On 6/29/26 7:39 PM, Jakub Kicinski wrote:
> query_nic_features() detects which offloads depend on tx-gso-partial
> by enabling everything, turning tx-gso-partial off, and seeing which
> active features drop out. Enabling all hw features is dangerous:
> we may end up enabling rx-fcs and loopback for example. For the
> ice driver we end up getting into problems with feature dependencies
> so the cleanup isn't successful either, and the test exits with
> rx-fcs and loopback enabled.
>
> Scope the feature probing just to segmentation bits.
>
> Fixes: 266b835e5e84 ("selftests: drv-net: tso: enable test cases based on hw_features")
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Daniel Zahka <daniel.zahka@gmail.com>
^ permalink raw reply
* Re: [PATCH v2 02/19] driver core: platform: provide platform_device_set_of_node()
From: Manuel Ebner @ 2026-06-30 11:37 UTC (permalink / raw)
To: Bartosz Golaszewski, Lee Jones, Thierry Reding,
Sebastian Hesselbarth, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Srinivas Kandagatla,
Greg Kroah-Hartman, Vinod Koul, Rafael J. Wysocki,
Danilo Krummrich, Rob Herring, Saravana Kannan,
Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
Christophe Leroy (CS GROUP), Andi Shyti, Andy Shevchenko,
Joerg Roedel, Will Deacon, Robin Murphy, Doug Berger,
Florian Fainelli, Broadcom internal kernel review list,
Ulf Hansson, Frank Li, Sascha Hauer, Pengutronix Kernel Team,
Fabio Estevam, Matthew Brost, Thomas Hellström, Rodrigo Vivi,
David Airlie, Simona Vetter, Peter Chen, Paul Cercueil, Bin Liu,
Philipp Zabel, Maximilian Luz, Hans de Goede, Ilpo Järvinen,
Krzysztof Kozlowski, Benjamin Herrenschmidt
Cc: brgl, linux-kernel, netdev, linux-arm-msm, linux-sound,
driver-core, devicetree, linuxppc-dev, linux-i2c, iommu, linux-pm,
imx, linux-arm-kernel, intel-xe, dri-devel, linux-usb, linux-mips,
platform-driver-x86
In-Reply-To: <20260629-pdev-fwnode-ref-v2-2-8abe2513f96e@oss.qualcomm.com>
On Mon, 2026-06-29 at 11:12 +0200, Bartosz Golaszewski wrote:
> [...]
>
> +/**
> + * platform_device_set_of_node - assign an OF node to device
> + * @pdev: platform device to add the node for
> + * @np: new device node
> + *
> + * Assign an OF node to this platform device. Internally keep track of the
> + * reference count. Devices created with platform_device_alloc() must use this
> + * function instead of assigning the node manually.
Doesn't it make sense to add a remark to the kernel doc of platform_device_alloc()?
Thanks
Manuel
> [...]
^ permalink raw reply
* Re: [PATCH v2 5/6] arm64: dts: qcom: ipq5018: add nodes required for Bluetooth support
From: Konrad Dybcio @ 2026-06-30 11:40 UTC (permalink / raw)
To: george.moussalem, Jens Axboe, Ulf Hansson, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Johannes Berg, Jeff Johnson,
Bartosz Golaszewski, Marcel Holtmann, Luiz Augusto von Dentz,
Balakrishna Godavarthi, Rocky Liao, Saravana Kannan, Andrew Lunn,
Heiner Kallweit, Russell King, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Simon Horman, Bjorn Andersson,
Konrad Dybcio, Mathieu Poirier, Philipp Zabel
Cc: linux-block, linux-kernel, linux-mmc, devicetree, linux-wireless,
ath10k, linux-arm-msm, linux-bluetooth, netdev, linux-remoteproc
In-Reply-To: <20260629-ipq5018-bluetooth-v2-5-02770f03b6bb@outlook.com>
On 6/29/26 3:01 PM, George Moussalem via B4 Relay wrote:
> From: George Moussalem <george.moussalem@outlook.com>
>
> Add nodes for the reserved memory carveout and Bluetooth.
>
> Signed-off-by: George Moussalem <george.moussalem@outlook.com>
> ---
> arch/arm64/boot/dts/qcom/ipq5018.dtsi | 25 ++++++++++++++++++++++++-
> 1 file changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/boot/dts/qcom/ipq5018.dtsi b/arch/arm64/boot/dts/qcom/ipq5018.dtsi
> index 6f8004a22a1f..65a47ba7d3a3 100644
> --- a/arch/arm64/boot/dts/qcom/ipq5018.dtsi
> +++ b/arch/arm64/boot/dts/qcom/ipq5018.dtsi
> @@ -17,6 +17,23 @@ / {
> #address-cells = <2>;
> #size-cells = <2>;
>
> + bluetooth: bluetooth {
> + compatible = "qcom,ipq5018-bt";
> +
> + firmware-name = "qca/bt_fw_patch.mbn";
Is this fw vendor-signed?
Konrad
^ permalink raw reply
* [PATCH iproute2-next] ss: stop displaying dccp sockets
From: Yafang Shao @ 2026-06-30 11:41 UTC (permalink / raw)
To: stephen, kuniyu; +Cc: netdev, Yafang Shao
DCCP support was retired in kernel commit 2a63dd0edf38 ("net: Retire
DCCP socket."). However, ss still attempts to query DCCP sockets via
netlink, which triggers repeated SELinux warnings in dmesg:
SELinux: unrecognized netlink message: protocol=4 nlmsg_type=19 \
sclass=netlink_tcpdiag_socket pid=188945 comm=ss
Stop sending DCCPDIAG_GETSOCK netlink messages to suppress these
warnings and align ss with the kernel change.
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Cc: Kuniyuki Iwashima <kuniyu@google.com>
---
man/man8/ss.8 | 5 +++--
misc/ss.c | 40 ++++++++--------------------------------
2 files changed, 11 insertions(+), 34 deletions(-)
diff --git a/man/man8/ss.8 b/man/man8/ss.8
index 70e0a566..37dd75a0 100644
--- a/man/man8/ss.8
+++ b/man/man8/ss.8
@@ -378,7 +378,8 @@ Display TCP sockets.
Display UDP sockets.
.TP
.B \-d, \-\-dccp
-Display DCCP sockets.
+[Deprecated] DCCP is no longer supported since kernel 6.16.
+This option is ignored.
.TP
.B \-w, \-\-raw
Display RAW sockets.
@@ -411,7 +412,7 @@ supported: unix, inet, inet6, link, netlink, vsock, tipc, xdp.
.B \-A QUERY, \-\-query=QUERY, \-\-socket=QUERY
List of socket tables to dump, separated by commas. The following identifiers
are understood: all, inet, tcp, udp, raw, unix, packet, netlink, unix_dgram,
-unix_stream, unix_seqpacket, packet_raw, packet_dgram, dccp, sctp, tipc,
+unix_stream, unix_seqpacket, packet_raw, packet_dgram, sctp, tipc,
vsock_stream, vsock_dgram, xdp, mptcp. Any item in the list may optionally be
prefixed by an exclamation mark
.RB ( ! )
diff --git a/misc/ss.c b/misc/ss.c
index 14e9f27a..dae5f282 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -195,7 +195,6 @@ static const char *dg_proto;
enum {
TCP_DB,
MPTCP_DB,
- DCCP_DB,
UDP_DB,
RAW_DB,
UNIX_DG_DB,
@@ -215,7 +214,7 @@ enum {
#define PACKET_DBM ((1<<PACKET_DG_DB)|(1<<PACKET_R_DB))
#define UNIX_DBM ((1<<UNIX_DG_DB)|(1<<UNIX_ST_DB)|(1<<UNIX_SQ_DB))
#define ALL_DB ((1<<MAX_DB)-1)
-#define INET_L4_DBM ((1<<TCP_DB)|(1<<MPTCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB)|(1<<SCTP_DB))
+#define INET_L4_DBM ((1<<TCP_DB)|(1<<MPTCP_DB)|(1<<UDP_DB)|(1<<SCTP_DB))
#define INET_DBM (INET_L4_DBM | (1<<RAW_DB))
#define VSOCK_DBM ((1<<VSOCK_ST_DB)|(1<<VSOCK_DG_DB))
@@ -274,10 +273,6 @@ static const struct filter default_dbs[MAX_DB] = {
.states = SS_CONN,
.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
- [DCCP_DB] = {
- .states = SS_CONN,
- .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
- },
[UDP_DB] = {
.states = (1 << SS_ESTABLISHED),
.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
@@ -388,13 +383,12 @@ static int filter_db_parse(struct filter *f, const char *s)
int dbs[MAX_DB + 1];
} db_name_tbl[] = {
#define ENTRY(name, ...) { #name, { __VA_ARGS__, MAX_DB } }
- ENTRY(all, UDP_DB, DCCP_DB, TCP_DB, MPTCP_DB, RAW_DB,
+ ENTRY(all, UDP_DB, TCP_DB, MPTCP_DB, RAW_DB,
UNIX_ST_DB, UNIX_DG_DB, UNIX_SQ_DB,
PACKET_R_DB, PACKET_DG_DB, NETLINK_DB,
SCTP_DB, VSOCK_ST_DB, VSOCK_DG_DB, XDP_DB),
- ENTRY(inet, UDP_DB, DCCP_DB, TCP_DB, MPTCP_DB, SCTP_DB, RAW_DB),
+ ENTRY(inet, UDP_DB, TCP_DB, MPTCP_DB, SCTP_DB, RAW_DB),
ENTRY(udp, UDP_DB),
- ENTRY(dccp, DCCP_DB),
ENTRY(tcp, TCP_DB),
ENTRY(mptcp, MPTCP_DB),
ENTRY(sctp, SCTP_DB),
@@ -935,8 +929,6 @@ static const char *proto_name(int protocol)
return "mptcp";
case IPPROTO_SCTP:
return "sctp";
- case IPPROTO_DCCP:
- return "dccp";
case IPPROTO_ICMPV6:
return "icmp6";
}
@@ -3897,8 +3889,6 @@ static int tcpdiag_send(int fd, int protocol, struct filter *f)
if (protocol == IPPROTO_TCP)
req.nlh.nlmsg_type = TCPDIAG_GETSOCK;
- else if (protocol == IPPROTO_DCCP)
- req.nlh.nlmsg_type = DCCPDIAG_GETSOCK;
else
return -1;
@@ -4134,7 +4124,7 @@ static int inet_show_netlink(struct filter *f, FILE *dump_fp, int protocol)
/* Suppress netlink errors. Older kernels do not support extended
* protocol requests using INET_DIAG_REQ_PROTOCOL, and some protocols
- * may not be available in the running kernel (e.g. SCTP, DCCP).
+ * may not be available in the running kernel (e.g. SCTP).
* In both cases the kernel returns EINVAL which would cause
* rtnl_dump_error() to print a confusing "RTNETLINK answers" error.
*/
@@ -4309,18 +4299,6 @@ static int mptcp_show(struct filter *f)
return 0;
}
-static int dccp_show(struct filter *f)
-{
- if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
- return 0;
-
- if (!getenv("PROC_NET_DCCP") && !getenv("PROC_ROOT")
- && inet_show_netlink(f, NULL, IPPROTO_DCCP) == 0)
- return 0;
-
- return 0;
-}
-
static int sctp_show(struct filter *f)
{
if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
@@ -5779,7 +5757,7 @@ static void _usage(FILE *dest)
" -M, --mptcp display only MPTCP sockets\n"
" -S, --sctp display only SCTP sockets\n"
" -u, --udp display only UDP sockets\n"
-" -d, --dccp display only DCCP sockets\n"
+" -d, --dccp DCCP is no longer supported, option ignored\n"
" -w, --raw display only RAW sockets\n"
" -x, --unix display only Unix domain sockets\n"
" --tipc display only TIPC sockets\n"
@@ -5795,7 +5773,7 @@ static void _usage(FILE *dest)
" --inet-sockopt show various inet socket options\n"
"\n"
" -A, --query=QUERY, --socket=QUERY\n"
-" QUERY := {all|inet|tcp|mptcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|packet_raw|packet_dgram|netlink|dccp|sctp|vsock_stream|vsock_dgram|tipc|xdp}[,QUERY]\n"
+" QUERY := {all|inet|tcp|mptcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|packet_raw|packet_dgram|netlink|sctp|vsock_stream|vsock_dgram|tipc|xdp}[,QUERY]\n"
"\n"
" -D, --diag=FILE Dump raw information about TCP sockets to FILE\n"
" -F, --filter=FILE read filter information from FILE\n"
@@ -5907,7 +5885,7 @@ static const struct option long_opts[] = {
{ "threads", 0, 0, 'T' },
{ "bpf", 0, 0, 'b' },
{ "events", 0, 0, 'E' },
- { "dccp", 0, 0, 'd' },
+ { "dccp", 0, 0, 'd' }, /* DCCP retired, kept for compatibility */
{ "tcp", 0, 0, 't' },
{ "sctp", 0, 0, 'S' },
{ "udp", 0, 0, 'u' },
@@ -5997,7 +5975,7 @@ int main(int argc, char *argv[])
follow_events = 1;
break;
case 'd':
- filter_db_set(¤t_filter, DCCP_DB, true);
+ /* DCCP retired in kernel 6.16, kept for compatibility */
break;
case 't':
filter_db_set(¤t_filter, TCP_DB, true);
@@ -6290,8 +6268,6 @@ int main(int argc, char *argv[])
udp_show(¤t_filter);
if (current_filter.dbs & (1<<TCP_DB))
tcp_show(¤t_filter);
- if (current_filter.dbs & (1<<DCCP_DB))
- dccp_show(¤t_filter);
if (current_filter.dbs & (1<<SCTP_DB))
sctp_show(¤t_filter);
if (current_filter.dbs & VSOCK_DBM)
--
2.50.1 (Apple Git-155)
^ permalink raw reply related
* Re: [PATCH net v3 1/1] net/sched: sch_teql: Introduce slaves_lock to avoid race condition and UAF
From: Jamal Hadi Salim @ 2026-06-30 11:49 UTC (permalink / raw)
To: Paolo Abeni
Cc: netdev, davem, edumazet, kuba, horms, victor, jiri, security,
zdi-disclosures, stable
In-Reply-To: <de40b1a5-663e-43ab-9fb7-5a49f029cc4b@redhat.com>
On Tue, Jun 30, 2026 at 7:15 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On 6/28/26 1:12 PM, Jamal Hadi Salim wrote:
> > The teql master->slaves singly linked list is not protected against
> > multiple writes. It can be mod'ed concurently from teql_master_xmit(),
> > teql_dequeue(), teql_init() and teql_destroy() without holding any list
> > lock or RCU protection.
> >
> > zdi-disclosures@trendmicro.com has demonstrated that the qdisc is freed
> > after an RCU grace period, but teql_master_xmit() running on another
> > CPU can still hold a stale pointer into the list, resulting in a
> > slab-use-after-free:
> >
> > BUG: KASAN: slab-use-after-free in teql_master_xmit+0xf0f/0x16b0
> > Read of size 8 at addr ffff888013fb0440 by task poc/332
> > Freed 512-byte region [ffff888013fb0400, ffff888013fb0600) (kmalloc-512)
> >
> > The fix?
> > Add a per-master slaves_lock spinlock that serializes all mutations of
> > master->slaves and the NEXT_SLAVE() links in teql_destroy() and
> > teql_qdisc_init(). teql_master_xmit() also takes the same slaves_lock
> > around those updates.
> > Annotate master->slaves and the per-slave ->next pointer with __rcu and
> > use the appropriate RCU accessors everywhere they are touched:
> > rcu_assign_pointer() on the writer side (under slaves_lock),
> > rcu_dereference_protected() for the writer-side loads (also under
> > slaves_lock), rcu_dereference_bh() for the loads in teql_master_xmit() and
> > rtnl_dereference() for the loads in teql_master_open()/teql_master_mtu(),
> > which run under RTNL.
> > Pair this with rcu_read_lock_bh()/rcu_read_unlock_bh() around the list
> > traversal in teql_master_xmit(), so that readers either observe a fully
> > linked list or are deferred until the in-flight mutation completes. The two
> > early-return paths in teql_master_xmit() are updated to release the RCU-bh
> > read-side critical section before returning, since leaving it held would
> > disable BH on that CPU for good.
> >
> > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > Reported-by: zdi-disclosures@trendmicro.com
> > Tested-by: Victor Nogueira <victor@mojatatu.com>
> > Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>
> Looks good, thanks!
>
> Please note that sashiko/gemini found a pre-existing issues which may
> require a follow-up/separate fix:
>
> https://sashiko.dev/#/patchset/20260628111229.669751-1-jhs%40mojatatu.com
>
> (the 2nd one in the above link, IDK how to generate a direct link to a
> specific comment)
I just sent v4 which covered that but i will send a followup instead
if you already applied.
BTW: What is the ruling on when Sashiko finds a pre-existing issue?
Should we address that as a separate follow-up patch? It is unclear
what the policy is.
This teql patch was one of the hardest to deal with in terms of
reproduciability and the fact sashiko kept coming up with pre-existing
issues - including the one Simon and I were discussing. Note: None of
the pre-existing issues affected reproducibility at all although i am
sure one of the AI-kiddies reading the sashiko reports will find a way
to create a poc (this is why i entertain fixing them when they look
simple enough)
cheers,
jamal
^ permalink raw reply
* [PATCH net V4 0/3] net/mlx5e: Fix crashes in dynamic per-channel stats and HV VHCA agent
From: Tariq Toukan @ 2026-06-30 11:51 UTC (permalink / raw)
To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
netdev, Paolo Abeni
Cc: Cosmin Ratiu, Eran Ben Elisha, Feng Liu, Haiyang Zhang,
Lama Kayal, Leon Romanovsky, linux-kernel, linux-rdma, Mark Bloch,
Nimrod Oren, Saeed Mahameed, Tariq Toukan, Gal Pressman,
Alexei Lazar, Simon Horman, Carolina Jubran, Kees Cook,
Eran Ben Elisha, Saeed Mahameed
Hi,
Since per-channel stats were converted to be allocated and published
lazily at first channel open in commit fa691d0c9c08 ("net/mlx5e:
Allocate per-channel stats dynamically at first usage"),
priv->channel_stats[] and priv->stats_nch are filled in
incrementally during interface bring-up. This opened a window in
which the various stats readers - most of them reachable from
userspace via netlink/netdev stats queries - can race with
mlx5e_open_channel() on another CPU and observe partially
initialized state. The HV VHCA stats agent, which is created
before the channels are opened, hits related problems of its own.
This series by Feng fixes the resulting crashes.
Regards,
Tariq
V4:
- Patch 1/3: also clear priv->stats_agent.{agent,buf} to NULL in
mlx5e_hv_vhca_stats_destroy() after freeing them. Making the
allocation non-zero in V3 made the kvzalloc() failure path in
mlx5e_hv_vhca_stats_create() reachable for the first time; without
the NULL assignments a failed create followed by destroy would
double-free stale pointers from a previous cycle.
(Caught by Simon Horman.)
V3:
https://lore.kernel.org/all/20260622083646.593220-1-tariqt@nvidia.com/
V2:
https://lore.kernel.org/all/20260617140127.573117-1-tariqt@nvidia.com/
Feng Liu (3):
net/mlx5e: Fix HV VHCA stats zero-sized buffer allocation
net/mlx5e: Fix HV VHCA stats agent registration race
net/mlx5e: Fix publication race for priv->channel_stats[]
drivers/net/ethernet/mellanox/mlx5/core/en.h | 12 ++++++
.../mellanox/mlx5/core/en/hv_vhca_stats.c | 37 +++++++++++++------
.../net/ethernet/mellanox/mlx5/core/en_main.c | 14 ++++---
.../ethernet/mellanox/mlx5/core/en_stats.c | 9 +++--
.../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 +-
.../ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 8 +++-
.../ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 6 ++-
7 files changed, 62 insertions(+), 27 deletions(-)
base-commit: dbf803bc4a8b0522c9a12560c20905a5952d1cb9
--
2.44.0
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox