* [PATCH net-next v3 6/9] net: dsa: lan9645x: add vlan support
From: Jens Emil Schulz Østergaard @ 2026-04-10 11:48 UTC (permalink / raw)
To: UNGLinuxDriver, Andrew Lunn, Vladimir Oltean, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
Rob Herring, Krzysztof Kozlowski, Conor Dooley, Woojung Huh,
Russell King, Steen Hegelund, Daniel Machon
Cc: linux-kernel, netdev, devicetree,
Jens Emil Schulz Østergaard
In-Reply-To: <20260410-dsa_lan9645x_switch_driver_base-v3-0-aadc8595306d@microchip.com>
Add support for vlanaware bridge. We reserve vid 4095 for standalone
mode, to implement fdb-isolation. A vlan-unaware bridge uses vid 0.
Reviewed-by: Steen Hegelund <Steen.Hegelund@microchip.com>
Signed-off-by: Jens Emil Schulz Østergaard <jensemil.schulzostergaard@microchip.com>
---
Changes in v3:
- use SET register macros in vlan_hw_wr
- add vlan id bounds check to vlan_del
- return vlan_hw_wr timeout err on init
- move cpu vlan action after bounds check
Changes in v2:
- redesign based on selftests which rely on changing vlan_default_pvid.
Our HW limitations were too forward. Following Vladimirs changes to
ocelot VLAN implementation, we now dynamically change egress tag
configuration, allowing more states.
- selftests are passing, except an expected failure w.r.t ctag/stag
conformance, which is a hw limitation.
---
drivers/net/dsa/microchip/lan9645x/Makefile | 1 +
drivers/net/dsa/microchip/lan9645x/lan9645x_main.c | 49 +++
drivers/net/dsa/microchip/lan9645x/lan9645x_main.h | 28 ++
drivers/net/dsa/microchip/lan9645x/lan9645x_port.c | 3 +
drivers/net/dsa/microchip/lan9645x/lan9645x_vlan.c | 378 +++++++++++++++++++++
5 files changed, 459 insertions(+)
diff --git a/drivers/net/dsa/microchip/lan9645x/Makefile b/drivers/net/dsa/microchip/lan9645x/Makefile
index 7cc0ae0ada40..e049114b3563 100644
--- a/drivers/net/dsa/microchip/lan9645x/Makefile
+++ b/drivers/net/dsa/microchip/lan9645x/Makefile
@@ -6,3 +6,4 @@ mchp-lan9645x-objs := \
lan9645x_npi.o \
lan9645x_phylink.o \
lan9645x_port.o \
+ lan9645x_vlan.o \
diff --git a/drivers/net/dsa/microchip/lan9645x/lan9645x_main.c b/drivers/net/dsa/microchip/lan9645x/lan9645x_main.c
index e709396c2298..adbdf2007e9f 100644
--- a/drivers/net/dsa/microchip/lan9645x/lan9645x_main.c
+++ b/drivers/net/dsa/microchip/lan9645x/lan9645x_main.c
@@ -156,6 +156,9 @@ static int lan9645x_setup(struct dsa_switch *ds)
}
mutex_init(&lan9645x->fwd_domain_lock);
+ err = lan9645x_vlan_init(lan9645x);
+ if (err)
+ return err;
/* Link Aggregation Mode: NETDEV_LAG_HASH_L2 */
lan_wr(ANA_AGGR_CFG_AC_SMAC_ENA |
@@ -542,11 +545,52 @@ static void lan9645x_port_bridge_leave(struct dsa_switch *ds, int port,
lan9645x->bridge = NULL;
__lan9645x_port_set_host_flood(lan9645x, port);
+ lan9645x_vlan_set_hostmode(p);
lan9645x_update_fwd_mask(lan9645x);
mutex_unlock(&lan9645x->fwd_domain_lock);
}
+static int lan9645x_port_vlan_filtering(struct dsa_switch *ds, int port,
+ bool enabled,
+ struct netlink_ext_ack *extack)
+{
+ struct lan9645x *lan9645x = ds->priv;
+ struct lan9645x_port *p;
+
+ p = lan9645x_to_port(lan9645x, port);
+ p->vlan_aware = enabled;
+ lan9645x_vlan_port_apply(p);
+
+ return 0;
+}
+
+static int lan9645x_port_vlan_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
+{
+ struct lan9645x *lan9645x = ds->priv;
+ struct lan9645x_port *p;
+ bool pvid, untagged;
+
+ p = lan9645x_to_port(lan9645x, port);
+ pvid = !!(vlan->flags & BRIDGE_VLAN_INFO_PVID);
+ untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED);
+
+ return lan9645x_vlan_port_add_vlan(p, vlan->vid, pvid, untagged,
+ extack);
+}
+
+static int lan9645x_port_vlan_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct lan9645x *lan9645x = ds->priv;
+ struct lan9645x_port *p;
+
+ p = lan9645x_to_port(lan9645x, port);
+ return lan9645x_vlan_port_del_vlan(p, vlan->vid);
+}
+
static const struct dsa_switch_ops lan9645x_switch_ops = {
.get_tag_protocol = lan9645x_get_tag_protocol,
@@ -569,6 +613,11 @@ static const struct dsa_switch_ops lan9645x_switch_ops = {
.port_bridge_leave = lan9645x_port_bridge_leave,
.port_stp_state_set = lan9645x_port_bridge_stp_state_set,
.port_set_host_flood = lan9645x_port_set_host_flood,
+
+ /* VLAN integration */
+ .port_vlan_filtering = lan9645x_port_vlan_filtering,
+ .port_vlan_add = lan9645x_port_vlan_add,
+ .port_vlan_del = lan9645x_port_vlan_del,
};
static int lan9645x_request_target_regmaps(struct lan9645x *lan9645x)
diff --git a/drivers/net/dsa/microchip/lan9645x/lan9645x_main.h b/drivers/net/dsa/microchip/lan9645x/lan9645x_main.h
index 22576bb8dd52..3c6996e150e4 100644
--- a/drivers/net/dsa/microchip/lan9645x/lan9645x_main.h
+++ b/drivers/net/dsa/microchip/lan9645x/lan9645x_main.h
@@ -7,6 +7,7 @@
#include <linux/dsa/lan9645x.h>
#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
#include <linux/regmap.h>
#include <net/dsa.h>
@@ -150,6 +151,17 @@ enum lan9645x_vlan_port_tag {
LAN9645X_TAG_ALL = 3,
};
+struct lan9645x_vlan {
+ u32 portmask: 10, /* ports 0-8 + CPU_PORT */
+ untagged: 9, /* ports 0-8 */
+ src_chk: 1,
+ mir: 1,
+ lrn_dis: 1,
+ prv_vlan: 1,
+ fld_dis: 1,
+ s_fwd_ena: 1;
+};
+
struct lan9645x {
struct device *dev;
struct dsa_switch *ds;
@@ -174,6 +186,9 @@ struct lan9645x {
u16 bridge_fwd_mask; /* Mask for forwarding bridged ports */
struct mutex fwd_domain_lock; /* lock forwarding configuration */
+ /* VLAN entries */
+ struct lan9645x_vlan vlans[VLAN_N_VID];
+
int num_port_dis;
bool dd_dis;
bool tsn_dis;
@@ -186,6 +201,9 @@ struct lan9645x_port {
u8 stp_state;
bool learn_ena;
+ bool vlan_aware;
+ u16 pvid;
+
bool rx_internal_delay;
bool tx_internal_delay;
};
@@ -349,4 +367,14 @@ void lan9645x_phylink_get_caps(struct lan9645x *lan9645x, int port,
struct phylink_config *c);
void lan9645x_phylink_port_down(struct lan9645x *lan9645x, int port);
+/* VLAN lan9645x_vlan.c */
+int lan9645x_vlan_init(struct lan9645x *lan9645x);
+u16 lan9645x_vlan_unaware_pvid(bool is_bridged);
+void lan9645x_vlan_port_apply(struct lan9645x_port *p);
+int lan9645x_vlan_port_add_vlan(struct lan9645x_port *p, u16 vid, bool pvid,
+ bool untagged,
+ struct netlink_ext_ack *extack);
+int lan9645x_vlan_port_del_vlan(struct lan9645x_port *p, u16 vid);
+void lan9645x_vlan_set_hostmode(struct lan9645x_port *p);
+
#endif /* __LAN9645X_MAIN_H__ */
diff --git a/drivers/net/dsa/microchip/lan9645x/lan9645x_port.c b/drivers/net/dsa/microchip/lan9645x/lan9645x_port.c
index 394a20ee678f..661cd00465e2 100644
--- a/drivers/net/dsa/microchip/lan9645x/lan9645x_port.c
+++ b/drivers/net/dsa/microchip/lan9645x/lan9645x_port.c
@@ -189,5 +189,8 @@ int lan9645x_port_setup(struct dsa_switch *ds, int port)
ANA_PORT_CFG_PORTID_VAL,
lan9645x, ANA_PORT_CFG(p->chip_port));
+ if (p->chip_port != lan9645x->npi)
+ lan9645x_vlan_set_hostmode(p);
+
return 0;
}
diff --git a/drivers/net/dsa/microchip/lan9645x/lan9645x_vlan.c b/drivers/net/dsa/microchip/lan9645x/lan9645x_vlan.c
new file mode 100644
index 000000000000..c38e918a881d
--- /dev/null
+++ b/drivers/net/dsa/microchip/lan9645x/lan9645x_vlan.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2026 Microchip Technology Inc.
+ */
+
+#include "lan9645x_main.h"
+
+#define VLANACCESS_CMD_IDLE 0
+#define VLANACCESS_CMD_READ 1
+#define VLANACCESS_CMD_WRITE 2
+#define VLANACCESS_CMD_INIT 3
+
+struct lan9645x_vlan_port_info {
+ int untagged;
+ int tagged;
+ u16 untagged_vid;
+};
+
+/* Calculate VLAN state of a port, across all VLANS. */
+static void lan9645x_vlan_port_get_info(struct lan9645x *lan9645x, int port,
+ struct lan9645x_vlan_port_info *info)
+{
+ u16 vid;
+
+ info->untagged = 0;
+ info->tagged = 0;
+ info->untagged_vid = 0;
+
+ for (vid = 1; vid <= VLAN_MAX; vid++) {
+ struct lan9645x_vlan *v = &lan9645x->vlans[vid];
+
+ if (!(v->portmask & BIT(port)))
+ continue;
+
+ if (v->untagged & BIT(port)) {
+ info->untagged++;
+ info->untagged_vid = vid;
+ } else {
+ info->tagged++;
+ }
+
+ /* VLAN composition is invalid, so break early. */
+ if (info->untagged > 1 && info->tagged)
+ break;
+ }
+}
+
+static int lan9645x_vlan_wait_for_completion(struct lan9645x *lan9645x)
+{
+ u32 val;
+
+ return lan9645x_rd_poll_timeout(lan9645x, ANA_VLANACCESS, val,
+ ANA_VLANACCESS_VLAN_TBL_CMD_GET(val) ==
+ VLANACCESS_CMD_IDLE);
+}
+
+static int lan9645x_vlan_hw_wr(struct lan9645x *lan9645x, u16 vid)
+{
+ struct lan9645x_vlan *v = &lan9645x->vlans[vid];
+ bool cpu_dis = !(v->portmask & BIT(CPU_PORT));
+ u32 val;
+ int err;
+
+ val = ANA_VLANTIDX_VLAN_PGID_CPU_DIS_SET(cpu_dis) |
+ ANA_VLANTIDX_V_INDEX_SET(vid) |
+ ANA_VLANTIDX_VLAN_SEC_FWD_ENA_SET(v->s_fwd_ena) |
+ ANA_VLANTIDX_VLAN_FLOOD_DIS_SET(v->fld_dis) |
+ ANA_VLANTIDX_VLAN_PRIV_VLAN_SET(v->prv_vlan) |
+ ANA_VLANTIDX_VLAN_LEARN_DISABLED_SET(v->lrn_dis) |
+ ANA_VLANTIDX_VLAN_MIRROR_SET(v->mir) |
+ ANA_VLANTIDX_VLAN_SRC_CHK_SET(v->src_chk);
+
+ lan_wr(val, lan9645x, ANA_VLANTIDX);
+ lan_wr(ANA_VLAN_PORT_MASK_VLAN_PORT_MASK_SET(v->portmask),
+ lan9645x, ANA_VLAN_PORT_MASK);
+ lan_wr(ANA_VLANACCESS_VLAN_TBL_CMD_SET(VLANACCESS_CMD_WRITE),
+ lan9645x, ANA_VLANACCESS);
+
+ err = lan9645x_vlan_wait_for_completion(lan9645x);
+ if (err)
+ dev_err(lan9645x->dev, "Vlan set mask failed\n");
+
+ return err;
+}
+
+u16 lan9645x_vlan_unaware_pvid(bool is_bridged)
+{
+ return is_bridged ? UNAWARE_PVID : HOST_PVID;
+}
+
+static u16 lan9645x_vlan_port_get_pvid(struct lan9645x_port *port)
+{
+ bool is_bridged = lan9645x_port_is_bridged(port);
+
+ if (is_bridged && port->vlan_aware)
+ return port->pvid;
+ else
+ return lan9645x_vlan_unaware_pvid(is_bridged);
+}
+
+/* Dynamically choose the egress tagging mode based on the port vlan state:
+ *
+ * Standalone:
+ * TAG_NO_PVID_NO_UNAWARE with PORT_VID=HOST_PVID. This avoids leaking the
+ * internal HOST_PVID tag on ingress mirrored frames while leaving normal
+ * egress frames untagged.
+ *
+ * Bridged, VLAN-aware:
+ * - N untagged, 0 tagged: TAG_DISABLED
+ * - 1 untagged, N tagged: TAG_NO_PVID_NO_UNAWARE
+ * - 0 untagged, N tagged: TAG_ALL
+ *
+ * Bridged, VLAN-unaware:
+ * TAG_DISABLED
+ */
+static void
+lan9645x_vlan_port_apply_egress(struct lan9645x_port *p,
+ struct lan9645x_vlan_port_info *info)
+{
+ struct lan9645x *lan9645x = p->lan9645x;
+ enum lan9645x_vlan_port_tag tag_cfg;
+ u16 port_vid = UNAWARE_PVID;
+
+ if (!lan9645x_port_is_bridged(p)) {
+ tag_cfg = LAN9645X_TAG_NO_PVID_NO_UNAWARE;
+ port_vid = HOST_PVID;
+ } else if (p->vlan_aware) {
+ struct lan9645x_vlan_port_info _info;
+
+ if (!info) {
+ lan9645x_vlan_port_get_info(lan9645x, p->chip_port,
+ &_info);
+ info = &_info;
+ }
+
+ if (info->untagged == 1 && info->tagged) {
+ tag_cfg = LAN9645X_TAG_NO_PVID_NO_UNAWARE;
+ port_vid = info->untagged_vid;
+ } else if (info->untagged) {
+ tag_cfg = LAN9645X_TAG_DISABLED;
+ } else {
+ tag_cfg = LAN9645X_TAG_ALL;
+ }
+ } else {
+ tag_cfg = LAN9645X_TAG_DISABLED;
+ }
+
+ /* TAG_TPID_CFG encoding:
+ *
+ * 0: Use 0x8100.
+ * 1: Use 0x88A8.
+ * 2: Use custom value from PORT_VLAN_CFG.PORT_TPID.
+ * 3: Use PORT_VLAN_CFG.PORT_TPID, unless ingress tag was a C-tag
+ * (EtherType = 0x8100)
+ *
+ * Use 3 and PORT_VLAN_CFG.PORT_TPID=0x88a8 to ensure stags are not
+ * rewritten to ctags on egress.
+ */
+ lan_rmw(REW_TAG_CFG_TAG_TPID_CFG_SET(3) |
+ REW_TAG_CFG_TAG_CFG_SET(tag_cfg),
+ REW_TAG_CFG_TAG_TPID_CFG |
+ REW_TAG_CFG_TAG_CFG,
+ lan9645x, REW_TAG_CFG(p->chip_port));
+
+ lan_rmw(REW_PORT_VLAN_CFG_PORT_TPID_SET(ETH_P_8021AD) |
+ REW_PORT_VLAN_CFG_PORT_VID_SET(port_vid),
+ REW_PORT_VLAN_CFG_PORT_TPID |
+ REW_PORT_VLAN_CFG_PORT_VID,
+ lan9645x, REW_PORT_VLAN_CFG(p->chip_port));
+}
+
+static void lan9645x_vlan_port_apply_ingress(struct lan9645x_port *p)
+{
+ struct lan9645x *lan9645x = p->lan9645x;
+ u16 pvid;
+ u32 val;
+
+ pvid = lan9645x_vlan_port_get_pvid(p);
+
+ /* Default vlan to classify for untagged frames (may be zero) */
+ val = ANA_VLAN_CFG_VLAN_VID_SET(pvid);
+ if (p->vlan_aware)
+ val |= ANA_VLAN_CFG_VLAN_AWARE_ENA_SET(1) |
+ ANA_VLAN_CFG_VLAN_POP_CNT_SET(1);
+
+ lan_rmw(val,
+ ANA_VLAN_CFG_VLAN_VID |
+ ANA_VLAN_CFG_VLAN_AWARE_ENA |
+ ANA_VLAN_CFG_VLAN_POP_CNT,
+ lan9645x, ANA_VLAN_CFG(p->chip_port));
+
+ /* Drop frames with multicast source address */
+ val = ANA_DROP_CFG_DROP_MC_SMAC_ENA_SET(1);
+ if (p->vlan_aware && !pvid)
+ /* If port is vlan-aware and tagged, drop untagged and priority
+ * tagged frames.
+ */
+ val |= ANA_DROP_CFG_DROP_UNTAGGED_ENA_SET(1) |
+ ANA_DROP_CFG_DROP_PRIO_S_TAGGED_ENA_SET(1) |
+ ANA_DROP_CFG_DROP_PRIO_C_TAGGED_ENA_SET(1);
+
+ lan_wr(val, lan9645x, ANA_DROP_CFG(p->chip_port));
+}
+
+void lan9645x_vlan_port_apply(struct lan9645x_port *p)
+{
+ lan9645x_vlan_port_apply_ingress(p);
+ lan9645x_vlan_port_apply_egress(p, NULL);
+}
+
+static struct lan9645x_vlan *lan9645x_vlan_port_modify(struct lan9645x_port *p,
+ u16 vid, bool pvid,
+ bool untagged)
+{
+ struct lan9645x_vlan *v = &p->lan9645x->vlans[vid];
+
+ if (untagged)
+ v->untagged |= BIT(p->chip_port);
+ else
+ v->untagged &= ~BIT(p->chip_port);
+
+ if (pvid)
+ p->pvid = vid;
+ else if (p->pvid == vid)
+ p->pvid = 0;
+
+ return v;
+}
+
+static int lan9645x_vlan_cpu_add(struct lan9645x_port *p, u16 vid, bool pvid,
+ bool untagged)
+{
+ struct lan9645x_vlan *v;
+
+ v = lan9645x_vlan_port_modify(p, vid, pvid, untagged);
+ v->portmask |= BIT(CPU_PORT) | BIT(p->chip_port);
+ lan9645x_vlan_hw_wr(p->lan9645x, vid);
+ lan9645x_vlan_port_apply_ingress(p);
+
+ return 0;
+}
+
+int lan9645x_vlan_port_add_vlan(struct lan9645x_port *p, u16 vid, bool pvid,
+ bool untagged, struct netlink_ext_ack *extack)
+{
+ struct lan9645x *lan9645x = p->lan9645x;
+ struct lan9645x_vlan_port_info info;
+ struct lan9645x_vlan old_vlan;
+ struct lan9645x_vlan *v;
+ u16 old_pvid;
+
+ /* Kernel VLAN core adds vid 0, which collides with our UNAWARE_PVID.
+ * We handle priority tagged frames by other means.
+ */
+ if (!vid)
+ return 0;
+
+ if (vid > VLAN_MAX) {
+ NL_SET_ERR_MSG_MOD(extack, "VLAN range 4094-4095 reserved.");
+ return -EBUSY;
+ }
+
+ if (p->chip_port == lan9645x->npi)
+ return lan9645x_vlan_cpu_add(p, vid, pvid, untagged);
+
+ old_vlan = lan9645x->vlans[vid];
+ old_pvid = p->pvid;
+
+ v = lan9645x_vlan_port_modify(p, vid, pvid, untagged);
+ v->portmask |= BIT(p->chip_port);
+
+ lan9645x_vlan_port_get_info(lan9645x, p->chip_port, &info);
+
+ if (info.untagged > 1 && info.tagged) {
+ *v = old_vlan;
+ p->pvid = old_pvid;
+ NL_SET_ERR_MSG_MOD(extack, "Only support 1 untagged port VLAN");
+ return -EBUSY;
+ }
+
+ lan9645x_vlan_hw_wr(lan9645x, vid);
+ lan9645x_vlan_port_apply_ingress(p);
+ lan9645x_vlan_port_apply_egress(p, &info);
+
+ return 0;
+}
+
+static int lan9645x_vlan_cpu_del(struct lan9645x_port *p, u16 vid)
+{
+ struct lan9645x_vlan *v;
+
+ v = lan9645x_vlan_port_modify(p, vid, false, false);
+ v->portmask &= ~BIT(CPU_PORT) & ~BIT(p->chip_port);
+ lan9645x_vlan_hw_wr(p->lan9645x, vid);
+ lan9645x_vlan_port_apply_ingress(p);
+
+ return 0;
+}
+
+int lan9645x_vlan_port_del_vlan(struct lan9645x_port *p, u16 vid)
+{
+ struct lan9645x *lan9645x = p->lan9645x;
+ struct lan9645x_vlan *v;
+
+ if (!vid)
+ return 0;
+
+ if (vid > VLAN_MAX)
+ return -EBUSY;
+
+ if (p->chip_port == lan9645x->npi)
+ return lan9645x_vlan_cpu_del(p, vid);
+
+ v = lan9645x_vlan_port_modify(p, vid, false, false);
+ v->portmask &= ~BIT(p->chip_port);
+ lan9645x_vlan_hw_wr(lan9645x, vid);
+ lan9645x_vlan_port_apply(p);
+
+ return 0;
+}
+
+void lan9645x_vlan_set_hostmode(struct lan9645x_port *p)
+{
+ p->vlan_aware = false;
+ lan9645x_vlan_port_apply(p);
+}
+
+int lan9645x_vlan_init(struct lan9645x *lan9645x)
+{
+ u32 all_phys_ports, all_ports;
+ u16 port, vid;
+ int err;
+
+ all_phys_ports = GENMASK(lan9645x->num_phys_ports - 1, 0);
+ all_ports = all_phys_ports | BIT(CPU_PORT);
+
+ /* Clear VLAN table, by default all ports are members of all VLANS */
+ lan_wr(ANA_VLANACCESS_VLAN_TBL_CMD_SET(VLANACCESS_CMD_INIT),
+ lan9645x, ANA_VLANACCESS);
+
+ err = lan9645x_vlan_wait_for_completion(lan9645x);
+ if (err) {
+ dev_err(lan9645x->dev, "Vlan clear table failed\n");
+ return err;
+ }
+
+ for (vid = 1; vid < VLAN_N_VID; vid++) {
+ err = lan9645x_vlan_hw_wr(lan9645x, vid);
+ if (err)
+ return err;
+ }
+
+ /* Set all the ports + cpu to be part of HOST_PVID and UNAWARE_PVID */
+ lan9645x->vlans[HOST_PVID].portmask = all_ports;
+ err = lan9645x_vlan_hw_wr(lan9645x, HOST_PVID);
+ if (err)
+ return err;
+
+ lan9645x->vlans[UNAWARE_PVID].portmask = all_ports;
+ err = lan9645x_vlan_hw_wr(lan9645x, UNAWARE_PVID);
+ if (err)
+ return err;
+
+ /* Configure the CPU port to be vlan aware */
+ lan_wr(ANA_VLAN_CFG_VLAN_VID_SET(UNAWARE_PVID) |
+ ANA_VLAN_CFG_VLAN_AWARE_ENA_SET(1) |
+ ANA_VLAN_CFG_VLAN_POP_CNT_SET(1),
+ lan9645x, ANA_VLAN_CFG(CPU_PORT));
+
+ /* Set vlan ingress filter mask to all ports */
+ lan_wr(all_ports, lan9645x, ANA_VLANMASK);
+
+ for (port = 0; port < lan9645x->num_phys_ports; port++) {
+ lan_wr(0, lan9645x, REW_PORT_VLAN_CFG(port));
+ lan_wr(0, lan9645x, REW_TAG_CFG(port));
+ }
+
+ return 0;
+}
--
2.52.0
^ permalink raw reply related
* [PATCH net-next v3 7/9] net: dsa: lan9645x: add mac table integration
From: Jens Emil Schulz Østergaard @ 2026-04-10 11:48 UTC (permalink / raw)
To: UNGLinuxDriver, Andrew Lunn, Vladimir Oltean, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
Rob Herring, Krzysztof Kozlowski, Conor Dooley, Woojung Huh,
Russell King, Steen Hegelund, Daniel Machon
Cc: linux-kernel, netdev, devicetree,
Jens Emil Schulz Østergaard
In-Reply-To: <20260410-dsa_lan9645x_switch_driver_base-v3-0-aadc8595306d@microchip.com>
Add MAC table support, and dsa fdb callback integration. The mactable is
keyed on (vid,mac) and each bucket has 4 slots. A mac table entry
typically points to a PGID index, the first 9 of which represent a front
port.
Mac table entries for L2 multicast will use a PGID containing a group
port mask. For IP multicast entries in the mac table a trick us used,
where the group port mask is packed into the MAC data, exploiting the
fact that the top bits are fixed, and that the number of switch ports is
small enough to fit in the redundant bits.
Therefore, we can avoid using sparse PGID resources for IP multicast
entries in the mac table.
Reviewed-by: Steen Hegelund <Steen.Hegelund@microchip.com>
Signed-off-by: Jens Emil Schulz Østergaard <jensemil.schulzostergaard@microchip.com>
---
Changes in v3:
- avoid mac add/del dealloc when mac table writes fail
- add mact_lock to change ageing time
- dealloc all mac_entries on deinit
- dsa_dump returns mac table timeout error
Changes in v2:
- use a single lock for hw and sw
- remove unused row struct field and define
- remove list element INIT_LIST_HEAD
- consistent use of err vs ret
- remove mutex_lock in init
- use empty initializer { 0 } -> {}
- do not move fwd_domain_lock init to this unit
- add newline to dev_* log statements
---
drivers/net/dsa/microchip/lan9645x/Makefile | 1 +
drivers/net/dsa/microchip/lan9645x/lan9645x_mac.c | 416 +++++++++++++++++++++
drivers/net/dsa/microchip/lan9645x/lan9645x_main.c | 95 +++++
drivers/net/dsa/microchip/lan9645x/lan9645x_main.h | 46 +++
4 files changed, 558 insertions(+)
diff --git a/drivers/net/dsa/microchip/lan9645x/Makefile b/drivers/net/dsa/microchip/lan9645x/Makefile
index e049114b3563..70815edca5b9 100644
--- a/drivers/net/dsa/microchip/lan9645x/Makefile
+++ b/drivers/net/dsa/microchip/lan9645x/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_NET_DSA_MICROCHIP_LAN9645X) += mchp-lan9645x.o
mchp-lan9645x-objs := \
+ lan9645x_mac.o \
lan9645x_main.o \
lan9645x_npi.o \
lan9645x_phylink.o \
diff --git a/drivers/net/dsa/microchip/lan9645x/lan9645x_mac.c b/drivers/net/dsa/microchip/lan9645x/lan9645x_mac.c
new file mode 100644
index 000000000000..f516979225ae
--- /dev/null
+++ b/drivers/net/dsa/microchip/lan9645x/lan9645x_mac.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2026 Microchip Technology Inc.
+ */
+
+#include "lan9645x_main.h"
+
+#define CMD_IDLE 0
+#define CMD_LEARN 1
+#define CMD_FORGET 2
+#define CMD_AGE 3
+#define CMD_GET_NEXT 4
+#define CMD_INIT 5
+#define CMD_READ 6
+#define CMD_WRITE 7
+#define CMD_SYNC_GET_NEXT 8
+
+static bool lan9645x_mact_entry_equal(struct lan9645x_mact_entry *entry,
+ const unsigned char *mac, u16 vid)
+{
+ /* The hardware table is keyed on (vid,mac) */
+ return entry->common.key.vid == vid &&
+ ether_addr_equal(mac, entry->common.key.mac);
+}
+
+static struct lan9645x_mact_entry *
+lan9645x_mact_entry_find(struct lan9645x *lan9645x, const unsigned char *mac,
+ u16 vid)
+{
+ struct lan9645x_mact_entry *entry;
+
+ lockdep_assert_held(&lan9645x->mact_lock);
+
+ list_for_each_entry(entry, &lan9645x->mac_entries, list)
+ if (lan9645x_mact_entry_equal(entry, mac, vid))
+ return entry;
+
+ return NULL;
+}
+
+static struct lan9645x_mact_entry *
+lan9645x_mact_entry_alloc(struct lan9645x *lan9645x, const unsigned char *mac,
+ u16 vid, u8 pgid, enum macaccess_entry_type type)
+{
+ struct lan9645x_mact_entry *entry;
+
+ entry = kzalloc_obj(*entry);
+ if (!entry)
+ return NULL;
+
+ ether_addr_copy(entry->common.key.mac, mac);
+ entry->common.key.vid = vid;
+ entry->common.pgid = pgid;
+ entry->common.type = type;
+
+ dev_dbg(lan9645x->dev,
+ "mac=%pM vid=%u pgid=%u type=%d\n",
+ entry->common.key.mac, entry->common.key.vid,
+ entry->common.pgid, entry->common.type);
+
+ return entry;
+}
+
+static void lan9645x_mact_entry_dealloc(struct lan9645x *lan9645x,
+ struct lan9645x_mact_entry *entry)
+{
+ if (!entry)
+ return;
+
+ dev_dbg(lan9645x->dev,
+ "mac=%pM vid=%u pgid=%u type=%d\n",
+ entry->common.key.mac, entry->common.key.vid,
+ entry->common.pgid, entry->common.type);
+
+ list_del(&entry->list);
+ kfree(entry);
+}
+
+static int lan9645x_mac_wait_for_completion(struct lan9645x *lan9645x,
+ u32 *maca)
+{
+ u32 val = 0;
+ int err;
+
+ lockdep_assert_held(&lan9645x->mact_lock);
+
+ err = lan9645x_rd_poll_timeout(lan9645x, ANA_MACACCESS, val,
+ ANA_MACACCESS_MAC_TABLE_CMD_GET(val) ==
+ CMD_IDLE);
+ if (err)
+ return err;
+
+ if (maca)
+ *maca = val;
+
+ return 0;
+}
+
+static void lan9645x_mact_parse(u32 machi, u32 maclo, u32 maca,
+ struct lan9645x_mact_common *rentry)
+{
+ u64 addr = ANA_MACHDATA_MACHDATA_GET(machi);
+
+ addr = addr << 32 | maclo;
+ u64_to_ether_addr(addr, rentry->key.mac);
+ rentry->key.vid = ANA_MACHDATA_VID_GET(machi);
+ rentry->pgid = ANA_MACACCESS_DEST_IDX_GET(maca);
+ rentry->type = ANA_MACACCESS_ENTRYTYPE_GET(maca);
+}
+
+static void lan9645x_mac_select(struct lan9645x *lan9645x,
+ const unsigned char *addr, u16 vid)
+{
+ u64 maddr = ether_addr_to_u64(addr);
+
+ lockdep_assert_held(&lan9645x->mact_lock);
+
+ lan_wr(ANA_MACHDATA_VID_SET(vid) |
+ ANA_MACHDATA_MACHDATA_SET(maddr >> 32),
+ lan9645x,
+ ANA_MACHDATA);
+
+ lan_wr(maddr & GENMASK(31, 0),
+ lan9645x,
+ ANA_MACLDATA);
+}
+
+static int __lan9645x_mact_forget(struct lan9645x *lan9645x,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
+{
+ lockdep_assert_held(&lan9645x->mact_lock);
+
+ lan9645x_mac_select(lan9645x, mac, vid);
+
+ lan_wr(ANA_MACACCESS_ENTRYTYPE_SET(type) |
+ ANA_MACACCESS_MAC_TABLE_CMD_SET(CMD_FORGET),
+ lan9645x,
+ ANA_MACACCESS);
+
+ return lan9645x_mac_wait_for_completion(lan9645x, NULL);
+}
+
+int lan9645x_mact_forget(struct lan9645x *lan9645x,
+ const unsigned char mac[ETH_ALEN], unsigned int vid,
+ enum macaccess_entry_type type)
+{
+ int err;
+
+ mutex_lock(&lan9645x->mact_lock);
+ err = __lan9645x_mact_forget(lan9645x, mac, vid, type);
+ mutex_unlock(&lan9645x->mact_lock);
+
+ return err;
+}
+
+static bool lan9645x_mac_ports_use_cpu(const unsigned char *mac,
+ enum macaccess_entry_type type)
+{
+ u32 mc_ports;
+
+ switch (type) {
+ case ENTRYTYPE_MACV4:
+ mc_ports = (mac[1] << 8) | mac[2];
+ break;
+ case ENTRYTYPE_MACV6:
+ mc_ports = (mac[0] << 8) | mac[1];
+ break;
+ default:
+ return false;
+ }
+
+ return !!(mc_ports & BIT(CPU_PORT));
+}
+
+static int __lan9645x_mact_learn_cpu_copy(struct lan9645x *lan9645x, int port,
+ const unsigned char *addr, u16 vid,
+ enum macaccess_entry_type type,
+ bool cpu_copy)
+{
+ lockdep_assert_held(&lan9645x->mact_lock);
+
+ lan9645x_mac_select(lan9645x, addr, vid);
+
+ lan_wr(ANA_MACACCESS_VALID_SET(1) |
+ ANA_MACACCESS_DEST_IDX_SET(port) |
+ ANA_MACACCESS_MAC_CPU_COPY_SET(cpu_copy) |
+ ANA_MACACCESS_ENTRYTYPE_SET(type) |
+ ANA_MACACCESS_MAC_TABLE_CMD_SET(CMD_LEARN),
+ lan9645x, ANA_MACACCESS);
+
+ return lan9645x_mac_wait_for_completion(lan9645x, NULL);
+}
+
+static int __lan9645x_mact_learn(struct lan9645x *lan9645x, int port,
+ const unsigned char *addr, u16 vid,
+ enum macaccess_entry_type type)
+{
+ bool cpu_copy = lan9645x_mac_ports_use_cpu(addr, type);
+
+ return __lan9645x_mact_learn_cpu_copy(lan9645x, port, addr, vid, type,
+ cpu_copy);
+}
+
+int lan9645x_mact_learn(struct lan9645x *lan9645x, int port,
+ const unsigned char *addr, u16 vid,
+ enum macaccess_entry_type type)
+{
+ int err;
+
+ mutex_lock(&lan9645x->mact_lock);
+ err = __lan9645x_mact_learn(lan9645x, port, addr, vid, type);
+ mutex_unlock(&lan9645x->mact_lock);
+
+ return err;
+}
+
+int lan9645x_mact_flush(struct lan9645x *lan9645x, int port)
+{
+ int err;
+
+ mutex_lock(&lan9645x->mact_lock);
+ /* MAC table entries with dst index matching port are aged on scan. */
+ lan_wr(ANA_ANAGEFIL_PID_EN_SET(1) |
+ ANA_ANAGEFIL_PID_VAL_SET(port),
+ lan9645x, ANA_ANAGEFIL);
+
+ /* Flushing requires two scans. First sets AGE_FLAG=1, second removes
+ * entries with AGE_FLAG=1.
+ */
+ lan_wr(ANA_MACACCESS_MAC_TABLE_CMD_SET(CMD_AGE),
+ lan9645x,
+ ANA_MACACCESS);
+
+ err = lan9645x_mac_wait_for_completion(lan9645x, NULL);
+ if (err)
+ goto mact_unlock;
+
+ lan_wr(ANA_MACACCESS_MAC_TABLE_CMD_SET(CMD_AGE),
+ lan9645x,
+ ANA_MACACCESS);
+
+ err = lan9645x_mac_wait_for_completion(lan9645x, NULL);
+
+mact_unlock:
+ lan_wr(0, lan9645x, ANA_ANAGEFIL);
+ mutex_unlock(&lan9645x->mact_lock);
+ return err;
+}
+
+int lan9645x_mact_entry_add(struct lan9645x *lan9645x, int pgid,
+ const unsigned char *mac, u16 vid)
+{
+ struct lan9645x_mact_entry *entry;
+ int err;
+
+ mutex_lock(&lan9645x->mact_lock);
+
+ /* Users can not move (vid,mac) to a different port, without removing
+ * the original entry first. But we overwrite entry in HW, and update
+ * software pgid for good measure.
+ */
+ entry = lan9645x_mact_entry_find(lan9645x, mac, vid);
+ if (entry) {
+ err = __lan9645x_mact_learn(lan9645x, pgid, mac, vid,
+ ENTRYTYPE_LOCKED);
+ if (!err)
+ entry->common.pgid = pgid;
+ mutex_unlock(&lan9645x->mact_lock);
+ return err;
+ }
+
+ entry = lan9645x_mact_entry_alloc(lan9645x, mac, vid, pgid,
+ ENTRYTYPE_LOCKED);
+ if (!entry) {
+ mutex_unlock(&lan9645x->mact_lock);
+ return -ENOMEM;
+ }
+
+ list_add_tail(&entry->list, &lan9645x->mac_entries);
+
+ err = __lan9645x_mact_learn(lan9645x, pgid, mac, vid, ENTRYTYPE_LOCKED);
+ if (err)
+ lan9645x_mact_entry_dealloc(lan9645x, entry);
+
+ mutex_unlock(&lan9645x->mact_lock);
+ return err;
+}
+
+int lan9645x_mact_entry_del(struct lan9645x *lan9645x, int pgid,
+ const unsigned char *mac, u16 vid)
+{
+ struct lan9645x_mact_entry *entry;
+ int err;
+
+ mutex_lock(&lan9645x->mact_lock);
+
+ entry = lan9645x_mact_entry_find(lan9645x, mac, vid);
+ if (!entry) {
+ mutex_unlock(&lan9645x->mact_lock);
+ return -ENOENT;
+ }
+
+ WARN_ON(entry->common.pgid != pgid);
+ err = __lan9645x_mact_forget(lan9645x, mac, vid, ENTRYTYPE_LOCKED);
+ if (!err)
+ lan9645x_mact_entry_dealloc(lan9645x, entry);
+
+ mutex_unlock(&lan9645x->mact_lock);
+ return err;
+}
+
+void lan9645x_mac_init(struct lan9645x *lan9645x)
+{
+ u32 val;
+
+ mutex_init(&lan9645x->mact_lock);
+ INIT_LIST_HEAD(&lan9645x->mac_entries);
+
+ /* Clear the MAC table */
+ lan_wr(ANA_MACACCESS_MAC_TABLE_CMD_SET(CMD_INIT),
+ lan9645x, ANA_MACACCESS);
+
+ if (lan9645x_rd_poll_timeout(lan9645x, ANA_MACACCESS, val,
+ ANA_MACACCESS_MAC_TABLE_CMD_GET(val) ==
+ CMD_IDLE))
+ dev_err(lan9645x->dev, "mac init timeout\n");
+}
+
+void lan9645x_mac_deinit(struct lan9645x *lan9645x)
+{
+ struct lan9645x_mact_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &lan9645x->mac_entries, list)
+ lan9645x_mact_entry_dealloc(lan9645x, entry);
+
+ mutex_destroy(&lan9645x->mact_lock);
+}
+
+int lan9645x_mact_dsa_dump(struct lan9645x *lan9645x, int port,
+ dsa_fdb_dump_cb_t *cb, void *data)
+{
+ struct lan9645x_mact_entry entry = {};
+ u32 mach, macl, maca;
+ int err = 0;
+ u32 autoage;
+
+ entry.common.type = ENTRYTYPE_NORMAL;
+
+ mutex_lock(&lan9645x->mact_lock);
+
+ /* The aging filter works both for aging scans and GET_NEXT table scans.
+ * With it, the HW table iteration only stops at entries matching our
+ * filter. Since DSA calls us for each port on a table dump, this helps
+ * avoid unnecessary work.
+ *
+ * Disable automatic aging temporarily. First save current state.
+ */
+ autoage = lan_rd(lan9645x, ANA_AUTOAGE);
+
+ /* Disable aging */
+ lan_rmw(ANA_AUTOAGE_AGE_PERIOD_SET(0),
+ ANA_AUTOAGE_AGE_PERIOD,
+ lan9645x, ANA_AUTOAGE);
+
+ /* Setup filter on our port */
+ lan_wr(ANA_ANAGEFIL_PID_EN_SET(1) |
+ ANA_ANAGEFIL_PID_VAL_SET(port),
+ lan9645x, ANA_ANAGEFIL);
+
+ lan_wr(0, lan9645x, ANA_MACHDATA);
+ lan_wr(0, lan9645x, ANA_MACLDATA);
+
+ while (1) {
+ /* NOTE: we rely on mach, macl and type being set correctly in
+ * the registers from previous round, vis a vis the GET_NEXT
+ * semantics, so locking entire loop is important.
+ */
+ lan_wr(ANA_MACACCESS_MAC_TABLE_CMD_SET(CMD_GET_NEXT) |
+ ANA_MACACCESS_ENTRYTYPE_SET(entry.common.type),
+ lan9645x, ANA_MACACCESS);
+
+ err = lan9645x_mac_wait_for_completion(lan9645x, &maca);
+ if (err)
+ break;
+
+ if (ANA_MACACCESS_VALID_GET(maca) == 0)
+ break;
+
+ mach = lan_rd(lan9645x, ANA_MACHDATA);
+ macl = lan_rd(lan9645x, ANA_MACLDATA);
+
+ lan9645x_mact_parse(mach, macl, maca, &entry.common);
+
+ if (ANA_MACACCESS_DEST_IDX_GET(maca) == port &&
+ entry.common.type == ENTRYTYPE_NORMAL) {
+ if (entry.common.key.vid > VLAN_MAX)
+ entry.common.key.vid = 0;
+
+ err = cb(entry.common.key.mac, entry.common.key.vid,
+ false, data);
+ if (err)
+ break;
+ }
+ }
+
+ /* Remove aging filters and restore aging */
+ lan_wr(0, lan9645x, ANA_ANAGEFIL);
+ lan_rmw(ANA_AUTOAGE_AGE_PERIOD_SET(ANA_AUTOAGE_AGE_PERIOD_GET(autoage)),
+ ANA_AUTOAGE_AGE_PERIOD,
+ lan9645x, ANA_AUTOAGE);
+
+ mutex_unlock(&lan9645x->mact_lock);
+
+ return err;
+}
diff --git a/drivers/net/dsa/microchip/lan9645x/lan9645x_main.c b/drivers/net/dsa/microchip/lan9645x/lan9645x_main.c
index adbdf2007e9f..764f4d6c0571 100644
--- a/drivers/net/dsa/microchip/lan9645x/lan9645x_main.c
+++ b/drivers/net/dsa/microchip/lan9645x/lan9645x_main.c
@@ -71,6 +71,7 @@ static void lan9645x_teardown(struct dsa_switch *ds)
destroy_workqueue(lan9645x->owq);
lan9645x_npi_port_deinit(lan9645x, lan9645x->npi);
+ lan9645x_mac_deinit(lan9645x);
mutex_destroy(&lan9645x->fwd_domain_lock);
}
@@ -159,6 +160,7 @@ static int lan9645x_setup(struct dsa_switch *ds)
err = lan9645x_vlan_init(lan9645x);
if (err)
return err;
+ lan9645x_mac_init(lan9645x);
/* Link Aggregation Mode: NETDEV_LAG_HASH_L2 */
lan_wr(ANA_AGGR_CFG_AC_SMAC_ENA |
@@ -285,6 +287,8 @@ static int lan9645x_set_ageing_time(struct dsa_switch *ds, unsigned int msecs)
u32 age_secs = max(1, msecs / MSEC_PER_SEC / 2);
struct lan9645x *lan9645x = ds->priv;
+ mutex_lock(&lan9645x->mact_lock);
+
/* Entry is must suffer two aging scans before it is removed, so it is
* aged after 2*AGE_PERIOD, and the unit is in seconds.
* An age period of 0 disables automatic aging.
@@ -292,6 +296,8 @@ static int lan9645x_set_ageing_time(struct dsa_switch *ds, unsigned int msecs)
lan_rmw(ANA_AUTOAGE_AGE_PERIOD_SET(msecs ? age_secs : 0),
ANA_AUTOAGE_AGE_PERIOD,
lan9645x, ANA_AUTOAGE);
+
+ mutex_unlock(&lan9645x->mact_lock);
return 0;
}
@@ -591,6 +597,89 @@ static int lan9645x_port_vlan_del(struct dsa_switch *ds, int port,
return lan9645x_vlan_port_del_vlan(p, vlan->vid);
}
+static void lan9645x_port_fast_age(struct dsa_switch *ds, int port)
+{
+ lan9645x_mact_flush(ds->priv, port);
+}
+
+static int lan9645x_fdb_dump(struct dsa_switch *ds, int port,
+ dsa_fdb_dump_cb_t *cb, void *data)
+{
+ return lan9645x_mact_dsa_dump(ds->priv, port, cb, data);
+}
+
+static struct net_device *lan9645x_db2bridge(struct dsa_db db)
+{
+ switch (db.type) {
+ case DSA_DB_PORT:
+ case DSA_DB_LAG:
+ return NULL;
+ case DSA_DB_BRIDGE:
+ return db.bridge.dev;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static int lan9645x_fdb_add(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct net_device *br = lan9645x_db2bridge(db);
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct lan9645x *lan9645x = ds->priv;
+
+ if (IS_ERR(br))
+ return PTR_ERR(br);
+
+ if (dsa_port_is_cpu(dp) && !br &&
+ dsa_fdb_present_in_other_db(ds, port, addr, vid, db))
+ return 0;
+
+ if (!vid)
+ vid = lan9645x_vlan_unaware_pvid(!!br);
+
+ if (dsa_port_is_cpu(dp))
+ return lan9645x_mact_learn(lan9645x, PGID_CPU, addr, vid,
+ ENTRYTYPE_LOCKED);
+
+ return lan9645x_mact_entry_add(lan9645x, port, addr, vid);
+}
+
+static int lan9645x_fdb_del(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid,
+ struct dsa_db db)
+{
+ struct net_device *br = lan9645x_db2bridge(db);
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct lan9645x *lan9645x = ds->priv;
+ int err;
+
+ if (IS_ERR(br))
+ return PTR_ERR(br);
+
+ if (dsa_port_is_cpu(dp) && !br &&
+ dsa_fdb_present_in_other_db(ds, port, addr, vid, db))
+ return 0;
+
+ if (!vid)
+ vid = lan9645x_vlan_unaware_pvid(!!br);
+
+ if (dsa_port_is_cpu(dp))
+ return lan9645x_mact_forget(lan9645x, addr, vid,
+ ENTRYTYPE_LOCKED);
+
+ err = lan9645x_mact_entry_del(lan9645x, port, addr, vid);
+ if (err == -ENOENT) {
+ dev_dbg(lan9645x->dev,
+ "fdb not found port=%d addr=%pM vid=%u\n", port, addr,
+ vid);
+ return 0;
+ }
+
+ return err;
+}
+
static const struct dsa_switch_ops lan9645x_switch_ops = {
.get_tag_protocol = lan9645x_get_tag_protocol,
@@ -618,6 +707,12 @@ static const struct dsa_switch_ops lan9645x_switch_ops = {
.port_vlan_filtering = lan9645x_port_vlan_filtering,
.port_vlan_add = lan9645x_port_vlan_add,
.port_vlan_del = lan9645x_port_vlan_del,
+
+ /* MAC table integration */
+ .port_fast_age = lan9645x_port_fast_age,
+ .port_fdb_dump = lan9645x_fdb_dump,
+ .port_fdb_add = lan9645x_fdb_add,
+ .port_fdb_del = lan9645x_fdb_del,
};
static int lan9645x_request_target_regmaps(struct lan9645x *lan9645x)
diff --git a/drivers/net/dsa/microchip/lan9645x/lan9645x_main.h b/drivers/net/dsa/microchip/lan9645x/lan9645x_main.h
index 3c6996e150e4..a5e64218d783 100644
--- a/drivers/net/dsa/microchip/lan9645x/lan9645x_main.h
+++ b/drivers/net/dsa/microchip/lan9645x/lan9645x_main.h
@@ -162,6 +162,33 @@ struct lan9645x_vlan {
s_fwd_ena: 1;
};
+/* MAC table entry types.
+ * ENTRYTYPE_NORMAL is subject to aging.
+ * ENTRYTYPE_LOCKED is not subject to aging.
+ * ENTRYTYPE_MACv4 is not subject to aging. For IPv4 multicast.
+ * ENTRYTYPE_MACv6 is not subject to aging. For IPv6 multicast.
+ */
+enum macaccess_entry_type {
+ ENTRYTYPE_NORMAL = 0,
+ ENTRYTYPE_LOCKED,
+ ENTRYTYPE_MACV4,
+ ENTRYTYPE_MACV6,
+};
+
+struct lan9645x_mact_common {
+ struct lan9645x_mact_key {
+ u16 vid;
+ u8 mac[ETH_ALEN] __aligned(2);
+ } key;
+ u32 pgid: 6, /* 0-63 general purpose pgids. */
+ type: 2;
+};
+
+struct lan9645x_mact_entry {
+ struct lan9645x_mact_common common;
+ struct list_head list;
+};
+
struct lan9645x {
struct device *dev;
struct dsa_switch *ds;
@@ -185,6 +212,8 @@ struct lan9645x {
u16 bridge_mask; /* Mask for bridged ports */
u16 bridge_fwd_mask; /* Mask for forwarding bridged ports */
struct mutex fwd_domain_lock; /* lock forwarding configuration */
+ struct list_head mac_entries;
+ struct mutex mact_lock; /* lock mac table and mac_entries list */
/* VLAN entries */
struct lan9645x_vlan vlans[VLAN_N_VID];
@@ -377,4 +406,21 @@ int lan9645x_vlan_port_add_vlan(struct lan9645x_port *p, u16 vid, bool pvid,
int lan9645x_vlan_port_del_vlan(struct lan9645x_port *p, u16 vid);
void lan9645x_vlan_set_hostmode(struct lan9645x_port *p);
+/* MAC table: lan9645x_mac.c */
+int lan9645x_mact_flush(struct lan9645x *lan9645x, int port);
+int lan9645x_mact_learn(struct lan9645x *lan9645x, int port,
+ const unsigned char *addr, u16 vid,
+ enum macaccess_entry_type type);
+int lan9645x_mact_forget(struct lan9645x *lan9645x,
+ const unsigned char mac[ETH_ALEN], unsigned int vid,
+ enum macaccess_entry_type type);
+void lan9645x_mac_init(struct lan9645x *lan9645x);
+void lan9645x_mac_deinit(struct lan9645x *lan9645x);
+int lan9645x_mact_dsa_dump(struct lan9645x *lan9645x, int port,
+ dsa_fdb_dump_cb_t *cb, void *data);
+int lan9645x_mact_entry_del(struct lan9645x *lan9645x, int pgid,
+ const unsigned char *mac, u16 vid);
+int lan9645x_mact_entry_add(struct lan9645x *lan9645x, int pgid,
+ const unsigned char *mac, u16 vid);
+
#endif /* __LAN9645X_MAIN_H__ */
--
2.52.0
^ permalink raw reply related
* Re: [PATCH net-next v3 04/12] net: airoha: Rely on net_device pointer in HTB callbacks
From: Lorenzo Bianconi @ 2026-04-10 11:51 UTC (permalink / raw)
To: Jakub Kicinski
Cc: andrew+netdev, davem, edumazet, pabeni, robh, krzk+dt, conor+dt,
ansuelsmth, benjamin.larsson, linux-arm-kernel, linux-mediatek,
netdev, devicetree
In-Reply-To: <20260410025722.1809973-1-kuba@kernel.org>
[-- Attachment #1: Type: text/plain, Size: 2773 bytes --]
> This is an AI-generated review of your patch. The human sending this
> email says "For your consideration."
> ---
> net: airoha: Introduce WAN device flag
>
> This commit introduces a WAN flag to specify whether a device is used to
> transmit or receive WAN or LAN traffic. It enables specifying multiple LAN
> devices but limits the configuration to a single WAN device.
>
> > diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
> > index 5b0cd37b155e4..9988011dca53f 100644
> > --- a/drivers/net/ethernet/airoha/airoha_eth.c
> > +++ b/drivers/net/ethernet/airoha/airoha_eth.c
> > @@ -1796,36 +1796,77 @@ static int airoha_set_gdm2_loopback(struct airoha_gdm_dev *dev)
>
> [ ... ]
>
> > +static int airoha_dev_init(struct net_device *netdev)
> > +{
> > + struct airoha_gdm_dev *dev = netdev_priv(netdev);
> > + struct airoha_gdm_port *port = dev->port;
> >
> > switch (port->id) {
> > case AIROHA_GDM3_IDX:
> > - case AIROHA_GDM4_IDX:
> > - /* If GDM2 is active we can't enable loopback */
> > - if (!eth->ports[1]) {
> > - int err;
> > + case AIROHA_GDM4_IDX: {
> > + struct airoha_eth *eth = dev->eth;
> >
> > - err = airoha_set_gdm2_loopback(dev);
> > - if (err)
> > - return err;
> > - }
> > + if (eth->ports[1] || airoha_get_wan_gdm_dev(eth))
> > + break;
>
> If a board's Device Tree enables both GDM3 and GDM4, and GDM2 is not present,
> does this sequence permanently assign the WAN role to GDM3 since it is
> initialized first? Is there a Device Tree property or fallback configuration
> mechanism to allow users to specify GDM4 as the WAN port instead?
This issue will be fixed with following series that allows to dynamically
select LAN or WAN configuration at runtime.
>
> > + fallthrough;
> > + }
> > + case AIROHA_GDM2_IDX:
> > + /* GDM2 is always used as wan */
> > + dev->flags |= PRIV_FLAG_WAN;
> > break;
> > default:
> > break;
> > }
> >
> > - for (i = 0; i < eth->soc->num_ppe; i++)
> > - airoha_ppe_set_cpu_port(dev, i, airoha_get_fe_port(dev));
> > + airoha_dev_set_qdma(dev);
> > + airoha_set_macaddr(dev, netdev->dev_addr);
>
> Since the commit message notes it is possible to specify multiple LAN
> devices, does unconditionally calling airoha_set_macaddr() for each
> device overwrite the single shared hardware LAN MAC register?
>
> If so, could this cause only the last initialized LAN device to have its MAC
> correctly programmed into the hardware, leading to dropped unicast
> packets for the other LAN interfaces?
>
> > +
> > + if (!airoha_is_lan_gdm_dev(dev) &&
> > + (port->id == AIROHA_GDM3_IDX || port->id == AIROHA_GDM4_IDX)) {
> > + int err;
>
> [ ... ]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply
* Re: [PATCH net-next v3 04/12] net: airoha: Rely on net_device pointer in HTB callbacks
From: Lorenzo Bianconi @ 2026-04-10 11:52 UTC (permalink / raw)
To: Benjamin Larsson
Cc: Jakub Kicinski, andrew+netdev, davem, edumazet, pabeni, robh,
krzk+dt, conor+dt, ansuelsmth, linux-arm-kernel, linux-mediatek,
netdev, devicetree
In-Reply-To: <bd486b55-a8d7-4811-965a-aa1f3dc98e30@genexis.eu>
[-- Attachment #1: Type: text/plain, Size: 2378 bytes --]
> On 10/04/2026 04:57, Jakub Kicinski wrote:
> > This is an AI-generated review of your patch. The human sending this
> > email says "For your consideration."
> > ---
> > net: airoha: Introduce WAN device flag
> >
> > This commit introduces a WAN flag to specify whether a device is used to
> > transmit or receive WAN or LAN traffic. It enables specifying multiple LAN
> > devices but limits the configuration to a single WAN device.
> >
>
> [...]
>
> > > + fallthrough;
> > > + }
> > > + case AIROHA_GDM2_IDX:
> > > + /* GDM2 is always used as wan */
> > > + dev->flags |= PRIV_FLAG_WAN;
> > > break;
> > > default:
> > > break;
> > > }
> > > - for (i = 0; i < eth->soc->num_ppe; i++)
> > > - airoha_ppe_set_cpu_port(dev, i, airoha_get_fe_port(dev));
> > > + airoha_dev_set_qdma(dev);
> > > + airoha_set_macaddr(dev, netdev->dev_addr);
> >
> > Since the commit message notes it is possible to specify multiple LAN
> > devices, does unconditionally calling airoha_set_macaddr() for each
> > device overwrite the single shared hardware LAN MAC register?
> >
> > If so, could this cause only the last initialized LAN device to have its MAC
> > correctly programmed into the hardware, leading to dropped unicast
> > packets for the other LAN interfaces?
> >
> > > +
> > > + if (!airoha_is_lan_gdm_dev(dev) &&
> > > + (port->id == AIROHA_GDM3_IDX || port->id == AIROHA_GDM4_IDX)) {
> > > + int err;
> >
> > [ ... ]
>
> Hi, the frame engine can be configured with a range of wan mac addresses and
> a range of lan mac addresses via registers that set the top 24 bits and then
> 2 registers that form a range of the low 24 bits.
>
> Documentation says that packets that fall into this mac address range either
> on the lan side or the wan side are to be treated as layer 3 packages and if
> a packet is not then it will be handled as a layer 2 packet.
>
> The exact implication of this and if it actually matters is unknown. But
> traffic that comes in on an interface that is not matched by an acceleration
> flow is usually forwarded to the cpu for further processing.
The assumption here is the mac addresses are read from the SNAND or eMMC and
they all share the upper bits. I will fix the issue with a separated patch for
net tree.
Regards,
Lorenzo
>
> MvH
> Benjamin Larsson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply
* Re: [PATCH v2 net-next 00/15] net/sched: prepare RTNL removal from qdisc dumps
From: Eric Dumazet @ 2026-04-10 11:52 UTC (permalink / raw)
To: Jamal Hadi Salim
Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Jiri Pirko, Kuniyuki Iwashima, netdev, eric.dumazet
In-Reply-To: <CAM0EoMnPVcdHoy4BNH_sC9H65iu=sHWQcZ25r9_0qcfdjRaTKA@mail.gmail.com>
On Fri, Apr 10, 2026 at 4:49 AM Jamal Hadi Salim <jhs@mojatatu.com> wrote:
>
> On Thu, Apr 9, 2026 at 5:49 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > We add annotations for data-races, so that most dump methods
> > can run in parallel with data path.
> >
> > Then change mq and mqprio to no longer acquire each children
> > qdisc spinlock.
> >
> > Next round of patches will wait for linux-7.2.
>
> Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
>
> with a small caveat: Shouldnt patch #6 onwards go to net?
We are late in the cycle, I will handle stable backports later.
sashiko.dev spotted some issues, I will send a v3 soon addressing them.
https://sashiko.dev/#/patchset/20260409214914.3072827-1-edumazet%40google.com
Thanks!
pw-bot: cr
^ permalink raw reply
* Re: [PATCH net v2 1/1] af_unix: read UNIX_DIAG_VFS data under unix_state_lock
From: Christian Brauner @ 2026-04-10 12:11 UTC (permalink / raw)
To: Ren Wei
Cc: netdev, kuniyu, davem, edumazet, kuba, pabeni, horms, xemul,
yifanwucs, tomapufckgml, yuantan098, bird, enjou1224z,
wangjiexun2025
In-Reply-To: <20260407080015.1744197-1-n05ec@lzu.edu.cn>
On Tue, Apr 07, 2026 at 04:00:14PM +0800, Ren Wei wrote:
> From: Jiexun Wang <wangjiexun2025@gmail.com>
>
> Exact UNIX diag lookups hold a reference to the socket, but not to
> u->path. Meanwhile, unix_release_sock() clears u->path under
> unix_state_lock() and drops the path reference after unlocking.
>
> Read the inode and device numbers for UNIX_DIAG_VFS while holding
> unix_state_lock(), then emit the netlink attribute after dropping the
> lock.
>
> This keeps the VFS data stable while the reply is being built.
>
> Fixes: 5f7b0569460b ("unix_diag: Unix inode info NLA")
> Reported-by: Yifan Wu <yifanwucs@gmail.com>
> Reported-by: Juefei Pu <tomapufckgml@gmail.com>
> Co-developed-by: Yuan Tan <yuantan098@gmail.com>
> Signed-off-by: Yuan Tan <yuantan098@gmail.com>
> Suggested-by: Xin Liu <bird@lzu.edu.cn>
> Tested-by: Ren Wei <enjou1224z@gmail.com>
> Signed-off-by: Jiexun Wang <wangjiexun2025@gmail.com>
> Signed-off-by: Ren Wei <n05ec@lzu.edu.cn>
> ---
Seems good, thanks.
Reviewed-by: Christian Brauner <brauner@kernel.org>
^ permalink raw reply
* [patch 00/38] treewide: Cleanup LATCH, CLOCK_TICK_RATE and get_cycles() [ab]use
From: Thomas Gleixner @ 2026-04-10 12:18 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
First of all sorry for the insanely big Cc list, but people can't make
their mind up whether they want to be Cc'ed on everything or not. So I'm
opting for the worst case to cater to the people who want to be Cc'ed on
everything and assume that the rest of you got used to it by now. I really
wanted this to be more confined but a treewide cleanup does not give a lot
of options.
That said, let me explain what this is about.
1) LATCH
The LATCH define goes back to Linux version 0.1 and has survived until
today for the very wrong reasons.
Initially it was based on the x86 PIT frequency and also steered the
timekeeping conversions.
With the arrival of non x86 architectures it got changed to be based
on CLOCK_TICK_RATE in order not to change core code which depended on
LATCH.
That all got meaningless when timers, timekeeping and scheduling
infrastructure got rewritten more than two decades ago.
But there is still a lonely survivor in arch/x86/kernel/apm_32.c
which dates back to Linux 1.3.46 (Dec. 1995)
Which causes the next historical gem
2) CLOCK_TICK_RATE
When Linux got expanded beyond i386 LATCH was made "flexible" by
basing it on CLOCK_TICK_RATE to adjust for other frequencies than the
i386 PIT frequency.
As LATCH this got meaningless long ago and for amusement value it got
copied into new architectures arriving way after it got obsolete for
no reason but with comments to the effect that it's meaningless
And of course it had a lonely survivor in arch/x86/kernel/setup.c
despite it being only an alias for PIT_TICK_RATE for a very long time.
3) get_cycles()
That was introduced in 2.1.133pre4 (Dec. 1998) to utilize the back
then brand new TSC. The introduction broke everything except i386 SMP
with a CPU having a TSC and got then fixed up within a couple of days
with empty stubs returning 0 and #ifdeffery for CONFIG_TSC before the
2.2.0 release.
It's amusing that the naming deliberately ignored that TSC is the
acronym for Time Stamp Counter and not Cycle Counter and rather went
for the underlying coincidence that the TSC was running at the same
fixed frequency as the CPU core.
That turned out to be interesting when CPUs started to have frequency
scaling as the TSC then turned into a variable frequency random number
generator.
A decade later CPU designers came to senses and made the TSC invariant
usually running at the nominal CPU frequency, which allowed to use it
for reliable timekeeping purposes.
Non x86 architectures implemented get_cycles() based on whatever
continuously running counter they had available in their CPUs. Some of
them actual CPU cycle counters, but many of them running at a fixed
frequency which was completely unrelated to CPU cycles.
Around 2004/5 the timekeeping subsystem was completely rewritten and
made generic along with the scheduling clock and other related
infrastructure. With that rewrite get_cycles() got mostly obsolete,
but despite it being on the todo list of quite some people it never
ceased to exist and it was just a conveniance vehicle to base other
things like the recent addition of random_get_entropy() on top with a
hideous #ifdef/#define macro maze.
The other remaining use cases are mostly debugging and testing
code. Especially the usage in performance test code is hillarious at
best. While the name get_cycles() suggests that it provides access to
CPU cycles the reality is that it provides a unspecified counter for
most systems, where a lot of architectures simply return 0 because
they either do not have such a counter or did not bother to implement
it at all.
So in fact get_cycles() should have been renamed to get_bogo_cycles()
long ago matching the BogoMIPS "impress your friends" printk which
still exists for historical amusement value.
But the real solution is to remove it all together instead of
proliferating the bogosity.
This is what this series does with the following steps:
1) Cleanup some header dependency hell which got unearthed by the
restructuring and went unnoticed so far. It's amazing how the kernel
build system magically "works". This affects not only x86, but the
main fallout was observed and fixed there. ARM64 and MIPS are at
least as bad as they silently rely on the accidental asm/timex.h
include through a variety of generic headers to make their
architecture code compile. See the changelog and patches specific to
those two.
2) Removal of LATCH
3) Removal of CLOCK_TICK_RATE
4) Consolidation of cycles_t which was a typedef in asm/timex.h
5) Cleanup of read_current_timer() which is only used for delay
calibration and has nothing to do with get_cycles()
6) Cleanup of get_cycles() usage in debug and test code
7) Decoupling of random_get_entropy() from get_cycles()
8) Removal of asm/timex.h includes except for architecture internal
usage where necessary.
At the end get_cycles() survives in a couple of architectures as a purely
architecture internal implementation detail.
This survives compile testing on all architectures except hexagon and nios2
because the current cross tools based on gcc15 do not offer a compiler for
them anymore. Boot tested on x86 and some qemu instances covering only a
few architectures.
The series applies on v7.0-rc7 and with very minor conflicts on -next. It
is also available from git:
git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git getcycles-v1
Thanks,
tglx
---
arch/alpha/include/asm/timex.h | 33 --------
arch/arc/include/asm/timex.h | 15 ---
arch/arm/include/asm/timex.h | 16 ---
arch/arm64/include/asm/timex.h | 18 ----
arch/hexagon/include/asm/timex.h | 23 -----
arch/m68k/include/asm/timex.h | 42 ----------
arch/microblaze/include/asm/timex.h | 13 ---
arch/mips/include/asm/timex.h | 102 -------------------------
arch/sh/include/asm/timex.h | 24 -----
arch/sparc/include/asm/timex.h | 9 --
arch/sparc/include/asm/timex_32.h | 14 ---
arch/um/include/asm/timex.h | 9 --
b/Documentation/fb/udlfb.rst | 4
b/arch/Kconfig | 10 ++
b/arch/alpha/Kconfig | 1
b/arch/alpha/include/asm/random.h | 14 +++
b/arch/arm/Kconfig | 2
b/arch/arm/include/asm/delay.h | 1
b/arch/arm/include/asm/random.h | 14 +++
b/arch/arm/lib/delay.c | 14 +--
b/arch/arm/mach-omap1/Kconfig | 2
b/arch/arm64/Kconfig | 2
b/arch/arm64/include/asm/io.h | 5 -
b/arch/arm64/include/asm/ptp_vmclock.h | 12 ++
b/arch/arm64/include/asm/random.h | 11 ++
b/arch/arm64/include/asm/rqspinlock.h | 1
b/arch/arm64/kernel/time.c | 6 +
b/arch/arm64/kernel/topology.c | 1
b/arch/arm64/kernel/traps.c | 1
b/arch/arm64/kvm/emulate-nested.c | 1
b/arch/arm64/kvm/hyp/include/hyp/switch.h | 1
b/arch/arm64/lib/delay.c | 1
b/arch/hexagon/Kconfig | 1
b/arch/hexagon/kernel/time.c | 8 +
b/arch/loongarch/Kconfig | 1
b/arch/loongarch/include/asm/random.h | 15 +++
b/arch/loongarch/include/asm/timex.h | 2
b/arch/loongarch/kernel/relocate.c | 1
b/arch/loongarch/kernel/syscall.c | 1
b/arch/loongarch/lib/delay.c | 2
b/arch/m68k/Kconfig | 1
b/arch/m68k/amiga/config.c | 1
b/arch/m68k/include/asm/random.h | 14 +++
b/arch/m68k/kernel/time.c | 2
b/arch/mips/Kconfig | 1
b/arch/mips/generic/init.c | 1
b/arch/mips/include/asm/random.h | 7 +
b/arch/mips/kernel/pm-cps.c | 1
b/arch/mips/kernel/proc.c | 1
b/arch/mips/kernel/relocate.c | 2
b/arch/mips/kernel/time.c | 53 ++++++++++++
b/arch/mips/lib/dump_tlb.c | 1
b/arch/mips/mm/cache.c | 1
b/arch/nios2/Kconfig | 1
b/arch/nios2/include/asm/random.h | 14 +++
b/arch/nios2/include/asm/timex.h | 7 -
b/arch/nios2/kernel/time.c | 4
b/arch/openrisc/Kconfig | 2
b/arch/openrisc/include/asm/random.h | 12 ++
b/arch/openrisc/include/asm/timex.h | 10 --
b/arch/openrisc/lib/delay.c | 8 -
b/arch/parisc/Kconfig | 1
b/arch/parisc/include/asm/random.h | 12 ++
b/arch/parisc/include/asm/timex.h | 10 --
b/arch/parisc/kernel/processor.c | 1
b/arch/parisc/kernel/time.c | 1
b/arch/powerpc/Kconfig | 1
b/arch/powerpc/include/asm/random.h | 13 +++
b/arch/powerpc/include/asm/timex.h | 25 ------
b/arch/powerpc/platforms/cell/spufs/switch.c | 5 -
b/arch/riscv/Kconfig | 2
b/arch/riscv/include/asm/random.h | 25 ++++++
b/arch/riscv/include/asm/timex.h | 23 -----
b/arch/riscv/kernel/unaligned_access_speed.c | 1
b/arch/riscv/kvm/vcpu_timer.c | 1
b/arch/riscv/lib/delay.c | 8 +
b/arch/s390/Kconfig | 1
b/arch/s390/include/asm/random.h | 12 ++
b/arch/s390/include/asm/timex.h | 10 --
b/arch/s390/kernel/time.c | 1
b/arch/s390/kernel/vtime.c | 1
b/arch/sparc/Kconfig | 2
b/arch/sparc/include/asm/random.h | 15 +++
b/arch/sparc/include/asm/timex_64.h | 20 ----
b/arch/sparc/kernel/pcic.c | 1
b/arch/sparc/kernel/time_32.c | 1
b/arch/sparc/kernel/time_64.c | 4
b/arch/sparc/vdso/vclock_gettime.c | 1
b/arch/x86/Kconfig | 4
b/arch/x86/include/asm/iommu.h | 3
b/arch/x86/include/asm/msr.h | 5 -
b/arch/x86/include/asm/percpu.h | 5 -
b/arch/x86/include/asm/percpu_types.h | 17 ++++
b/arch/x86/include/asm/ptp_vmclock.h | 12 ++
b/arch/x86/include/asm/pvclock.h | 1
b/arch/x86/include/asm/random.h | 12 --
b/arch/x86/include/asm/smp.h | 2
b/arch/x86/include/asm/tsc.h | 11 --
b/arch/x86/include/asm/vdso/gettimeofday.h | 5 -
b/arch/x86/kernel/apm_32.c | 4
b/arch/x86/kernel/cpu/mce/core.c | 1
b/arch/x86/kernel/nmi.c | 1
b/arch/x86/kernel/setup.c | 2
b/arch/x86/kernel/smpboot.c | 1
b/arch/x86/kernel/tsc.c | 12 +-
b/arch/x86/lib/delay.c | 8 -
b/crypto/jitterentropy-kcapi.c | 1
b/crypto/tcrypt.c | 84 ++++++++++----------
b/drivers/iommu/intel/dmar.c | 4
b/drivers/iommu/intel/iommu.h | 8 +
b/drivers/irqchip/irq-apple-aic.c | 1
b/drivers/misc/sgi-gru/gruhandles.c | 20 +---
b/drivers/misc/sgi-gru/grukservices.c | 3
b/drivers/misc/sgi-gru/grutlbpurge.c | 5 -
b/drivers/net/arcnet/arc-rimi.c | 4
b/drivers/net/arcnet/arcdevice.h | 20 ----
b/drivers/net/arcnet/com20020.c | 6 -
b/drivers/net/arcnet/com90io.c | 6 -
b/drivers/net/arcnet/com90xx.c | 4
b/drivers/net/hamradio/baycom_epp.c | 51 ------------
b/drivers/net/wireless/ath/wil6210/debugfs.c | 2
b/drivers/net/wireless/ath/wil6210/txrx.c | 6 -
b/drivers/net/wireless/ath/wil6210/txrx_edma.c | 4
b/drivers/net/wireless/ath/wil6210/wil6210.h | 3
b/drivers/ptp/Kconfig | 6 -
b/drivers/ptp/ptp_vmclock.c | 6 -
b/drivers/video/fbdev/udlfb.c | 24 ++---
b/fs/ext4/mballoc.c | 4
b/include/asm-generic/Kbuild | 2
b/include/asm-generic/percpu_types.h | 20 ++++
b/include/linux/compiler_types.h | 1
b/include/linux/delay.h | 2
b/include/linux/jiffies.h | 3
b/include/linux/random.h | 18 ++++
b/include/linux/timex.h | 26 ------
b/include/linux/types.h | 6 +
b/init/calibrate.c | 19 ++--
b/kernel/kcsan/core.c | 3
b/kernel/kcsan/debugfs.c | 8 -
b/kernel/time/timer.c | 1
b/lib/interval_tree_test.c | 17 +---
b/lib/rbtree_test.c | 47 +++++------
b/lib/test_vmalloc.c | 10 +-
b/mm/kasan/sw_tags.c | 2
b/mm/slub.c | 37 +++++----
include/asm-generic/timex.h | 23 -----
146 files changed, 622 insertions(+), 796 deletions(-)
^ permalink raw reply
* [patch 01/38] percpu: Sanitize __percpu_qual include hell
From: Thomas Gleixner @ 2026-04-10 12:18 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
Slapping __percpu_qual into the next available header is sloppy at best.
It's required by __percpu which is defined in compiler_types.h and that is
meant to be included without requiring a boatload of other headers so that
a struct or function declaration can contain a __percpu qualifier w/o
further prerequisites.
This implicit dependency on linux/percpu.h makes that impossible and causes
a major problem when trying to seperate headers.
Create asm/percpu_types.h and move it there. Include that from
compiler_types.h and the whole recursion problem goes away.
Signed-off-by: Thomas Gleixner <tglx@kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
---
arch/x86/include/asm/percpu.h | 5 -----
arch/x86/include/asm/percpu_types.h | 17 +++++++++++++++++
include/asm-generic/Kbuild | 1 +
include/asm-generic/percpu_types.h | 20 ++++++++++++++++++++
include/linux/compiler_types.h | 1 +
5 files changed, 39 insertions(+), 5 deletions(-)
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -40,12 +40,10 @@
#endif
#define __percpu_prefix
-#define __percpu_seg_override CONCATENATE(__seg_, __percpu_seg)
#else /* !CONFIG_CC_HAS_NAMED_AS: */
#define __percpu_prefix __force_percpu_prefix
-#define __percpu_seg_override
#endif /* CONFIG_CC_HAS_NAMED_AS */
@@ -82,7 +80,6 @@
#define __force_percpu_prefix
#define __percpu_prefix
-#define __percpu_seg_override
#define PER_CPU_VAR(var) (var)__percpu_rel
@@ -92,8 +89,6 @@
# define __my_cpu_type(var) typeof(var)
# define __my_cpu_ptr(ptr) (ptr)
# define __my_cpu_var(var) (var)
-
-# define __percpu_qual __percpu_seg_override
#else
# define __my_cpu_type(var) typeof(var) __percpu_seg_override
# define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr)
--- /dev/null
+++ b/arch/x86/include/asm/percpu_types.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_PERCPU_TYPES_H
+#define _ASM_X86_PERCPU_TYPES_H
+
+#if defined(CONFIG_SMP) && defined(CONFIG_CC_HAS_NAMED_AS)
+#define __percpu_seg_override CONCATENATE(__seg_, __percpu_seg)
+#else /* !CONFIG_CC_HAS_NAMED_AS: */
+#define __percpu_seg_override
+#endif
+
+#if defined(CONFIG_USE_X86_SEG_SUPPORT) && defined(USE_TYPEOF_UNQUAL)
+#define __percpu_qual __percpu_seg_override
+#endif
+
+#include <asm-generic/percpu_types.h>
+
+#endif
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -44,6 +44,7 @@ mandatory-y += module.lds.h
mandatory-y += msi.h
mandatory-y += pci.h
mandatory-y += percpu.h
+mandatory-y += percpu_types.h
mandatory-y += pgalloc.h
mandatory-y += preempt.h
mandatory-y += rqspinlock.h
--- /dev/null
+++ b/include/asm-generic/percpu_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_PERCPU_TYPES_H_
+#define _ASM_GENERIC_PERCPU_TYPES_H_
+
+#ifndef __ASSEMBLER__
+/*
+ * __percpu_qual is the qualifier for the percpu named address space.
+ *
+ * Most arches use generic named address space for percpu variables but
+ * some arches define percpu variables in different named address space
+ * (on the x86 arch, percpu variable may be declared as being relative
+ * to the %fs or %gs segments using __seg_fs or __seg_gs named address
+ * space qualifier).
+ */
+#ifndef __percpu_qual
+# define __percpu_qual
+#endif
+
+#endif /* __ASSEMBLER__ */
+#endif /* _ASM_GENERIC_PERCPU_TYPES_H_ */
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -41,6 +41,7 @@
# define BTF_TYPE_TAG(value) /* nothing */
#endif
+#include <asm/percpu_types.h>
#include <linux/compiler-context-analysis.h>
/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */
^ permalink raw reply
* [patch 02/38] x86: Cleanup include recursion hell
From: Thomas Gleixner @ 2026-04-10 12:18 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
Including a random architecture specific header which requires global
headers just to avoid including that header at the two usage sites is
really beyond lazy and tasteless. Including global headers just to get the
__percpu macro from linux/compiler_types.h falls into the same category.
Remove the linux/percpu.h and asm/cpumask.h includes from msr.h and smp.h
and fix the resulting fallout by a simple forward struct declaration and by
including the x86 specific asm/cpumask.h header where it is actually
required.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
arch/x86/include/asm/msr.h | 5 +++--
arch/x86/include/asm/pvclock.h | 1 +
arch/x86/include/asm/smp.h | 2 --
arch/x86/include/asm/vdso/gettimeofday.h | 5 ++---
arch/x86/kernel/cpu/mce/core.c | 1 +
arch/x86/kernel/nmi.c | 1 +
arch/x86/kernel/smpboot.c | 1 +
7 files changed, 9 insertions(+), 7 deletions(-)
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -8,12 +8,11 @@
#include <asm/asm.h>
#include <asm/errno.h>
-#include <asm/cpumask.h>
#include <uapi/asm/msr.h>
#include <asm/shared/msr.h>
+#include <linux/compiler_types.h>
#include <linux/types.h>
-#include <linux/percpu.h>
struct msr_info {
u32 msr_no;
@@ -256,6 +255,8 @@ int msr_set_bit(u32 msr, u8 bit);
int msr_clear_bit(u32 msr, u8 bit);
#ifdef CONFIG_SMP
+struct cpumask;
+
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
int rdmsrq_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_PVCLOCK_H
#define _ASM_X86_PVCLOCK_H
+#include <asm/barrier.h>
#include <asm/clocksource.h>
#include <asm/pvclock-abi.h>
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -5,8 +5,6 @@
#include <linux/cpumask.h>
#include <linux/thread_info.h>
-#include <asm/cpumask.h>
-
DECLARE_PER_CPU_CACHE_HOT(int, cpu_number);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -11,13 +11,12 @@
#define __ASM_VDSO_GETTIMEOFDAY_H
#ifndef __ASSEMBLER__
-
+#include <clocksource/hyperv_timer.h>
#include <uapi/linux/time.h>
+
#include <asm/vgtod.h>
#include <asm/unistd.h>
-#include <asm/msr.h>
#include <asm/pvclock.h>
-#include <clocksource/hyperv_timer.h>
#include <asm/vdso/sys_call.h>
#define VDSO_HAS_TIME 1
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -48,6 +48,7 @@
#include <linux/vmcore_info.h>
#include <asm/fred.h>
+#include <asm/cpumask.h>
#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/traps.h>
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,6 +26,7 @@
#include <linux/sched/clock.h>
#include <linux/kvm_types.h>
+#include <asm/cpumask.h>
#include <asm/cpu_entry_area.h>
#include <asm/traps.h>
#include <asm/mach_traps.h>
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -70,6 +70,7 @@
#include <asm/irq.h>
#include <asm/realmode.h>
#include <asm/cpu.h>
+#include <asm/cpumask.h>
#include <asm/numa.h>
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
^ permalink raw reply
* [patch 03/38] x86/apm: Remove last LATCH usage
From: Thomas Gleixner @ 2026-04-10 12:18 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
LATCH is a historical leftover and has been replaced with PIT_LATCH in all
other places a decade ago. Replace the last holdout and remove the
definition from jiffies.h.
This allows to remove the otherwise unused CLOCK_TICK_RATE define in the
next step.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
arch/x86/kernel/apm_32.c | 4 ++--
include/linux/jiffies.h | 3 ---
2 files changed, 2 insertions(+), 5 deletions(-)
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1196,9 +1196,9 @@ static void reinit_timer(void)
/* set the clock to HZ */
outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
udelay(10);
- outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
+ outb_p(PIT_LATCH & 0xff, PIT_CH0); /* LSB */
udelay(10);
- outb_p(LATCH >> 8, PIT_CH0); /* MSB */
+ outb_p(PIT_LATCH >> 8, PIT_CH0); /* MSB */
udelay(10);
raw_spin_unlock_irqrestore(&i8253_lock, flags);
#endif
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -56,9 +56,6 @@
#define SH_DIV(NOM,DEN,LSH) ( (((NOM) / (DEN)) << (LSH)) \
+ ((((NOM) % (DEN)) << (LSH)) + (DEN) / 2) / (DEN))
-/* LATCH is used in the interval timer and ftape setup. */
-#define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */
-
extern void register_refined_jiffies(long clock_tick_rate);
/* TICK_USEC is the time between ticks in usec */
^ permalink raw reply
* [patch 04/38] x86: Use PIT_TICK_RATE instead of CLOCK_TICK_RATE
From: Thomas Gleixner @ 2026-04-10 12:18 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
CLOCK_TICK_RATE is only used in x86 but defined all over the tree for no
reason with comments that it's scheduled for removal for more than a
decade.
Use PIT_TICK_RATE for registering refined jiffies to get rid of the last
dependency.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
arch/x86/kernel/setup.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1268,7 +1268,7 @@ void __init setup_arch(char **cmdline_p)
mcheck_init();
- register_refined_jiffies(CLOCK_TICK_RATE);
+ register_refined_jiffies(PIT_TICK_RATE);
#ifdef CONFIG_EFI
if (efi_enabled(EFI_BOOT))
^ permalink raw reply
* [patch 05/38] treewide: Remove CLOCK_TICK_RATE
From: Thomas Gleixner @ 2026-04-10 12:18 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
This has been scheduled for removal more than a decade ago and the comments
related to it have been dutifully ignored. The last dependencies are gone.
Remove it along with various now empty asm/timex.h files.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
arch/alpha/include/asm/timex.h | 4 ----
arch/arc/include/asm/timex.h | 15 ---------------
arch/arm/mach-omap1/Kconfig | 2 +-
arch/hexagon/include/asm/timex.h | 3 ---
arch/m68k/include/asm/timex.h | 15 ---------------
arch/microblaze/include/asm/timex.h | 13 -------------
arch/mips/include/asm/timex.h | 8 --------
arch/openrisc/include/asm/timex.h | 3 ---
arch/parisc/include/asm/timex.h | 2 --
arch/powerpc/include/asm/timex.h | 2 --
arch/s390/include/asm/timex.h | 2 --
arch/sh/include/asm/timex.h | 24 ------------------------
arch/sparc/include/asm/timex.h | 2 +-
arch/sparc/include/asm/timex_32.h | 14 --------------
arch/sparc/include/asm/timex_64.h | 2 --
arch/um/include/asm/timex.h | 9 ---------
arch/x86/include/asm/timex.h | 3 ---
17 files changed, 2 insertions(+), 121 deletions(-)
--- a/arch/alpha/include/asm/timex.h
+++ b/arch/alpha/include/asm/timex.h
@@ -7,10 +7,6 @@
#ifndef _ASMALPHA_TIMEX_H
#define _ASMALPHA_TIMEX_H
-/* With only one or two oddballs, we use the RTC as the ticker, selecting
- the 32.768kHz reference clock, which nicely divides down to our HZ. */
-#define CLOCK_TICK_RATE 32768
-
/*
* Standard way to access the cycle counter.
* Currently only used on SMP for scheduling.
--- a/arch/arc/include/asm/timex.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef _ASM_ARC_TIMEX_H
-#define _ASM_ARC_TIMEX_H
-
-#define CLOCK_TICK_RATE 80000000 /* slated to be removed */
-
-#include <asm-generic/timex.h>
-
-/* XXX: get_cycles() to be implemented with RTSC insn */
-
-#endif /* _ASM_ARC_TIMEX_H */
--- a/arch/arm/mach-omap1/Kconfig
+++ b/arch/arm/mach-omap1/Kconfig
@@ -74,7 +74,7 @@ config OMAP_32K_TIMER
currently only available for OMAP16XX, 24XX, 34XX, OMAP4/5 and DRA7XX.
On OMAP2PLUS this value is only used for CONFIG_HZ and
- CLOCK_TICK_RATE compile time calculation.
+ timer frequency compile time calculation.
The actual timer selection is done in the board file
through the (DT_)MACHINE_START structure.
--- a/arch/hexagon/include/asm/timex.h
+++ b/arch/hexagon/include/asm/timex.h
@@ -9,9 +9,6 @@
#include <asm-generic/timex.h>
#include <asm/hexagon_vm.h>
-/* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */
-#define CLOCK_TICK_RATE 19200
-
#define ARCH_HAS_READ_CURRENT_TIMER
static inline int read_current_timer(unsigned long *timer_val)
--- a/arch/m68k/include/asm/timex.h
+++ b/arch/m68k/include/asm/timex.h
@@ -7,21 +7,6 @@
#ifndef _ASMm68K_TIMEX_H
#define _ASMm68K_TIMEX_H
-#ifdef CONFIG_COLDFIRE
-/*
- * CLOCK_TICK_RATE should give the underlying frequency of the tick timer
- * to make ntp work best. For Coldfires, that's the main clock.
- */
-#include <asm/coldfire.h>
-#define CLOCK_TICK_RATE MCF_CLK
-#else
-/*
- * This default CLOCK_TICK_RATE is probably wrong for many 68k boards
- * Users of those boards will need to check and modify accordingly
- */
-#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
-#endif
-
typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
--- a/arch/microblaze/include/asm/timex.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2006 Atmark Techno, Inc.
- */
-
-#ifndef _ASM_MICROBLAZE_TIMEX_H
-#define _ASM_MICROBLAZE_TIMEX_H
-
-#include <asm-generic/timex.h>
-
-#define CLOCK_TICK_RATE 1000 /* Timer input freq. */
-
-#endif /* _ASM_TIMEX_H */
--- a/arch/mips/include/asm/timex.h
+++ b/arch/mips/include/asm/timex.h
@@ -19,14 +19,6 @@
#include <asm/cpu-type.h>
/*
- * This is the clock rate of the i8253 PIT. A MIPS system may not have
- * a PIT by the symbol is used all over the kernel including some APIs.
- * So keeping it defined to the number for the PIT is the only sane thing
- * for now.
- */
-#define CLOCK_TICK_RATE 1193182
-
-/*
* Standard way to access the cycle counter.
* Currently only used on SMP for scheduling.
*
--- a/arch/openrisc/include/asm/timex.h
+++ b/arch/openrisc/include/asm/timex.h
@@ -25,9 +25,6 @@ static inline cycles_t get_cycles(void)
}
#define get_cycles get_cycles
-/* This isn't really used any more */
-#define CLOCK_TICK_RATE 1000
-
#define ARCH_HAS_READ_CURRENT_TIMER
#endif
--- a/arch/parisc/include/asm/timex.h
+++ b/arch/parisc/include/asm/timex.h
@@ -9,8 +9,6 @@
#include <asm/special_insns.h>
-#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
-
typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -11,8 +11,6 @@
#include <asm/cputable.h>
#include <asm/vdso/timebase.h>
-#define CLOCK_TICK_RATE 1024000 /* Underlying HZ */
-
typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -177,8 +177,6 @@ static inline void local_tick_enable(uns
set_clock_comparator(get_lowcore()->clock_comparator);
}
-#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
-
typedef unsigned long cycles_t;
static __always_inline unsigned long get_tod_clock(void)
--- a/arch/sh/include/asm/timex.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/asm-sh/timex.h
- *
- * sh architecture timex specifications
- */
-#ifndef __ASM_SH_TIMEX_H
-#define __ASM_SH_TIMEX_H
-
-/*
- * Only parts using the legacy CPG code for their clock framework
- * implementation need to define their own Pclk value. If provided, this
- * can be used for accurately setting CLOCK_TICK_RATE, otherwise we
- * simply fall back on the i8253 PIT value.
- */
-#ifdef CONFIG_SH_PCLK_FREQ
-#define CLOCK_TICK_RATE (CONFIG_SH_PCLK_FREQ / 4) /* Underlying HZ */
-#else
-#define CLOCK_TICK_RATE 1193180
-#endif
-
-#include <asm-generic/timex.h>
-
-#endif /* __ASM_SH_TIMEX_H */
--- a/arch/sparc/include/asm/timex.h
+++ b/arch/sparc/include/asm/timex.h
@@ -4,6 +4,6 @@
#if defined(__sparc__) && defined(__arch64__)
#include <asm/timex_64.h>
#else
-#include <asm/timex_32.h>
+#include <asm-generic/timex.h>
#endif
#endif
--- a/arch/sparc/include/asm/timex_32.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/asm/timex.h
- *
- * sparc architecture timex specifications
- */
-#ifndef _ASMsparc_TIMEX_H
-#define _ASMsparc_TIMEX_H
-
-#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
-
-#include <asm-generic/timex.h>
-
-#endif
--- a/arch/sparc/include/asm/timex_64.h
+++ b/arch/sparc/include/asm/timex_64.h
@@ -9,8 +9,6 @@
#include <asm/timer.h>
-#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
-
/* Getting on the cycle counter on sparc64. */
typedef unsigned long cycles_t;
#define get_cycles() tick_ops->get_tick()
--- a/arch/um/include/asm/timex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_TIMEX_H
-#define __UM_TIMEX_H
-
-#define CLOCK_TICK_RATE (HZ)
-
-#include <asm-generic/timex.h>
-
-#endif
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -14,9 +14,6 @@ static inline unsigned long random_get_e
}
#define random_get_entropy random_get_entropy
-/* Assume we use the PIT time source for the clock tick */
-#define CLOCK_TICK_RATE PIT_TICK_RATE
-
#define ARCH_HAS_READ_CURRENT_TIMER
#endif /* _ASM_X86_TIMEX_H */
^ permalink raw reply
* [patch 06/38] calibrate: Rework delay timer calibration
From: Thomas Gleixner @ 2026-04-10 12:18 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
The header define in asm/timex,h and the naming of the function to read the
delay timer are confusing at best.
Convert it to a config switch selected by the archictures, which provide
the functionality, and rename the function to delay_read_timer(), which
makes the purpose clear. Move the declaration to linux/delay.h where it
belongs.
Remove the resulting empty asm/timex.h files as well.
No functional change.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
---
arch/Kconfig | 3 +++
arch/arm/Kconfig | 1 +
arch/arm/include/asm/delay.h | 1 -
arch/arm/include/asm/timex.h | 5 ++++-
arch/arm/lib/delay.c | 10 ++++------
arch/hexagon/Kconfig | 1 +
arch/hexagon/include/asm/timex.h | 20 --------------------
arch/hexagon/kernel/time.c | 8 +++++++-
arch/openrisc/Kconfig | 1 +
arch/openrisc/include/asm/timex.h | 2 --
arch/openrisc/lib/delay.c | 9 ++++-----
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/timex.h | 8 --------
arch/riscv/lib/delay.c | 7 ++++++-
arch/sparc/Kconfig | 1 +
arch/sparc/include/asm/timex_64.h | 2 --
arch/sparc/kernel/time_64.c | 4 ++--
arch/x86/Kconfig | 1 +
arch/x86/include/asm/timex.h | 2 --
arch/x86/lib/delay.c | 8 +++-----
include/asm-generic/timex.h | 7 -------
include/linux/delay.h | 2 ++
include/linux/timex.h | 2 --
init/calibrate.c | 19 +++++++++----------
24 files changed, 50 insertions(+), 75 deletions(-)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -363,6 +363,9 @@ config ARCH_HAS_DMA_CLEAR_UNCACHED
config ARCH_HAS_CPU_FINALIZE_INIT
bool
+config ARCH_HAS_DELAY_TIMER
+ bool
+
# The architecture has a per-task state that includes the mm's PASID
config ARCH_HAS_CPU_PASID
bool
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -11,6 +11,7 @@ config ARM
select ARCH_HAS_CPU_FINALIZE_INIT if MMU
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
+ select ARCH_HAS_DELAY_TIMER
select ARCH_HAS_DMA_ALLOC if MMU
select ARCH_HAS_DMA_OPS
select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -91,7 +91,6 @@ extern void __loop_udelay(unsigned long
extern void __loop_const_udelay(unsigned long);
/* Delay-loop timer registration. */
-#define ARCH_HAS_READ_CURRENT_TIMER
extern void register_current_timer_delay(const struct delay_timer *timer);
#endif /* __ASSEMBLY__ */
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -10,7 +10,10 @@
#define _ASMARM_TIMEX_H
typedef unsigned long cycles_t;
-#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+// Temporary workaround
+bool delay_read_timer(unsigned long *t);
+
+#define get_cycles() ({ cycles_t c; delay_read_timer(&c) ? 0 : c; })
#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback())
#endif
--- a/arch/arm/lib/delay.c
+++ b/arch/arm/lib/delay.c
@@ -12,7 +12,6 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/timex.h>
/*
* Default to the loop-based delay implementation.
@@ -27,15 +26,14 @@ static const struct delay_timer *delay_t
static bool delay_calibrated;
static u64 delay_res;
-int read_current_timer(unsigned long *timer_val)
+bool delay_read_timer(unsigned long *timer_val)
{
if (!delay_timer)
- return -ENXIO;
-
+ return false;
*timer_val = delay_timer->read_current_timer();
- return 0;
+ return true;
}
-EXPORT_SYMBOL_GPL(read_current_timer);
+EXPORT_SYMBOL_GPL(delay_read_timer);
static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
{
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -5,6 +5,7 @@ comment "Linux Kernel Configuration for
config HEXAGON
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_DELAY_TIMER
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_PREEMPT
select ARCH_WANT_FRAME_POINTERS
--- a/arch/hexagon/include/asm/timex.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#ifndef _ASM_TIMEX_H
-#define _ASM_TIMEX_H
-
-#include <asm-generic/timex.h>
-#include <asm/hexagon_vm.h>
-
-#define ARCH_HAS_READ_CURRENT_TIMER
-
-static inline int read_current_timer(unsigned long *timer_val)
-{
- *timer_val = __vmgettime();
- return 0;
-}
-
-#endif
--- a/arch/hexagon/kernel/time.c
+++ b/arch/hexagon/kernel/time.c
@@ -6,6 +6,7 @@
*/
#include <linux/init.h>
+#include <linux/delay.h>
#include <linux/clockchips.h>
#include <linux/clocksource.h>
#include <linux/interrupt.h>
@@ -17,7 +18,6 @@
#include <linux/of_irq.h>
#include <linux/module.h>
-#include <asm/delay.h>
#include <asm/hexagon_vm.h>
#include <asm/time.h>
@@ -231,3 +231,9 @@ void __udelay(unsigned long usecs)
cpu_relax(); /* not sure how this improves readability */
}
EXPORT_SYMBOL(__udelay);
+
+bool delay_read_timer(unsigned long *timer_val)
+{
+ *timer_val = __vmgettime();
+ return true;
+}
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -7,6 +7,7 @@
config OPENRISC
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_DELAY_TIMER
select ARCH_HAS_DMA_SET_UNCACHED
select ARCH_HAS_DMA_CLEAR_UNCACHED
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
--- a/arch/openrisc/include/asm/timex.h
+++ b/arch/openrisc/include/asm/timex.h
@@ -25,6 +25,4 @@ static inline cycles_t get_cycles(void)
}
#define get_cycles get_cycles
-#define ARCH_HAS_READ_CURRENT_TIMER
-
#endif
--- a/arch/openrisc/lib/delay.c
+++ b/arch/openrisc/lib/delay.c
@@ -13,18 +13,17 @@
*/
#include <linux/kernel.h>
+#include <linux/delay.h>
#include <linux/export.h>
#include <linux/init.h>
-#include <linux/timex.h>
+
#include <asm/param.h>
-#include <asm/delay.h>
-#include <asm/timex.h>
#include <asm/processor.h>
-int read_current_timer(unsigned long *timer_value)
+bool delay_read_timer(unsigned long *timer_value)
{
*timer_value = get_cycles();
- return 0;
+ return true;
}
void __delay(unsigned long cycles)
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -29,6 +29,7 @@ config RISCV
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
+ select ARCH_HAS_DELAY_TIMER
select ARCH_HAS_ELF_CORE_EFLAGS if BINFMT_ELF && ELF_CORE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -80,12 +80,4 @@ static inline u64 get_cycles64(void)
return ((u64)hi << 32) | lo;
}
#endif /* CONFIG_64BIT */
-
-#define ARCH_HAS_READ_CURRENT_TIMER
-static inline int read_current_timer(unsigned long *timer_val)
-{
- *timer_val = get_cycles();
- return 0;
-}
-
#endif /* _ASM_RISCV_TIMEX_H */
--- a/arch/riscv/lib/delay.c
+++ b/arch/riscv/lib/delay.c
@@ -6,7 +6,6 @@
#include <linux/delay.h>
#include <linux/math.h>
#include <linux/param.h>
-#include <linux/timex.h>
#include <linux/types.h>
#include <linux/export.h>
@@ -109,3 +108,9 @@ void ndelay(unsigned long nsecs)
__delay(ncycles >> NDELAY_SHIFT);
}
EXPORT_SYMBOL(ndelay);
+
+bool delay_read_timer(unsigned long *timer_val)
+{
+ *timer_val = get_cycles();
+ return true;
+}
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -70,6 +70,7 @@ config SPARC32
config SPARC64
def_bool 64BIT
select ALTERNATE_USER_ADDRESS_SPACE
+ select ARCH_HAS_DELAY_TIMER
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_KRETPROBES
--- a/arch/sparc/include/asm/timex_64.h
+++ b/arch/sparc/include/asm/timex_64.h
@@ -13,6 +13,4 @@
typedef unsigned long cycles_t;
#define get_cycles() tick_ops->get_tick()
-#define ARCH_HAS_READ_CURRENT_TIMER
-
#endif
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -894,8 +894,8 @@ unsigned long long sched_clock(void)
return ((get_tick() * quotient) >> SPARC64_NSEC_PER_CYC_SHIFT) - offset;
}
-int read_current_timer(unsigned long *timer_val)
+bool delay_read_timer(unsigned long *timer_val)
{
*timer_val = get_tick();
- return 0;
+ return true;
}
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -81,6 +81,7 @@ config X86
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
+ select ARCH_HAS_DELAY_TIMER
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN
select ARCH_HAS_EARLY_DEBUG if KGDB
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -14,6 +14,4 @@ static inline unsigned long random_get_e
}
#define random_get_entropy random_get_entropy
-#define ARCH_HAS_READ_CURRENT_TIMER
-
#endif /* _ASM_X86_TIMEX_H */
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -14,12 +14,10 @@
#include <linux/export.h>
#include <linux/sched.h>
-#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
#include <asm/processor.h>
-#include <asm/delay.h>
#include <asm/timer.h>
#include <asm/mwait.h>
@@ -189,13 +187,13 @@ void use_mwaitx_delay(void)
delay_fn = delay_halt;
}
-int read_current_timer(unsigned long *timer_val)
+bool delay_read_timer(unsigned long *timer_val)
{
if (delay_fn == delay_tsc) {
*timer_val = rdtsc();
- return 0;
+ return true;
}
- return -1;
+ return false;
}
void __delay(unsigned long loops)
--- a/include/asm-generic/timex.h
+++ b/include/asm-generic/timex.h
@@ -13,11 +13,4 @@ static inline cycles_t get_cycles(void)
}
#endif
-/*
- * Architectures are encouraged to implement read_current_timer
- * and define this in order to avoid the expensive delay loop
- * calibration during boot.
- */
-#undef ARCH_HAS_READ_CURRENT_TIMER
-
#endif /* __ASM_GENERIC_TIMEX_H */
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -17,6 +17,8 @@ extern unsigned long loops_per_jiffy;
#include <asm/delay.h>
+bool delay_read_timer(unsigned long *t);
+
/*
* Using udelay() for intervals greater than a few milliseconds can
* risk overflow for high loops_per_jiffy (high bogomips) machines. The
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -156,8 +156,6 @@ extern int do_clock_adjtime(const clocki
extern void hardpps(const struct timespec64 *, const struct timespec64 *);
-int read_current_timer(unsigned long *timer_val);
-
/* The clock frequency of the i8253/i8254 PIT */
#define PIT_TICK_RATE 1193182ul
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -13,7 +13,6 @@
#include <linux/printk.h>
#include <linux/smp.h>
#include <linux/stddef.h>
-#include <linux/timex.h>
unsigned long lpj_fine;
unsigned long preset_lpj;
@@ -25,9 +24,9 @@ static int __init lpj_setup(char *str)
__setup("lpj=", lpj_setup);
-#ifdef ARCH_HAS_READ_CURRENT_TIMER
+#ifdef CONFIG_ARCH_HAS_DELAY_TIMER
-/* This routine uses the read_current_timer() routine and gets the
+/* This routine uses the delay_read_timer() routine and gets the
* loops per jiffy directly, instead of guessing it using delay().
* Also, this code tries to handle non-maskable asynchronous events
* (like SMIs)
@@ -48,13 +47,13 @@ static unsigned long calibrate_delay_dir
int min = -1;
int i;
- if (read_current_timer(&pre_start) < 0 )
+ if (!delay_read_timer(&pre_start))
return 0;
/*
* A simple loop like
* while ( jiffies < start_jiffies+1)
- * start = read_current_timer();
+ * start = delay_read_timer();
* will not do. As we don't really know whether jiffy switch
* happened first or timer_value was read first. And some asynchronous
* event can happen between these two events introducing errors in lpj.
@@ -72,22 +71,22 @@ static unsigned long calibrate_delay_dir
for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
pre_start = 0;
- read_current_timer(&start);
+ delay_read_timer(&start);
start_jiffies = jiffies;
while (time_before_eq(jiffies, start_jiffies + 1)) {
pre_start = start;
- read_current_timer(&start);
+ delay_read_timer(&start);
}
- read_current_timer(&post_start);
+ delay_read_timer(&post_start);
pre_end = 0;
end = post_start;
while (time_before_eq(jiffies, start_jiffies + 1 +
DELAY_CALIBRATION_TICKS)) {
pre_end = end;
- read_current_timer(&end);
+ delay_read_timer(&end);
}
- read_current_timer(&post_end);
+ delay_read_timer(&post_end);
timer_rate_max = (post_end - pre_start) /
DELAY_CALIBRATION_TICKS;
^ permalink raw reply
* [patch 07/38] treewide: Consolidate cycles_t
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
Most architectures define cycles_t as unsigned long execpt:
- x86 requires it to be 64-bit independent of the 32-bit/64-bit build.
- parisc and mips define it as unsigned int
parisc has no real reason to do so as there are only a few usage sites
which either expand it to a 64-bit value or utilize only the lower
32bits.
mips has no real requirement either.
Move the typedef to types.h and provide a config switch to enforce the
64-bit type for x86.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
arch/Kconfig | 4 ++++
arch/alpha/include/asm/timex.h | 3 ---
arch/arm/include/asm/timex.h | 1 -
arch/loongarch/include/asm/timex.h | 2 --
arch/m68k/include/asm/timex.h | 2 --
arch/mips/include/asm/timex.h | 2 --
arch/nios2/include/asm/timex.h | 2 --
arch/parisc/include/asm/timex.h | 2 --
arch/powerpc/include/asm/timex.h | 4 +---
arch/riscv/include/asm/timex.h | 2 --
arch/s390/include/asm/timex.h | 2 --
arch/sparc/include/asm/timex_64.h | 1 -
arch/x86/Kconfig | 1 +
arch/x86/include/asm/tsc.h | 2 --
include/asm-generic/timex.h | 1 -
include/linux/types.h | 6 ++++++
16 files changed, 12 insertions(+), 25 deletions(-)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -360,6 +360,10 @@ config ARCH_HAS_DMA_SET_UNCACHED
config ARCH_HAS_DMA_CLEAR_UNCACHED
bool
+# cycles_t is always 64bit wide
+config ARCH_HAS_CYCLES_T_64
+ bool
+
config ARCH_HAS_CPU_FINALIZE_INIT
bool
--- a/arch/alpha/include/asm/timex.h
+++ b/arch/alpha/include/asm/timex.h
@@ -15,9 +15,6 @@
* But this only means we'll force a reschedule every 8 seconds or so,
* which isn't an evil thing.
*/
-
-typedef unsigned int cycles_t;
-
static inline cycles_t get_cycles (void)
{
cycles_t ret;
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -9,7 +9,6 @@
#ifndef _ASMARM_TIMEX_H
#define _ASMARM_TIMEX_H
-typedef unsigned long cycles_t;
// Temporary workaround
bool delay_read_timer(unsigned long *t);
--- a/arch/loongarch/include/asm/timex.h
+++ b/arch/loongarch/include/asm/timex.h
@@ -12,8 +12,6 @@
#include <asm/cpu.h>
#include <asm/cpu-features.h>
-typedef unsigned long cycles_t;
-
#define get_cycles get_cycles
static inline cycles_t get_cycles(void)
--- a/arch/m68k/include/asm/timex.h
+++ b/arch/m68k/include/asm/timex.h
@@ -7,8 +7,6 @@
#ifndef _ASMm68K_TIMEX_H
#define _ASMm68K_TIMEX_H
-typedef unsigned long cycles_t;
-
static inline cycles_t get_cycles(void)
{
return 0;
--- a/arch/mips/include/asm/timex.h
+++ b/arch/mips/include/asm/timex.h
@@ -29,8 +29,6 @@
* We know that all SMP capable CPUs have cycle counters.
*/
-typedef unsigned int cycles_t;
-
/*
* On R4000/R4400 an erratum exists such that if the cycle counter is
* read in the exact moment that it is matching the compare register,
--- a/arch/nios2/include/asm/timex.h
+++ b/arch/nios2/include/asm/timex.h
@@ -5,8 +5,6 @@
#ifndef _ASM_NIOS2_TIMEX_H
#define _ASM_NIOS2_TIMEX_H
-typedef unsigned long cycles_t;
-
extern cycles_t get_cycles(void);
#define get_cycles get_cycles
--- a/arch/parisc/include/asm/timex.h
+++ b/arch/parisc/include/asm/timex.h
@@ -9,8 +9,6 @@
#include <asm/special_insns.h>
-typedef unsigned long cycles_t;
-
static inline cycles_t get_cycles(void)
{
return mfctl(16);
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -11,9 +11,7 @@
#include <asm/cputable.h>
#include <asm/vdso/timebase.h>
-typedef unsigned long cycles_t;
-
-static inline cycles_t get_cycles(void)
+ostatic inline cycles_t get_cycles(void)
{
return mftb();
}
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -8,8 +8,6 @@
#include <asm/csr.h>
-typedef unsigned long cycles_t;
-
#ifdef CONFIG_RISCV_M_MODE
#include <asm/clint.h>
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -177,8 +177,6 @@ static inline void local_tick_enable(uns
set_clock_comparator(get_lowcore()->clock_comparator);
}
-typedef unsigned long cycles_t;
-
static __always_inline unsigned long get_tod_clock(void)
{
union tod_clock clk;
--- a/arch/sparc/include/asm/timex_64.h
+++ b/arch/sparc/include/asm/timex_64.h
@@ -10,7 +10,6 @@
#include <asm/timer.h>
/* Getting on the cycle counter on sparc64. */
-typedef unsigned long cycles_t;
#define get_cycles() tick_ops->get_tick()
#endif
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -79,6 +79,7 @@ config X86
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CPU_PASID if IOMMU_SVA
select ARCH_HAS_CURRENT_STACK_POINTER
+ select ARCH_HAS_CYCLES_T_64
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
select ARCH_HAS_DELAY_TIMER
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -67,8 +67,6 @@ static __always_inline u64 rdtsc_ordered
/*
* Standard way to access the cycle counter.
*/
-typedef unsigned long long cycles_t;
-
extern unsigned int cpu_khz;
extern unsigned int tsc_khz;
--- a/include/asm-generic/timex.h
+++ b/include/asm-generic/timex.h
@@ -5,7 +5,6 @@
/*
* If you have a cycle counter, return the value here.
*/
-typedef unsigned long cycles_t;
#ifndef get_cycles
static inline cycles_t get_cycles(void)
{
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -270,5 +270,11 @@ struct rcuwait {
struct task_struct __rcu *task;
};
+#ifdef CONFIG_ARCH_HAS_CYCLES_T_64
+typedef unsigned long long cycles_t;
+#else
+typedef unsigned long cycles_t;
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _LINUX_TYPES_H */
^ permalink raw reply
* [patch 08/38] x86/tsc: Use rdtsc() instead of get_cycles()
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
This code knows that TSC is available so there is no point to use the TSC
feature guarded get_cycles().
No functional change.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
arch/x86/kernel/tsc.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -371,12 +371,12 @@ static u64 tsc_read_refs(u64 *p, int hpe
int i;
for (i = 0; i < MAX_RETRIES; i++) {
- t1 = get_cycles();
+ t1 = rdtsc();
if (hpet)
*p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
else
*p = acpi_pm_read_early();
- t2 = get_cycles();
+ t2 = rdtsc();
if ((t2 - t1) < thresh)
return t2;
}
@@ -468,13 +468,13 @@ static unsigned long pit_calibrate_tsc(u
outb(latch & 0xff, 0x42);
outb(latch >> 8, 0x42);
- tsc = t1 = t2 = get_cycles();
+ tsc = t1 = t2 = rdtsc();
pitcnt = 0;
tscmax = 0;
tscmin = ULONG_MAX;
while ((inb(0x61) & 0x20) == 0) {
- t2 = get_cycles();
+ t2 = rdtsc();
delta = t2 - tsc;
tsc = t2;
if ((unsigned long) delta < tscmin)
@@ -553,9 +553,9 @@ static inline int pit_expect_msb(unsigne
if (!pit_verify_msb(val))
break;
prev_tsc = tsc;
- tsc = get_cycles();
+ tsc = rdtsc();
}
- *deltap = get_cycles() - prev_tsc;
+ *deltap = rdtsc() - prev_tsc;
*tscp = tsc;
/*
^ permalink raw reply
* [patch 09/38] iommu/vt-d: Use sched_clock() instead of get_cycles()
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: x86, Lu Baolu, iommu, Arnd Bergmann, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
Calculating the timeout from get_cycles() is a historical leftover without
any functional requirement.
Use ktime_get() instead.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: x86@kernel.org
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: iommu@lists.linux.dev
---
arch/x86/include/asm/iommu.h | 3 ---
drivers/iommu/intel/dmar.c | 4 ++--
drivers/iommu/intel/iommu.h | 8 ++++++--
3 files changed, 8 insertions(+), 7 deletions(-)
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -18,9 +18,6 @@ extern bool x86_swiotlb_enable;
#define x86_swiotlb_enable false
#endif
-/* 10 seconds */
-#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
-
static inline int __init
arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
{
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1606,9 +1606,9 @@ void qi_flush_pasid_cache(struct intel_i
*/
void dmar_disable_qi(struct intel_iommu *iommu)
{
+ ktime_t start_time = ktime_get();
unsigned long flags;
u32 sts;
- cycles_t start_time = get_cycles();
if (!ecap_qis(iommu->ecap))
return;
@@ -1624,7 +1624,7 @@ void dmar_disable_qi(struct intel_iommu
*/
while ((readl(iommu->reg + DMAR_IQT_REG) !=
readl(iommu->reg + DMAR_IQH_REG)) &&
- (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
+ (DMAR_OPERATION_TIMEOUT > (ktime_get() - start_time)))
cpu_relax();
iommu->gcmd &= ~DMA_GCMD_QIE;
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -23,6 +23,7 @@
#include <linux/xarray.h>
#include <linux/perf_event.h>
#include <linux/pci.h>
+#include <linux/timekeeping.h>
#include <linux/generic_pt/iommu.h>
#include <asm/iommu.h>
@@ -360,14 +361,17 @@
/* PERFINTRSTS_REG */
#define DMA_PERFINTRSTS_PIS ((u32)1)
+#define DMAR_OPERATION_TIMEOUT (((ktime_t)10) * NSEC_PER_SEC)
+
#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
do { \
- cycles_t start_time = get_cycles(); \
+ ktime_t start_time = ktime_get(); \
+ \
while (1) { \
sts = op(iommu->reg + offset); \
if (cond) \
break; \
- if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
+ if (DMAR_OPERATION_TIMEOUT < (ktime_get() - start_time))\
panic("DMAR hardware is malfunctioning\n"); \
cpu_relax(); \
} \
^ permalink raw reply
* [patch 10/38] arcnet: Remove function timing code
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: Michael Grzeschik, netdev, Arnd Bergmann, x86, Lu Baolu, iommu,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
ARCNET is a museums piece and the function timing can be done with
ftrace. Remove the cruft.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: Michael Grzeschik <m.grzeschik@pengutronix.de>
Cc: netdev@vger.kernel.org
---
drivers/net/arcnet/arc-rimi.c | 4 ++--
drivers/net/arcnet/arcdevice.h | 20 +-------------------
drivers/net/arcnet/com20020.c | 6 ++----
drivers/net/arcnet/com90io.c | 6 ++----
drivers/net/arcnet/com90xx.c | 4 ++--
5 files changed, 9 insertions(+), 31 deletions(-)
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -291,7 +291,7 @@ static void arcrimi_copy_to_card(struct
struct arcnet_local *lp = netdev_priv(dev);
void __iomem *memaddr = lp->mem_start + 0x800 + bufnum * 512 + offset;
- TIME(dev, "memcpy_toio", count, memcpy_toio(memaddr, buf, count));
+ memcpy_toio(memaddr, buf, count);
}
static void arcrimi_copy_from_card(struct net_device *dev, int bufnum,
@@ -300,7 +300,7 @@ static void arcrimi_copy_from_card(struc
struct arcnet_local *lp = netdev_priv(dev);
void __iomem *memaddr = lp->mem_start + 0x800 + bufnum * 512 + offset;
- TIME(dev, "memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
+ memcpy_fromio(buf, memaddr, count);
}
static int node;
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -11,7 +11,6 @@
#ifndef _LINUX_ARCDEVICE_H
#define _LINUX_ARCDEVICE_H
-#include <asm/timex.h>
#include <linux/if_arcnet.h>
#ifdef __KERNEL__
@@ -62,7 +61,7 @@
#define D_RX 512 /* show rx packets */
#define D_SKB 1024 /* show skb's */
#define D_SKB_SIZE 2048 /* show skb sizes */
-#define D_TIMING 4096 /* show time needed to copy buffers to card */
+#define D_TIMING 4096 /* Not longer supported. Use tracing instead */
#define D_DEBUG 8192 /* Very detailed debug line for line */
#ifndef ARCNET_DEBUG_MAX
@@ -95,23 +94,6 @@ do { \
pr_cont(fmt, ##__VA_ARGS__); \
} while (0)
-/* see how long a function call takes to run, expressed in CPU cycles */
-#define TIME(dev, name, bytes, call) \
-do { \
- if (BUGLVL(D_TIMING)) { \
- unsigned long _x, _y; \
- _x = get_cycles(); \
- call; \
- _y = get_cycles(); \
- arc_printk(D_TIMING, dev, \
- "%s: %d bytes in %lu cycles == %lu Kbytes/100Mcycle\n", \
- name, bytes, _y - _x, \
- 100000000 / 1024 * bytes / (_y - _x + 1)); \
- } else { \
- call; \
- } \
-} while (0)
-
/*
* Time needed to reset the card - in ms (milliseconds). This works on my
* SMC PC100. I can't find a reference that tells me just how long I
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -70,8 +70,7 @@ static void com20020_copy_from_card(stru
arcnet_outb(ofs & 0xff, ioaddr, COM20020_REG_W_ADDR_LO);
/* copy the data */
- TIME(dev, "insb", count,
- arcnet_insb(ioaddr, COM20020_REG_RW_MEMDATA, buf, count));
+ arcnet_insb(ioaddr, COM20020_REG_RW_MEMDATA, buf, count);
}
static void com20020_copy_to_card(struct net_device *dev, int bufnum,
@@ -84,8 +83,7 @@ static void com20020_copy_to_card(struct
arcnet_outb(ofs & 0xff, ioaddr, COM20020_REG_W_ADDR_LO);
/* copy the data */
- TIME(dev, "outsb", count,
- arcnet_outsb(ioaddr, COM20020_REG_RW_MEMDATA, buf, count));
+ arcnet_outsb(ioaddr, COM20020_REG_RW_MEMDATA, buf, count);
}
/* Reset the card and check some basic stuff during the detection stage. */
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -332,15 +332,13 @@ static void com90io_setmask(struct net_d
static void com90io_copy_to_card(struct net_device *dev, int bufnum,
int offset, void *buf, int count)
{
- TIME(dev, "put_whole_buffer", count,
- put_whole_buffer(dev, bufnum * 512 + offset, count, buf));
+ put_whole_buffer(dev, bufnum * 512 + offset, count, buf);
}
static void com90io_copy_from_card(struct net_device *dev, int bufnum,
int offset, void *buf, int count)
{
- TIME(dev, "get_whole_buffer", count,
- get_whole_buffer(dev, bufnum * 512 + offset, count, buf));
+ get_whole_buffer(dev, bufnum * 512 + offset, count, buf);
}
static int io; /* use the insmod io= irq= shmem= options */
--- a/drivers/net/arcnet/com90xx.c
+++ b/drivers/net/arcnet/com90xx.c
@@ -633,7 +633,7 @@ static void com90xx_copy_to_card(struct
struct arcnet_local *lp = netdev_priv(dev);
void __iomem *memaddr = lp->mem_start + bufnum * 512 + offset;
- TIME(dev, "memcpy_toio", count, memcpy_toio(memaddr, buf, count));
+ memcpy_toio(memaddr, buf, count);
}
static void com90xx_copy_from_card(struct net_device *dev, int bufnum,
@@ -642,7 +642,7 @@ static void com90xx_copy_from_card(struc
struct arcnet_local *lp = netdev_priv(dev);
void __iomem *memaddr = lp->mem_start + bufnum * 512 + offset;
- TIME(dev, "memcpy_fromio", count, memcpy_fromio(buf, memaddr, count));
+ memcpy_fromio(buf, memaddr, count);
}
MODULE_DESCRIPTION("ARCnet COM90xx normal chipset driver");
^ permalink raw reply
* [patch 11/38] misc: sgi-gru: Remove get_cycles() [ab]use
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: Arnd Bergmann, x86, Lu Baolu, iommu, Michael Grzeschik, netdev,
linux-wireless, Herbert Xu, linux-crypto, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
Calculating a timeout from get_cycles() is a historical leftover without
any functional requirement.
Use ktime_get() instead.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
drivers/misc/sgi-gru/gruhandles.c | 20 ++++++++------------
drivers/misc/sgi-gru/grukservices.c | 3 ++-
drivers/misc/sgi-gru/grutlbpurge.c | 5 ++---
3 files changed, 12 insertions(+), 16 deletions(-)
--- a/drivers/misc/sgi-gru/gruhandles.c
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -6,26 +6,22 @@
*/
#include <linux/kernel.h>
+#include <linux/timekeeping.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
-/* 10 sec */
#include <linux/sync_core.h>
-#include <asm/tsc.h>
-#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
-#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz)
+
+#define GRU_OPERATION_TIMEOUT_NSEC (((ktime_t)10 * NSEC_PER_SEC))
/* Extract the status field from a kernel handle */
#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3)
struct mcs_op_statistic mcs_op_statistics[mcsop_last];
-static void update_mcs_stats(enum mcs_op op, unsigned long clks)
+static void update_mcs_stats(enum mcs_op op, unsigned long nsec)
{
- unsigned long nsec;
-
- nsec = CLKS2NSEC(clks);
atomic_long_inc(&mcs_op_statistics[op].count);
atomic_long_add(nsec, &mcs_op_statistics[op].total);
if (mcs_op_statistics[op].max < nsec)
@@ -58,21 +54,21 @@ static void report_instruction_timeout(v
static int wait_instruction_complete(void *h, enum mcs_op opc)
{
+ ktime_t start_time = ktime_get();
int status;
- unsigned long start_time = get_cycles();
while (1) {
cpu_relax();
status = GET_MSEG_HANDLE_STATUS(h);
if (status != CCHSTATUS_ACTIVE)
break;
- if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) {
+ if (GRU_OP_TIMEOUT_NSEC < (ktime_get() - start_time)) {
report_instruction_timeout(h);
- start_time = get_cycles();
+ start_time = ktime_get();
}
}
if (gru_options & OPT_STATS)
- update_mcs_stats(opc, get_cycles() - start_time);
+ update_mcs_stats(opc, (unsigned long)(ktime_get() - start_time));
return status;
}
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -20,6 +20,7 @@
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/export.h>
+#include <linux/random.h>
#include <asm/io_apic.h>
#include "gru.h"
#include "grulib.h"
@@ -1106,7 +1107,7 @@ static int quicktest3(unsigned long arg)
int ret = 0;
memset(buf2, 0, sizeof(buf2));
- memset(buf1, get_cycles() & 255, sizeof(buf1));
+ memset(buf1, get_random_u32() & 255, sizeof(buf1));
gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
if (memcmp(buf1, buf2, BUFSIZE)) {
printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -22,13 +22,12 @@
#include <linux/delay.h>
#include <linux/timex.h>
#include <linux/srcu.h>
+#include <linux/random.h>
#include <asm/processor.h>
#include "gru.h"
#include "grutables.h"
#include <asm/uv/uv_hub.h>
-#define gru_random() get_cycles()
-
/* ---------------------------------- TLB Invalidation functions --------
* get_tgh_handle
*
@@ -49,7 +48,7 @@ static inline int get_off_blade_tgh(stru
int n;
n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
- n = gru_random() % n;
+ n = get_random_u32() % n;
n += gru->gs_tgh_first_remote;
return n;
}
^ permalink raw reply
* [patch 12/38] wifi: wil6210: Replace get_cyles() usage
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: linux-wireless, Arnd Bergmann, x86, Lu Baolu, iommu,
Michael Grzeschik, netdev, Herbert Xu, linux-crypto,
Vlastimil Babka, linux-mm, David Woodhouse, Bernie Thompson,
linux-fbdev, Theodore Tso, linux-ext4, Andrew Morton,
Uladzislau Rezki, Marco Elver, Dmitry Vyukov, kasan-dev,
Andrey Ryabinin, Thomas Sailer, linux-hams, Jason A. Donenfeld,
Richard Henderson, linux-alpha, Russell King, linux-arm-kernel,
Catalin Marinas, Huacai Chen, loongarch, Geert Uytterhoeven,
linux-m68k, Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
There is no reason why this debug code requires to use get_cycles() for
timing purposes.
Use ktime_get() instead.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: linux-wireless@vger.kernel.org
---
drivers/net/wireless/ath/wil6210/debugfs.c | 2 +-
drivers/net/wireless/ath/wil6210/txrx.c | 6 +++---
drivers/net/wireless/ath/wil6210/txrx_edma.c | 4 ++--
drivers/net/wireless/ath/wil6210/wil6210.h | 3 ++-
4 files changed, 8 insertions(+), 7 deletions(-)
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -151,7 +151,7 @@ static int ring_show(struct seq_file *s,
char name[10];
char sidle[10];
/* performance monitoring */
- cycles_t now = get_cycles();
+ ktime_t now = ktime_get();
uint64_t idle = txdata->idle * 100;
uint64_t total = now - txdata->begin;
--- a/drivers/net/wireless/ath/wil6210/txrx.c
+++ b/drivers/net/wireless/ath/wil6210/txrx.c
@@ -1976,7 +1976,7 @@ static int __wil_tx_vring_tso(struct wil
used = wil_ring_used_tx(vring);
if (wil_val_in_range(wil->ring_idle_trsh,
used, used + descs_used)) {
- txdata->idle += get_cycles() - txdata->last_idle;
+ txdata->idle += ktime_get() - txdata->last_idle;
wil_dbg_txrx(wil, "Ring[%2d] not idle %d -> %d\n",
vring_index, used, used + descs_used);
}
@@ -2129,7 +2129,7 @@ static int __wil_tx_ring(struct wil6210_
used = wil_ring_used_tx(ring);
if (wil_val_in_range(wil->ring_idle_trsh,
used, used + nr_frags + 1)) {
- txdata->idle += get_cycles() - txdata->last_idle;
+ txdata->idle += ktime_get() - txdata->last_idle;
wil_dbg_txrx(wil, "Ring[%2d] not idle %d -> %d\n",
ring_index, used, used + nr_frags + 1);
}
@@ -2531,7 +2531,7 @@ int wil_tx_complete(struct wil6210_vif *
used_new, used_before_complete)) {
wil_dbg_txrx(wil, "Ring[%2d] idle %d -> %d\n",
ringid, used_before_complete, used_new);
- txdata->last_idle = get_cycles();
+ txdata->last_idle = ktime_get();
}
/* shall we wake net queues? */
--- a/drivers/net/wireless/ath/wil6210/txrx_edma.c
+++ b/drivers/net/wireless/ath/wil6210/txrx_edma.c
@@ -1286,7 +1286,7 @@ int wil_tx_sring_handler(struct wil6210_
used_new, used_before_complete)) {
wil_dbg_txrx(wil, "Ring[%2d] idle %d -> %d\n",
ring_id, used_before_complete, used_new);
- txdata->last_idle = get_cycles();
+ txdata->last_idle = ktime_get();
}
again:
@@ -1499,7 +1499,7 @@ static int __wil_tx_ring_tso_edma(struct
used = wil_ring_used_tx(ring);
if (wil_val_in_range(wil->ring_idle_trsh,
used, used + descs_used)) {
- txdata->idle += get_cycles() - txdata->last_idle;
+ txdata->idle += ktime_get() - txdata->last_idle;
wil_dbg_txrx(wil, "Ring[%2d] not idle %d -> %d\n",
ring_index, used, used + descs_used);
}
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -14,6 +14,7 @@
#include <linux/timex.h>
#include <linux/types.h>
#include <linux/irqreturn.h>
+#include <linux/timekeeping.h>
#include "wmi.h"
#include "wil_platform.h"
#include "fw.h"
@@ -632,7 +633,7 @@ struct wil_txrx_ops {
struct wil_ring_tx_data {
bool dot1x_open;
int enabled;
- cycles_t idle, last_idle, begin;
+ ktime_t idle, last_idle, begin;
u8 agg_wsize; /* agreed aggregation window, 0 - no agg */
u16 agg_timeout;
u8 agg_amsdu;
^ permalink raw reply
* [patch 13/38] crypto: tcrypt: Replace get_cycles() with ktime_get()
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: Herbert Xu, linux-crypto, Arnd Bergmann, x86, Lu Baolu, iommu,
Michael Grzeschik, netdev, linux-wireless, Vlastimil Babka,
linux-mm, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
get_cycles() is the historical access to a fine grained time source, but it
is a suboptimal choice for two reasons:
- get_cycles() is not guaranteed to be supported and functional on all
systems/platforms. If not supported or not functional it returns 0,
which makes benchmarking moot.
- get_cycles() returns the raw counter value of whatever the
architecture platform provides. The original x86 Time Stamp Counter
(TSC) was despite its name tied to the actual CPU core frequency.
That's not longer the case. So the counter value is only meaningful
when the CPU operates at the same frequency as the TSC or the value is
adjusted to the actual CPU frequency. Other architectures and
platforms provide similar disjunct counters via get_cycles(), so the
result is operations per BOGO-cycles, which is not really meaningful.
Use ktime_get() instead which provides nanosecond timestamps with the
granularity of the underlying hardware counter, which is not different to
the variety of get_cycles() implementations.
This provides at least understandable metrics, i.e. operations/nanoseconds,
and is available on all platforms. As with get_cycles() the result might
have to be put into relation with the CPU operating frequency, but that's
not any different.
This is part of a larger effort to remove get_cycles() usage from
non-architecture code.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: linux-crypto@vger.kernel.org
---
crypto/tcrypt.c | 84 ++++++++++++++++++++++++++++----------------------------
1 file changed, 42 insertions(+), 42 deletions(-)
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -202,7 +202,7 @@ static int test_mb_aead_jiffies(struct t
static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
int blen, u32 num_mb)
{
- unsigned long cycles = 0;
+ unsigned long nsecs = 0;
int ret = 0;
int i;
int *rc;
@@ -220,20 +220,20 @@ static int test_mb_aead_cycles(struct te
/* The real thing. */
for (i = 0; i < 8; i++) {
- cycles_t start, end;
+ ktime_t start, end;
- start = get_cycles();
+ start = ktime_get();
ret = do_mult_aead_op(data, enc, num_mb, rc);
- end = get_cycles();
+ end = ktime_get();
if (ret)
goto out;
- cycles += end - start;
+ nsecs += (unsigned long)(end - start);
}
- pr_cont("1 operation in %lu cycles (%d bytes)\n",
- (cycles + 4) / (8 * num_mb), blen);
+ pr_cont("1 operation in %lu nsecs (%d bytes)\n",
+ nsecs / (8 * num_mb), blen);
out:
kfree(rc);
@@ -475,7 +475,7 @@ static int test_aead_jiffies(struct aead
static int test_aead_cycles(struct aead_request *req, int enc, int blen)
{
- unsigned long cycles = 0;
+ unsigned long nsecs = 0;
int ret = 0;
int i;
@@ -492,25 +492,24 @@ static int test_aead_cycles(struct aead_
/* The real thing. */
for (i = 0; i < 8; i++) {
- cycles_t start, end;
+ ktime_t start, end;
- start = get_cycles();
+ start = ktime_get();
if (enc)
ret = do_one_aead_op(req, crypto_aead_encrypt(req));
else
ret = do_one_aead_op(req, crypto_aead_decrypt(req));
- end = get_cycles();
+ end = ktime_get();
if (ret)
goto out;
- cycles += end - start;
+ nsecs += (unsigned long)(end - start);
}
out:
if (ret == 0)
- pr_cont("1 operation in %lu cycles (%d bytes)\n",
- (cycles + 4) / 8, blen);
+ pr_cont("1 operation in %lu nsecs (%d bytes)\n", nsecs / 8, blen);
return ret;
}
@@ -771,7 +770,7 @@ static int test_ahash_jiffies(struct aha
static int test_ahash_cycles_digest(struct ahash_request *req, int blen,
char *out)
{
- unsigned long cycles = 0;
+ unsigned long nsecs = 0;
int ret, i;
/* Warm-up run. */
@@ -783,25 +782,25 @@ static int test_ahash_cycles_digest(stru
/* The real thing. */
for (i = 0; i < 8; i++) {
- cycles_t start, end;
+ ktime_t start, end;
- start = get_cycles();
+ start = ktime_get();
ret = do_one_ahash_op(req, crypto_ahash_digest(req));
if (ret)
goto out;
- end = get_cycles();
+ end = ktime_get();
- cycles += end - start;
+ nsecs += (unsigned long)(end - start);
}
out:
if (ret)
return ret;
- pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
- cycles / 8, cycles / (8 * blen));
+ pr_cont("%6lu nsecs/operation, %4lu nsecs/byte\n",
+ nsecs / 8, nsecs / (8 * blen));
return 0;
}
@@ -809,7 +808,7 @@ static int test_ahash_cycles_digest(stru
static int test_ahash_cycles(struct ahash_request *req, int blen,
int plen, char *out)
{
- unsigned long cycles = 0;
+ unsigned long nsecs = 0;
int i, pcount, ret;
if (plen == blen)
@@ -832,9 +831,9 @@ static int test_ahash_cycles(struct ahas
/* The real thing. */
for (i = 0; i < 8; i++) {
- cycles_t start, end;
+ ktime_t start, end;
- start = get_cycles();
+ start = ktime_get();
ret = do_one_ahash_op(req, crypto_ahash_init(req));
if (ret)
@@ -848,17 +847,17 @@ static int test_ahash_cycles(struct ahas
if (ret)
goto out;
- end = get_cycles();
+ end = ktime_get();
- cycles += end - start;
+ nsecs += (unsigned long)(end - start);
}
out:
if (ret)
return ret;
- pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
- cycles / 8, cycles / (8 * blen));
+ pr_cont("%6lu nsecs/operation, %4lu nsecs/byte\n",
+ nsecs / 8, nsecs / (8 * blen));
return 0;
}
@@ -1019,7 +1018,7 @@ static int test_mb_acipher_jiffies(struc
static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
int blen, u32 num_mb)
{
- unsigned long cycles = 0;
+ unsigned long nsecs = 0;
int ret = 0;
int i;
int *rc;
@@ -1037,20 +1036,20 @@ static int test_mb_acipher_cycles(struct
/* The real thing. */
for (i = 0; i < 8; i++) {
- cycles_t start, end;
+ ktime_t start, end;
- start = get_cycles();
+ start = ktime_get();
ret = do_mult_acipher_op(data, enc, num_mb, rc);
- end = get_cycles();
+ end = ktime_get();
if (ret)
goto out;
- cycles += end - start;
+ nsecs += (unsigned long)(end - start);
}
- pr_cont("1 operation in %lu cycles (%d bytes)\n",
- (cycles + 4) / (8 * num_mb), blen);
+ pr_cont("1 operation in %lu nsecs (%d bytes)\n",
+ nsecs / (8 * num_mb), blen);
out:
kfree(rc);
@@ -1246,7 +1245,7 @@ static int test_acipher_jiffies(struct s
static int test_acipher_cycles(struct skcipher_request *req, int enc,
int blen)
{
- unsigned long cycles = 0;
+ unsigned long nsecs = 0;
int ret = 0;
int i;
@@ -1265,27 +1264,28 @@ static int test_acipher_cycles(struct sk
/* The real thing. */
for (i = 0; i < 8; i++) {
- cycles_t start, end;
+ ktime_t start, end;
+
+ start = ktime_get();
- start = get_cycles();
if (enc)
ret = do_one_acipher_op(req,
crypto_skcipher_encrypt(req));
else
ret = do_one_acipher_op(req,
crypto_skcipher_decrypt(req));
- end = get_cycles();
+ end = ktime_get();
if (ret)
goto out;
- cycles += end - start;
+ nsecs += (unsigned long)(end - start);
}
out:
if (ret == 0)
- pr_cont("1 operation in %lu cycles (%d bytes)\n",
- (cycles + 4) / 8, blen);
+ pr_cont("1 operation in %lu nsecs (%d bytes)\n",
+ nsecs / 8, blen);
return ret;
}
^ permalink raw reply
* [patch 14/38] slub: Use prandom instead of get_cycles()
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: Vlastimil Babka, linux-mm, Arnd Bergmann, x86, Lu Baolu, iommu,
Michael Grzeschik, netdev, linux-wireless, Herbert Xu,
linux-crypto, David Woodhouse, Bernie Thompson, linux-fbdev,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
The decision whether to scan remote nodes is based on a 'random' number
retrieved via get_cycles(). get_cycles() is about to be removed.
There is already prandom state in the code, so use that instead.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: Vlastimil Babka <vbabka@kernel.org>
Cc: linux-mm@kvack.org
---
mm/slub.c | 37 +++++++++++++++++++++++--------------
1 file changed, 23 insertions(+), 14 deletions(-)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3302,6 +3302,25 @@ static inline struct slab *alloc_slab_pa
return slab;
}
+#if defined(CONFIG_SLAB_FREELIST_RANDOM) || defined(CONFIG_NUMA)
+static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state);
+
+static unsigned int slab_get_prandom_state(unsigned int limit)
+{
+ struct rnd_state *state;
+ unsigned int res;
+
+ /*
+ * An interrupt or NMI handler might interrupt and change
+ * the state in the middle, but that's safe.
+ */
+ state = &get_cpu_var(slab_rnd_state);
+ res = prandom_u32_state(state) % limit;
+ put_cpu_var(slab_rnd_state);
+ return res;
+}
+#endif
+
#ifdef CONFIG_SLAB_FREELIST_RANDOM
/* Pre-initialize the random sequence cache */
static int init_cache_random_seq(struct kmem_cache *s)
@@ -3365,8 +3384,6 @@ static void *next_freelist_entry(struct
return (char *)start + idx;
}
-static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state);
-
/* Shuffle the single linked freelist based on a random pre-computed sequence */
static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
bool allow_spin)
@@ -3383,15 +3400,7 @@ static bool shuffle_freelist(struct kmem
if (allow_spin) {
pos = get_random_u32_below(freelist_count);
} else {
- struct rnd_state *state;
-
- /*
- * An interrupt or NMI handler might interrupt and change
- * the state in the middle, but that's safe.
- */
- state = &get_cpu_var(slab_rnd_state);
- pos = prandom_u32_state(state) % freelist_count;
- put_cpu_var(slab_rnd_state);
+ pos = slab_get_prandom_state(freelist_count);
}
page_limit = slab->objects * s->size;
@@ -3882,7 +3891,7 @@ static void *get_from_any_partial(struct
* with available objects.
*/
if (!s->remote_node_defrag_ratio ||
- get_cycles() % 1024 > s->remote_node_defrag_ratio)
+ slab_get_prandom_state(1024) > s->remote_node_defrag_ratio)
return NULL;
do {
@@ -7102,7 +7111,7 @@ static unsigned int
/* see get_from_any_partial() for the defrag ratio description */
if (!s->remote_node_defrag_ratio ||
- get_cycles() % 1024 > s->remote_node_defrag_ratio)
+ slab_get_prandom_state(1024) > s->remote_node_defrag_ratio)
return 0;
do {
@@ -8421,7 +8430,7 @@ void __init kmem_cache_init_late(void)
flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM | WQ_PERCPU,
0);
WARN_ON(!flushwq);
-#ifdef CONFIG_SLAB_FREELIST_RANDOM
+#if defined(CONFIG_SLAB_FREELIST_RANDOM) || defined(CONFIG_NUMA)
prandom_init_once(&slab_rnd_state);
#endif
}
^ permalink raw reply
* [patch 15/38] ptp: ptp_vmclock: Replace get_cycles() usage
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: David Woodhouse, Arnd Bergmann, x86, Lu Baolu, iommu,
Michael Grzeschik, netdev, linux-wireless, Herbert Xu,
linux-crypto, Vlastimil Babka, linux-mm, Bernie Thompson,
linux-fbdev, Theodore Tso, linux-ext4, Andrew Morton,
Uladzislau Rezki, Marco Elver, Dmitry Vyukov, kasan-dev,
Andrey Ryabinin, Thomas Sailer, linux-hams, Jason A. Donenfeld,
Richard Henderson, linux-alpha, Russell King, linux-arm-kernel,
Catalin Marinas, Huacai Chen, loongarch, Geert Uytterhoeven,
linux-m68k, Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
get_cycles() is not really well defined and similar to other usaage of the
underlying hardware CPU counters the PTP vmclock should use an explicit
interface as well.
Implement ptp_vmclock_read_cpu_counter() in arm64 and x86 and simplify the
Kconfig selection while at it.
No functional change.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: David Woodhouse <dwmw2@infradead.org>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/ptp_vmclock.h | 12 ++++++++++++
arch/x86/Kconfig | 1 +
arch/x86/include/asm/ptp_vmclock.h | 12 ++++++++++++
drivers/ptp/Kconfig | 6 ++++--
drivers/ptp/ptp_vmclock.c | 6 ++++--
6 files changed, 34 insertions(+), 4 deletions(-)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -112,6 +112,7 @@ config ARM64
select ARCH_SUPPORTS_SCHED_SMT
select ARCH_SUPPORTS_SCHED_CLUSTER
select ARCH_SUPPORTS_SCHED_MC
+ select ARCH_SUPPORTS_PTP_VMCLOCK if ARCH_SUPPORTS_INT128
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
--- /dev/null
+++ b/arch/arm64/include/asm/ptp_vmclock.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_PTP_VMCLOCK_H
+#define __ASM_PTP_VMCLOCK_H
+
+#include <asm/arch_timer.h>
+
+static inline u64 ptp_vmclock_read_cpu_counter(void)
+{
+ return arch_timer_read_counter();
+}
+
+#endif
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -135,6 +135,7 @@ config X86
select ARCH_SUPPORTS_RT
select ARCH_SUPPORTS_AUTOFDO_CLANG
select ARCH_SUPPORTS_PROPELLER_CLANG if X86_64
+ select ARCH_SUPPORTS_PTP_VMCLOCK if X86_TSC
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if X86_CX8
select ARCH_USE_MEMTEST
--- /dev/null
+++ b/arch/x86/include/asm/ptp_vmclock.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_PTP_VMCLOCK_H
+#define __ASM_PTP_VMCLOCK_H
+
+#include <asm/tsc.h>
+
+static inline u64 ptp_vmclock_read_cpu_counter(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_TSC) ? rdtsc() : 0;
+}
+
+#endif
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -131,10 +131,12 @@ config PTP_1588_CLOCK_KVM
To compile this driver as a module, choose M here: the module
will be called ptp_kvm.
+config ARCH_SUPPORTS_PTP_VMCLOCK
+ bool
+
config PTP_1588_CLOCK_VMCLOCK
tristate "Virtual machine PTP clock"
- depends on X86_TSC || ARM_ARCH_TIMER
- depends on PTP_1588_CLOCK && ARCH_SUPPORTS_INT128
+ depends on PTP_1588_CLOCK && ARCH_SUPPORTS_PTP_VMCLOCK
default PTP_1588_CLOCK_KVM
help
This driver adds support for using a virtual precision clock
--- a/drivers/ptp/ptp_vmclock.c
+++ b/drivers/ptp/ptp_vmclock.c
@@ -28,6 +28,8 @@
#include <linux/ptp_clock_kernel.h>
+#include <asm/ptp_vmclock.h>
+
#ifdef CONFIG_X86
#include <asm/pvclock.h>
#include <asm/kvmclock.h>
@@ -144,11 +146,11 @@ static int vmclock_get_crosststamp(struc
if (systime_snapshot.cs_id == st->cs_id) {
cycle = systime_snapshot.cycles;
} else {
- cycle = get_cycles();
+ cycle = ptp_vmclock_read_cpu_counter();
ptp_read_system_postts(sts);
}
} else {
- cycle = get_cycles();
+ cycle = ptp_vmclock_read_cpu_counter();
}
delta = cycle - le64_to_cpu(st->clk->counter_value);
^ permalink raw reply
* [patch 16/38] fbdev: udlfb: Replace get_cycles() with ktime_get()
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: Bernie Thompson, linux-fbdev, Arnd Bergmann, x86, Lu Baolu, iommu,
Michael Grzeschik, netdev, linux-wireless, Herbert Xu,
linux-crypto, Vlastimil Babka, linux-mm, David Woodhouse,
Theodore Tso, linux-ext4, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
These metrics are not really requiring CPU cycles and as those are
meanlingless when the CPU is running at a different frequency, this can use
ktime_get() and achieve the same result.
Part of a larger effort to confine get_cycles() access to low level
architecture code.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: Bernie Thompson <bernie@plugable.com>
Cc: linux-fbdev@vger.kernel.org
---
Documentation/fb/udlfb.rst | 4 ++--
drivers/video/fbdev/udlfb.c | 24 ++++++++++++------------
2 files changed, 14 insertions(+), 14 deletions(-)
--- a/Documentation/fb/udlfb.rst
+++ b/Documentation/fb/udlfb.rst
@@ -156,8 +156,8 @@ metrics_bytes_sent 32-bit count of how
USB to communicate the resulting changed pixels to the
hardware. Includes compression and protocol overhead
-metrics_cpu_kcycles_used 32-bit count of CPU cycles used in processing the
- above pixels (in thousands of cycles).
+metrics_cpu_kcycles_used 32-bit count of microseconds used in processing the
+ above pixels
metrics_reset Write-only. Any write to this file resets all metrics
above to zero. Note that the 32-bit counters above
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/unaligned.h>
+#include <linux/timekeeping.h>
#include <video/udlfb.h>
#include "edid.h"
@@ -600,15 +601,15 @@ static int dlfb_render_hline(struct dlfb
static int dlfb_handle_damage(struct dlfb_data *dlfb, int x, int y, int width, int height)
{
+ ktime_t t_start, t_delta;
int i, ret;
char *cmd;
- cycles_t start_cycles, end_cycles;
int bytes_sent = 0;
int bytes_identical = 0;
struct urb *urb;
int aligned_x;
- start_cycles = get_cycles();
+ t_start = ktime_get();
mutex_lock(&dlfb->render_mutex);
@@ -661,10 +662,9 @@ static int dlfb_handle_damage(struct dlf
atomic_add(bytes_sent, &dlfb->bytes_sent);
atomic_add(bytes_identical, &dlfb->bytes_identical);
atomic_add(width*height*2, &dlfb->bytes_rendered);
- end_cycles = get_cycles();
- atomic_add(((unsigned int) ((end_cycles - start_cycles)
- >> 10)), /* Kcycles */
- &dlfb->cpu_kcycles_used);
+ t_delta = ktime_get() - t_start;
+ /* Avoid a division and approximate microseconds with shift right ten */
+ atomic_add(((int)(t_delta >> 10)), &dlfb->cpu_kcycles_used);
ret = 0;
@@ -727,9 +727,9 @@ static void dlfb_dpy_deferred_io(struct
{
struct fb_deferred_io_pageref *pageref;
struct dlfb_data *dlfb = info->par;
+ ktime_t t_start, t_delta;
struct urb *urb;
char *cmd;
- cycles_t start_cycles, end_cycles;
int bytes_sent = 0;
int bytes_identical = 0;
int bytes_rendered = 0;
@@ -742,7 +742,7 @@ static void dlfb_dpy_deferred_io(struct
if (!atomic_read(&dlfb->usb_active))
goto unlock_ret;
- start_cycles = get_cycles();
+ t_start = ktime_get();
urb = dlfb_get_urb(dlfb);
if (!urb)
@@ -774,10 +774,10 @@ static void dlfb_dpy_deferred_io(struct
atomic_add(bytes_sent, &dlfb->bytes_sent);
atomic_add(bytes_identical, &dlfb->bytes_identical);
atomic_add(bytes_rendered, &dlfb->bytes_rendered);
- end_cycles = get_cycles();
- atomic_add(((unsigned int) ((end_cycles - start_cycles)
- >> 10)), /* Kcycles */
- &dlfb->cpu_kcycles_used);
+ t_delta = ktime_get() - t_start;
+ /* Avoid a division and approximate microseconds with shift right ten */
+ atomic_add(((int)(t_delta >> 10)), &dlfb->cpu_kcycles_used);
+
unlock_ret:
mutex_unlock(&dlfb->render_mutex);
}
^ permalink raw reply
* [patch 17/38] ext4: Replace get_cycles() usage with ktime_get()
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: Theodore Tso, linux-ext4, Arnd Bergmann, x86, Lu Baolu, iommu,
Michael Grzeschik, netdev, linux-wireless, Herbert Xu,
linux-crypto, Vlastimil Babka, linux-mm, David Woodhouse,
Bernie Thompson, linux-fbdev, Andrew Morton, Uladzislau Rezki,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
get_cycles() is not guaranteed to be functional on all systems/platforms
and the values returned are unitless and not easy to map to something
useful.
Use ktime_get() instead, which provides nanosecond timestamps and is
functional everywhere.
This is part of a larger effort to limit get_cycles() usage to low level
architecture code.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: linux-ext4@vger.kernel.org
---
fs/ext4/mballoc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1289,7 +1289,7 @@ void ext4_mb_generate_buddy(struct super
ext4_grpblk_t len;
unsigned free = 0;
unsigned fragments = 0;
- unsigned long long period = get_cycles();
+ ktime_t period = ktime_get();
/* initialize buddy from bitmap which is aggregation
* of on-disk bitmap and preallocations */
@@ -1328,7 +1328,7 @@ void ext4_mb_generate_buddy(struct super
clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
- period = get_cycles() - period;
+ period = ktime_get() - period;
atomic_inc(&sbi->s_mb_buddies_generated);
atomic64_add(period, &sbi->s_mb_generation_time);
}
^ permalink raw reply
* [patch 18/38] lib/tests: Replace get_cycles() with ktime_get()
From: Thomas Gleixner @ 2026-04-10 12:19 UTC (permalink / raw)
To: LKML
Cc: Andrew Morton, Uladzislau Rezki, linux-mm, Arnd Bergmann, x86,
Lu Baolu, iommu, Michael Grzeschik, netdev, linux-wireless,
Herbert Xu, linux-crypto, Vlastimil Babka, David Woodhouse,
Bernie Thompson, linux-fbdev, Theodore Tso, linux-ext4,
Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
Huacai Chen, loongarch, Geert Uytterhoeven, linux-m68k,
Dinh Nguyen, Jonas Bonn, linux-openrisc, Helge Deller,
linux-parisc, Michael Ellerman, linuxppc-dev, Paul Walmsley,
linux-riscv, Heiko Carstens, linux-s390, David S. Miller,
sparclinux
In-Reply-To: <20260410120044.031381086@kernel.org>
get_cycles() is the historical access to a fine grained time source, but it
is a suboptimal choice for two reasons:
- get_cycles() is not guaranteed to be supported and functional on all
systems/platforms. If not supported or not functional it returns 0,
which makes benchmarking moot.
- get_cycles() returns the raw counter value of whatever the
architecture platform provides. The original x86 Time Stamp Counter
(TSC) was despite its name tied to the actual CPU core frequency.
That's not longer the case. So the counter value is only meaningful
when the CPU operates at the same frequency as the TSC or the value is
adjusted to the actual CPU frequency. Other architectures and
platforms provide similar disjunct counters via get_cycles(), so the
result is operations per BOGO-cycles, which is not really meaningful.
Use ktime_get() instead which provides nanosecond timestamps with the
granularity of the underlying hardware counter, which is not different to
the variety of get_cycles() implementations.
This provides at least understandable metrics, i.e. operations/nanoseconds,
and is available on all platforms. As with get_cycles() the result might
have to be put into relation with the CPU operating frequency, but that's
not any different.
This is part of a larger effort to remove get_cycles() usage from
non-architecture code.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: linux-mm@kvack.org
---
lib/interval_tree_test.c | 16 ++++++++--------
lib/rbtree_test.c | 46 +++++++++++++++++++++++-----------------------
lib/test_vmalloc.c | 10 +++++-----
3 files changed, 36 insertions(+), 36 deletions(-)
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -65,13 +65,13 @@ static void init(void)
static int basic_check(void)
{
int i, j;
- cycles_t time1, time2, time;
+ ktime_t time1, time2, time;
printk(KERN_ALERT "interval tree insert/remove");
init();
- time1 = get_cycles();
+ time1 = ktime_get();
for (i = 0; i < perf_loops; i++) {
for (j = 0; j < nnodes; j++)
@@ -80,11 +80,11 @@ static int basic_check(void)
interval_tree_remove(nodes + j, &root);
}
- time2 = get_cycles();
+ time2 = ktime_get();
time = time2 - time1;
time = div_u64(time, perf_loops);
- printk(" -> %llu cycles\n", (unsigned long long)time);
+ printk(" -> %llu nsecs\n", (unsigned long long)time);
return 0;
}
@@ -93,7 +93,7 @@ static int search_check(void)
{
int i, j;
unsigned long results;
- cycles_t time1, time2, time;
+ ktime_t time1, time2, time;
printk(KERN_ALERT "interval tree search");
@@ -102,7 +102,7 @@ static int search_check(void)
for (j = 0; j < nnodes; j++)
interval_tree_insert(nodes + j, &root);
- time1 = get_cycles();
+ time1 = ktime_get();
results = 0;
for (i = 0; i < search_loops; i++)
@@ -113,12 +113,12 @@ static int search_check(void)
results += search(&root, start, last);
}
- time2 = get_cycles();
+ time2 = ktime_get();
time = time2 - time1;
time = div_u64(time, search_loops);
results = div_u64(results, search_loops);
- printk(" -> %llu cycles (%lu results)\n",
+ printk(" -> %llu nsecs (%lu results)\n",
(unsigned long long)time, results);
for (j = 0; j < nnodes; j++)
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -243,14 +243,14 @@ static void check_augmented(int nr_nodes
static int basic_check(void)
{
int i, j;
- cycles_t time1, time2, time;
+ ktime_t time1, time2, time;
struct rb_node *node;
printk(KERN_ALERT "rbtree testing");
init();
- time1 = get_cycles();
+ time1 = ktime_get();
for (i = 0; i < perf_loops; i++) {
for (j = 0; j < nnodes; j++)
@@ -259,14 +259,14 @@ static int basic_check(void)
erase(nodes + j, &root);
}
- time2 = get_cycles();
+ time2 = ktime_get();
time = time2 - time1;
time = div_u64(time, perf_loops);
- printk(" -> test 1 (latency of nnodes insert+delete): %llu cycles\n",
+ printk(" -> test 1 (latency of nnodes insert+delete): %llu nsecs\n",
(unsigned long long)time);
- time1 = get_cycles();
+ time1 = ktime_get();
for (i = 0; i < perf_loops; i++) {
for (j = 0; j < nnodes; j++)
@@ -275,52 +275,52 @@ static int basic_check(void)
erase_cached(nodes + j, &root);
}
- time2 = get_cycles();
+ time2 = ktime_get();
time = time2 - time1;
time = div_u64(time, perf_loops);
- printk(" -> test 2 (latency of nnodes cached insert+delete): %llu cycles\n",
+ printk(" -> test 2 (latency of nnodes cached insert+delete): %llu nsecs\n",
(unsigned long long)time);
for (i = 0; i < nnodes; i++)
insert(nodes + i, &root);
- time1 = get_cycles();
+ time1 = ktime_get();
for (i = 0; i < perf_loops; i++) {
for (node = rb_first(&root.rb_root); node; node = rb_next(node))
;
}
- time2 = get_cycles();
+ time2 = ktime_get();
time = time2 - time1;
time = div_u64(time, perf_loops);
- printk(" -> test 3 (latency of inorder traversal): %llu cycles\n",
+ printk(" -> test 3 (latency of inorder traversal): %llu nsecs\n",
(unsigned long long)time);
- time1 = get_cycles();
+ time1 = ktime_get();
for (i = 0; i < perf_loops; i++)
node = rb_first(&root.rb_root);
- time2 = get_cycles();
+ time2 = ktime_get();
time = time2 - time1;
time = div_u64(time, perf_loops);
printk(" -> test 4 (latency to fetch first node)\n");
- printk(" non-cached: %llu cycles\n", (unsigned long long)time);
+ printk(" non-cached: %llu nsecs\n", (unsigned long long)time);
- time1 = get_cycles();
+ time1 = ktime_get();
for (i = 0; i < perf_loops; i++)
node = rb_first_cached(&root);
- time2 = get_cycles();
+ time2 = ktime_get();
time = time2 - time1;
time = div_u64(time, perf_loops);
- printk(" cached: %llu cycles\n", (unsigned long long)time);
+ printk(" cached: %llu nsecs\n", (unsigned long long)time);
for (i = 0; i < nnodes; i++)
erase(nodes + i, &root);
@@ -345,13 +345,13 @@ static int basic_check(void)
static int augmented_check(void)
{
int i, j;
- cycles_t time1, time2, time;
+ ktime_t time1, time2, time;
printk(KERN_ALERT "augmented rbtree testing");
init();
- time1 = get_cycles();
+ time1 = ktime_get();
for (i = 0; i < perf_loops; i++) {
for (j = 0; j < nnodes; j++)
@@ -360,13 +360,13 @@ static int augmented_check(void)
erase_augmented(nodes + j, &root);
}
- time2 = get_cycles();
+ time2 = ktime_get();
time = time2 - time1;
time = div_u64(time, perf_loops);
- printk(" -> test 1 (latency of nnodes insert+delete): %llu cycles\n", (unsigned long long)time);
+ printk(" -> test 1 (latency of nnodes insert+delete): %llu nsecs\n", (unsigned long long)time);
- time1 = get_cycles();
+ time1 = ktime_get();
for (i = 0; i < perf_loops; i++) {
for (j = 0; j < nnodes; j++)
@@ -375,11 +375,11 @@ static int augmented_check(void)
erase_augmented_cached(nodes + j, &root);
}
- time2 = get_cycles();
+ time2 = ktime_get();
time = time2 - time1;
time = div_u64(time, perf_loops);
- printk(" -> test 2 (latency of nnodes cached insert+delete): %llu cycles\n", (unsigned long long)time);
+ printk(" -> test 2 (latency of nnodes cached insert+delete): %llu nsecs\n", (unsigned long long)time);
for (i = 0; i < check_loops; i++) {
init();
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -454,8 +454,8 @@ static struct test_driver {
struct task_struct *task;
struct test_case_data data[ARRAY_SIZE(test_case_array)];
- unsigned long start;
- unsigned long stop;
+ ktime_t start;
+ ktime_t stop;
} *tdriver;
static void shuffle_array(int *arr, int n)
@@ -490,7 +490,7 @@ static int test_func(void *private)
*/
synchronize_srcu(&prepare_for_test_srcu);
- t->start = get_cycles();
+ t->start = ktime_get();
for (i = 0; i < ARRAY_SIZE(test_case_array); i++) {
index = random_array[i];
@@ -519,7 +519,7 @@ static int test_func(void *private)
t->data[index].time = delta;
}
- t->stop = get_cycles();
+ t->stop = ktime_get();
test_report_one_done();
/*
@@ -619,7 +619,7 @@ static void do_concurrent_test(void)
t->data[j].time);
}
- pr_info("All test took worker%d=%lu cycles\n",
+ pr_info("All test took worker%d=%lld nsecs\n",
i, t->stop - t->start);
}
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox