* Re: [PATCH v3 3/7] net: wwan: t9xx: Add control DMA interface
From: Andrew Lunn @ 2026-06-24 16:15 UTC (permalink / raw)
To: jackbb_wu
Cc: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng, Matthias Brugger,
AngeloGioacchino Del Regno, Simon Horman, Jonathan Corbet,
Shuah Khan, linux-kernel, netdev, linux-arm-kernel,
linux-mediatek, linux-doc
In-Reply-To: <20260624-t9xx_driver_v1-v3-3-73ff03f60c48@compal.com>
> diff --git a/drivers/net/wwan/t9xx/pcie/mtk_cldma.c b/drivers/net/wwan/t9xx/pcie/mtk_cldma.c
> +static inline void mtk_cldma_clr_bd_dsc(struct cldma_drv_info *drv_info,
> + struct bd_dsc *bd_dsc_pool, int nr_bds)
No inline functions in C files. Please let the compiler decide.
> +static int mtk_cldma_reload_rx_skb(struct mtk_md_dev *mdev, struct rxq *rxq,
> + struct rx_req *req)
> +{
> + struct sk_buff *tail = NULL;
> + struct bd_dsc *bd_dsc;
> + int nr_bds;
> + int i, ret;
> +
> + nr_bds = rxq->nr_bds;
> +
> + for (i = 0; i < nr_bds; i++) {
> + bd_dsc = req->bd_dsc_pool + i;
> + bd_dsc->skb = __dev_alloc_skb(req->frag_size, GFP_KERNEL);
> + if (!bd_dsc->skb) {
> + dev_warn((mdev)->dev, "Failed to alloc SKB\n");
You might want to rate limit this, and the other similar messages in
the data path, otherwise it could be a DOS.
> +u32 mtk_cldma_stop_queue(struct cldma_drv_info *drv_info, enum mtk_tx_rx dir, u32 qno)
> +{
> + u32 val = (qno == ALLQ) ? qno : BIT(qno);
> + struct cldma_hw_regs *hw_regs;
> + unsigned int active;
> + int cnt = 0;
> + int base;
> + u32 addr;
> +
> + hw_regs = drv_info->hw_regs;
> + base = drv_info->base_addr;
> +
> + if (dir == DIR_TX)
> + addr = base + hw_regs->reg_cldma_ul_stop_cmd;
> + else
> + addr = base + hw_regs->reg_cldma_so_stop_cmd;
> +
> + mtk_pci_write32(drv_info->mdev, addr, val);
> +
> + do {
> + active = drv_info->drv_ops->cldma_queue_status(drv_info, dir, qno);
> + if (active == LINK_ERROR_VAL || !active)
> + break;
> + usleep_range(WAIT_QUEUE_STOP, 2 * WAIT_QUEUE_STOP);
> + } while (++cnt < 10);
Please use one of the helpers from iopoll.h. Any loops waiting for an
event to happen should use those macros, since the open code
implementation is often wrong.
> +static int mtk_ctrl_trb_srv_init(struct mtk_ctrl_trans *trans)
> +{
> + struct srv_que *srv_que;
> + struct trb_srv *srv;
> + int i, j;
> + int ret;
> +
> + for (i = 0; i < trans->trb_srv_num; i++) {
> + srv = devm_kzalloc(trans->mdev->dev, sizeof(*srv), GFP_KERNEL);
> + if (!srv) {
> + ret = -ENOMEM;
> + goto err_free_srv;
> + }
> +
> + srv->trans = trans;
> + srv->srv_id = i;
> + trans->trb_srv[i] = srv;
> +
> + init_waitqueue_head(&srv->trb_waitq);
> + for (j = 0; j < NR_CLDMA; j++)
> + INIT_LIST_HEAD(&srv->srv_q_list[j]);
> + }
> +
> + for (i = 0; i < NR_CLDMA; i++)
> + for (j = 0; j < HW_QUE_NUM; j++) {
> + if (trans->srv_cfg[i][j] < 0 ||
> + trans->srv_cfg[i][j] >= trans->trb_srv_num)
> + trans->srv_cfg[i][j] = 0;
> + srv_que = devm_kzalloc(trans->mdev->dev, sizeof(*srv_que), GFP_KERNEL);
> + if (!srv_que) {
> + ret = -ENOMEM;
> + goto err_free_srv_que;
> + }
> + srv_que->hif_id = i;
> + srv_que->qno = j;
> + list_add_tail(&srv_que->list,
> + &trans->trb_srv[trans->srv_cfg[i][j]]->srv_q_list[i]);
> + }
> +
> + for (i = 0; i < trans->trb_srv_num; i++) {
> + trans->trb_srv[i]->trb_thread = kthread_run(mtk_ctrl_trb_thread, trans->trb_srv[i],
> + "mtk_trb_srv%d_%s", i,
> + trans->mdev->dev_str);
> + if (IS_ERR(trans->trb_srv[i]->trb_thread)) {
> + ret = PTR_ERR(trans->trb_srv[i]->trb_thread);
> + trans->trb_srv[i]->trb_thread = NULL;
> + goto err_stop_kthread;
> + }
> + }
> +
> + return 0;
> +err_stop_kthread:
> + while (--i >= 0)
> + kthread_stop(trans->trb_srv[i]->trb_thread);
> +err_free_srv_que:
> + for (i = 0; i < trans->trb_srv_num; i++) {
> + for (j = 0; j < NR_CLDMA; j++) {
> + struct srv_que *next_srv_que;
> +
> + list_for_each_entry_safe(srv_que, next_srv_que,
> + &trans->trb_srv[i]->srv_q_list[j], list) {
> + list_del(&srv_que->list);
> + devm_kfree(trans->mdev->dev, srv_que);
It is unusual to see devm_kfree(). Why is it needed?
> +static unsigned int ctrl_port_chl_mtu;
Is this a global variable? Why is it not part of priv?
> +module_param(ctrl_port_chl_mtu, uint, 0644);
> +MODULE_PARM_DESC(ctrl_port_chl_mtu, "This is used to config the ctrl port mtu!\n");
Ah. No modules parameters please. If this is an MTU, why not use the
normal networking interfaces to set the MTU?
Andrew
^ permalink raw reply
* Re: [PATCH v3 2/7] net: wwan: t9xx: Add control plane transaction layer
From: Andrew Lunn @ 2026-06-24 16:00 UTC (permalink / raw)
To: jackbb_wu
Cc: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng, Matthias Brugger,
AngeloGioacchino Del Regno, Simon Horman, Jonathan Corbet,
Shuah Khan, linux-kernel, netdev, linux-arm-kernel,
linux-mediatek, linux-doc
In-Reply-To: <20260624-t9xx_driver_v1-v3-2-73ff03f60c48@compal.com>
> +static int __init mtk_common_drv_init(void)
> +{
> + return 0;
> +}
> +module_init(mtk_common_drv_init);
> +
> +static void __exit mtk_common_drv_exit(void)
> +{
> +}
> +module_exit(mtk_common_drv_exit);
Since these don't do anything, they should not be needed.
> @@ -467,6 +468,7 @@ static u32 mtk_pci_ext_h2d_evt_hw_bits(u32 chs)
>
> SET_HW_BITS(hw_bits, chs, MHCCIF_RC2EP_EVT_DEVICE_RESET,
> DEV_EVT_H2D_DEVICE_RESET);
> +
> return LE32_TO_U32(cpu_to_le32(hw_bits));
Please don't add white space like this. I assume a previous patch
added this code, so move this to that patch.
> @@ -908,13 +910,11 @@ static int mtk_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> struct mtk_md_dev *mdev;
> int ret;
>
> - mdev = devm_kzalloc(dev, sizeof(*mdev), GFP_KERNEL);
> + mdev = mtk_dev_alloc(dev, &pci_hw_ops);
> if (!mdev) {
> ret = -ENOMEM;
> goto log_err;
> }
> - mdev->dev_ops = &pci_hw_ops;
> - mdev->dev = dev;
>
> priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
> if (!priv) {
> @@ -991,7 +991,7 @@ static int mtk_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> free_priv_data:
> devm_kfree(dev, priv);
> free_cntx_data:
> - devm_kfree(dev, mdev);
> + mtk_dev_free(mdev);
Why are you removing devm_ calls?
Andrew
^ permalink raw reply
* Re: [PATCH v3 1/7] net: wwan: t9xx: Add PCIe core
From: Andrew Lunn @ 2026-06-24 15:56 UTC (permalink / raw)
To: jackbb_wu
Cc: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng, Matthias Brugger,
AngeloGioacchino Del Regno, Simon Horman, Jonathan Corbet,
Shuah Khan, linux-kernel, netdev, linux-arm-kernel,
linux-mediatek, linux-doc
In-Reply-To: <20260624-t9xx_driver_v1-v3-1-73ff03f60c48@compal.com>
> From: Jack Wu <jackbb_wu@compal.com>
>
> Registers the T900 device driver with the kernel. Set up all
> the fundamental configurations for the device: PCIe layer,
> Modem Host Cross Core Interface (MHCCIF), Reset Generation
> Unit (RGU), modem common control operations and build
> infrastructure.
>
> * PCIe layer code implements driver probe and removal, MSI-X
> interrupt initialization and de-initialization, and the way
> of resetting the device.
> * MHCCIF provides interrupt channels to communicate events
> such as handshake, PM and port enumeration.
> * RGU provides interrupt channels to generate notifications
> from the device so that the T900 driver could get the
> device reset.
> * Modem common control operations provide the basic read/write
> functions of the device's hardware registers,
> mask/unmask/get/clear functions of the device's interrupt
> registers and inquiry functions of the device's status.
>
> Signed-off-by: Jack Wu <jackbb_wu@compal.com>
> ---
> drivers/net/wwan/Kconfig | 12 +
> drivers/net/wwan/Makefile | 1 +
> drivers/net/wwan/t9xx/Makefile | 10 +
> drivers/net/wwan/t9xx/mtk_dev.h | 108 +++
> drivers/net/wwan/t9xx/pcie/mtk_pci.c | 1049 +++++++++++++++++++++++++
> drivers/net/wwan/t9xx/pcie/mtk_pci.h | 234 ++++++
> drivers/net/wwan/t9xx/pcie/mtk_pci_drv_m9xx.c | 69 ++
> drivers/net/wwan/t9xx/pcie/mtk_pci_reg.h | 70 ++
> 8 files changed, 1553 insertions(+)
>
> diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
> index 88df55d78d90..4cee537c739f 100644
> --- a/drivers/net/wwan/Kconfig
> +++ b/drivers/net/wwan/Kconfig
> @@ -121,6 +121,18 @@ config MTK_T7XX
>
> If unsure, say N.
>
> +config MTK_T9XX
> + tristate "MediaTek PCIe 5G WWAN modem T9xx device"
> + depends on PCI
> + select NET_DEVLINK
> + help
> + Enables MediaTek PCIe based 5G WWAN modem (T9xx series) device.
> +
> + To compile this driver as a module, choose M here: the module will be
> + called mtk_t9xx.
> +
> + If unsure, say N.
> +
> endif # WWAN
>
> endmenu
> diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
> index 3960c0ae2445..7361eef4c472 100644
> --- a/drivers/net/wwan/Makefile
> +++ b/drivers/net/wwan/Makefile
> @@ -14,3 +14,4 @@ obj-$(CONFIG_QCOM_BAM_DMUX) += qcom_bam_dmux.o
> obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o
> obj-$(CONFIG_IOSM) += iosm/
> obj-$(CONFIG_MTK_T7XX) += t7xx/
> +obj-$(CONFIG_MTK_T9XX) += t9xx/
> diff --git a/drivers/net/wwan/t9xx/Makefile b/drivers/net/wwan/t9xx/Makefile
> new file mode 100644
> index 000000000000..6f2dd3f91454
> --- /dev/null
> +++ b/drivers/net/wwan/t9xx/Makefile
> @@ -0,0 +1,10 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +
> +ccflags-y += -I$(src)/pcie
> +ccflags-y += -I$(src)
> +
> +obj-$(CONFIG_MTK_T9XX) += mtk_t9xx.o
> +
> +mtk_t9xx-y := \
> + pcie/mtk_pci.o \
> + pcie/mtk_pci_drv_m9xx.o
> diff --git a/drivers/net/wwan/t9xx/mtk_dev.h b/drivers/net/wwan/t9xx/mtk_dev.h
> new file mode 100644
> index 000000000000..8278a0e2875e
> --- /dev/null
> +++ b/drivers/net/wwan/t9xx/mtk_dev.h
> @@ -0,0 +1,108 @@
> +/* SPDX-License-Identifier: GPL-2.0-only
> + *
> + * Copyright (c) 2022, MediaTek Inc.
> + */
> +
> +#ifndef __MTK_DEV_H__
> +#define __MTK_DEV_H__
> +
> +#include <linux/dma-mapping.h>
> +#include <linux/dmapool.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +
> +#define MTK_DEV_STR_LEN 16
> +
> +enum mtk_user_id {
> + MTK_USER_MIN,
> + MTK_USER_CTRL,
> + MTK_USER_DATA,
> + MTK_USER_MAX
> +};
> +
> +enum mtk_dev_evt_h2d {
> + DEV_EVT_H2D_DEVICE_RESET = BIT(2),
> + DEV_EVT_H2D_MAX = BIT(5)
> +};
> +
> +enum mtk_dev_evt_d2h {
> + DEV_EVT_D2H_BOOT_FLOW_SYNC = BIT(4),
> + DEV_EVT_D2H_ASYNC_HS_NOTIFY_SAP = BIT(5),
> + DEV_EVT_D2H_ASYNC_HS_NOTIFY_MD = BIT(6),
> + DEV_EVT_D2H_MAX = BIT(11)
> +};
> +
> +struct mtk_md_dev;
> +
> +struct mtk_dev_ops {
> + u32 (*get_dev_state)(struct mtk_md_dev *mdev);
> + void (*ack_dev_state)(struct mtk_md_dev *mdev, u32 state);
> + u32 (*get_dev_cfg)(struct mtk_md_dev *mdev);
> + int (*register_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt,
> + int (*evt_cb)(u32 status, void *data), void *data);
> + void (*unregister_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
> + void (*mask_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
> + void (*unmask_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
> + void (*clear_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
> + int (*send_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
> +};
> +
> +/* mtk_md_dev defines the structure of MTK modem device */
> +struct mtk_md_dev {
> + struct device *dev;
> + const struct mtk_dev_ops *dev_ops;
> + void *hw_priv;
> + u32 hw_ver;
> + char dev_str[MTK_DEV_STR_LEN];
> +};
> +
> +static inline u32 mtk_dev_get_dev_state(struct mtk_md_dev *mdev)
> +{
> + return mdev->dev_ops->get_dev_state(mdev);
> +}
> +
> +static inline void mtk_dev_ack_dev_state(struct mtk_md_dev *mdev, u32 state)
> +{
> + return mdev->dev_ops->ack_dev_state(mdev, state);
> +}
> +
> +static inline u32 mtk_dev_get_dev_cfg(struct mtk_md_dev *mdev)
> +{
> + return mdev->dev_ops->get_dev_cfg(mdev);
> +}
> +
> +static inline int mtk_dev_register_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt,
> + int (*evt_cb)(u32 status, void *data), void *data)
> +{
> + return mdev->dev_ops->register_dev_evt(mdev, dev_evt, evt_cb, data);
> +}
> +
> +static inline void mtk_dev_unregister_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
> +{
> + mdev->dev_ops->unregister_dev_evt(mdev, dev_evt);
> +}
> +
> +static inline void mtk_dev_mask_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
> +{
> + mdev->dev_ops->mask_dev_evt(mdev, dev_evt);
> +}
> +
> +static inline void mtk_dev_unmask_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
> +{
> + mdev->dev_ops->unmask_dev_evt(mdev, dev_evt);
> +}
> +
> +static inline void mtk_dev_clear_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
> +{
> + mdev->dev_ops->clear_dev_evt(mdev, dev_evt);
> +}
> +
> +static inline int mtk_dev_send_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
> +{
> + return mdev->dev_ops->send_dev_evt(mdev, dev_evt);
> +}
> +
> +#endif /* __MTK_DEV_H__ */
> diff --git a/drivers/net/wwan/t9xx/pcie/mtk_pci.c b/drivers/net/wwan/t9xx/pcie/mtk_pci.c
> new file mode 100644
> index 000000000000..c6a7196fcdd6
> --- /dev/null
> +++ b/drivers/net/wwan/t9xx/pcie/mtk_pci.c
> @@ -0,0 +1,1049 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2022, MediaTek Inc.
> + */
> +
> +#include <linux/acpi.h>
> +#include <linux/aer.h>
> +#include <linux/bitfield.h>
> +#include <linux/debugfs.h>
> +#include <linux/delay.h>
> +#include <linux/device.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +
> +#include "mtk_dev.h"
> +#include "mtk_pci.h"
> +#include "mtk_pci_reg.h"
> +
> +#define MTK_PCI_BAR_NUM 6
> +#define MTK_PCI_TRANSPARENT_ATR_SIZE (0x3F)
> +#define MTK_PCI_MINIMUM_ATR_SIZE (0x1000)
> +#define ATR_SIZE_LO32_MASK GENMASK_ULL(31, 0)
> +#define ATR_SIZE_HI32_MASK GENMASK_ULL(63, 32)
> +#define ATR_SIZE_BIAS_FROM_LO32 2
> +#define ATR_ADDR_ALIGN_MASK 0xFFFFF000
> +#define ATR_EN BIT(0)
> +#define ATR_PARAM_OFFSET 16
> +/* Delay between ACPI PXP._OFF and _ON for modem power cycle stabilization */
> +#define MTK_PLDR_POWER_OFF_DELAY_MS 500
> +#define LE32_TO_U32(x) ((__force u32)(__le32)(x))
> +#define SET_HW_BITS(dest, chs, mhccif, dev) \
> + ({ \
> + if ((chs) & (dev)) \
> + (dest) |= FIELD_PREP(mhccif, 1); \
> + })
> +
> +struct mtk_mhccif_cb {
> + struct list_head entry;
> + int (*evt_cb)(u32 status, void *data);
> + void *data;
> + u32 chs;
> +};
> +
> +/**
> + * mtk_pci_setup_atr() - Configure a PCIe address translation rule
> + * @mdev: MTK MD device
> + * @cfg: ATR configuration parameters
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int mtk_pci_setup_atr(struct mtk_md_dev *mdev, struct mtk_atr_cfg *cfg)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> + u32 addr, val, size_h, size_l;
> + int atr_size, pos, offset;
> +
> + if (cfg->transparent) {
> + /* No address conversion is performed */
> + atr_size = MTK_PCI_TRANSPARENT_ATR_SIZE;
> + } else {
> + if (cfg->size < MTK_PCI_MINIMUM_ATR_SIZE)
> + cfg->size = MTK_PCI_MINIMUM_ATR_SIZE;
> +
> + if (cfg->src_addr & (cfg->size - 1)) {
> + dev_err((mdev)->dev, "Invalid atr src addr is not aligned to size\n");
> + return -EFAULT;
> + }
> +
> + if (cfg->trsl_addr & (cfg->size - 1)) {
> + dev_err((mdev)->dev,
> + "Invalid atr trsl addr is not aligned to size, %llx, %llx\n",
> + cfg->trsl_addr, cfg->size - 1);
> + return -EFAULT;
> + }
> +
> + size_l = FIELD_GET(ATR_SIZE_LO32_MASK, cfg->size);
> + size_h = FIELD_GET(ATR_SIZE_HI32_MASK, cfg->size);
> + pos = ffs(size_l);
> + if (pos) {
> + atr_size = pos - ATR_SIZE_BIAS_FROM_LO32;
> + } else {
> + pos = ffs(size_h);
> + atr_size = pos + 32 - ATR_SIZE_BIAS_FROM_LO32;
> + }
> + }
> +
> + /* Calculate table offset */
> + offset = ATR_PORT_OFFSET * cfg->port + ATR_TABLE_OFFSET * cfg->table;
> + addr = REG_ATR_PCIE_WIN0_T0_SRC_ADDR_MSB + offset;
> + val = (u32)(cfg->src_addr >> 32);
> + mtk_pci_mac_write32(priv, addr, val);
> +
> + addr = REG_ATR_PCIE_WIN0_T0_SRC_ADDR_LSB + offset;
> + val = (u32)(cfg->src_addr & ATR_ADDR_ALIGN_MASK) | (atr_size << 1) | ATR_EN;
> + mtk_pci_mac_write32(priv, addr, val);
> +
> + addr = REG_ATR_PCIE_WIN0_T0_TRSL_ADDR_MSB + offset;
> + val = (u32)(cfg->trsl_addr >> 32);
> + mtk_pci_mac_write32(priv, addr, val);
> +
> + addr = REG_ATR_PCIE_WIN0_T0_TRSL_ADDR_LSB + offset;
> + val = (u32)(cfg->trsl_addr & ATR_ADDR_ALIGN_MASK);
> + mtk_pci_mac_write32(priv, addr, val);
> +
> + /* TRSL_PARAM */
> + addr = REG_ATR_PCIE_WIN0_T0_TRSL_PARAM + offset;
> + val = (cfg->trsl_param << ATR_PARAM_OFFSET) | cfg->trsl_id;
> + mtk_pci_mac_write32(priv, addr, val);
> +
> + return 0;
> +}
> +
> +/**
> + * mtk_pci_atr_disable() - Disable all PCIe address translation rules
> + * @priv: MTK PCI private data
> + */
> +void mtk_pci_atr_disable(struct mtk_pci_priv *priv)
> +{
> + int port, tbl, offset;
> + u32 val;
> +
> + /* Disable all ATR table for all ports */
> + for (port = ATR_SRC_PCI_WIN0; port <= ATR_SRC_AXIS_3; port++)
> + for (tbl = 0; tbl < ATR_TABLE_NUM_PER_ATR; tbl++) {
> + /* Calculate table offset */
> + offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * tbl;
> + val = mtk_pci_mac_read32(priv, REG_ATR_PCIE_WIN0_T0_SRC_ADDR_LSB + offset);
> + val = val & (~BIT(0));
> + /* Disable table by SRC_ADDR_L */
> + mtk_pci_mac_write32(priv, REG_ATR_PCIE_WIN0_T0_SRC_ADDR_LSB + offset, val);
> + }
> +}
> +
> +static void mtk_pci_set_msix_merged(struct mtk_pci_priv *priv, int irq_cnt)
> +{
> + mtk_pci_mac_write32(priv, REG_PCIE_CFG_MSIX, ffs(irq_cnt) * 2 - 1);
> +}
> +
> +/**
> + * mtk_pci_get_dev_state() - Read the device state from the modem
> + * @mdev: MTK MD device
> + *
> + * Return: Device state value.
> + */
> +u32 mtk_pci_get_dev_state(struct mtk_md_dev *mdev)
> +{
> + return mtk_pci_mac_read32(mdev->hw_priv, REG_PCIE_DEBUG_DUMMY_7);
> +}
> +
> +/**
> + * mtk_pci_ack_dev_state() - Acknowledge the device state to the modem
> + * @mdev: MTK MD device
> + * @state: State value to acknowledge
> + */
> +void mtk_pci_ack_dev_state(struct mtk_md_dev *mdev, u32 state)
> +{
> + mtk_pci_mac_write32(mdev->hw_priv, REG_PCIE_DEBUG_DUMMY_7, state);
> +}
> +
> +/**
> + * mtk_pci_get_irq_id() - Map an IRQ source to its hardware IRQ ID
> + * @mdev: MTK MD device
> + * @irq_src: IRQ source enum
> + *
> + * Return: IRQ ID on success, -EINVAL on failure.
> + */
> +int mtk_pci_get_irq_id(struct mtk_md_dev *mdev, enum mtk_irq_src irq_src)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> + const int *irq_tbl = priv->cfg->irq_tbl;
> + int irq_id = -EINVAL;
> +
> + if (irq_src > MTK_IRQ_SRC_MIN && irq_src < MTK_IRQ_SRC_MAX) {
> + irq_id = irq_tbl[irq_src];
> + if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX)
> + irq_id = -EINVAL;
> + }
> +
> + return irq_id;
> +}
> +
> +/**
> + * mtk_pci_get_virq_id() - Get the Linux virtual IRQ for a hardware IRQ ID
> + * @mdev: MTK MD device
> + * @irq_id: Hardware IRQ ID
> + *
> + * Return: Virtual IRQ number on success, negative error code on failure.
> + */
> +int mtk_pci_get_virq_id(struct mtk_md_dev *mdev, int irq_id)
> +{
> + struct pci_dev *pdev = to_pci_dev(mdev->dev);
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> +
> + if (!priv->irq_cnt || irq_id < 0)
> + return -EINVAL;
> +
> + return pci_irq_vector(pdev, irq_id % priv->irq_cnt);
> +}
> +
> +/**
> + * mtk_pci_register_irq() - Register a callback for a hardware IRQ
> + * @mdev: MTK MD device
> + * @irq_id: Hardware IRQ ID
> + * @irq_cb: Callback function
> + * @data: Private data passed to callback
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int mtk_pci_register_irq(struct mtk_md_dev *mdev, int irq_id,
> + int (*irq_cb)(int irq_id, void *data), void *data)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> +
> + if ((irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX) || !irq_cb)
> + return -EINVAL;
> +
> + if (priv->irq_cb_list[irq_id]) {
> + dev_err((mdev)->dev,
> + "Unable to register irq, irq_id=%d, it's already been register by %ps.\n",
> + irq_id, priv->irq_cb_list[irq_id]);
> + return -EFAULT;
> + }
> + priv->irq_cb_list[irq_id] = irq_cb;
> + priv->irq_cb_data[irq_id] = data;
> +
> + return 0;
> +}
> +
> +/**
> + * mtk_pci_unregister_irq() - Unregister a hardware IRQ callback
> + * @mdev: MTK MD device
> + * @irq_id: Hardware IRQ ID
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int mtk_pci_unregister_irq(struct mtk_md_dev *mdev, int irq_id)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> +
> + if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX)
> + return -EINVAL;
> +
> + if (!priv->irq_cb_list[irq_id]) {
> + dev_err((mdev)->dev, "irq_id=%d has not been registered\n", irq_id);
> + return -EFAULT;
> + }
> + priv->irq_cb_list[irq_id] = NULL;
> + priv->irq_cb_data[irq_id] = NULL;
> +
> + return 0;
> +}
> +
> +/**
> + * mtk_pci_mask_irq() - Mask (disable) a hardware IRQ
> + * @mdev: MTK MD device
> + * @irq_id: Hardware IRQ ID
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int mtk_pci_mask_irq(struct mtk_md_dev *mdev, int irq_id)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> +
> + if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX ||
> + priv->irq_type != PCI_IRQ_MSIX) {
> + dev_err(mdev->dev, "Failed to mask irq: input irq_id=%d\n", irq_id);
> + return -EINVAL;
> + }
> +
> + mtk_pci_mac_write32(priv, REG_IMASK_HOST_MSIX_CLR_GRP0_0, BIT(irq_id));
> +
> + return 0;
> +}
> +
> +/**
> + * mtk_pci_unmask_irq() - Unmask (enable) a hardware IRQ
> + * @mdev: MTK MD device
> + * @irq_id: Hardware IRQ ID
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int mtk_pci_unmask_irq(struct mtk_md_dev *mdev, int irq_id)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> +
> + if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX ||
> + priv->irq_type != PCI_IRQ_MSIX) {
> + dev_err(mdev->dev, "Failed to unmask irq: input irq_id=%d\n", irq_id);
> + return -EINVAL;
> + }
> +
> + mtk_pci_mac_write32(priv, REG_IMASK_HOST_MSIX_SET_GRP0_0, BIT(irq_id));
> +
> + return 0;
> +}
> +
> +/**
> + * mtk_pci_clear_irq() - Clear (acknowledge) a hardware IRQ
> + * @mdev: MTK MD device
> + * @irq_id: Hardware IRQ ID
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int mtk_pci_clear_irq(struct mtk_md_dev *mdev, int irq_id)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> +
> + if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX ||
> + priv->irq_type != PCI_IRQ_MSIX) {
> + dev_err(mdev->dev, "Failed to clear irq: input irq_id=%d\n", irq_id);
> + return -EINVAL;
> + }
> +
> + mtk_pci_mac_write32(priv, REG_MSIX_ISTATUS_HOST_GRP0_0, BIT(irq_id));
> +
> + return 0;
> +}
> +
> +static u32 mtk_pci_ext_d2h_evt_hw_bits(u32 chs)
> +{
> + u32 hw_bits = 0;
> +
> + SET_HW_BITS(hw_bits, chs, MHCCIF_EP2RC_EVT_BOOT_FLOW_SYNC,
> + DEV_EVT_D2H_BOOT_FLOW_SYNC);
> + SET_HW_BITS(hw_bits, chs, MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_SAP,
> + DEV_EVT_D2H_ASYNC_HS_NOTIFY_SAP);
> + SET_HW_BITS(hw_bits, chs, MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_MD,
> + DEV_EVT_D2H_ASYNC_HS_NOTIFY_MD);
> +
> + return LE32_TO_U32(cpu_to_le32(hw_bits));
> +}
> +
> +static u32 mtk_pci_ext_d2h_evt_chs(u32 hw_bits)
> +{
> + u32 chs = 0;
> +
> + if (!hw_bits)
> + return chs;
> +
> + chs = FIELD_PREP(DEV_EVT_D2H_BOOT_FLOW_SYNC,
> + FIELD_GET(MHCCIF_EP2RC_EVT_BOOT_FLOW_SYNC, hw_bits)) |
> + FIELD_PREP(DEV_EVT_D2H_ASYNC_HS_NOTIFY_SAP,
> + FIELD_GET(MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_SAP, hw_bits)) |
> + FIELD_PREP(DEV_EVT_D2H_ASYNC_HS_NOTIFY_MD,
> + FIELD_GET(MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_MD, hw_bits));
> +
> + return chs;
> +}
> +
> +/**
> + * mtk_pci_register_ext_evt() - Register a callback for MHCCIF device events
> + * @mdev: MTK MD device
> + * @chs: Bitmask of event channels to register
> + * @evt_cb: Callback function
> + * @data: Private data passed to callback
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int mtk_pci_register_ext_evt(struct mtk_md_dev *mdev, u32 chs,
> + int (*evt_cb)(u32 status, void *data), void *data)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> + struct mtk_mhccif_cb *cb;
> + int ret = 0;
> +
> + if (!chs || !evt_cb)
> + return -EINVAL;
> +
> + spin_lock_bh(&priv->mhccif_lock);
> + list_for_each_entry(cb, &priv->mhccif_cb_list, entry) {
> + if (cb->chs & chs) {
> + ret = -EFAULT;
> + dev_err((mdev)->dev,
> + "Unable to register evt, intersection: chs=0x%08x&0x%08x cb=%ps\n",
> + chs, cb->chs, cb->evt_cb);
> + goto err_spin_unlock;
> + }
> + }
> + cb = devm_kzalloc(mdev->dev, sizeof(*cb), GFP_ATOMIC);
> + if (!cb) {
> + ret = -ENOMEM;
> + goto err_spin_unlock;
> + }
> + cb->evt_cb = evt_cb;
> + cb->data = data;
> + cb->chs = chs;
> + list_add_tail(&cb->entry, &priv->mhccif_cb_list);
> +err_spin_unlock:
> + spin_unlock_bh(&priv->mhccif_lock);
> +
> + return ret;
> +}
> +
> +/**
> + * mtk_pci_unregister_ext_evt() - Unregister an MHCCIF device event callback
> + * @mdev: MTK MD device
> + * @chs: Bitmask of event channels to unregister
> + */
> +void mtk_pci_unregister_ext_evt(struct mtk_md_dev *mdev, u32 chs)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> + struct mtk_mhccif_cb *cb, *next;
> +
> + if (!chs)
> + return;
> +
> + spin_lock_bh(&priv->mhccif_lock);
> + list_for_each_entry_safe(cb, next, &priv->mhccif_cb_list, entry) {
> + if (cb->chs == chs) {
> + list_del(&cb->entry);
> + devm_kfree(mdev->dev, cb);
> + goto out;
> + }
> + }
> + dev_warn((mdev)->dev,
> + "Unable to unregister evt, no chs=0x%08x has been registered.\n", chs);
> +out:
> + spin_unlock_bh(&priv->mhccif_lock);
> +}
> +
> +/**
> + * mtk_pci_mask_ext_evt() - Mask (disable) MHCCIF device events
> + * @mdev: MTK MD device
> + * @chs: Bitmask of event channels to mask
> + */
> +void mtk_pci_mask_ext_evt(struct mtk_md_dev *mdev, u32 chs)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> + u32 hw_bits = mtk_pci_ext_d2h_evt_hw_bits(chs);
> +
> + mtk_pci_write32(mdev, priv->cfg->mhccif_rc_base_addr +
> + MHCCIF_EP2RC_SW_INT_EAP_MASK_SET, hw_bits);
> +}
> +
> +/**
> + * mtk_pci_unmask_ext_evt() - Unmask (enable) MHCCIF device events
> + * @mdev: MTK MD device
> + * @chs: Bitmask of event channels to unmask
> + */
> +void mtk_pci_unmask_ext_evt(struct mtk_md_dev *mdev, u32 chs)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> + u32 hw_bits = mtk_pci_ext_d2h_evt_hw_bits(chs);
> +
> + mtk_pci_write32(mdev, priv->cfg->mhccif_rc_base_addr +
> + MHCCIF_EP2RC_SW_INT_EAP_MASK_CLR, hw_bits);
> +}
> +
> +/**
> + * mtk_pci_clear_ext_evt() - Clear (acknowledge) MHCCIF device events
> + * @mdev: MTK MD device
> + * @chs: Bitmask of event channels to clear
> + */
> +void mtk_pci_clear_ext_evt(struct mtk_md_dev *mdev, u32 chs)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> + u32 hw_bits = mtk_pci_ext_d2h_evt_hw_bits(chs);
> +
> + mtk_pci_write32(mdev, priv->cfg->mhccif_rc_base_addr +
> + MHCCIF_EP2RC_SW_INT_ACK, hw_bits);
> +}
> +
> +static u32 mtk_pci_ext_h2d_evt_hw_bits(u32 chs)
> +{
> + u32 hw_bits = 0;
> +
> + SET_HW_BITS(hw_bits, chs, MHCCIF_RC2EP_EVT_DEVICE_RESET,
> + DEV_EVT_H2D_DEVICE_RESET);
> + return LE32_TO_U32(cpu_to_le32(hw_bits));
> +}
> +
> +/**
> + * mtk_pci_send_ext_evt() - Send an MHCCIF event to the modem
> + * @mdev: MTK MD device
> + * @ch: Event channel to trigger (must be a single bit)
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int mtk_pci_send_ext_evt(struct mtk_md_dev *mdev, u32 ch)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> + u32 rc_base, hw_bits;
> +
> + rc_base = priv->cfg->mhccif_rc_base_addr;
> +
> + /* Only allow one ch to be triggered at a time */
> + if (!is_power_of_2(ch)) {
> + dev_err((mdev)->dev, "Unsupported ext evt ch=0x%08x\n", ch);
> + return -EINVAL;
> + }
> +
> + hw_bits = mtk_pci_ext_h2d_evt_hw_bits(ch);
> + mtk_pci_write32(mdev, rc_base + MHCCIF_RC2EP_SW_BSY, hw_bits);
> + mtk_pci_write32(mdev, rc_base + MHCCIF_RC2EP_SW_TCHNUM, ffs(hw_bits) - 1);
> + return 0;
> +}
> +
> +static u32 mtk_pci_get_ext_evt_hw_status(struct mtk_md_dev *mdev)
> +{
> + struct mtk_pci_priv *priv = mdev->hw_priv;
> +
> + return mtk_pci_read32(mdev, priv->cfg->mhccif_rc_base_addr +
> + MHCCIF_EP2RC_SW_INT_STS);
> +}
> +
> +/**
> + * mtk_pci_fldr() - Perform a Function Level Device Reset via ACPI _RST
> + * @mdev: MTK MD device
> + *
> + * Return: 0 on success, negative error code on failure.
> + */
> +int mtk_pci_fldr(struct mtk_md_dev *mdev)
> +{
> +#ifdef CONFIG_ACPI
...
> +#else /* !CONFIG_ACPI */
> + dev_err((mdev)->dev, "Unsupported, CONFIG ACPI hasn't been set to 'y'\n");
Why not just have the Kconfig depend on ACPI?
> + if (ret) {
> + dev_err((mdev)->dev, "Failed to register mhccif_irq callback\n");
Why the () around mdev?
Andrew
---
pw-bot: cr
^ permalink raw reply
* Re: [PATCH v6 6/8] Documentation: bootconfig: document build-time cmdline rendering
From: Breno Leitao @ 2026-06-24 15:50 UTC (permalink / raw)
To: Masami Hiramatsu
Cc: Andrew Morton, Nathan Chancellor, paulmck, Nicolas Schier,
Nick Desaulniers, Bill Wendling, Justin Stitt, Jonathan Corbet,
Shuah Khan, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
Dave Hansen, x86, H. Peter Anvin, linux-kernel,
linux-trace-kernel, linux-kbuild, bpf, llvm, linux-doc,
kernel-team
In-Reply-To: <20260624174737.a4862dcd86f3d746b788d197@kernel.org>
On Wed, Jun 24, 2026 at 05:47:37PM +0900, Masami Hiramatsu wrote:
> On Tue, 23 Jun 2026 09:15:33 -0700
> >
> > +The option requires ``CONFIG_BOOT_CONFIG_EMBED=y``, a non-empty
> > +``CONFIG_BOOT_CONFIG_EMBED_FILE``, and an architecture that selects
> > +``CONFIG_ARCH_SUPPORTS_CMDLINE_FROM_BOOTCONFIG``. Currently only x86
> > +selects it; on other architectures the embedded bootconfig still works,
> > +but only through the late runtime parser.
>
> As commented by Sashiko, here we need to mention that this option requires
> CONFIG_CMDLINE to be empty. This means user can NOT set both option
> at once (This also means user doesn't have to worry about configuration
> conflicts.)
Ack! I will update.
--breno
^ permalink raw reply
* Re: [PATCH v3 1/2] dt-bindings: iio: dac: Add AD5529R
From: David Lechner @ 2026-06-24 15:13 UTC (permalink / raw)
To: Janani Sunil, Jonathan Cameron, Rodrigo Alencar
Cc: Nuno Sá, Conor Dooley, Janani Sunil, Lars-Peter Clausen,
Michael Hennerich, Nuno Sá, Andy Shevchenko, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Philipp Zabel, Jonathan Corbet,
Shuah Khan, linux-iio, devicetree, linux-kernel, linux-doc,
Mark Brown
In-Reply-To: <9abc53d0-432f-48fc-9e21-4d9a3c5e129f@gmail.com>
>>>>>
>>>>>> But yes, I do feel that the whole feature is for aggregation so seeing
>>>>>> one device with 32 channels is the expectation here? Rather than seeing
>>>>>> two devices with 16 channels.
>>>>> Yes, I think aggregation is the whole point there... so that the IIO driver
>>>>> is multi-device-aware.
>>>> Which makes me feel that different pins per device might be possible
>>>> from an HW point of view but does not make much sense. For example, for
>>>> the buffer example I would expect LDAC to be shared between all the
>>>> devices.
>>> That is why I would still suggest the multi-dac node in the middle...
>>> the parent node can hold shared resources, while the dac children can
>>> have their own, overriding or inheriting stuff.
>>>
>> Before going down that path I'd want confirmation this is something we
>> actually think anyone will build.
>>
>> Jonathan
>
> To directly answer your question- we currently do not have a platform that supports multi device topology with independent supplies or reset lines.
> Given that, I agree to start with the parallel wiring assumption and defer per chip resource variation under there is a solid use case. I will also drop the "adi,resolution" proposal and proceed with "adi,device-addrs" in the AD5529R binding.
> With all of the above, the proposed binding for the multi-device follow up series would look like:
>
>
> dac@0 {
> compatible = "adi,ad5529r-16";
> reg = <0>;
> adi,device-addrs = <0 1>;
> reset-gpios = <&gpio0 87 GPIO_ACTIVE_LOW>;
> vdd-supply = <&vdd_reg>;
> hvdd-supply = <&hvdd_reg>;
>
> channel@0 { reg = <0>; adi,output-range-microvolt = <0 5000000>; };
> channel@16 { reg = <16>; adi,output-range-microvolt = <0 40000000>; };
> };
>
> Does this look reasonable to everyone?
>
> Regards,
> Janani Sunil
>
LGTM. Seems like the simplest way to handle it and should cover most
use cases.
^ permalink raw reply
* Re: [PATCH v8 04/46] KVM: Decouple kvm_has_arch_private_mem from CONFIG_KVM_VM_MEMORY_ATTRIBUTES
From: Sean Christopherson @ 2026-06-24 15:12 UTC (permalink / raw)
To: Ackerley Tng
Cc: Binbin Wu, aik, andrew.jones, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Steven Rostedt,
Masami Hiramatsu, Mathieu Desnoyers, Jonathan Corbet, Shuah Khan,
Shuah Khan, Vishal Annapurve, Andrew Morton, Chris Li,
Kairui Song, Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen,
Yuanchu Xie, Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt,
Kiryl Shutsemau, Baoquan He, Jason Gunthorpe, Vlastimil Babka,
kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco
In-Reply-To: <CAEvNRgGF+O7r-YHqcLp-ZgoXTCbqjuUhpOdD5eE5w2wu3YYYpw@mail.gmail.com>
On Tue, Jun 23, 2026, Ackerley Tng wrote:
> Binbin Wu <binbin.wu@linux.intel.com> writes:
>
> > On 6/19/2026 8:31 AM, Ackerley Tng via B4 Relay wrote:
> >> From: Sean Christopherson <seanjc@google.com>
> >>
> >> When memory attributes become trackable in guest_memfd, the concept of
> >> having private memory is no longer dependent on
> >> CONFIG_KVM_VM_MEMORY_ATTRIBUTES.
> >>
> >> With this, on x86, kvm_arch_has_private_mem() is defined if some CoCo
> >> platform support (or the testing CONFIG_KVM_SW_PROTECTED_VM) is compiled
> >> in.
> >>
> >> Signed-off-by: Sean Christopherson <seanjc@google.com>
> >> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> >> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> >
> > Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
> >
> > One nit below.
> >
> >> ---
> >> arch/x86/include/asm/kvm_host.h | 4 +++-
> >> include/linux/kvm_host.h | 2 +-
> >> 2 files changed, 4 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >> index 8e8eb8a5e8a6b..1bde67cf6eb0e 100644
> >> --- a/arch/x86/include/asm/kvm_host.h
> >> +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -2394,7 +2394,9 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
> >> int tdp_max_root_level, int tdp_huge_page_level);
> >>
> >>
> >> -#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
> >> +#if defined(CONFIG_KVM_SW_PROTECTED_VM) || \
> >> + defined(CONFIG_KVM_INTEL_TDX) || \
> >> + defined(CONFIG_KVM_AMD_SEV)
> >
> > Nit:
> > Vertically align the defined(XXX) statements for better readability?
> >
>
> Sean had this aligned with spaces, and checkpatch complained about
checkpatch is a tool, it is neither omniscient nor authoritative. And for things
like this, the *entire* purpose for rules/guildlines like "no tabs after spaces"
is to help ensure the code is easier to read, e.g. doesn't end up with wonky
formatting when viewed in certain editors or whatever. So, ignore checkpatch if
it complains about formatting that is visually superior to what makes checkpatch
happy.
> having no spaces before tabs, so I switched it to tabs instead since I
> don't think alignment like that is officially documented either way.
This exact case may not be "officially" documented, but the general gist is in
Documentation/process/maintainer-tip.rst:
When splitting function declarations or function calls, then please align
the first argument in the second line with the first argument in the first
line::
And there is lots and lots of prior art on-list (from me and others) that is more
or less as good as official documentation.
> Either way is fine :)
Please restore the alignment.
^ permalink raw reply
* Re: [PATCH v3 1/2] dt-bindings: iio: dac: Add AD5529R
From: Janani Sunil @ 2026-06-24 15:01 UTC (permalink / raw)
To: Jonathan Cameron, Rodrigo Alencar
Cc: Nuno Sá, Conor Dooley, Janani Sunil, Lars-Peter Clausen,
Michael Hennerich, David Lechner, Nuno Sá, Andy Shevchenko,
Rob Herring, Krzysztof Kozlowski, Conor Dooley, Philipp Zabel,
Jonathan Corbet, Shuah Khan, linux-iio, devicetree, linux-kernel,
linux-doc, Mark Brown
In-Reply-To: <20260623155732.318f34f2@jic23-huawei>
On 6/23/26 16:57, Jonathan Cameron wrote:
> On Tue, 23 Jun 2026 09:09:14 +0100
> Rodrigo Alencar <455.rodrigo.alencar@gmail.com> wrote:
>
>> On 22/06/26 13:20, Nuno Sá wrote:
>>> On Mon, Jun 22, 2026 at 12:51:20PM +0100, Rodrigo Alencar wrote:
>>>> On 22/06/26 11:29, Nuno Sá wrote:
>>>>> On Mon, Jun 22, 2026 at 10:24:05AM +0100, Rodrigo Alencar wrote:
>>>>>> On 21/06/26 15:33, Jonathan Cameron wrote:
>>>>>>> On Fri, 19 Jun 2026 16:54:11 +0100
>>>>>>> Nuno Sá <noname.nuno@gmail.com> wrote:
>>>>>>>
>>>>>>>> On Fri, Jun 19, 2026 at 03:12:07PM +0100, Conor Dooley wrote:
>>>>>>>>> On Fri, Jun 19, 2026 at 02:01:08PM +0100, Nuno Sá wrote:
>>>>>>>>>> On Fri, Jun 19, 2026 at 12:40:54PM +0100, Conor Dooley wrote:
>>>>>>>>>>> On Fri, Jun 19, 2026 at 12:36:55PM +0100, Conor Dooley wrote:
>>>>>>>>>>>> On Fri, Jun 19, 2026 at 12:33:11PM +0200, Janani Sunil wrote:
>>>>>>>>>>>>> On 6/14/26 21:44, Jonathan Cameron wrote:
>>>>>>>>>>>>>> On Tue, 9 Jun 2026 16:47:23 +0200
>>>>>>>>>>>>>> Janani Sunil <jan.sun97@gmail.com> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On 5/26/26 15:11, Rodrigo Alencar wrote:
>>>>>>>>>>>>>>>> On 26/05/19 05:42PM, Janani Sunil wrote:
>>>>>>>>>>>>>>>>> Devicetree bindings for AD5529R 16 channel 12/16 bit high voltage,
>>>>>>>>>>>>>>>>> buffered voltage output digital-to-analog converter (DAC) with an
>>>>>>>>>>>>>>>>> integrated precision reference.
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>> Probably others may comment on that, but...
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> This parent node may support device addressing for multi-device support through
>>>>>>>>>>>>>>>> those ID pins. I suppose that each device may have its own power supplies or
>>>>>>>>>>>>>>>> other resources like the toggle pins or reset and enable.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> That way I suppose that an example would look like...
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +patternProperties:
>>>>>>>>>>>>>>>>> + "^channel@([0-9]|1[0-5])$":
>>>>>>>>>>>>>>>>> + type: object
>>>>>>>>>>>>>>>>> + description: Child nodes for individual channel configuration
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + properties:
>>>>>>>>>>>>>>>>> + reg:
>>>>>>>>>>>>>>>>> + description: Channel number.
>>>>>>>>>>>>>>>>> + minimum: 0
>>>>>>>>>>>>>>>>> + maximum: 15
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + adi,output-range-microvolt:
>>>>>>>>>>>>>>>>> + description: |
>>>>>>>>>>>>>>>>> + Output voltage range for this channel as [min, max] in microvolts.
>>>>>>>>>>>>>>>>> + If not specified, defaults to 0V to 5V range.
>>>>>>>>>>>>>>>>> + oneOf:
>>>>>>>>>>>>>>>>> + - items:
>>>>>>>>>>>>>>>>> + - const: 0
>>>>>>>>>>>>>>>>> + - enum: [5000000, 10000000, 20000000, 40000000]
>>>>>>>>>>>>>>>>> + - items:
>>>>>>>>>>>>>>>>> + - const: -5000000
>>>>>>>>>>>>>>>>> + - const: 5000000
>>>>>>>>>>>>>>>>> + - items:
>>>>>>>>>>>>>>>>> + - const: -10000000
>>>>>>>>>>>>>>>>> + - const: 10000000
>>>>>>>>>>>>>>>>> + - items:
>>>>>>>>>>>>>>>>> + - const: -15000000
>>>>>>>>>>>>>>>>> + - const: 15000000
>>>>>>>>>>>>>>>>> + - items:
>>>>>>>>>>>>>>>>> + - const: -20000000
>>>>>>>>>>>>>>>>> + - const: 20000000
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + required:
>>>>>>>>>>>>>>>>> + - reg
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + additionalProperties: false
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +required:
>>>>>>>>>>>>>>>>> + - compatible
>>>>>>>>>>>>>>>>> + - reg
>>>>>>>>>>>>>>>>> + - vdd-supply
>>>>>>>>>>>>>>>>> + - avdd-supply
>>>>>>>>>>>>>>>>> + - hvdd-supply
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +dependencies:
>>>>>>>>>>>>>>>>> + spi-cpha: [ spi-cpol ]
>>>>>>>>>>>>>>>>> + spi-cpol: [ spi-cpha ]
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +allOf:
>>>>>>>>>>>>>>>>> + - $ref: /schemas/spi/spi-peripheral-props.yaml#
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +unevaluatedProperties: false
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +examples:
>>>>>>>>>>>>>>>>> + - |
>>>>>>>>>>>>>>>>> + #include <dt-bindings/gpio/gpio.h>
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + spi {
>>>>>>>>>>>>>>>>> + #address-cells = <1>;
>>>>>>>>>>>>>>>>> + #size-cells = <0>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + dac@0 {
>>>>>>>>>>>>>>>>> + compatible = "adi,ad5529r-16";
>>>>>>>>>>>>>>>>> + reg = <0>;
>>>>>>>>>>>>>>>>> + spi-max-frequency = <25000000>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + vdd-supply = <&vdd_regulator>;
>>>>>>>>>>>>>>>>> + avdd-supply = <&avdd_regulator>;
>>>>>>>>>>>>>>>>> + hvdd-supply = <&hvdd_regulator>;
>>>>>>>>>>>>>>>>> + hvss-supply = <&hvss_regulator>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + reset-gpios = <&gpio0 87 GPIO_ACTIVE_LOW>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + #address-cells = <1>;
>>>>>>>>>>>>>>>>> + #size-cells = <0>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + channel@0 {
>>>>>>>>>>>>>>>>> + reg = <0>;
>>>>>>>>>>>>>>>>> + adi,output-range-microvolt = <0 5000000>;
>>>>>>>>>>>>>>>>> + };
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + channel@1 {
>>>>>>>>>>>>>>>>> + reg = <1>;
>>>>>>>>>>>>>>>>> + adi,output-range-microvolt = <(-10000000) 10000000>;
>>>>>>>>>>>>>>>>> + };
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> + channel@2 {
>>>>>>>>>>>>>>>>> + reg = <2>;
>>>>>>>>>>>>>>>>> + adi,output-range-microvolt = <0 40000000>;
>>>>>>>>>>>>>>>>> + };
>>>>>>>>>>>>>>>>> + };
>>>>>>>>>>>>>>>>> + };
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> spi {
>>>>>>>>>>>>>>>> #address-cells = <1>;
>>>>>>>>>>>>>>>> #size-cells = <0>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> multi-dac@0 {
>>>>>>>>>>>>>>>> compatible = "adi,ad5529r-16";
>>>>>>>>>>>>>>>> reg = <0>;
>>>>>>>>>>>>>>>> spi-max-frequency = <25000000>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> #address-cells = <1>;
>>>>>>>>>>>>>>>> #size-cells = <0>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> dac@0 {
>>>>>>>>>>>>>>>> reg = <0>;
>>>>>>>>>>>>>>>> vdd-supply = <&vdd_regulator>;
>>>>>>>>>>>>>>>> avdd-supply = <&avdd_regulator>;
>>>>>>>>>>>>>>>> hvdd-supply = <&hvdd_regulator>;
>>>>>>>>>>>>>>>> hvss-supply = <&hvss_regulator>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> reset-gpios = <&gpio0 87 GPIO_ACTIVE_LOW>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> #address-cells = <1>;
>>>>>>>>>>>>>>>> #size-cells = <0>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> channel@0 {
>>>>>>>>>>>>>>>> reg = <0>;
>>>>>>>>>>>>>>>> adi,output-range-microvolt = <0 5000000>;
>>>>>>>>>>>>>>>> };
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> channel@1 {
>>>>>>>>>>>>>>>> reg = <1>;
>>>>>>>>>>>>>>>> adi,output-range-microvolt = <(-10000000) 10000000>;
>>>>>>>>>>>>>>>> };
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> channel@2 {
>>>>>>>>>>>>>>>> reg = <2>;
>>>>>>>>>>>>>>>> adi,output-range-microvolt = <0 40000000>;
>>>>>>>>>>>>>>>> };
>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> dac@1 {
>>>>>>>>>>>>>>>> reg = <1>;
>>>>>>>>>>>>>>>> vdd-supply = <&vdd_regulator>;
>>>>>>>>>>>>>>>> avdd-supply = <&avdd_regulator>;
>>>>>>>>>>>>>>>> hvdd-supply = <&hvdd_regulator>;
>>>>>>>>>>>>>>>> hvss-supply = <&hvss_regulator>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> reset-gpios = <&gpio0 88 GPIO_ACTIVE_LOW>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> #address-cells = <1>;
>>>>>>>>>>>>>>>> #size-cells = <0>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> channel@0 {
>>>>>>>>>>>>>>>> reg = <0>;
>>>>>>>>>>>>>>>> adi,output-range-microvolt = <0 5000000>;
>>>>>>>>>>>>>>>> };
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> channel@1 {
>>>>>>>>>>>>>>>> reg = <1>;
>>>>>>>>>>>>>>>> adi,output-range-microvolt = <(-10000000) 10000000>;
>>>>>>>>>>>>>>>> };
>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>> };
>>>>>>>>>>>>>>>> };
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> then you might need something like:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> patternProperties:
>>>>>>>>>>>>>>>> "^dac@[0-3]$":
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> and put most of the things under this node pattern.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> So the main driver that you're putting together might need to handle up to four instances.
>>>>>>>>>>>>>>>> Even if your current driver cannot handle this, the dt-bindings might need cover that.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Need to double check if each dac node needs a separate compatible, so you would maybe populate
>>>>>>>>>>>>>>>> a platform data to be shared with the child nodes, which would be a separate driver.
>>>>>>>>>>>>>>>> (not sure if it would make sense to mix and match ad5529r-16 and ad5529r-12).
>>>>>>>>>>>>>>> Hi Rodrigo,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thank you for looking at this.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> For now, I would prefer to keep the binding scoped to a single AD5529R device instance. The current
>>>>>>>>>>>>>>> hardware/use case we have only needs one device node and the driver is written around that model as well.
>>>>>>>>>>>>>>> While the device addressing pins could allow multi-device topology, we do not have an actual platform using
>>>>>>>>>>>>>>> that configuration at the moment, so I would prefer not to introduce an extra parent/child binding structure
>>>>>>>>>>>>>>> speculatively without a validating use case.
>>>>>>>>>>>>>> Interesting feature - kind of similar to address control on a typical i2c bus device, or
>>>>>>>>>>>>>> looking at it another way a kind of distributed SPI mux.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Challenge of a binding is we need to anticipate the future. So I think we do need something
>>>>>>>>>>>>>> like Rodrigo is suggesting even if we only (for now) support a single instance in the driver.
>>>>>>>>>>>>>> That would leave the path open to supporting the addressing at a later date.
>>>>>>>>>>>>>> An alternative might be to look at it like a chained device setup. In those we pretend there
>>>>>>>>>>>>>> is just one device with a lot of channels etc. The snag is that here things are more loosely
>>>>>>>>>>>>>> coupled whereas for those devices it tends to be you have to read / write the same register
>>>>>>>>>>>>>> in all devices in the chain as one big SPI message.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> +CC Mark Brown as he may know of some precedence for this feature. For his reference..
>>>>>>>>>>>>>> - Each of these device has 2 ID pins. The SPI transfers have to contain the 2 bit
>>>>>>>>>>>>>> value that matches that or they are ignored. Thus a single bus + 1 chip select can
>>>>>>>>>>>>>> be used to talk to 4 devices. Question is what that looks like in device tree + I guess
>>>>>>>>>>>>>> longer term how to support it cleanly in SPI.
>>>>>>>>>>>> I'd swear I have seen this before, from some Microchip devices. Let me
>>>>>>>>>>>> see if I can find what I am thinking of...
>>>>>>>>>>>
>>>>>>>>>>> microchip,mcp3911 and microchip,mcp3564 both seem to do this with
>>>>>>>>>>> slightly different properties.
>>>>>>>>>>>
>>>>>>>>>>> microchip,device-addr:
>>>>>>>>>>> description: Device address when multiple MCP3911 chips are present on the same SPI bus.
>>>>>>>>>>> $ref: /schemas/types.yaml#/definitions/uint32
>>>>>>>>>>> enum: [0, 1, 2, 3]
>>>>>>>>>>> default: 0
>>>>>>>>>>>
>>>>>>>>>>> and
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> microchip,hw-device-address:
>>>>>>>>>>> $ref: /schemas/types.yaml#/definitions/uint32
>>>>>>>>>>> minimum: 0
>>>>>>>>>>> maximum: 3
>>>>>>>>>>> description:
>>>>>>>>>>> The address is set on a per-device basis by fuses in the factory,
>>>>>>>>>>> configured on request. If not requested, the fuses are set for 0x1.
>>>>>>>>>>> The device address is part of the device markings to avoid
>>>>>>>>>>> potential confusion. This address is coded on two bits, so four possible
>>>>>>>>>>> addresses are available when multiple devices are present on the same
>>>>>>>>>>> SPI bus with only one Chip Select line for all devices.
>>>>>>>>>>> Each device communication starts by a CS falling edge, followed by the
>>>>>>>>>>> clocking of the device address (BITS[7:6] - top two bits of COMMAND BYTE
>>>>>>>>>>> which is first one on the wire).
>>>>>>>>>>>
>>>>>>>>>>> This sounds exactly like the sort of feature that you're dealing with
>>>>>>>>>>> here?
>>>>>>>>>>>
>>>>>>>>>> The core idea yes but for this chip, things are a bit more annoying (but
>>>>>>>>>> Janani can correct me if I'm wrong). Here, each device can, in theory,
>>>>>>>>>> have it's own supplies, pins and at the very least, channels with maybe
>>>>>>>>>> different scales. That is why Janani is proposing dac nodes. Given I
>>>>>>>>>> honestly don't like much of that "adi,ad5529r-bus" compatible I wondered
>>>>>>>>>> about solving this at the spi level.
>>>>>>>>>>
>>>>>>>>>> Ah and to make it more annoying, we can also mix 12 and 16 bits variants
>>>>>>>>>> together in the same bus.
>>>>>>>>> I'm definitely missing something, because that property for the
>>>>>>>>> microchip devices is not impacted what else is on the bus. AFAICT, you
>>>>>>>>> could have an mcp3911 and an mcp3564 on the same bus even though both
>>>>>>>>> are completely different devices with different drivers. They have
>>>>>>>>> individual device nodes and their own supplies etc etc. These aren't
>>>>>>>>> per-channel properties on an adc or dac, they're per child device on a
>>>>>>>>> spi bus.
>>>>>>>> Maybe I'm the one missing something :). IIRC, spi would not allow two
>>>>>>>> devices on the same CS right? Because for this chip we would need
>>>>>>>> something like:
>>>>>>>>
>>>>>>>> spi {
>>>>>>>> dac@0 {
>>>>>>>> reg = <0>;
>>>>>>>> adi,pin-id = <0>;
>>>>>>>> };
>>>>>>>>
>>>>>>>> dac@1 {
>>>>>>>> reg = <0>; // which seems already problematic?
>>>>>>>> adi,pin-id <1>;
>>>>>>>> };
>>>>>>>>
>>>>>>>> ...
>>>>>>>>
>>>>>>>> //up to 4
>>>>>>>> };
>>>>>>> Yeah. It's not clear to me how that works for the microchip devices
>>>>>>> (I suspect it doesn't!)
>>>>>>>
>>>>>>> Just thinking as I type, but could we do something a bit nasty with
>>>>>>> a gpio mux that doesn't actually switch but represents the GPIO being
>>>>>>> shared? Given this is all tied to the spi bus that should all happen
>>>>>>> under serializing locks.
>>>>>>>
>>>>>>> Agreed though that this would be nicer as an SPI thing that let
>>>>>>> us specify that a single CS is share by multiple devices and their
>>>>>>> is some other signal acting to select which one we are talking to.
>>>>>>>
>>>>>> If the device-addressing on the same chip-select is to be handled
>>>>>> by the spi framework, wouldn't we lose device-specific features?
>>>>>>
>>>>>> I understand that this multi-device feature is there mostly to extend the
>>>>>> channel count from 16 to 32, 48 or 64. I suppose the command:
>>>>>>
>>>>>> "MULTI DEVICE SW LDAC MODE"
>>>>>>
>>>>>> exists so that software can update channel values accross multiple devices.
>>>>> Right! You do have a point! I agree the main driver for a feature like
>>>>> this is likely to extend the channel count and effectively "aggregate"
>>>>> devices.
>>>>>
>>>>> But I would say that even with the spi solution the MULTI DEVICE stuff
>>>>> should be doable (as we still need a sort of adi,pin-id property).
>>>> I don't think we can have something like an IIO buffer shared by multiple
>>>> devices. Synchronizing separate devices would be doable with proper hardware
>>>> support for this (probably involving an FGPA).
>>> True!
>>>
>>>>
>>>>> But yes, I do feel that the whole feature is for aggregation so seeing
>>>>> one device with 32 channels is the expectation here? Rather than seeing
>>>>> two devices with 16 channels.
>>>> Yes, I think aggregation is the whole point there... so that the IIO driver
>>>> is multi-device-aware.
>>> Which makes me feel that different pins per device might be possible
>>> from an HW point of view but does not make much sense. For example, for
>>> the buffer example I would expect LDAC to be shared between all the
>>> devices.
>> That is why I would still suggest the multi-dac node in the middle...
>> the parent node can hold shared resources, while the dac children can
>> have their own, overriding or inheriting stuff.
>>
> Before going down that path I'd want confirmation this is something we
> actually think anyone will build.
>
> Jonathan
To directly answer your question- we currently do not have a platform that supports multi device topology with independent supplies or reset lines.
Given that, I agree to start with the parallel wiring assumption and defer per chip resource variation under there is a solid use case. I will also drop the "adi,resolution" proposal and proceed with "adi,device-addrs" in the AD5529R binding.
With all of the above, the proposed binding for the multi-device follow up series would look like:
dac@0 {
compatible = "adi,ad5529r-16";
reg = <0>;
adi,device-addrs = <0 1>;
reset-gpios = <&gpio0 87 GPIO_ACTIVE_LOW>;
vdd-supply = <&vdd_reg>;
hvdd-supply = <&hvdd_reg>;
channel@0 { reg = <0>; adi,output-range-microvolt = <0 5000000>; };
channel@16 { reg = <16>; adi,output-range-microvolt = <0 40000000>; };
};
Does this look reasonable to everyone?
Regards,
Janani Sunil
^ permalink raw reply
* Re: [PATCH v16 04/14] lib: kstrtox: add initial value to _parse_integer_limit()
From: Jonathan Cameron @ 2026-06-24 14:54 UTC (permalink / raw)
To: Rodrigo Alencar
Cc: rodrigo.alencar, linux-kernel, linux-iio, devicetree, linux-doc,
linux, David Lechner, Andy Shevchenko, Lars-Peter Clausen,
Michael Hennerich, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
Jonathan Corbet, Andrew Morton, Petr Mladek, Steven Rostedt,
Andy Shevchenko, Rasmus Villemoes, Sergey Senozhatsky, Shuah Khan
In-Reply-To: <20260614210044.19dfc8df@jic23-huawei>
On Sun, 14 Jun 2026 21:00:44 +0100
Jonathan Cameron <jic23@kernel.org> wrote:
> On Thu, 4 Jun 2026 11:09:33 +0100
> Rodrigo Alencar <455.rodrigo.alencar@gmail.com> wrote:
>
> > On 26/06/04 10:58AM, Rodrigo Alencar via B4 Relay wrote:
> > > From: Rodrigo Alencar <rodrigo.alencar@analog.com>
> > >
> > > Add init parameter to _parse_integer_limit() that defines an initial
> > > value for the accumulated result when parsing an 64-bit integer. The
> > > new function prototype is adjusted so that the _parse_integer() macros
> > > stay consistent allowing for one more argument, which defaults to 0.
> >
> > ...
> >
> > > noinline
> > > unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
> > > - size_t max_chars)
> > > + size_t max_chars, unsigned long long init)
> > > {
> > > unsigned long long res;
> > > unsigned int rv;
> > >
> > > - res = 0;
> > > + res = init;
> >
> > This might generate conflict, as the code around have changed in linux-next.
> > It is an easy fix though.
> >
> Thanks for the heads up. Hopefully that will all fall out when I rebase testing
> on rc1 once that is out.
I've done a mid merge cycle rebase as the char-misc branches have merged.
So this should be resolve on my testing branch now.
Thanks
Jonathan
>
> Jonathan
>
> > > rv = 0;
> > > while (max_chars--) {
> > > unsigned int c = *s;
> >
>
>
^ permalink raw reply
* [PATCH v8 10/10] tracing/probes: Add a new testcase for BTF typecasts
From: Masami Hiramatsu (Google) @ 2026-06-24 14:43 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
With the introduction of container_of-style BTF typecasting and
per-CPU variable access support in trace probes, we need a way to
verify their functionality and prevent regressions.
Add a new ftrace kselftest and update the trace event sample module
to test and validate these features.
Specifically, update the trace-events-sample module to set up a
periodic timer whose callback accesses a per-CPU counter. Introduce
a new sample trace event, foo_timer_fn, to trace this callback
and log the current counter value.
Then, add a new test case, btf_probe_event.tc, which defines a
dynamic probe on the timer callback. The probe uses BTF typecasting
to recover the parent structure from the timer argument and
this_cpu_read() to fetch the per-CPU counter. The test verifies
the integrity of the implementation by ensuring the values
recorded by the dynamic probe match those from the static tracepoint.
Assisted-by: Antigravity:gemini-3.5-flash
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v8:
- Add more test cases.
Changes in v6:
- Update testcase according to changes.
Changes in v5:
- Add more syntax test cases.
Changes in v4:
- Fix uprobe $current test.
Changes in v3:
- Add syntax test case.
- Update testcase to use this_cpu_read()
Changes in v2:
- Use timer_shutdown_sync() instead of timer_delete_sync() for teardown.
---
samples/trace_events/trace-events-sample.c | 40 +++++++++++++++-
samples/trace_events/trace-events-sample.h | 34 ++++++++++++-
.../ftrace/test.d/dynevent/btf_probe_event.tc | 51 ++++++++++++++++++++
.../test.d/dynevent/eprobes_syntax_errors.tc | 3 +
.../ftrace/test.d/dynevent/fprobe_syntax_errors.tc | 12 +++++
.../ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 12 +++++
.../ftrace/test.d/kprobe/uprobe_syntax_errors.tc | 5 ++
7 files changed, 152 insertions(+), 5 deletions(-)
create mode 100644 tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 0b7a6efdb247..ca5d98c360cb 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -94,6 +94,20 @@ static int simple_thread_fn(void *arg)
static DEFINE_MUTEX(thread_mutex);
static int simple_thread_cnt;
+static struct foo_timer_data *foo_timer_data;
+
+static void sample_timer_cb(struct timer_list *t)
+{
+ struct foo_timer_data *data = container_of(t, struct foo_timer_data, timer);
+
+ get_cpu();
+ trace_foo_timer_fn(data);
+ (*this_cpu_ptr(data->counter))++;
+ put_cpu();
+
+ mod_timer(t, jiffies + HZ);
+}
+
int foo_bar_reg(void)
{
mutex_lock(&thread_mutex);
@@ -132,9 +146,27 @@ void foo_bar_unreg(void)
static int __init trace_event_init(void)
{
+ foo_timer_data = kzalloc_obj(*foo_timer_data, GFP_KERNEL);
+ if (!foo_timer_data)
+ return -ENOMEM;
+
+ foo_timer_data->name = "sample_timer_counter";
+ foo_timer_data->counter = alloc_percpu(int);
+ if (!foo_timer_data->counter) {
+ kfree(foo_timer_data);
+ return -ENOMEM;
+ }
+
+ timer_setup(&foo_timer_data->timer, sample_timer_cb, 0);
+ mod_timer(&foo_timer_data->timer, jiffies + HZ);
+
simple_tsk = kthread_run(simple_thread, NULL, "event-sample");
- if (IS_ERR(simple_tsk))
- return -1;
+ if (IS_ERR(simple_tsk)) {
+ timer_shutdown_sync(&foo_timer_data->timer);
+ free_percpu(foo_timer_data->counter);
+ kfree(foo_timer_data);
+ return PTR_ERR(simple_tsk);
+ }
return 0;
}
@@ -147,6 +179,10 @@ static void __exit trace_event_exit(void)
kthread_stop(simple_tsk_fn);
simple_tsk_fn = NULL;
mutex_unlock(&thread_mutex);
+
+ timer_shutdown_sync(&foo_timer_data->timer);
+ free_percpu(foo_timer_data->counter);
+ kfree(foo_timer_data);
}
module_init(trace_event_init);
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 1a05fc153353..816848a456a2 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -247,12 +247,14 @@
*/
/*
- * It is OK to have helper functions in the file, but they need to be protected
- * from being defined more than once. Remember, this file gets included more
- * than once.
+ * It is OK to have helper functions and data structures in the file, but they
+ * need to be protected from being defined more than once. Remember, this file
+ * gets included more than once.
*/
#ifndef __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS
#define __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS
+#include <linux/timer.h>
+
static inline int __length_of(const int *list)
{
int i;
@@ -270,6 +272,13 @@ enum {
TRACE_SAMPLE_BAR = 4,
TRACE_SAMPLE_ZOO = 8,
};
+
+struct foo_timer_data {
+ const char *name;
+ struct timer_list timer;
+ int __percpu *counter;
+};
+
#endif
/*
@@ -595,6 +604,25 @@ TRACE_EVENT(foo_rel_loc,
__get_rel_bitmask(bitmask),
__get_rel_cpumask(cpumask))
);
+
+TRACE_EVENT(foo_timer_fn,
+
+ TP_PROTO(struct foo_timer_data *data),
+
+ TP_ARGS(data),
+
+ TP_STRUCT__entry(
+ __string( name, data->name )
+ __field( int, count )
+ ),
+
+ TP_fast_assign(
+ __assign_str(name);
+ __entry->count = *this_cpu_ptr(data->counter);
+ ),
+
+ TP_printk("name=%s count=%d", __get_str(name), __entry->count)
+);
#endif
/***** NOTICE! The #if protection ends here. *****/
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc
new file mode 100644
index 000000000000..96791e120b7d
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc
@@ -0,0 +1,51 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: BTF event with typecast and percpu access
+# requires: dynamic_events "this_cpu_read(<fetcharg>)":README "[(structname[,field])]<argname>[->field[->field|.field...]]":README
+
+# Check if the sample module is loaded
+if ! lsmod | grep -q trace_events_sample; then
+ modprobe trace-events-sample || exit_unsupported
+fi
+
+echo 0 > events/enable
+echo > dynamic_events
+
+# The sample_timer_cb(struct timer_list *t) is called.
+# We want to check (STRUCT,FIELD)VAR typecast and this_cpu_read() access.
+# (foo_timer_data,timer)t converts t to struct foo_timer_data * using container_of.
+# data->counter is a per-cpu pointer to int.
+# this_cpu_read(data->counter) should give the value of the counter.
+
+echo 'f:mysample/myevent sample_timer_cb name=(foo_timer_data,timer)t->name:string count=this_cpu_read((foo_timer_data,timer)t->counter)' >> dynamic_events
+
+echo 1 > events/mysample/myevent/enable
+echo 1 > events/sample-trace/foo_timer_fn/enable
+
+sleep 2
+
+echo 0 > events/mysample/myevent/enable
+echo 0 > events/sample-trace/foo_timer_fn/enable
+
+# Compare the values.
+MATCH=0
+while read line; do
+ if echo $line | grep -q "foo_timer_fn:"; then
+ NAME=`echo $line | sed 's/.*name=\([^ ]*\) .*/\1/'`
+ COUNT=`echo $line | sed 's/.*count=\([^ ]*\).*/\1/'`
+ if grep -q "myevent:.*name=\"${NAME}\" count=$COUNT" trace; then
+ MATCH=$((MATCH+1))
+ fi
+ fi
+done < trace
+
+if [ $MATCH -eq 0 ]; then
+ echo "No matching events found"
+ exit_fail
+fi
+
+# Clean up
+echo 0 > events/mysample/myevent/enable
+echo 0 > events/sample-trace/foo_timer_fn/enable
+echo > dynamic_events
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
index 0e65e787e426..ae17eb344bf7 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
@@ -21,6 +21,9 @@ check_error 'e:foo/^bar.1 syscalls/sys_enter_openat' # BAD_EVENT_NAME
check_error 'e:foo/bar syscalls/sys_enter_openat arg=^$foo' # BAD_ATTACH_ARG
+check_error 'e:foo/bar syscalls/sys_enter_openat arg=^COMM' # NO_EVENT_FIELD
+check_error 'e:foo/bar syscalls/sys_enter_openat arg=^current' # NO_EVENT_FIELD
+
if grep -q '<attached-group>\.<attached-event>.*\[if <filter>\]' README; then
check_error 'e:foo/bar syscalls/sys_enter_openat if ^' # NO_EP_FILTER
fi
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index fee479295e2f..e9d7e6919c7f 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -112,6 +112,18 @@ check_error 'f vfs_read%return $retval->^foo' # NO_PTR_STRCT
check_error 'f vfs_read file->^foo' # NO_BTF_FIELD
check_error 'f vfs_read file^-.foo' # BAD_HYPHEN
check_error 'f vfs_read ^file:string' # BAD_TYPE4STR
+if grep -qF "[(structname" README ; then
+check_error 'f vfs_read arg1=(task_struct)file^' # TYPECAST_REQ_FIELD
+check_error 'f vfs_read arg1=(a)((b)((c)(^(d)file->d)->c)->b)->a' # TOO_MANY_NESTED
+check_error 'f vfs_read arg1=(task_struct,^in_execve)file->comm' # TYPECAST_NOT_ALIGNED
+check_error 'f vfs_read arg1=(task_struct,^foo_bar)file->pid' # NO_BTF_FIELD
+check_error 'f vfs_read arg1=(^task_struct1234)file->pid' # NO_PTR_STRCT
+check_error 'f vfs_read arg1=(task_struct,se^->group_node)file->comm' # TYPECAST_BAD_ARROW
+check_error 'f vfs_read arg1=(task_struct,^->pid)file->comm' # NO_BTF_FIELD
+check_error 'f vfs_read arg1=(task_struct,^.pid)file->comm' # NO_BTF_FIELD
+check_error 'f vfs_read arg1=(task_struct,^.)file->comm' # NO_BTF_FIELD
+check_error 'f vfs_read arg1=(task_struct)^@symbol+10->comm' # TYPECAST_SYM_OFFSET
+fi
fi
else
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index 8f1c58f0c239..21ce8414459f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -115,6 +115,18 @@ check_error 'p vfs_read+20 ^$arg*' # NOFENTRY_ARGS
check_error 'p vfs_read ^hoge' # NO_BTFARG
check_error 'p kfree ^$arg10' # NO_BTFARG (exceed the number of parameters)
check_error 'r kfree ^$retval' # NO_RETVAL
+if grep -qF "[(structname" README ; then
+check_error 'p vfs_read arg1=(task_struct)file^' # TYPECAST_REQ_FIELD
+check_error 'p vfs_read arg1=(a)((b)((c)(^(d)file->d)->c)->b)->a' # TOO_MANY_NESTED
+check_error 'p vfs_read arg1=(task_struct,^in_execve)file->comm' # TYPECAST_NOT_ALIGNED
+check_error 'p vfs_read arg1=(task_struct,^foo_bar)file->pid' # NO_BTF_FIELD
+check_error 'p vfs_read arg1=(^task_struct1234)file->pid' # NO_PTR_STRCT
+check_error 'p vfs_read arg1=(task_struct,se^->group_node)file->comm' # TYPECAST_BAD_ARROW
+check_error 'p vfs_read arg1=(task_struct,^->pid)file->comm' # NO_BTF_FIELD
+check_error 'p vfs_read arg1=(task_struct,^.pid)file->comm' # NO_BTF_FIELD
+check_error 'p vfs_read arg1=(task_struct,^.)file->comm' # NO_BTF_FIELD
+check_error 'p vfs_read arg1=(task_struct)^@symbol+10->comm' # TYPECAST_SYM_OFFSET
+fi
else
check_error 'p vfs_read ^$arg*' # NOSUP_BTFARG
fi
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
index c817158b99db..e12dc967ec76 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
@@ -28,4 +28,9 @@ if grep -q ".*symstr.*" README; then
check_error 'p /bin/sh:10 $stack0:^symstr' # BAD_TYPE
fi
+# $current is not supported by uprobe
+if grep -q "\$current.*" README; then
+check_error 'p /bin/sh:10 ^$current:u8' # BAD_VAR
+fi
+
exit 0
^ permalink raw reply related
* [PATCH v8 09/10] tracing/probes: Add this_cpu_read() and this_cpu_ptr() dereference method to fetcharg
From: Masami Hiramatsu (Google) @ 2026-06-24 14:42 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
When tracing the kernel local variables, sometimes we need to get the
CPU local variables. To access it, current simple dereference is not
enough.
Thus, introduce a special this_cpu_read() dereference to access per-cpu
variable for the current CPU (accessing other CPU variable may race with
updates on other CPUs). Also this_cpu_ptr() is for accessing per-cpu
pointer.
Those are working as same as the kernel percpu macro.
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v6:
- Rebased on dump fetcharg patch.
- Fix to fetch static percpu variable with @SYM correctly.
Changes in v5:
- Simplify this_cpu_read() into +0(this_cpu_ptr()).
Changes in v3:
- Remove NULL check for percpu var because it is just an offset, could be 0.
- Simplify process_fetch_insn_bottom() code.
- If the last operation is this_cpu_read(), read only memory of the specific
size (of type).
Changes in v2:
- Drop +CPU/+PCPU and introduce this_cpu_read() and this_cpu_ptr().
- Support these method with BTF typecast.
- Just check the base address is NOT NULL instead of is_kernel_percpu_address().
---
Documentation/trace/eprobetrace.rst | 2
Documentation/trace/fprobetrace.rst | 2
Documentation/trace/kprobetrace.rst | 2
kernel/trace/trace.c | 1
kernel/trace/trace_probe.c | 143 ++++++++++++++++++++++++++---------
kernel/trace/trace_probe.h | 3 -
kernel/trace/trace_probe_tmpl.h | 22 ++++-
7 files changed, 130 insertions(+), 45 deletions(-)
diff --git a/Documentation/trace/eprobetrace.rst b/Documentation/trace/eprobetrace.rst
index 680e0af43d5d..279396951b34 100644
--- a/Documentation/trace/eprobetrace.rst
+++ b/Documentation/trace/eprobetrace.rst
@@ -39,6 +39,8 @@ Synopsis of eprobe_events
@SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
$comm : Fetch current task comm.
+|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
+ this_cpu_read(FETCHARG) : Read the value of the per-CPU variable FETCHARG on the current CPU.
+ this_cpu_ptr(FETCHARG) : Get the address of the per-CPU variable FETCHARG on the current CPU.
\IMM : Store an immediate value to the argument.
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst
index 3392cab016b3..3439bc9bd351 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -52,6 +52,8 @@ Synopsis of fprobe-events
$comm : Fetch current task comm.
$current : Fetch the address of the current task_struct.
+|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*4)(\*5)
+ this_cpu_read(FETCHARG) : Read the value of the per-CPU variable FETCHARG on the current CPU.
+ this_cpu_ptr(FETCHARG) : Get the address of the per-CPU variable FETCHARG on the current CPU.
\IMM : Store an immediate value to the argument.
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 81e4fe38791d..9ae330eb0a52 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -55,6 +55,8 @@ Synopsis of kprobe_events
$comm : Fetch current task comm.
$current : Fetch the address of the current task_struct.
+|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
+ this_cpu_read(FETCHARG) : Read the value of the per-CPU variable FETCHARG on the current CPU.
+ this_cpu_ptr(FETCHARG) : Get the address of the per-CPU variable FETCHARG on the current CPU.
\IMM : Store an immediate value to the argument.
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7a5676524f1a..d4121acc2938 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4332,6 +4332,7 @@ static const char readme_msg[] =
"\t $stack<index>, $stack, $retval, $comm, $current\n"
#endif
"\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
+ "\t this_cpu_read(<fetcharg>), this_cpu_ptr(<fetcharg>)\n"
"\t kernel return probes support: $retval, $arg<N>, $comm\n"
"\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
"\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index eb58b70ae082..f84a4d7d2e02 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -345,6 +345,100 @@ static int parse_trace_event(char *arg, struct fetch_insn *code,
return -EINVAL;
}
+/* this_cpu_* parser */
+#define THIS_CPU_PTR_PREFIX "this_cpu_ptr("
+#define THIS_CPU_READ_PREFIX "this_cpu_read("
+#define THIS_CPU_PTR_LEN (sizeof(THIS_CPU_PTR_PREFIX) - 1)
+#define THIS_CPU_READ_LEN (sizeof(THIS_CPU_READ_PREFIX) - 1)
+
+static int
+parse_probe_arg(char *arg, const struct fetch_type *type,
+ struct fetch_insn **pcode, struct fetch_insn *end,
+ struct traceprobe_parse_context *ctx);
+
+/* handle dereference nested call */
+static inline int handle_dereference(char *arg, struct fetch_insn **pcode,
+ struct fetch_insn *end, struct traceprobe_parse_context *ctx,
+ int deref, long offset)
+{
+ const struct fetch_type *type = find_fetch_type(NULL, ctx->flags);
+ struct fetch_insn *code = *pcode;
+ int cur_offs = ctx->offset;
+ char *tmp;
+ int ret;
+
+ tmp = strrchr(arg, ')');
+ if (!tmp) {
+ trace_probe_log_err(ctx->offset + strlen(arg),
+ DEREF_OPEN_BRACE);
+ return -EINVAL;
+ }
+
+ *tmp = '\0';
+ ret = parse_probe_arg(arg, type, &code, end, ctx);
+ if (ret)
+ return ret;
+ ctx->offset = cur_offs;
+ if (code->op == FETCH_OP_COMM || code->op == FETCH_OP_IMMSTR) {
+ trace_probe_log_err(ctx->offset, COMM_CANT_DEREF);
+ return -EINVAL;
+ }
+
+ /*
+ * this_cpu_ptr(@SYM) does not use SYM value, but use SYM address.
+ * So we overwrite the last FETCH_OP_DEREF with FETCH_OP_CPU_PTR.
+ */
+ if (!(deref == FETCH_OP_CPU_PTR && *arg == '@')) {
+ code++;
+ if (code == end) {
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
+ return -EINVAL;
+ }
+ }
+ *pcode = code;
+
+ code->op = deref;
+ code->offset = offset;
+ /* Reset the last type if used */
+ ctx->last_type = NULL;
+ return 0;
+}
+
+static int parse_this_cpu(char *arg, struct fetch_insn **pcode,
+ struct fetch_insn *end,
+ struct traceprobe_parse_context *ctx)
+{
+ struct fetch_insn *code;
+ bool is_ptr = false;
+ int ret;
+
+ if (str_has_prefix(arg, THIS_CPU_PTR_PREFIX)) {
+ arg += THIS_CPU_PTR_LEN;
+ ctx->offset += THIS_CPU_PTR_LEN;
+ is_ptr = true;
+ } else if (str_has_prefix(arg, THIS_CPU_READ_PREFIX)) {
+ arg += THIS_CPU_READ_LEN;
+ ctx->offset += THIS_CPU_READ_LEN;
+ } else
+ return -EINVAL;
+
+ ret = handle_dereference(arg, pcode, end, ctx, FETCH_OP_CPU_PTR, 0);
+ if (ret || is_ptr)
+ return ret;
+
+ /* this_cpu_read(VAR) -> +0(this_cpu_ptr(VAR)) */
+ code = *pcode;
+ code++;
+ if (code == end) {
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
+ return -EINVAL;
+ }
+ code->op = FETCH_OP_DEREF;
+ code->offset = 0;
+ *pcode = code;
+ return 0;
+}
+
#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
static u32 btf_type_int(const struct btf_type *t)
@@ -904,11 +998,6 @@ static char *find_matched_close_paren(char *s)
return NULL;
}
-static int
-parse_probe_arg(char *arg, const struct fetch_type *type,
- struct fetch_insn **pcode, struct fetch_insn *end,
- struct traceprobe_parse_context *ctx);
-
static int handle_typecast(char *arg, struct fetch_insn **pcode,
struct fetch_insn *end,
struct traceprobe_parse_context *ctx)
@@ -961,7 +1050,9 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
/* Skip '(' */
ctx->offset += 1;
tmp++;
- } else if (*tmp == '+' || *tmp == '-') {
+ } else if (*tmp == '+' || *tmp == '-' ||
+ str_has_prefix(tmp, THIS_CPU_PTR_PREFIX) ||
+ str_has_prefix(tmp, THIS_CPU_READ_PREFIX)) {
/* Dereference can have another field access inside it. */
char *open = strchr(tmp + 1, '(');
@@ -1481,36 +1572,9 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
}
ctx->offset += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0);
arg = tmp + 1;
- tmp = strrchr(arg, ')');
- if (!tmp) {
- trace_probe_log_err(ctx->offset + strlen(arg),
- DEREF_OPEN_BRACE);
- return -EINVAL;
- } else {
- const struct fetch_type *t2 = find_fetch_type(NULL, ctx->flags);
- int cur_offs = ctx->offset;
-
- *tmp = '\0';
- ret = parse_probe_arg(arg, t2, &code, end, ctx);
- if (ret)
- break;
- ctx->offset = cur_offs;
- if (code->op == FETCH_OP_COMM ||
- code->op == FETCH_OP_IMMSTR) {
- trace_probe_log_err(ctx->offset, COMM_CANT_DEREF);
- return -EINVAL;
- }
- if (++code == end) {
- trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
- return -EINVAL;
- }
- *pcode = code;
-
- code->op = deref;
- code->offset = offset;
- /* Reset the last type if used */
- ctx->last_type = NULL;
- }
+ ret = handle_dereference(arg, pcode, end, ctx, deref, offset);
+ if (ret < 0)
+ return ret;
break;
case '\\': /* Immediate value */
if (arg[1] == '"') { /* Immediate string */
@@ -1531,7 +1595,10 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
ret = handle_typecast(arg, pcode, end, ctx);
break;
default:
- if (isalpha(arg[0]) || arg[0] == '_') {
+ if (str_has_prefix(arg, THIS_CPU_PTR_PREFIX) ||
+ str_has_prefix(arg, THIS_CPU_READ_PREFIX)) {
+ ret = parse_this_cpu(arg, pcode, end, ctx);
+ } else if (isalpha(arg[0]) || arg[0] == '_') {
/* BTF variable or event field*/
if (ctx->flags & TPARG_FL_TEVENT) {
ret = parse_trace_event(arg, *pcode, ctx);
@@ -1548,8 +1615,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
return -EINVAL;
}
ret = parse_btf_arg(arg, pcode, end, ctx);
- break;
}
+ break;
}
if (!ret && code->op == FETCH_OP_NOP) {
/* Parsed, but do not find fetch method */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 053f72fdaece..9955a36acbb1 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -101,6 +101,7 @@ typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
/* Stage 2 (dereference) ops */ \
FETCH_OP(DEREF, offset), /* Dereference: .offset */ \
FETCH_OP(UDEREF, offset), /* User-space dereference: .offset */\
+ FETCH_OP(CPU_PTR, none), /* Per-CPU pointer: .offset */ \
/* Stage 3 (store) ops */ \
FETCH_OP(ST_RAW, store), /* Raw value: .size */ \
FETCH_OP(ST_MEM, store), /* Memory: .offset, .size */ \
@@ -596,7 +597,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(TYPECAST_NOT_EVENT, "Typecasts are only for eprobe fields"), \
C(TYPECAST_REQ_FIELD, "Typecast requires a field access"), \
C(TOO_MANY_NESTED, "Too many nested typecasts/dereferences"), \
- C(TYPECAST_SYM_OFFSET, "@SYM+/-OFFSET with typecast needs parentheses") \
+ C(TYPECAST_SYM_OFFSET, "@SYM+/-OFFSET with typecast needs parentheses"), \
C(TYPECAST_NOT_ALIGNED, "Typecast field option is not byte-aligned"), \
C(TYPECAST_BAD_ARROW, "Typecast field option does not support -> operator"),
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index d0e9662cde00..8db12f758fda 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -129,25 +129,35 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
struct fetch_insn *s3 = NULL;
int total = 0, ret = 0, i = 0;
u32 loc = 0;
- unsigned long lval = val;
+ unsigned long lval, llval = val;
stage2:
/* 2nd stage: dereference memory if needed */
do {
- if (code->op == FETCH_OP_DEREF) {
- lval = val;
+ lval = val;
+ switch (code->op) {
+ case FETCH_OP_DEREF:
ret = probe_mem_read(&val, (void *)val + code->offset,
sizeof(val));
- } else if (code->op == FETCH_OP_UDEREF) {
- lval = val;
+ break;
+ case FETCH_OP_UDEREF:
ret = probe_mem_read_user(&val,
(void *)val + code->offset, sizeof(val));
- } else
break;
+ case FETCH_OP_CPU_PTR:
+ val = (unsigned long)this_cpu_ptr((void __percpu *)val);
+ ret = 0;
+ break;
+ default:
+ lval = llval;
+ goto out;
+ }
if (ret)
return ret;
+ llval = lval;
code++;
} while (1);
+out:
s3 = code;
stage3:
^ permalink raw reply related
* [PATCH v8 08/10] tracing/probes: Add $current variable support
From: Masami Hiramatsu (Google) @ 2026-06-24 14:42 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Since we can use the BTF to cast value to a structure pointer type,
it is useful to introduce "$current" special variable support to
fetcharg.
User can define a fetcharg to access current task_struct properties
using BTF info. e.g.
$current->cpus_ptr
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v8:
- Avoid uninitialized ctx->btf issue on $current without typecast.
Changes in v7:
- Fix to use force-typecast for task_struct implicitly.
Changes in v6:
- Rebased on dump fetcharg patch.
- Remove function name/eprobe requirement for $current.
Changes in v5:
- Use s32 for bof_find_btf_id().
Changes in v4:
- Add $current in README when CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y case.
- Fix to prohibit using $current in eprobes and address based kprobes.
Changes in v3:
- Remove $current support from eprobes (because eprobes is only for event)
- Prohibit uprobes to use $current.
Changes in v2:
- Support to parse $current in parse_btf_arg().
- If no typecast on $current, it automatically casted to task_struct.
- Check error case if $current follows something except for "-".
---
Documentation/trace/fprobetrace.rst | 1 +
Documentation/trace/kprobetrace.rst | 1 +
kernel/trace/trace.c | 4 ++--
kernel/trace/trace_probe.c | 37 ++++++++++++++++++++++++++++++++++-
kernel/trace/trace_probe.h | 1 +
kernel/trace/trace_probe_tmpl.h | 3 +++
6 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst
index 290a9e6f7491..3392cab016b3 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -50,6 +50,7 @@ Synopsis of fprobe-events
$argN : Fetch the Nth function argument. (N >= 1) (\*2)
$retval : Fetch return value.(\*3)
$comm : Fetch current task comm.
+ $current : Fetch the address of the current task_struct.
+|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*4)(\*5)
\IMM : Store an immediate value to the argument.
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index a62707e6a9f2..81e4fe38791d 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -53,6 +53,7 @@ Synopsis of kprobe_events
$argN : Fetch the Nth function argument. (N >= 1) (\*1)
$retval : Fetch return value.(\*2)
$comm : Fetch current task comm.
+ $current : Fetch the address of the current task_struct.
+|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
\IMM : Store an immediate value to the argument.
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0e36af853199..7a5676524f1a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4323,13 +4323,13 @@ static const char readme_msg[] =
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
- "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>, $current\n"
#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
"\t [(structname[,field])]<argname>[->field[->field|.field...]],\n"
"\t [(structname[,field])](fetcharg)->field[->field|.field...],\n"
#endif
#else
- "\t $stack<index>, $stack, $retval, $comm,\n"
+ "\t $stack<index>, $stack, $retval, $comm, $current\n"
#endif
"\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
"\t kernel return probes support: $retval, $arg<N>, $comm\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 2d5b2686cc15..eb58b70ae082 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -692,7 +692,9 @@ static int parse_btf_arg(char *varname,
int i, is_ptr, ret;
u32 tid;
- if (!ctx->funcname && !(ctx->flags & TPARG_FL_TEVENT))
+ /* Note: field is not separated at this point, so check prefix. */
+ if (!str_has_prefix(varname, "$current") &&
+ !ctx->funcname && !(ctx->flags & TPARG_FL_TEVENT))
return -EINVAL;
is_ptr = split_next_field(varname, &field, ctx);
@@ -705,6 +707,20 @@ static int parse_btf_arg(char *varname,
return -EOPNOTSUPP;
}
+ if (!strcmp(varname, "$current")) {
+ code->op = FETCH_OP_CURRENT;
+ /* If no typecast is specified for $current, use task_struct by default */
+ ret = bpf_find_btf_id("task_struct", BTF_KIND_STRUCT, &ctx->struct_btf);
+ if (ret < 0) {
+ trace_probe_log_err(ctx->offset, NO_BTF_ENTRY);
+ return -ENOENT;
+ }
+ tid = (u32)ret;
+ type = ctx->last_struct =
+ btf_type_skip_modifiers(ctx->struct_btf, tid, NULL);
+ goto found_type;
+ }
+
if (ctx->flags & TPARG_FL_RETURN && !strcmp(varname, "$retval")) {
code->op = FETCH_OP_RETVAL;
/* Check whether the function return type is not void, even with typecast. */
@@ -761,6 +777,7 @@ static int parse_btf_arg(char *varname,
found:
type = btf_type_skip_modifiers(ctx->btf, tid, NULL);
+found_type:
if (!type) {
trace_probe_log_err(ctx->offset, BAD_BTF_TID);
return -EINVAL;
@@ -1270,6 +1287,24 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t,
return 0;
}
+ /* $current returns the address of the current task_struct. */
+ if (str_has_prefix(arg, "current")) {
+ /* $current is only supported by kernel probe. */
+ if (!(ctx->flags & TPARG_FL_KERNEL)) {
+ err = TP_ERR_BAD_VAR;
+ goto inval;
+ }
+ arg += strlen("current");
+ if (*arg == '-' && IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS))
+ return parse_btf_arg(orig_arg, pcode, end, ctx);
+
+ if (*arg != '\0')
+ goto inval;
+
+ code->op = FETCH_OP_CURRENT;
+ return 0;
+ }
+
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
len = str_has_prefix(arg, "arg");
if (len) {
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index e7fcc77f51fc..053f72fdaece 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -92,6 +92,7 @@ typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
FETCH_OP(RETVAL, none), /* Return value */ \
FETCH_OP(IMM, imm), /* Immediate: .immediate */ \
FETCH_OP(COMM, none), /* Current comm */ \
+ FETCH_OP(CURRENT, none), /* Current task_struct address */\
FETCH_OP(ARG, param), /* Argument: .param = index */ \
FETCH_OP(FOFFS, imm), /* File offset: .immediate */ \
FETCH_OP(IMMSTR, string), /* Allocated string: .data */ \
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index 51436f19083b..d0e9662cde00 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -112,6 +112,9 @@ process_common_fetch_insn(struct fetch_insn *code, unsigned long *val)
case FETCH_OP_IMMSTR:
*val = (unsigned long)code->data;
break;
+ case FETCH_OP_CURRENT:
+ *val = (unsigned long)current;
+ break;
default:
return -EILSEQ;
}
^ permalink raw reply related
* [PATCH v8 07/10] tracing/probes: Support field specifier option for typecast
From: Masami Hiramatsu (Google) @ 2026-06-24 14:42 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Add a field specifier option for the typecast. This works like
container_of() macro.
(STRUCT[,FIELD[.FIELD2...]])VAR
This is equivalent to :
container_of(VAR, struct STRUCT, FIELD[.FIELD2...])
For example:
echo "f tick_nohz_handler next_tick=(tick_sched,sched_timer)timer->next_tick" >> dynamic_events
This will trace tick_nohz_handler() with its tick_sched::next_tick which
is converted from @timer by contianer_of(tick, struct tick_sched, sched_timer).
So, if you enabkle both fprobes:tick_nohz_handler__entry and
timer:hrtimer_expire_entry events, we will see something like:
<idle>-0 [002] d.h1. 3778.087272: hrtimer_expire_entry: hrtimer=00000000d63db328 f
unction=tick_nohz_handler now=3777450051040
<idle>-0 [002] d.h1. 3778.087281: tick_nohz_handler__entry: (tick_nohz_handler+0x4
/0x140) next_tick=3777450000000
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v6:
- Update according to the allways nested patch.
Changes in v3:
- Fix error caret position.
Changes in v2:
- Use byteoffset for typecast field offset instead of bitoffset. This fixes negative modulo calculation.
- Check whether a field is specified after typecast.
- Reject if typecast field option has arrow operator.
---
Documentation/trace/eprobetrace.rst | 5 +
Documentation/trace/fprobetrace.rst | 8 +-
Documentation/trace/kprobetrace.rst | 8 +-
kernel/trace/trace.c | 4 -
kernel/trace/trace_probe.c | 169 ++++++++++++++++++++++++-----------
kernel/trace/trace_probe.h | 5 +
6 files changed, 135 insertions(+), 64 deletions(-)
diff --git a/Documentation/trace/eprobetrace.rst b/Documentation/trace/eprobetrace.rst
index cd0b4aa7f896..680e0af43d5d 100644
--- a/Documentation/trace/eprobetrace.rst
+++ b/Documentation/trace/eprobetrace.rst
@@ -49,7 +49,10 @@ Synopsis of eprobe_events
(STRUCT)FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
a pointer to STRUCT and then derference the pointer defined by
->MEMBER. Note that when this is used, the FIELD name does not
- need to be prefixed with a '$'.
+ need to be prefixed with a '$'. ASGN can be specified optionally.
+ If ASGN is specified, FIELD will be cast to the same offset
+ position as the ASGN member, rather than to the beginning of
+ the STRUCT.
(STRUCT)(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
also be used with another FETCHARG instead of FIELD.
diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst
index 6b8bb27bb62d..290a9e6f7491 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -57,10 +57,12 @@ Synopsis of fprobe-events
(u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
(x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr"
and bitfield are supported.
- (STRUCT)FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
+ (STRUCT[,ASGN])FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
a pointer to STRUCT and then derference the pointer defined by
- ->MEMBER.
- (STRUCT)(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
+ ->MEMBER. ASGN can be specified optionally. If ASGN is specified,
+ FIELD will be cast to the same offset position as the ASGN member,
+ rather than to the beginning of the STRUCT.
+ (STRUCT[,ASGN])(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
also be used with another FETCHARG instead of FIELD.
(\*1) This is available only when BTF is enabled.
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index c4382765d5b2..a62707e6a9f2 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -61,11 +61,13 @@ Synopsis of kprobe_events
(x8/x16/x32/x64), VFS layer common type(%pd/%pD), "char",
"string", "ustring", "symbol", "symstr" and bitfield are
supported.
- (STRUCT)FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
+ (STRUCT[,ASGN])FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
a pointer to STRUCT and then derference the pointer defined by
->MEMBER. Note that this is available only when the probe is
- on function entry.
- (STRUCT)(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
+ on function entry. ASGN can be specified optionally. If ASGN
+ is specified, FIELD will be cast to the same offset position
+ as the ASGN member, rather than to the beginning of the STRUCT.
+ (STRUCT[,ASGN])(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
also be used with another FETCHARG instead of FIELD.
(\*1) only for the probe on function entry (offs == 0). Note, this argument access
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4f70318918c2..0e36af853199 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4325,8 +4325,8 @@ static const char readme_msg[] =
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
"\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
- "\t [(structname)]<argname>[->field[->field|.field...]],\n"
- "\t [(structname)](fetcharg)->field[->field|.field...],\n"
+ "\t [(structname[,field])]<argname>[->field[->field|.field...]],\n"
+ "\t [(structname[,field])](fetcharg)->field[->field|.field...],\n"
#endif
#else
"\t $stack<index>, $stack, $retval, $comm,\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 87a2bb1cd950..2d5b2686cc15 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -568,6 +568,64 @@ static int split_next_field(char *varname, char **next_field,
return ret;
}
+/* Inner loop for solving dot operator ('.'). Return bit-offset of the given field */
+static int get_bitoffset_of_field(char **pfieldname, const struct btf_type **ptype,
+ struct traceprobe_parse_context *ctx)
+{
+ const struct btf_type *type = *ptype;
+ const struct btf_member *field;
+ struct btf *btf = ctx_btf(ctx);
+ char *fieldname = *pfieldname;
+ int bitoffs = 0;
+ u32 anon_offs;
+ char *next;
+ int is_ptr;
+
+ do {
+ next = NULL;
+ is_ptr = split_next_field(fieldname, &next, ctx);
+ if (is_ptr < 0)
+ return is_ptr;
+
+ anon_offs = 0;
+ field = btf_find_struct_member(btf, type, fieldname,
+ &anon_offs);
+ if (IS_ERR(field)) {
+ trace_probe_log_err(ctx->offset, BAD_BTF_TID);
+ return PTR_ERR(field);
+ }
+ if (!field) {
+ trace_probe_log_err(ctx->offset, NO_BTF_FIELD);
+ return -ENOENT;
+ }
+ /* Add anonymous structure/union offset */
+ bitoffs += anon_offs;
+
+ /* Accumulate the bit-offsets of the dot-connected fields */
+ if (btf_type_kflag(type)) {
+ bitoffs += BTF_MEMBER_BIT_OFFSET(field->offset);
+ ctx->last_bitsize = BTF_MEMBER_BITFIELD_SIZE(field->offset);
+ } else {
+ bitoffs += field->offset;
+ ctx->last_bitsize = 0;
+ }
+
+ type = btf_type_skip_modifiers(btf, field->type, NULL);
+ if (!type) {
+ trace_probe_log_err(ctx->offset, BAD_BTF_TID);
+ return -EINVAL;
+ }
+
+ if (next)
+ ctx->offset += next - fieldname;
+ fieldname = next;
+ } while (!is_ptr && fieldname);
+
+ *pfieldname = fieldname;
+ *ptype = type;
+
+ return bitoffs;
+}
/*
* Parse the field of data structure. The @type must be a pointer type
* pointing the target data structure type.
@@ -577,15 +635,13 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type,
struct traceprobe_parse_context *ctx)
{
struct fetch_insn *code = *pcode;
- const struct btf_member *field;
- u32 bitoffs, anon_offs;
- bool is_struct = ctx->struct_btf != NULL;
struct btf *btf = ctx_btf(ctx);
- char *next;
- int is_ptr;
+ bool is_first_field = true;
+ int bitoffs;
do {
- if (!is_struct) {
+ /* For the first field of typecast, @type will be the target structure type. */
+ if (!(is_first_field && ctx->struct_btf)) {
/* Outer loop for solving arrow operator ('->') */
if (BTF_INFO_KIND(type->info) != BTF_KIND_PTR) {
trace_probe_log_err(ctx->offset, NO_PTR_STRCT);
@@ -599,60 +655,25 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type,
return -EINVAL;
}
}
- /* Only the first type can skip being a pointer */
- is_struct = false;
-
- bitoffs = 0;
- do {
- /* Inner loop for solving dot operator ('.') */
- next = NULL;
- is_ptr = split_next_field(fieldname, &next, ctx);
- if (is_ptr < 0)
- return is_ptr;
-
- anon_offs = 0;
- field = btf_find_struct_member(btf, type, fieldname,
- &anon_offs);
- if (IS_ERR(field)) {
- trace_probe_log_err(ctx->offset, BAD_BTF_TID);
- return PTR_ERR(field);
- }
- if (!field) {
- trace_probe_log_err(ctx->offset, NO_BTF_FIELD);
- return -ENOENT;
- }
- /* Add anonymous structure/union offset */
- bitoffs += anon_offs;
-
- /* Accumulate the bit-offsets of the dot-connected fields */
- if (btf_type_kflag(type)) {
- bitoffs += BTF_MEMBER_BIT_OFFSET(field->offset);
- ctx->last_bitsize = BTF_MEMBER_BITFIELD_SIZE(field->offset);
- } else {
- bitoffs += field->offset;
- ctx->last_bitsize = 0;
- }
-
- type = btf_type_skip_modifiers(btf, field->type, NULL);
- if (!type) {
- trace_probe_log_err(ctx->offset, BAD_BTF_TID);
- return -EINVAL;
- }
-
- ctx->offset += next - fieldname;
- fieldname = next;
- } while (!is_ptr && fieldname);
+ bitoffs = get_bitoffset_of_field(&fieldname, &type, ctx);
+ if (bitoffs < 0)
+ return bitoffs;
if (++code == end) {
trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
return -EINVAL;
}
code->op = FETCH_OP_DEREF; /* TODO: user deref support */
code->offset = bitoffs / 8;
+ if (is_first_field && ctx->struct_btf) {
+ /* The first field can be typecasted with field option. */
+ code->offset -= ctx->prefix_byteoffs;
+ }
*pcode = code;
ctx->last_bitoffs = bitoffs % 8;
ctx->last_type = type;
+ is_first_field = false;
} while (fieldname);
return 0;
@@ -808,6 +829,46 @@ static int query_btf_struct(const char *sname, struct traceprobe_parse_context *
return 0;
}
+static int parse_btf_casttype(char *casttype, struct traceprobe_parse_context *ctx)
+{
+ char *field;
+ int ret;
+
+ /* Field option - evaluated later. */
+ field = strchr(casttype, ',');
+ if (field)
+ *field++ = '\0';
+
+ ret = query_btf_struct(casttype, ctx);
+ if (ret < 0) {
+ trace_probe_log_err(ctx->offset, NO_PTR_STRCT);
+ return -EINVAL;
+ }
+
+ if (field) {
+ struct btf_type *type = (struct btf_type *)ctx->last_struct;
+
+ ctx->offset += field - casttype;
+ ret = get_bitoffset_of_field(&field, &ctx->last_struct, ctx);
+ if (ret < 0)
+ return ret;
+ if (ret % 8) {
+ trace_probe_log_err(ctx->offset, TYPECAST_NOT_ALIGNED);
+ return -EINVAL;
+ }
+ if (field != NULL) {
+ /* this means @field skips an arrow operator ("->"). */
+ trace_probe_log_err(ctx->offset - 2, TYPECAST_BAD_ARROW);
+ return -EINVAL;
+ }
+ ctx->prefix_byteoffs = ret / 8;
+ /* Restore the original struct type (overwritten by get_bitoffset_of_field) */
+ ctx->last_struct = type;
+ }
+
+ return ret;
+}
+
/* Find the matching closing parenthesis for a given opening parenthesis. */
static char *find_matched_close_paren(char *s)
{
@@ -940,14 +1001,14 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
tmp = close + 2; /* Skip ">" after inner variable name */
/* resolve the typecast struct name */
- ret = query_btf_struct(arg + 1, ctx);
- if (ret < 0) {
- trace_probe_log_err(orig_offset + 1, NO_PTR_STRCT);
- return -EINVAL;
- }
+ ctx->offset = orig_offset + 1; /* for the '(' */
+ ret = parse_btf_casttype(arg + 1, ctx);
+ if (ret < 0)
+ return ret;
ctx->offset = orig_offset + tmp - arg;
ret = parse_btf_field(tmp, ctx->last_struct, pcode, end, ctx);
+ ctx->prefix_byteoffs = 0;
return ret;
}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index f4fbe3010978..e7fcc77f51fc 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -451,6 +451,7 @@ struct traceprobe_parse_context {
unsigned int flags;
int offset;
int nested_level;
+ int prefix_byteoffs; /* The byte offset of the prefix field of typecast */
};
/* Each typecast consumes nested level. So the max number of typecast is 3. */
@@ -594,7 +595,9 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(TYPECAST_NOT_EVENT, "Typecasts are only for eprobe fields"), \
C(TYPECAST_REQ_FIELD, "Typecast requires a field access"), \
C(TOO_MANY_NESTED, "Too many nested typecasts/dereferences"), \
- C(TYPECAST_SYM_OFFSET, "@SYM+/-OFFSET with typecast needs parentheses")
+ C(TYPECAST_SYM_OFFSET, "@SYM+/-OFFSET with typecast needs parentheses") \
+ C(TYPECAST_NOT_ALIGNED, "Typecast field option is not byte-aligned"), \
+ C(TYPECAST_BAD_ARROW, "Typecast field option does not support -> operator"),
#undef C
#define C(a, b) TP_ERR_##a
^ permalink raw reply related
* [PATCH v8 06/10] tracing/probes: Type casting always involves nested calls
From: Masami Hiramatsu (Google) @ 2026-06-24 14:42 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
This allows type casting to various fetchargs without parentheses
by recursively calling parse_probe_arg on the target when type
casting is used.
For example, this allows the following expressions:
- (STRUCT)%REG->FIELD
- (STRUCT)$stackN->FIELD
- (STRUCT)@SYM->FIELD
Note that @SYM+/-OFFSET with typecast needs parentheses like:
- (STRUCT)(@SYM-8)->FIELD
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v8:
- Fix caret position in error case.
- Add a comment about @SYM+/-OFFSET without parentheses.
Changes in v7:
- Prohibit using @SYM+/-OFFSET without parentheses.
- Cleanup parse_btf_arg() since ctx->struct_btf is always NULL now.
Changes in v6:
- Newly added.
---
kernel/trace/trace_probe.c | 123 ++++++++++++++++++++++++++------------------
kernel/trace/trace_probe.h | 4 +
2 files changed, 75 insertions(+), 52 deletions(-)
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 1d6afda39462..87a2bb1cd950 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -684,19 +684,6 @@ static int parse_btf_arg(char *varname,
return -EOPNOTSUPP;
}
- if (ctx->flags & TPARG_FL_TEVENT) {
- ret = parse_trace_event(varname, code, ctx);
- if (ret < 0) {
- trace_probe_log_err(ctx->offset, BAD_ATTACH_ARG);
- return ret;
- }
- /* TEVENT is only here via a typecast */
- if (WARN_ON_ONCE(ctx->struct_btf == NULL))
- return -EINVAL;
- type = ctx->last_struct;
- goto found_type;
- }
-
if (ctx->flags & TPARG_FL_RETURN && !strcmp(varname, "$retval")) {
code->op = FETCH_OP_RETVAL;
/* Check whether the function return type is not void, even with typecast. */
@@ -708,13 +695,6 @@ static int parse_btf_arg(char *varname,
tid = ctx->proto->type;
goto found;
}
- /*
- * Even if we can not find appropriate BTF info, we can still access
- * the field via typecast.
- */
- if (ctx->struct_btf)
- goto found;
-
if (field) {
trace_probe_log_err(ctx->offset + field - varname,
NO_BTF_ENTRY);
@@ -759,11 +739,7 @@ static int parse_btf_arg(char *varname,
return -ENOENT;
found:
- if (ctx->struct_btf)
- type = ctx->last_struct;
- else
- type = btf_type_skip_modifiers(ctx->btf, tid, NULL);
-found_type:
+ type = btf_type_skip_modifiers(ctx->btf, tid, NULL);
if (!type) {
trace_probe_log_err(ctx->offset, BAD_BTF_TID);
return -EINVAL;
@@ -860,7 +836,7 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
struct traceprobe_parse_context *ctx)
{
int orig_offset = ctx->offset;
- bool nested = false;
+ char *close;
char *tmp;
int ret;
@@ -871,6 +847,17 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
return -EOPNOTSUPP;
}
+ /*
+ * Always consider the token after typecast as a nested call
+ * For example: (STRUCT)VAR->FIELD and (STRUCT)(VAR)->FIELD are same.
+ * VAR is solved in the nested call.
+ */
+ ctx->nested_level++;
+ if (ctx->nested_level > TRACEPROBE_MAX_NESTED_LEVEL) {
+ trace_probe_log_err(ctx->offset, TOO_MANY_NESTED);
+ return -E2BIG;
+ }
+
tmp = strchr(arg, ')');
if (!tmp) {
trace_probe_log_err(ctx->offset + strlen(arg),
@@ -879,11 +866,10 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
}
*tmp++ = '\0';
- /* Handle the nested structure like (STRUCT)(VAR->FIELD)->... */
+ ctx->offset += tmp - arg;
if (*tmp == '(') {
- char *close = find_matched_close_paren(tmp);
+ close = find_matched_close_paren(tmp);
- ctx->offset += tmp - arg;
if (!close) {
trace_probe_log_err(ctx->offset, DEREF_OPEN_BRACE);
return -EINVAL;
@@ -894,27 +880,66 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
TYPECAST_REQ_FIELD);
return -EINVAL;
}
-
- ctx->nested_level++;
- if (ctx->nested_level > TRACEPROBE_MAX_NESTED_LEVEL) {
- trace_probe_log_err(ctx->offset, TOO_MANY_NESTED);
- return -E2BIG;
+ /* Skip '(' */
+ ctx->offset += 1;
+ tmp++;
+ } else if (*tmp == '+' || *tmp == '-') {
+ /* Dereference can have another field access inside it. */
+ char *open = strchr(tmp + 1, '(');
+
+ if (!open) {
+ trace_probe_log_err(ctx->offset,
+ DEREF_NEED_BRACE);
+ return -EINVAL;
+ }
+ close = find_matched_close_paren(open);
+ if (!close) {
+ trace_probe_log_err(ctx->offset + strlen(tmp),
+ DEREF_OPEN_BRACE);
+ return -EINVAL;
+ }
+ close++;
+ /* We expect a field access for typecast */
+ if (close[0] != '-' || close[1] != '>') {
+ trace_probe_log_err(ctx->offset + close - tmp,
+ TYPECAST_REQ_FIELD);
+ return -EINVAL;
+ }
+ } else {
+ if (tmp[0] == '@') {
+ /* @sym+offset is not allowed without parenthesized */
+ close = strpbrk(tmp, "+-");
+ if (close && isdigit(close[1])) {
+ trace_probe_log_err(ctx->offset,
+ TYPECAST_SYM_OFFSET);
+ return -EINVAL;
+ }
}
- *close = '\0';
+ /* Inner variable name */
+ close = strchr(tmp, '-');
+ if (!close || close[1] != '>') {
+ trace_probe_log_err(ctx->offset + strlen(tmp),
+ TYPECAST_REQ_FIELD);
+ return -EINVAL;
+ }
+ }
+ *close = '\0';
- ctx->offset += 1; /* for the '(' */
- /* We need to parse the nested one */
- ret = parse_probe_arg(tmp + 1, find_fetch_type(NULL, ctx->flags),
- pcode, end, ctx);
- if (ret < 0)
- return ret;
- ctx->nested_level--;
- clear_struct_btf(ctx);
+ /* We need to parse the nested one */
+ ret = parse_probe_arg(tmp, find_fetch_type(NULL, ctx->flags),
+ pcode, end, ctx);
+ if (ret < 0)
+ return ret;
+ ctx->nested_level--;
+ clear_struct_btf(ctx);
- tmp = close + 3;/* Skip "->" after closing parenthesis */
- nested = true;
- }
+ /* Let tmp point the field name. */
+ if (close[1] == '-')
+ tmp = close + 3; /* Skip "->" after closing parenthesis */
+ else
+ tmp = close + 2; /* Skip ">" after inner variable name */
+ /* resolve the typecast struct name */
ret = query_btf_struct(arg + 1, ctx);
if (ret < 0) {
trace_probe_log_err(orig_offset + 1, NO_PTR_STRCT);
@@ -922,11 +947,7 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
}
ctx->offset = orig_offset + tmp - arg;
- /* If it is nested, tmp points to the field name. */
- if (nested)
- ret = parse_btf_field(tmp, ctx->last_struct, pcode, end, ctx);
- else
- ret = parse_btf_arg(tmp, pcode, end, ctx);
+ ret = parse_btf_field(tmp, ctx->last_struct, pcode, end, ctx);
return ret;
}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 7d71925244e8..f4fbe3010978 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -453,6 +453,7 @@ struct traceprobe_parse_context {
int nested_level;
};
+/* Each typecast consumes nested level. So the max number of typecast is 3. */
#define TRACEPROBE_MAX_NESTED_LEVEL 3
extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
@@ -592,7 +593,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(EVENT_TOO_BIG, "Event too big (too many fields?)"), \
C(TYPECAST_NOT_EVENT, "Typecasts are only for eprobe fields"), \
C(TYPECAST_REQ_FIELD, "Typecast requires a field access"), \
- C(TOO_MANY_NESTED, "Too many nested typecasts/dereferences"),
+ C(TOO_MANY_NESTED, "Too many nested typecasts/dereferences"), \
+ C(TYPECAST_SYM_OFFSET, "@SYM+/-OFFSET with typecast needs parentheses")
#undef C
#define C(a, b) TP_ERR_##a
^ permalink raw reply related
* [PATCH v8 05/10] tracing/probes: Support nested typecast
From: Masami Hiramatsu (Google) @ 2026-06-24 14:42 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
When we hit an open parenthesis right after typecast closing
parenthesis, it means we have nested typecast. This allows us to
typecast a generic data member in a structure to a pointer to
another structure.
For example, to cast a DATA_MEMBER of VAR structure to STRUCT pointer
and get MEMBER value.
(STRUCT)(VAR->DATA_MEMBER)->MEMBER
Also, we can nest typecast.
(STRUCT1)((STRUCT2)$ARG->FIELD2)->FIELD1
Currently the max nest level is limited to 3.
This also allows user to use typecasting for registers or stacks on
kprobe events. e.g.
(STRUCT)(%ax)->MEMBER
(STRUCT)($stack0)->MEMBER
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v6:
- Add a WARN_ON_ONCE check for leaking nested_level (it must not happen.)
Changes in v4:
- Use orig_offset for reporting NO_PTR_STRCT error.
Changes in v2:
- Fix to skip "->" after closing parenthetsis.
---
Documentation/trace/eprobetrace.rst | 2 +
Documentation/trace/fprobetrace.rst | 2 +
Documentation/trace/kprobetrace.rst | 2 +
kernel/trace/trace.c | 1
kernel/trace/trace_probe.c | 81 ++++++++++++++++++++++++++++++++---
kernel/trace/trace_probe.h | 7 +++
6 files changed, 86 insertions(+), 9 deletions(-)
diff --git a/Documentation/trace/eprobetrace.rst b/Documentation/trace/eprobetrace.rst
index fe3602540569..cd0b4aa7f896 100644
--- a/Documentation/trace/eprobetrace.rst
+++ b/Documentation/trace/eprobetrace.rst
@@ -50,6 +50,8 @@ Synopsis of eprobe_events
a pointer to STRUCT and then derference the pointer defined by
->MEMBER. Note that when this is used, the FIELD name does not
need to be prefixed with a '$'.
+ (STRUCT)(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
+ also be used with another FETCHARG instead of FIELD.
Types
-----
diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst
index 7435ded2d66d..6b8bb27bb62d 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -60,6 +60,8 @@ Synopsis of fprobe-events
(STRUCT)FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
a pointer to STRUCT and then derference the pointer defined by
->MEMBER.
+ (STRUCT)(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
+ also be used with another FETCHARG instead of FIELD.
(\*1) This is available only when BTF is enabled.
(\*2) only for the probe on function entry (offs == 0). Note, this argument access
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index f73614997d52..c4382765d5b2 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -65,6 +65,8 @@ Synopsis of kprobe_events
a pointer to STRUCT and then derference the pointer defined by
->MEMBER. Note that this is available only when the probe is
on function entry.
+ (STRUCT)(FETCHARG)->MEMBER[->MEMBER] : typecast can nest, so the above can
+ also be used with another FETCHARG instead of FIELD.
(\*1) only for the probe on function entry (offs == 0). Note, this argument access
is best effort, because depending on the argument type, it may be passed on
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index aa93e7b01146..4f70318918c2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4326,6 +4326,7 @@ static const char readme_msg[] =
"\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
"\t [(structname)]<argname>[->field[->field|.field...]],\n"
+ "\t [(structname)](fetcharg)->field[->field|.field...],\n"
#endif
#else
"\t $stack<index>, $stack, $retval, $comm,\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index e6cc9f3d6c8b..1d6afda39462 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -832,10 +832,35 @@ static int query_btf_struct(const char *sname, struct traceprobe_parse_context *
return 0;
}
+/* Find the matching closing parenthesis for a given opening parenthesis. */
+static char *find_matched_close_paren(char *s)
+{
+ char *p = s;
+ int count = 0;
+
+ while (*p) {
+ if (*p == '(')
+ count++;
+ else if (*p == ')') {
+ if (--count == 0)
+ return p;
+ }
+ p++;
+ }
+ return NULL;
+}
+
+static int
+parse_probe_arg(char *arg, const struct fetch_type *type,
+ struct fetch_insn **pcode, struct fetch_insn *end,
+ struct traceprobe_parse_context *ctx);
+
static int handle_typecast(char *arg, struct fetch_insn **pcode,
struct fetch_insn *end,
struct traceprobe_parse_context *ctx)
{
+ int orig_offset = ctx->offset;
+ bool nested = false;
char *tmp;
int ret;
@@ -852,19 +877,56 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
DEREF_OPEN_BRACE);
return -EINVAL;
}
- *tmp = '\0';
- ret = query_btf_struct(arg + 1, ctx);
- *tmp = ')';
+ *tmp++ = '\0';
+
+ /* Handle the nested structure like (STRUCT)(VAR->FIELD)->... */
+ if (*tmp == '(') {
+ char *close = find_matched_close_paren(tmp);
+ ctx->offset += tmp - arg;
+ if (!close) {
+ trace_probe_log_err(ctx->offset, DEREF_OPEN_BRACE);
+ return -EINVAL;
+ }
+ /* We expect a field access for typecast */
+ if (close[1] != '-' || close[2] != '>') {
+ trace_probe_log_err(ctx->offset + close - tmp + 1,
+ TYPECAST_REQ_FIELD);
+ return -EINVAL;
+ }
+
+ ctx->nested_level++;
+ if (ctx->nested_level > TRACEPROBE_MAX_NESTED_LEVEL) {
+ trace_probe_log_err(ctx->offset, TOO_MANY_NESTED);
+ return -E2BIG;
+ }
+ *close = '\0';
+
+ ctx->offset += 1; /* for the '(' */
+ /* We need to parse the nested one */
+ ret = parse_probe_arg(tmp + 1, find_fetch_type(NULL, ctx->flags),
+ pcode, end, ctx);
+ if (ret < 0)
+ return ret;
+ ctx->nested_level--;
+ clear_struct_btf(ctx);
+
+ tmp = close + 3;/* Skip "->" after closing parenthesis */
+ nested = true;
+ }
+
+ ret = query_btf_struct(arg + 1, ctx);
if (ret < 0) {
- trace_probe_log_err(ctx->offset + 1, NO_PTR_STRCT);
+ trace_probe_log_err(orig_offset + 1, NO_PTR_STRCT);
return -EINVAL;
}
- tmp++;
-
- ctx->offset += tmp - arg;
- ret = parse_btf_arg(tmp, pcode, end, ctx);
+ ctx->offset = orig_offset + tmp - arg;
+ /* If it is nested, tmp points to the field name. */
+ if (nested)
+ ret = parse_btf_field(tmp, ctx->last_struct, pcode, end, ctx);
+ else
+ ret = parse_btf_arg(tmp, pcode, end, ctx);
return ret;
}
@@ -1638,6 +1700,9 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
ctx);
if (ret < 0)
goto fail;
+ /* nested_level must be 0 here, otherwise there is a bug. */
+ if (WARN_ON_ONCE(ctx->nested_level))
+ goto fail;
/* Update storing type if BTF is available */
if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) &&
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index aa72e2ffdd93..7d71925244e8 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -450,8 +450,11 @@ struct traceprobe_parse_context {
struct trace_probe *tp;
unsigned int flags;
int offset;
+ int nested_level;
};
+#define TRACEPROBE_MAX_NESTED_LEVEL 3
+
extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
const char *argv,
struct traceprobe_parse_context *ctx);
@@ -587,7 +590,9 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(TOO_MANY_ARGS, "Too many arguments are specified"), \
C(TOO_MANY_EARGS, "Too many entry arguments specified"), \
C(EVENT_TOO_BIG, "Event too big (too many fields?)"), \
- C(TYPECAST_NOT_EVENT, "Typecasts are only for eprobe fields"),
+ C(TYPECAST_NOT_EVENT, "Typecasts are only for eprobe fields"), \
+ C(TYPECAST_REQ_FIELD, "Typecast requires a field access"), \
+ C(TOO_MANY_NESTED, "Too many nested typecasts/dereferences"),
#undef C
#define C(a, b) TP_ERR_##a
^ permalink raw reply related
* [PATCH v8 04/10] tracing/probes: Support typecast for various probe events
From: Masami Hiramatsu (Google) @ 2026-06-24 14:42 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Support BTF typecast feature on other probe events, but only if it is
kernel function entry or return, and must use function parameter name
or $retval. This means you can do:
(STRUCT)PARAM->MEMBER
Note: you can not use other variables like $stackN, %reg etc. That
needs nesting support.
To support other probe events, we just need to use last_struct type
when we find a function parameter in parse_btf_arg().
This also updates <tracefs>/README file to show struct typecast.
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v5:
- Add comments about $retval with typecast.
- Even if the type of retvalue is not known, if user specifies typecast,
use it for its type.
Changes in v3:
- Clarify the limitation.
Changes in v2:
- Fix to re-enable typecast on eprobe.
---
Documentation/trace/fprobetrace.rst | 3 +++
Documentation/trace/kprobetrace.rst | 4 ++++
kernel/trace/trace.c | 2 +-
kernel/trace/trace_probe.c | 23 +++++++++++++++++------
kernel/trace/trace_probe.h | 5 +++++
5 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst
index b4c2ca3d02c1..7435ded2d66d 100644
--- a/Documentation/trace/fprobetrace.rst
+++ b/Documentation/trace/fprobetrace.rst
@@ -57,6 +57,9 @@ Synopsis of fprobe-events
(u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
(x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr"
and bitfield are supported.
+ (STRUCT)FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
+ a pointer to STRUCT and then derference the pointer defined by
+ ->MEMBER.
(\*1) This is available only when BTF is enabled.
(\*2) only for the probe on function entry (offs == 0). Note, this argument access
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 3b6791c17e9b..f73614997d52 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -61,6 +61,10 @@ Synopsis of kprobe_events
(x8/x16/x32/x64), VFS layer common type(%pd/%pD), "char",
"string", "ustring", "symbol", "symstr" and bitfield are
supported.
+ (STRUCT)FIELD->MEMBER[->MEMBER] : If BTF is supported, typecast FIELD to
+ a pointer to STRUCT and then derference the pointer defined by
+ ->MEMBER. Note that this is available only when the probe is
+ on function entry.
(\*1) only for the probe on function entry (offs == 0). Note, this argument access
is best effort, because depending on the argument type, it may be passed on
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6eb4d3097a4d..aa93e7b01146 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4325,7 +4325,7 @@ static const char readme_msg[] =
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
"\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
- "\t <argname>[->field[->field|.field...]],\n"
+ "\t [(structname)]<argname>[->field[->field|.field...]],\n"
#endif
#else
"\t $stack<index>, $stack, $retval, $comm,\n"
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 0908019aea12..e6cc9f3d6c8b 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -699,7 +699,7 @@ static int parse_btf_arg(char *varname,
if (ctx->flags & TPARG_FL_RETURN && !strcmp(varname, "$retval")) {
code->op = FETCH_OP_RETVAL;
- /* Check whether the function return type is not void */
+ /* Check whether the function return type is not void, even with typecast. */
if (query_btf_context(ctx) == 0) {
if (ctx->proto->type == 0) {
trace_probe_log_err(ctx->offset, NO_RETVAL);
@@ -708,6 +708,13 @@ static int parse_btf_arg(char *varname,
tid = ctx->proto->type;
goto found;
}
+ /*
+ * Even if we can not find appropriate BTF info, we can still access
+ * the field via typecast.
+ */
+ if (ctx->struct_btf)
+ goto found;
+
if (field) {
trace_probe_log_err(ctx->offset + field - varname,
NO_BTF_ENTRY);
@@ -752,7 +759,10 @@ static int parse_btf_arg(char *varname,
return -ENOENT;
found:
- type = btf_type_skip_modifiers(ctx->btf, tid, NULL);
+ if (ctx->struct_btf)
+ type = ctx->last_struct;
+ else
+ type = btf_type_skip_modifiers(ctx->btf, tid, NULL);
found_type:
if (!type) {
trace_probe_log_err(ctx->offset, BAD_BTF_TID);
@@ -829,10 +839,11 @@ static int handle_typecast(char *arg, struct fetch_insn **pcode,
char *tmp;
int ret;
- /* Currently this only works for eprobes */
- if (!(ctx->flags & TPARG_FL_TEVENT)) {
- trace_probe_log_err(ctx->offset, TYPECAST_NOT_EVENT);
- return -EINVAL;
+ if (!(tparg_is_event_probe(ctx->flags) ||
+ tparg_is_function_entry(ctx->flags) ||
+ tparg_is_function_return(ctx->flags))) {
+ trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
+ return -EOPNOTSUPP;
}
tmp = strchr(arg, ')');
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index e36cfe39e9a8..aa72e2ffdd93 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -429,6 +429,11 @@ static inline bool tparg_is_function_return(unsigned int flags)
return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_RETURN);
}
+static inline bool tparg_is_event_probe(unsigned int flags)
+{
+ return !!(flags & TPARG_FL_TEVENT);
+}
+
struct traceprobe_parse_context {
struct trace_event_call *event;
/* BTF related parameters */
^ permalink raw reply related
* [PATCH v8 03/10] tracing/probes: Support dumping fetcharg program for debugging dynamic events
From: Masami Hiramatsu (Google) @ 2026-06-24 14:41 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
For debugging probe events, it is helpful to verify the compiled
fetch instructions for each probe argument. This introduces a new
kernel config CONFIG_PROBE_EVENTS_DUMP_FETCHARG to decode the
instruction sequence of each argument and display it under a
commented line starting with '#' immediately following the dynamic
event definition (such as in dynamic_events, kprobe_events,
uprobe_events, etc.).
For example:
/sys/kernel/tracing # cat dynamic_events
p:kprobes/p_vfs_read_0 vfs_read arg1=+0(file):ustring arg2=%ax:x16
# arg1: ARG(0) -> ST_USTRING(offset=0,size=4) -> END
# arg2: REG(80) -> ST_RAW(size=2) -> END
Assisted-by: Antigravity:gemini-3.5-flash
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v8:
- State this feature is only for debugging probe events.
- Fix dependency list after description in Kconfig.
Changes in v7:
- Show trace event field name for FETCH_OP_TP_ARG.
- Show immediate string value for FETCH_OP_IMMSTR.
- Fix style issues warned by checkpatch.pl.
Changes in v6:
- Newly added.
---
kernel/trace/Kconfig | 12 +++++
kernel/trace/trace_eprobe.c | 2 +
kernel/trace/trace_fprobe.c | 2 +
kernel/trace/trace_kprobe.c | 2 +
kernel/trace/trace_probe.c | 96 +++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace_probe.h | 79 +++++++++++++++++++++--------------
kernel/trace/trace_uprobe.c | 3 +
7 files changed, 164 insertions(+), 32 deletions(-)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e130da35808f..ca78727ad121 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -779,6 +779,18 @@ config PROBE_EVENTS_BTF_ARGS
kernel function entry or a tracepoint.
This is available only if BTF (BPF Type Format) support is enabled.
+config PROBE_EVENTS_DUMP_FETCHARG
+ bool "Dump of dynamic probe event fetch-arguments"
+ depends on PROBE_EVENTS
+ default n
+ help
+ This shows the dump of fetch-arguments of dynamic probe events
+ alongside their event definitions in the dynamic_events file
+ as comment lines. This is useful to debug the probe events.
+ Since this exposes the raw values in the dynamic_events file,
+ it might be a security risk. Only enable it if you need to debug
+ probe events themselves.
+
config KPROBE_EVENTS
depends on KPROBES
depends on HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 50518b071414..462c31145733 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -87,6 +87,8 @@ static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
seq_putc(m, '\n');
+ trace_probe_dump_args(m, &ep->tp);
+
return 0;
}
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 4d1abbf66229..536781cd4c47 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -1449,6 +1449,8 @@ static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
seq_printf(m, " %s=%s", tf->tp.args[i].name, tf->tp.args[i].comm);
seq_putc(m, '\n');
+ trace_probe_dump_args(m, &tf->tp);
+
return 0;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a8420e6abb56..cfa807d8e760 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1320,6 +1320,8 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
seq_putc(m, '\n');
+ trace_probe_dump_args(m, &tk->tp);
+
return 0;
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 2ce7d62471cb..0908019aea12 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -2403,3 +2403,99 @@ int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_a
}
return 0;
}
+
+#ifdef CONFIG_PROBE_EVENTS_DUMP_FETCHARG
+
+struct fetch_op_decode {
+ const char *name;
+ void (*decode)(struct seq_file *m, struct fetch_insn *insn);
+};
+
+static const struct fetch_op_decode fetch_op_decode[];
+
+static void fetcharg_decode_none(struct seq_file *m, struct fetch_insn *insn)
+{
+ seq_puts(m, fetch_op_decode[insn->op].name);
+}
+
+static void fetcharg_decode_param(struct seq_file *m, struct fetch_insn *insn)
+{
+ seq_printf(m, "%s(%u)", fetch_op_decode[insn->op].name, insn->param);
+}
+
+static void fetcharg_decode_imm(struct seq_file *m, struct fetch_insn *insn)
+{
+ seq_printf(m, "%s(0x%lx)", fetch_op_decode[insn->op].name, insn->immediate);
+}
+
+static void fetcharg_decode_string(struct seq_file *m, struct fetch_insn *insn)
+{
+ seq_printf(m, "%s(%s)", fetch_op_decode[insn->op].name, (char *)insn->data);
+}
+
+static void fetcharg_decode_symbol(struct seq_file *m, struct fetch_insn *insn)
+{
+ seq_printf(m, "%s(%s)", fetch_op_decode[insn->op].name, (char *)insn->data);
+}
+
+static void fetcharg_decode_offset(struct seq_file *m, struct fetch_insn *insn)
+{
+ seq_printf(m, "%s(offset=%d)", fetch_op_decode[insn->op].name, insn->offset);
+}
+
+static void fetcharg_decode_store(struct seq_file *m, struct fetch_insn *insn)
+{
+ if (insn->op == FETCH_OP_ST_RAW)
+ seq_printf(m, "%s(size=%u)", fetch_op_decode[insn->op].name, insn->size);
+ else
+ seq_printf(m, "%s(offset=%d,size=%u)", fetch_op_decode[insn->op].name,
+ insn->offset, insn->size);
+}
+
+static void fetcharg_decode_bf(struct seq_file *m, struct fetch_insn *insn)
+{
+ seq_printf(m, "%s(basesize=%u,lshift=%u,rshift=%u)",
+ fetch_op_decode[insn->op].name, insn->basesize, insn->lshift, insn->rshift);
+}
+
+static void fetcharg_decode_tp_arg(struct seq_file *m, struct fetch_insn *insn)
+{
+ struct ftrace_event_field *field = insn->data;
+
+ seq_printf(m, "%s(%s)", fetch_op_decode[insn->op].name, field->name);
+}
+
+#define FETCH_OP(opname, decode_fn) \
+ [FETCH_OP_##opname] = { .name = #opname, .decode = fetcharg_decode_##decode_fn }
+
+static const struct fetch_op_decode fetch_op_decode[] = FETCH_OP_LIST;
+#undef FETCH_OP
+
+static void trace_probe_dump_arg(struct seq_file *m, struct probe_arg *parg)
+{
+ int i;
+
+ seq_printf(m, "# %s: ", parg->name);
+ for (i = 0; i < FETCH_INSN_MAX; i++) {
+ struct fetch_insn *insn = parg->code + i;
+
+ if (insn->op >= ARRAY_SIZE(fetch_op_decode) || !fetch_op_decode[insn->op].decode)
+ seq_printf(m, "unknown(%d)", insn->op);
+ else
+ fetch_op_decode[insn->op].decode(m, insn);
+
+ if (insn->op == FETCH_OP_END)
+ break;
+ seq_puts(m, " -> ");
+ }
+ seq_putc(m, '\n');
+}
+
+void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp)
+{
+ int i;
+
+ for (i = 0; i < tp->nr_args; i++)
+ trace_probe_dump_arg(m, &tp->args[i]);
+}
+#endif /* CONFIG_PROBE_EVENTS_DUMP_FETCHARG */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 2e0d8384ee5c..e36cfe39e9a8 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -83,38 +83,46 @@ static nokprobe_inline u32 update_data_loc(u32 loc, int consumed)
/* Printing function type */
typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
-enum fetch_op {
- FETCH_OP_NOP = 0,
- // Stage 1 (load) ops
- FETCH_OP_REG, /* Register : .param = offset */
- FETCH_OP_STACK, /* Stack : .param = index */
- FETCH_OP_STACKP, /* Stack pointer */
- FETCH_OP_RETVAL, /* Return value */
- FETCH_OP_IMM, /* Immediate : .immediate */
- FETCH_OP_COMM, /* Current comm */
- FETCH_OP_ARG, /* Function argument : .param */
- FETCH_OP_FOFFS, /* File offset: .immediate */
- FETCH_OP_IMMSTR, /* Allocated string: .data */
- FETCH_OP_EDATA, /* Entry data: .offset */
- // Stage 2 (dereference) op
- FETCH_OP_DEREF, /* Dereference: .offset */
- FETCH_OP_UDEREF, /* User-space Dereference: .offset */
- // Stage 3 (store) ops
- FETCH_OP_ST_RAW, /* Raw: .size */
- FETCH_OP_ST_MEM, /* Mem: .offset, .size */
- FETCH_OP_ST_UMEM, /* Mem: .offset, .size */
- FETCH_OP_ST_STRING, /* String: .offset, .size */
- FETCH_OP_ST_USTRING, /* User String: .offset, .size */
- FETCH_OP_ST_SYMSTR, /* Kernel Symbol String: .offset, .size */
- FETCH_OP_ST_EDATA, /* Store Entry Data: .offset */
- // Stage 4 (modify) op
- FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */
- // Stage 5 (loop) op
- FETCH_OP_LP_ARRAY, /* Array: .param = loop count */
- FETCH_OP_TP_ARG, /* Trace Point argument */
- FETCH_OP_END,
- FETCH_NOP_SYMBOL, /* Unresolved Symbol holder */
-};
+#define FETCH_OP_LIST { \
+ /* Stage 1 (load) ops */ \
+ FETCH_OP(NOP, none), /* NOP */ \
+ FETCH_OP(REG, param), /* Register: .param = offset */ \
+ FETCH_OP(STACK, param), /* Stack: .param = index */ \
+ FETCH_OP(STACKP, none), /* Stack pointer */ \
+ FETCH_OP(RETVAL, none), /* Return value */ \
+ FETCH_OP(IMM, imm), /* Immediate: .immediate */ \
+ FETCH_OP(COMM, none), /* Current comm */ \
+ FETCH_OP(ARG, param), /* Argument: .param = index */ \
+ FETCH_OP(FOFFS, imm), /* File offset: .immediate */ \
+ FETCH_OP(IMMSTR, string), /* Allocated string: .data */ \
+ FETCH_OP(EDATA, offset), /* Entry data: .offset */ \
+ FETCH_OP(TP_ARG, tp_arg), /* Tracepoint argument: .data */\
+ /* Stage 2 (dereference) ops */ \
+ FETCH_OP(DEREF, offset), /* Dereference: .offset */ \
+ FETCH_OP(UDEREF, offset), /* User-space dereference: .offset */\
+ /* Stage 3 (store) ops */ \
+ FETCH_OP(ST_RAW, store), /* Raw value: .size */ \
+ FETCH_OP(ST_MEM, store), /* Memory: .offset, .size */ \
+ FETCH_OP(ST_UMEM, store), /* User memory: .offset, .size */\
+ FETCH_OP(ST_STRING, store), /* String: .offset, .size */ \
+ FETCH_OP(ST_USTRING, store), /* User string: .offset, .size */\
+ FETCH_OP(ST_SYMSTR, store), /* Symbol name: .offset, .size */\
+ FETCH_OP(ST_EDATA, offset), /* Entry data: .offset */ \
+ /* Stage 4 (modify) op */ \
+ FETCH_OP(MOD_BF, bf), /* Bitfield: .basesize, .lshift, .rshift*/\
+ /* Stage 5 (loop) op */ \
+ FETCH_OP(LP_ARRAY, param), /* Loop array: .param = count */\
+ /* End */ \
+ FETCH_OP(END, none), \
+ /* Unresolved Symbol holder */ \
+ FETCH_OP(NOP_SYMBOL, symbol), /* Non loaded symbol: .data = symbol name */\
+}
+
+#define FETCH_OP(opname, decode_fn) FETCH_OP_##opname
+enum fetch_op FETCH_OP_LIST;
+#undef FETCH_OP
+
+#define FETCH_NOP_SYMBOL FETCH_OP_NOP_SYMBOL
struct fetch_insn {
enum fetch_op op;
@@ -370,6 +378,13 @@ bool trace_probe_match_command_args(struct trace_probe *tp,
int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **));
int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
u8 *data, void *field);
+#ifdef CONFIG_PROBE_EVENTS_DUMP_FETCHARG
+void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp);
+#else
+static inline void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp)
+{
+}
+#endif
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
int traceprobe_get_entry_data_size(struct trace_probe *tp);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c274346853d1..b2e264a4b96c 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -765,6 +765,9 @@ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev)
seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
seq_putc(m, '\n');
+
+ trace_probe_dump_args(m, &tu->tp);
+
return 0;
}
^ permalink raw reply related
* [PATCH v8 02/10] tracing/probes: Allow eprobe to use variable without $ prefix
From: Masami Hiramatsu (Google) @ 2026-06-24 14:41 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
The commit 69efd863a785 ("tracing/eprobes: Allow use of BTF names
to dereference pointers") allows eprobe to use event field without
"$" prefix when it is used with typecast, it is natual to allow it
without typecast.
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v8:
- Newly added.
---
kernel/trace/trace_probe.c | 12 +++++++++++-
kernel/trace/trace_probe.h | 1 +
.../test.d/dynevent/eprobes_syntax_errors.tc | 3 +--
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 0da7c0b53ba7..2ce7d62471cb 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1341,7 +1341,17 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
ret = handle_typecast(arg, pcode, end, ctx);
break;
default:
- if (isalpha(arg[0]) || arg[0] == '_') { /* BTF variable */
+ if (isalpha(arg[0]) || arg[0] == '_') {
+ /* BTF variable or event field*/
+ if (ctx->flags & TPARG_FL_TEVENT) {
+ ret = parse_trace_event(arg, *pcode, ctx);
+ if (ret < 0) {
+ trace_probe_log_err(ctx->offset,
+ NO_EVENT_FIELD);
+ return -EINVAL;
+ }
+ break;
+ }
if (!tparg_is_function_entry(ctx->flags) &&
!tparg_is_function_return(ctx->flags)) {
trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 40b53b5b58a9..2e0d8384ee5c 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -559,6 +559,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(NO_PTR_STRCT, "This is not a pointer to union/structure."), \
C(NOSUP_DAT_ARG, "Non pointer structure/union argument is not supported."),\
C(BAD_HYPHEN, "Failed to parse single hyphen. Forgot '>'?"), \
+ C(NO_EVENT_FIELD, "This event field is not found."), \
C(NO_BTF_FIELD, "This field is not found."), \
C(BAD_BTF_TID, "Failed to get BTF type info."),\
C(BAD_TYPE4STR, "This type does not fit for string."),\
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
index 2a680c086047..0e65e787e426 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
@@ -10,7 +10,7 @@ check_error() { # command-with-error-pos-by-^
check_error 'e ^a.' # NO_EVENT_INFO
check_error 'e ^.b' # NO_EVENT_INFO
check_error 'e ^a.b' # BAD_ATTACH_EVENT
-check_error 'e syscalls/sys_enter_openat ^foo' # BAD_ATTACH_ARG
+check_error 'e syscalls/sys_enter_openat ^foo' # NO_EVENT_FIELD
check_error 'e:^/bar syscalls/sys_enter_openat' # NO_GROUP_NAME
check_error 'e:^12345678901234567890123456789012345678901234567890123456789012345/bar syscalls/sys_enter_openat' # GROUP_TOO_LONG
@@ -19,7 +19,6 @@ check_error 'e:^ syscalls/sys_enter_openat' # NO_EVENT_NAME
check_error 'e:foo/^12345678901234567890123456789012345678901234567890123456789012345 syscalls/sys_enter_openat' # EVENT_TOO_LONG
check_error 'e:foo/^bar.1 syscalls/sys_enter_openat' # BAD_EVENT_NAME
-check_error 'e:foo/bar syscalls/sys_enter_openat arg=^dfd' # BAD_FETCH_ARG
check_error 'e:foo/bar syscalls/sys_enter_openat arg=^$foo' # BAD_ATTACH_ARG
if grep -q '<attached-group>\.<attached-event>.*\[if <filter>\]' README; then
^ permalink raw reply related
* [PATCH v8 01/10] tracing/probes: Make the $ prefix mandatory for comm access
From: Masami Hiramatsu (Google) @ 2026-06-24 14:41 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
In-Reply-To: <178231208703.732967.1160700962651040729.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Since $comm or $COMM are not event field but special fetcharg
variables to access current->comm, It should not be accessed
without '$' prefix even with typecast.
Fixes: 69efd863a785 ("tracing/eprobes: Allow use of BTF names to dereference pointers")
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
Changes in v8:
- Newly added.
---
kernel/trace/trace_probe.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index c10bbb0df7b9..0da7c0b53ba7 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -342,10 +342,6 @@ static int parse_trace_event(char *arg, struct fetch_insn *code,
ret = parse_trace_event_arg(arg, code, ctx);
if (!ret)
return 0;
- if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
- code->op = FETCH_OP_COMM;
- return 0;
- }
return -EINVAL;
}
@@ -1065,8 +1061,14 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t,
int len;
if (ctx->flags & TPARG_FL_TEVENT) {
- if (parse_trace_event(arg, code, ctx) < 0)
+ if (parse_trace_event(arg, code, ctx) < 0) {
+ /* 'comm' should be checked after field parsing. */
+ if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
+ code->op = FETCH_OP_COMM;
+ return 0;
+ }
goto inval;
+ }
return 0;
}
^ permalink raw reply related
* [PATCH v8 00/10] tracing/probes: Add more typecast features
From: Masami Hiramatsu (Google) @ 2026-06-24 14:41 UTC (permalink / raw)
To: Steven Rostedt, Mathieu Desnoyers
Cc: Jonathan Corbet, Shuah Khan, Masami Hiramatsu, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest
Hi,
Here is the 8th version of series to introduce more typecast features
to probe events. The previous version is here:
https://lore.kernel.org/all/178217904992.643090.15726197350652241270.stgit@devnote2/
In this version, I removed already picked 2 patches and add 2 new
fix and feature patches. The previous BTF typecast patch allows
`(STRUCT)FIELD->MEMBER` without $ prefix for eprobes, but it also
allows user to use COMM/comm instead of FIELD. $COMM/$comm are special
variables, so it should not skip $ prefix[1/10]. However, accessing
event fields without $ prefix itself is acceptable, it is generically
allowed without typecast[2/10].
Other patches have small fixes according to Julian and Sashiko's
comments and are rebased on top of probes/core branch.
This series extends BTF typecast feature and add more options:
1. Expanding BTF typecast to kprobe and fprobe.
(currently only function entry/exit)
2. Introduce container_of like typecast. This adds a "assigned
member" option to the typecast.
(STRUCT,MEMBER)VAR->ANOTHER_MEMBER
This casts VAR to STRUCT type but the VAR is as the address
of STRUCT.MEMBER. In C, it is:
container_of(VAR, STRUCT, MEMBER)->ANOTHER_MEMBER
3. Support nested typecast, e.g.
(STRUCT)((STRUCT2)VAR->MEMBER2)->MEMBER
the nest level must be smaller than 3.
4. Add $current variable to point "current" task_struct.
This is useful with typecast, e.g.
(task_struct)$current->pid
5. per-cpu dereference support.
Intrdouce this_cpu_read(VAR) and this_cpu_ptr(VAR) to
access per-cpu data on the current CPU (accessing other CPU
data is not stable, because it can be changed.)
You can access the member of per-cpu data structure using
typecast like:
(STRUCT)this_cpu_ptr(VAR)->MEMBER
6. Support event fields without $ prefix on eprobes.
Now eprobe events can access its event fields.
And added fetcharg dump feature (for debug) and updated test scripts
to test part of them.
Thanks,
---
base-commit: 18dfb4703cd6af27deb30d628dac2e7db2b24e6a
Masami Hiramatsu (Google) (10):
tracing/probes: Make the $ prefix mandatory for comm access
tracing/probes: Allow eprobe to use variable without $ prefix
tracing/probes: Support dumping fetcharg program for debugging dynamic events
tracing/probes: Support typecast for various probe events
tracing/probes: Support nested typecast
tracing/probes: Type casting always involves nested calls
tracing/probes: Support field specifier option for typecast
tracing/probes: Add $current variable support
tracing/probes: Add this_cpu_read() and this_cpu_ptr() dereference method to fetcharg
tracing/probes: Add a new testcase for BTF typecasts
Documentation/trace/eprobetrace.rst | 9
Documentation/trace/fprobetrace.rst | 10
Documentation/trace/kprobetrace.rst | 11
kernel/trace/Kconfig | 12
kernel/trace/trace.c | 8
kernel/trace/trace_eprobe.c | 2
kernel/trace/trace_fprobe.c | 2
kernel/trace/trace_kprobe.c | 2
kernel/trace/trace_probe.c | 586 ++++++++++++++++----
kernel/trace/trace_probe.h | 99 ++-
kernel/trace/trace_probe_tmpl.h | 25 +
kernel/trace/trace_uprobe.c | 3
samples/trace_events/trace-events-sample.c | 40 +
samples/trace_events/trace-events-sample.h | 34 +
.../ftrace/test.d/dynevent/btf_probe_event.tc | 51 ++
.../test.d/dynevent/eprobes_syntax_errors.tc | 6
.../ftrace/test.d/dynevent/fprobe_syntax_errors.tc | 12
.../ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 12
.../ftrace/test.d/kprobe/uprobe_syntax_errors.tc | 5
19 files changed, 770 insertions(+), 159 deletions(-)
create mode 100644 tools/testing/selftests/ftrace/test.d/dynevent/btf_probe_event.tc
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply
* Re: [PATCH v8 09/46] KVM: guest_memfd: Introduce function to check GFN private/shared status
From: Ackerley Tng @ 2026-06-24 14:38 UTC (permalink / raw)
To: Binbin Wu
Cc: aik, andrew.jones, brauner, chao.p.peng, david, jmattson,
jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
linux-coco
In-Reply-To: <1b59fec2-a464-4429-8532-880394912af5@linux.intel.com>
Binbin Wu <binbin.wu@linux.intel.com> writes:
>
> [...snip...]
>
>> +bool kvm_gmem_is_private(struct kvm *kvm, gfn_t gfn)
>> +{
>> + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
>> + struct inode *inode;
>> +
>> + /*
>> + * If this gfn has no associated memslot, there's no chance of the gfn
>> + * being backed by private memory, since guest_memfd must be used for
>> + * private memory,
>
> "guest_memfd must be used for private memory" is a bit confusing to me.
>
Hmm good point. Is the source of confusion that guest_memfd can be used
for both shared and private memory?
Perhaps this can be rephrased as:
guest_memfd is the only provider of private memory and guest_memfd must
be used with a memslot, hence if there's no associated memslot, there's
no chance of this gfn being private.
>> and guest_memfd must be associated with some memslot.
>> + */
>> + if (!slot)
>> + return 0;
>> +
>>
>> [...snip...]
>>
^ permalink raw reply
* [RFC PATCH 01/11] Docs/mm/damon/design: update for DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP
From: SeongJae Park @ 2026-06-24 14:19 UTC (permalink / raw)
Cc: SeongJae Park, Liam R. Howlett, Andrew Morton, David Hildenbrand,
Jonathan Corbet, Lorenzo Stoakes, Michal Hocko, Mike Rapoport,
Shuah Khan, Suren Baghdasaryan, Vlastimil Babka, damon, linux-doc,
linux-kernel, linux-mm
In-Reply-To: <20260624142008.87180-1-sj@kernel.org>
Commit 9138e27a3bc3 ("mm/damon: add node_eligible_mem_bp goal metric")
introduced DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP but forgot updating the
DAMON design document for that. Update.
Signed-off-by: SeongJae Park <sj@kernel.org>
---
Documentation/mm/damon/design.rst | 2 ++
1 file changed, 2 insertions(+)
diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst
index c16a3bb288d07..06a8894eb06f9 100644
--- a/Documentation/mm/damon/design.rst
+++ b/Documentation/mm/damon/design.rst
@@ -686,6 +686,8 @@ mechanism tries to make ``current_value`` of ``target_metric`` be same to
(1/10,000).
- ``inactive_mem_bp``: Inactive to active + inactive (LRU) memory size ratio in
bp (1/10,000).
+- ``node_eligible_mem_bp``: Scheme target access pattern-eligible memory ratio
+ of a node in bp (1/10,000).
``nid`` is optionally required for only ``node_mem_used_bp``,
``node_mem_free_bp``, ``node_memcg_used_bp`` and ``node_memcg_free_bp`` to
--
2.47.3
^ permalink raw reply related
* [RFC PATCH 00/11] mm/damon: update, optimize, and clean up doc, tests, and code
From: SeongJae Park @ 2026-06-24 14:19 UTC (permalink / raw)
Cc: SeongJae Park, Liam R. Howlett, Andrew Morton, Brendan Higgins,
David Gow, David Hildenbrand, Jonathan Corbet, Lorenzo Stoakes,
Michal Hocko, Mike Rapoport, Shuah Khan, Shuah Khan,
Suren Baghdasaryan, Vlastimil Babka, damon, kunit-dev, linux-doc,
linux-kernel, linux-kselftest, linux-mm
Patches 1 and 2 update the design and ABI documents for recently added
DAMON features. Patches 3-7 add or update more unit and self tests for
DAMON to cover recently changed or added functions and sysfs files.
Patch 8 optimizes damon_commit_target_regions() to skip unnecessary
adjacent ranges setup. Patches 9-11 clean and fix up recently added
DAMON sysfs interface code for readability.
SeongJae Park (11):
Docs/mm/damon/design: update for DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP
Docs/ABI/damon: document probe files
mm/damon/tests/core-kunit: test damon_rand()
selftests/damon/sysfs.sh: test multiple probe dirs creation
selftests/damon/sysfs.sh: test {core,ops}_filters/ directories
selftests/damon/sysfs.sh: test dests dir
selftests/damon/sysfs.sh: test all files in quota goal dir
mm/damon/core: reduce range setup in damon_commit_target_regions()
mm/damon/sysfs: split probe setup function out
mm/damon/sysfs: split out filters setup function
mm/damon/sysfs: fix typos in probe_{add,rm}_dirs: s/attr/probe/
.../ABI/testing/sysfs-kernel-mm-damon | 40 +++++++
Documentation/mm/damon/design.rst | 2 +
mm/damon/core.c | 22 +++-
mm/damon/sysfs.c | 102 ++++++++++--------
mm/damon/tests/core-kunit.h | 21 ++++
tools/testing/selftests/damon/sysfs.sh | 70 +++++++++++-
6 files changed, 206 insertions(+), 51 deletions(-)
base-commit: 197a7eb91f786e5deeb1dfea35076c01ebd37ce0
--
2.47.3
^ permalink raw reply
* Re: [PATCH v8 08/46] KVM: Provide generic interface for checking memory private/shared status
From: Ackerley Tng @ 2026-06-24 14:18 UTC (permalink / raw)
To: Suzuki K Poulose, Fuad Tabba
Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
yan.y.zhao, forkloop, pratyush, aneesh.kumar, liam, Paolo Bonzini,
Sean Christopherson, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Steven Rostedt,
Masami Hiramatsu, Mathieu Desnoyers, Jonathan Corbet, Shuah Khan,
Shuah Khan, Vishal Annapurve, Andrew Morton, Chris Li,
Kairui Song, Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen,
Yuanchu Xie, Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt,
Kiryl Shutsemau, Baoquan He, Jason Gunthorpe, Vlastimil Babka,
kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
linux-mm, linux-coco
In-Reply-To: <3ec15992-2a29-434b-8c99-8b86bfcf007e@arm.com>
Suzuki K Poulose <suzuki.poulose@arm.com> writes:
>
> [...snip...]
>
>>>> @@ -2546,7 +2546,7 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
>>>> bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
>>>> struct kvm_gfn_range *range);
>>>>
>>>> -static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
>>>> +static inline bool kvm_vm_mem_is_private(struct kvm *kvm, gfn_t gfn)
>>
>> Should have read the Sashiko review first, but where is this used?
>> It's not used at all in this series...
>
> See below:
>
>>
>> /fuad
>>
>>>> {
>>>> return kvm_get_vm_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
>>>> }
>>>> @@ -2557,6 +2557,16 @@ static inline bool kvm_mem_range_is_private(struct kvm *kvm, gfn_t start,
>>>> KVM_MEMORY_ATTRIBUTE_PRIVATE,
>>>> KVM_MEMORY_ATTRIBUTE_PRIVATE);
>>>> }
>>>> +#endif /* CONFIG_KVM_VM_MEMORY_ATTRIBUTES */
>>>> +
>>>> +#ifdef kvm_arch_has_private_mem
>>>> +typedef bool (kvm_mem_is_private_t)(struct kvm *kvm, gfn_t gfn);
>>>> +DECLARE_STATIC_CALL(__kvm_mem_is_private, kvm_mem_is_private_t);
>>>> +
>>>> +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
>>>> +{
>>>> + return static_call(__kvm_mem_is_private)(kvm, gfn);
>>>> +}
>>>> #else
>>>> static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
>>>> {
>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>>> index 6669f1477013c..8b238e461b854 100644
>>>> --- a/virt/kvm/kvm_main.c
>>>> +++ b/virt/kvm/kvm_main.c
>>>> @@ -2627,6 +2627,20 @@ static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
>>>> }
>>>> #endif /* CONFIG_KVM_VM_MEMORY_ATTRIBUTES */
>>>>
>>>> +#ifdef kvm_arch_has_private_mem
>>>> +DEFINE_STATIC_CALL_RET0(__kvm_mem_is_private, kvm_mem_is_private_t);
>>>> +EXPORT_STATIC_CALL_GPL(__kvm_mem_is_private);
>>>> +
>>>> +static void kvm_init_memory_attributes(void)
>>>> +{
>>>> +#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
>>>> + static_call_update(__kvm_mem_is_private, kvm_vm_mem_is_private);
>>>> +#endif
>>>> +}
>
>
> Here ^^ as the static call update ?
>
>
> Suzuki
Thanks Suzuki, it is used here. kvm_mem_is_private() was and still is
the function used to check if some gfn is private or shared. Hence, in
this patch, the usages of kvm_mem_is_private() were not
updated. Instead, kvm_mem_is_private() is now set up as a static call,
and the static call is hard-wired to kvm_vm_mem_is_private() in this
patch.
In the later wiring patch, all the places where attributes are looked up
are updated all at once: if conversion enabled, take gmem route, else
take VM route.
kvm_mem_is_private() is special in that the if-else is done at KVM load
time rather than runtime, and I believe that's for performance reasons
since this is checked quite often from the KVM fault handling code.
Buut I think perhaps Fuad was referring to kvm_mem_range_is_private(),
which is indeed not used anywhere. Binbin also asked about this, I think
we should drop kvm_mem_range_is_private(). My reply to Binbin is at [1].
[1] https://lore.kernel.org/all/CAEvNRgGbBcrX5Fw3vNTsTOBNC=Ypi=9-S07674yPxLU9i4akjA@mail.gmail.com/
^ permalink raw reply
* Re: [PATCH v2] usbcore: Add quirk for 255-bytes initial config read
From: Alan Stern @ 2026-06-24 14:00 UTC (permalink / raw)
To: Nikhil Solanke
Cc: linux-usb, gregkh, linux-kernel, michal.pecio, stable, corbet,
skhan, linux-doc
In-Reply-To: <CAFgddhJ2HeJ=oTBX_axMJcgJq7GXH9abe+LH+x9NGekGO4BMyw@mail.gmail.com>
On Wed, Jun 24, 2026 at 01:36:28PM +0530, Nikhil Solanke wrote:
> > Actually, the best approach here would be to put this single change into
> > a separate patch that comes before the current one. That removes issues
> > of making more than one functional change in one patch and improves
> > bisectability.
>
> Before? Shouldn't it be after my changes? That would make it easier to
> justify the changes. And just to be sure, you did mention it does
> align with what the intention of USB_QUIRK_DELAY_INIT, but it does
> change its behavior when the quirk is not set. Atleast from what I
> understood from the documentation and an LLM's summary, the device
> needs time to prepare the full configuration set. So, does delaying
> before the first header read really work? I can't test this since I
> don't have a device that requires the quirk to be set.
>
> I personally think adding a condition to check if the quirk is set and
> then delaying before sending the first request would be appropriate.
> What are your opinions on this.
Well, put it this way: If you change the existing behavior, that change
belongs in a separate patch. If you want to redo this patch so that it
doesn't change anything when the quirk flag isn't set, that's fine.
> Also is it fine if the string lines exceed 100 columns?
In lines containing long strings, it's okay for the string to extend
well beyond 80 columns. But then you should break the line at some
point closely following the end of the string. I'm sure you can find
examples of this if you look through some of the other source files.
> Also, is there a need to check for krealloc()'s return value? Since we
> are only shrinking the buffer, there won't be any moves or completely
> new blocks (at least as per my understanding). Do I still need to
> check its return value for completeness' sake?
It's a little tricky to track this down, but if you look in
include/linux/slab.h you'll see that krealloc() is defined as
krealloc_node(), which is defined as krealloc_node_align(), which is
defined as krealloc_node_align_noprof(), which is declared with
__must_check. So yes, you need to check the return value from
krealloc().
Of course, you could simply try not checking the return value and seeing
if that provokes a warning or error from the compiler.
Alan Stern
^ permalink raw reply
* Re: [PATCH v8 07/46] KVM: Rename memory attribute APIs to prepare for in-place gmem conversion
From: Ackerley Tng @ 2026-06-24 13:44 UTC (permalink / raw)
To: Binbin Wu
Cc: aik, andrew.jones, brauner, chao.p.peng, david, jmattson,
jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
aneesh.kumar, liam, Paolo Bonzini, Sean Christopherson,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
Baoquan He, Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
linux-coco
In-Reply-To: <96fb369d-dbff-4ed6-b1f9-0ce63d7d4ed0@linux.intel.com>
Binbin Wu <binbin.wu@linux.intel.com> writes:
>
> [...snip...]
>
>> +static inline bool kvm_mem_range_is_private(struct kvm *kvm, gfn_t start,
>> + gfn_t end)
>> +{
>> + return kvm_range_has_vm_memory_attributes(kvm, start, end,
>> + KVM_MEMORY_ATTRIBUTE_PRIVATE,
>> + KVM_MEMORY_ATTRIBUTE_PRIVATE);
>> }
>
> This function is added, but never used in this patch series.
> Is it intended to be called only when CONFIG_KVM_VM_MEMORY_ATTRIBUTES is
> enabled?
>
Thank you for catching this! I think in some earlier revision this was
meant to be used from the guest_memfd populate flow.
I think the version of kvm_gmem_range_is_private in this revision is
good because it is symmetric. If conversion is enabled, call the gmem
range-has-attributes function, and if conversion is disabled, use the VM
range-has-attributes function.
Sean, if no new revision is needed would you be able to drop
kvm_mem_range_is_private() while you're pulling it in?
>>
>> [...snip...]
>>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox