linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
@ 2025-07-14  6:25 aspeedyh
  2025-07-14  8:54 ` Jeremy Kerr
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: aspeedyh @ 2025-07-14  6:25 UTC (permalink / raw)
  To: jk, matt, andrew+netdev, davem, edumazet, kuba, pabeni,
	linux-kernel, netdev, bmc-sw

Add an implementation for DMTF DSP0238 MCTP PCIe VDM transport spec.

Introduce struct mctp_pcie_vdm_dev to represent each PCIe VDM
interface and its send/receive operations.  Register a
net_device with the MCTP core so packets traverse the standard
networking socket API.

Because there is no generic PCIe VDM bus framework in-tree, this
driver provides a transport interface for lower layers to
implement vendor-specific read/write callbacks.

TX path uses a dedicated kernel thread and ptr_ring: skbs queued by the
MCTP stack are enqueued on the ring and processed in-thread context.

RX path employs a workqueue; the lower layer notifies availability, and
the work handler drains packets until the receive callback returns an
ERR_PTR, avoiding per-packet notifications.

Signed-off-by: aspeedyh <yh_chung@aspeedtech.com>
---
 drivers/net/mctp/Kconfig         |  10 +
 drivers/net/mctp/Makefile        |   1 +
 drivers/net/mctp/mctp-pcie-vdm.c | 673 +++++++++++++++++++++++++++++++
 include/linux/mctp-pcie-vdm.h    |  42 ++
 4 files changed, 726 insertions(+)
 create mode 100644 drivers/net/mctp/mctp-pcie-vdm.c
 create mode 100644 include/linux/mctp-pcie-vdm.h

diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig
index cf325ab0b1ef..3355613bb04b 100644
--- a/drivers/net/mctp/Kconfig
+++ b/drivers/net/mctp/Kconfig
@@ -57,6 +57,16 @@ config MCTP_TRANSPORT_USB
 	  MCTP-over-USB interfaces are peer-to-peer, so each interface
 	  represents a physical connection to one remote MCTP endpoint.
 
+config MCTP_TRANSPORT_PCIE_VDM
+	tristate "MCTP PCIe VDM transport"
+	depends on PCI
+	select MCTP_FLOWS
+	help
+	  Provides a driver to access MCTP devices over PCIe VDM transport,
+	  from DMTF specification DSP0238.
+	  A MCTP protocol network device is created for each PCIe VDM device
+	  that registers to this driver.
+
 endmenu
 
 endif
diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile
index c36006849a1e..4acd04d7eb32 100644
--- a/drivers/net/mctp/Makefile
+++ b/drivers/net/mctp/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_MCTP_SERIAL) += mctp-serial.o
 obj-$(CONFIG_MCTP_TRANSPORT_I2C) += mctp-i2c.o
 obj-$(CONFIG_MCTP_TRANSPORT_I3C) += mctp-i3c.o
 obj-$(CONFIG_MCTP_TRANSPORT_USB) += mctp-usb.o
+obj-$(CONFIG_MCTP_TRANSPORT_PCIE_VDM) += mctp-pcie-vdm.o
diff --git a/drivers/net/mctp/mctp-pcie-vdm.c b/drivers/net/mctp/mctp-pcie-vdm.c
new file mode 100644
index 000000000000..7ac4d3f55992
--- /dev/null
+++ b/drivers/net/mctp/mctp-pcie-vdm.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mctp-pcie-vdm.c - MCTP-over-PCIe-VDM (DMTF DSP0238) transport binding driver.
+ *
+ * DSP0238 is available at:
+ * https://www.dmtf.org/sites/default/files/standards/documents/DSP0238_1.2.0.pdf
+ *
+ */
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/byteorder/generic.h>
+#include <linux/dynamic_debug.h>
+#include <linux/fs.h>
+#include <linux/hashtable.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/mctp-pcie-vdm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/ptr_ring.h>
+#include <linux/skbuff.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+
+#define MCTP_PCIE_VDM_MIN_MTU 64
+#define MCTP_PCIE_VDM_MAX_MTU 512
+/* 16byte */
+#define MCTP_PCIE_VDM_HDR_SIZE 16
+#define MCTP_PAYLOAD_IC_TYPE_SIZE 1
+#define MCTP_RECEIVE_PKT_TIMEOUT_MS 5
+
+#define MCTP_PCIE_VDM_NET_DEV_TX_QUEUE_LEN 1100
+#define MCTP_PCIE_VDM_DEV_TX_QUEUE_SIZE 64
+
+#define MCTP_PCIE_VDM_FMT_4DW 0x3
+#define MCTP_PCIE_VDM_TYPE_MSG 0x10
+#define MCTP_PCIE_VDM_CODE 0x0
+/* PCIe VDM message code */
+#define MCTP_PCIE_VDM_MSG_CODE 0x7F
+#define MCTP_PCIE_VDM_VENDOR_ID 0x1AB4
+/* MCTP message type */
+#define MCTP_MSG_TYPE_MASK GENMASK(6, 0)
+#define MCTP_PCIE_VDM_MSG_TYPE 0x7E
+#define MCTP_CONTROL_MSG_TYPE 0x00
+
+#define MCTP_CTRL_MSG_RQDI_REQ 0x80
+#define MCTP_CTRL_MSG_RQDI_RSP 0x00
+
+#define MCTP_CTRL_MSG_RQDI_REQ_DATA_OFFSET 3
+#define MCTP_CTRL_MSG_RQDI_RSP_DATA_OFFSET 4
+
+#define MCTP_PCIE_SWAP_NET_ENDIAN(arr, len)       \
+	do {                                      \
+		u32 *p = (u32 *)(arr);            \
+		for (int i = 0; i < (len); i++) { \
+			p[i] = htonl(p[i]);       \
+		}                                 \
+	} while (0)
+
+#define MCTP_PCIE_SWAP_LITTLE_ENDIAN(arr, len)      \
+	do {                                      \
+		u32 *p = (u32 *)(arr);            \
+		for (int i = 0; i < (len); i++) { \
+			p[i] = ntohl(p[i]);       \
+			p[i] = cpu_to_le32(p[i]); \
+		}                                 \
+	} while (0)
+
+enum mctp_pcie_vdm_route_type {
+	MCTP_PCIE_VDM_ROUTE_TO_RC = 0,
+	MCTP_PCIE_VDM_ROUTE_BY_ID = 2,
+	MCTP_PCIE_VDM_BROADCAST_FROM_RC = 3,
+};
+
+enum mctp_ctrl_command_code {
+	MCTP_CTRL_CMD_SET_ENDPOINT_ID = 0x01,
+	MCTP_CTRL_CMD_GET_ENDPOINT_ID = 0x02,
+	MCTP_CTRL_CMD_PREPARE_ENDPOINT_DISCOVERY = 0x0B,
+	MCTP_CTRL_CMD_ENDPOINT_DISCOVERY = 0x0C,
+	MCTP_CTRL_CMD_DISCOVERY_NOTIFY = 0x0D
+};
+
+struct mctp_ctrl_msg_hdr {
+	u8 ctrl_msg_class;
+	u8 command_code;
+};
+
+struct mctp_pcie_vdm_hdr {
+	u32 length : 10, rsvd0 : 2, attr : 2, ep : 1, td : 1, rsvd1 : 4, tc : 3,
+		rsvd2 : 1, route_type : 5, fmt : 2, rsvd3 : 1;
+	u8 msg_code;
+	u8 tag_vdm_code : 4, tag_pad_len : 2, tag_rsvd : 2;
+	u16 pci_req_id;
+	u16 pci_vendor_id;
+	u16 pci_target_id;
+};
+
+struct mctp_pcie_vdm_route_info {
+	u8 eid;
+	u8 dirty;
+	u16 bdf_addr;
+	struct hlist_node hnode;
+};
+
+struct mctp_pcie_vdm_dev {
+	struct device *dev;
+	struct net_device *ndev;
+	struct task_struct *tx_thread;
+	wait_queue_head_t tx_wait;
+	struct work_struct rx_work;
+	struct ptr_ring tx_queue;
+	struct list_head list;
+	/* each network may have at most 256 EIDs */
+	DECLARE_HASHTABLE(route_table, 8);
+	const struct mctp_pcie_vdm_ops *callback_ops;
+};
+
+/* mutex for vdm_devs add/delete */
+DEFINE_MUTEX(mctp_pcie_vdm_dev_mutex);
+LIST_HEAD(mctp_pcie_vdm_devs);
+struct workqueue_struct *mctp_pcie_vdm_wq;
+
+static const struct mctp_pcie_vdm_hdr mctp_pcie_vdm_hdr_template = {
+	.fmt = MCTP_PCIE_VDM_FMT_4DW,
+	.route_type = MCTP_PCIE_VDM_TYPE_MSG | MCTP_PCIE_VDM_ROUTE_BY_ID,
+	.tag_vdm_code = MCTP_PCIE_VDM_CODE,
+	.msg_code = MCTP_PCIE_VDM_MSG_CODE,
+	.pci_vendor_id = MCTP_PCIE_VDM_VENDOR_ID,
+	.attr = 0,
+};
+
+static void mctp_pcie_vdm_display_skb_buff_data(struct sk_buff *skb)
+{
+	int i = 0;
+
+	while ((i + 4) < skb->len) {
+		pr_debug("%02x %02x %02x %02x\n", skb->data[i],
+			 skb->data[i + 1], skb->data[i + 2], skb->data[i + 3]);
+		i += 4;
+	}
+
+	char buf[16] = { 0 };
+	char *p = buf;
+
+	while (i < skb->len) {
+		p += snprintf(p, sizeof(buf) - (p - buf), "%02x ",
+			      skb->data[i]);
+		i++;
+	}
+	pr_debug("%s\n", buf);
+}
+
+static void mctp_pcie_vdm_update_route_table(struct mctp_pcie_vdm_dev *vdm_dev,
+					     u8 eid, u16 bdf)
+{
+	if (eid == 0x00 || eid == 0xFF)
+		return;
+
+	bool exist = false;
+	struct mctp_pcie_vdm_route_info *route;
+
+	hash_for_each_possible(vdm_dev->route_table, route, hnode, eid) {
+		pr_debug("%s: route table eid %d maps to %d", __func__,
+			 route->eid, route->bdf_addr);
+		if (route->eid == eid) {
+			exist = true;
+			route->bdf_addr = bdf;
+			break;
+		}
+	}
+
+	if (!exist) {
+		route = kmalloc(sizeof(*route), GFP_KERNEL);
+		route->bdf_addr = bdf;
+		route->eid = eid;
+		route->dirty = 0;
+
+		hash_add(vdm_dev->route_table, &route->hnode, route->eid);
+		pr_debug("%s: not found, add map eid %d to bdf 0x%x", __func__,
+			 eid, bdf);
+	}
+}
+
+static void mctp_pcie_vdm_ctrl_msg_handler(struct mctp_pcie_vdm_dev *vdm_dev,
+					   u8 *packet)
+{
+	u8 message_type =
+		FIELD_GET(MCTP_MSG_TYPE_MASK, packet[MCTP_PCIE_VDM_HDR_SIZE]);
+
+	if (message_type != MCTP_CONTROL_MSG_TYPE)
+		return;
+
+	struct mctp_ctrl_msg_hdr *ctrl_hdr =
+		(struct mctp_ctrl_msg_hdr *)(&packet[MCTP_PCIE_VDM_HDR_SIZE + 1]);
+
+	/* host endian expected */
+	struct mctp_pcie_vdm_hdr *hdr = (struct mctp_pcie_vdm_hdr *)packet;
+
+	switch (ctrl_hdr->command_code) {
+	case MCTP_CTRL_CMD_SET_ENDPOINT_ID:
+		if (ctrl_hdr->ctrl_msg_class == MCTP_CTRL_MSG_RQDI_REQ) {
+			/* EID placed at byte2 of SET EID REQ DATA */
+			u8 dst_eid =
+				packet[MCTP_PCIE_VDM_HDR_SIZE +
+				       MCTP_CTRL_MSG_RQDI_REQ_DATA_OFFSET + 1];
+			u16 dst_bdf = hdr->pci_target_id;
+
+			mctp_pcie_vdm_update_route_table(vdm_dev, dst_eid,
+							 dst_bdf);
+		}
+		break;
+	case MCTP_CTRL_CMD_GET_ENDPOINT_ID:
+		if (ctrl_hdr->ctrl_msg_class == MCTP_CTRL_MSG_RQDI_RSP) {
+			/* EID placed at byte2 of GET EID RSP DATA */
+			u8 target_eid =
+				packet[MCTP_PCIE_VDM_HDR_SIZE +
+				       MCTP_CTRL_MSG_RQDI_RSP_DATA_OFFSET + 1];
+			u16 src_bdf = hdr->pci_req_id;
+
+			mctp_pcie_vdm_update_route_table(vdm_dev, target_eid,
+							 src_bdf);
+		}
+		break;
+	case MCTP_CTRL_CMD_DISCOVERY_NOTIFY:
+		hdr->pci_target_id = 0x0000;
+		/* default use MCTP_PCIE_VDM_ROUTE_BY_ID, so no need to handle RSP class */
+		if (ctrl_hdr->ctrl_msg_class == MCTP_CTRL_MSG_RQDI_REQ) {
+			hdr->route_type = MCTP_PCIE_VDM_TYPE_MSG |
+					  MCTP_PCIE_VDM_ROUTE_TO_RC;
+		}
+		break;
+	case MCTP_CTRL_CMD_PREPARE_ENDPOINT_DISCOVERY:
+	case MCTP_CTRL_CMD_ENDPOINT_DISCOVERY:
+		if (ctrl_hdr->ctrl_msg_class == MCTP_CTRL_MSG_RQDI_REQ) {
+			hdr->route_type = MCTP_PCIE_VDM_TYPE_MSG |
+					  MCTP_PCIE_VDM_BROADCAST_FROM_RC;
+			hdr->pci_target_id = 0xFFFF;
+		} else if (ctrl_hdr->ctrl_msg_class == MCTP_CTRL_MSG_RQDI_RSP) {
+			hdr->route_type = MCTP_PCIE_VDM_TYPE_MSG |
+					  MCTP_PCIE_VDM_ROUTE_TO_RC;
+		}
+		break;
+	default:
+		/* Unknown command code or not supported currently */
+		break;
+	}
+}
+
+static netdev_tx_t mctp_pcie_vdm_start_xmit(struct sk_buff *skb,
+					    struct net_device *ndev)
+{
+	struct mctp_pcie_vdm_dev *vdm_dev = netdev_priv(ndev);
+
+	pr_debug("%s: skb len %u\n", __func__, skb->len);
+
+	netdev_tx_t ret;
+
+	if (ptr_ring_full(&vdm_dev->tx_queue)) {
+		pr_debug("%s: failed to send packet, buffer full\n", __func__);
+		netif_stop_queue(ndev);
+
+		ret = NETDEV_TX_BUSY;
+	} else {
+		int reason = ptr_ring_produce_bh(&vdm_dev->tx_queue, skb);
+
+		if (reason) {
+			pr_err("%s: failed to produce skb to tx queue, reason %d\n",
+			       __func__, reason);
+			ret = NETDEV_TX_BUSY;
+		} else {
+			ret = NETDEV_TX_OK;
+		}
+	}
+
+	wake_up(&vdm_dev->tx_wait);
+
+	return ret;
+}
+
+static void mctp_pcie_vdm_xmit(struct mctp_pcie_vdm_dev *vdm_dev,
+			       struct sk_buff *skb)
+{
+	struct net_device_stats *stats;
+	struct mctp_pcie_vdm_hdr *hdr;
+	u8 *hdr_byte;
+	u8 message_type;
+	u16 payload_len_dw;
+	u16 payload_len_byte;
+	int rc;
+
+	stats = &vdm_dev->ndev->stats;
+	hdr = (struct mctp_pcie_vdm_hdr *)skb->data;
+	hdr_byte = skb->data;
+	payload_len_dw = (ALIGN(skb->len, sizeof(u32)) - MCTP_PCIE_VDM_HDR_SIZE) / sizeof(u32);
+	payload_len_byte = skb->len - MCTP_PCIE_VDM_HDR_SIZE;
+	message_type = FIELD_GET(MCTP_MSG_TYPE_MASK, hdr_byte[MCTP_PCIE_VDM_HDR_SIZE]);
+
+	if (message_type == MCTP_CONTROL_MSG_TYPE) {
+		mctp_pcie_vdm_ctrl_msg_handler(vdm_dev, hdr_byte);
+	} else {
+		if (hdr->route_type ==
+		    (MCTP_PCIE_VDM_TYPE_MSG | MCTP_PCIE_VDM_ROUTE_BY_ID)) {
+			struct mctp_pcie_vdm_route_info *route;
+			bool exist = false;
+			u16 bdf = 0x00;
+			u8 dst_eid =
+				FIELD_GET(GENMASK(15, 8),
+					  *((u32 *)skb->data +
+					    sizeof(struct mctp_pcie_vdm_hdr) /
+						    sizeof(u32)));
+
+			hash_for_each_possible(vdm_dev->route_table, route,
+					       hnode, dst_eid) {
+				if (route->eid == dst_eid) {
+					exist = true;
+					bdf = route->bdf_addr;
+					hdr->pci_target_id = bdf;
+					break;
+				}
+			}
+			if (exist)
+				pr_debug("%s fill bdf 0x%x to eid %d", __func__,
+					 bdf, dst_eid);
+		}
+	}
+
+	hdr->length = payload_len_dw;
+	hdr->tag_pad_len =
+		ALIGN(payload_len_byte, sizeof(u32)) - payload_len_byte;
+	pr_debug("%s: skb len %d pad len %d\n", __func__, skb->len,
+		 hdr->tag_pad_len);
+	MCTP_PCIE_SWAP_NET_ENDIAN((u32 *)hdr,
+				  sizeof(struct mctp_pcie_vdm_hdr) / sizeof(u32));
+
+	mctp_pcie_vdm_display_skb_buff_data(skb);
+	rc = vdm_dev->callback_ops->send_packet(vdm_dev->dev, skb->data,
+								payload_len_dw * sizeof(u32));
+
+	if (rc) {
+		pr_err("%s: failed to send packet, rc %d\n", __func__, rc);
+		stats->tx_errors++;
+		return;
+	}
+	stats->tx_packets++;
+	stats->tx_bytes += (skb->len - sizeof(struct mctp_pcie_vdm_hdr));
+}
+
+static int mctp_pcie_vdm_tx_thread(void *data)
+{
+	struct mctp_pcie_vdm_dev *vdm_dev = data;
+
+	for (;;) {
+		wait_event_idle(vdm_dev->tx_wait,
+				__ptr_ring_peek(&vdm_dev->tx_queue) ||
+					kthread_should_stop());
+
+		if (kthread_should_stop())
+			break;
+
+		while (__ptr_ring_peek(&vdm_dev->tx_queue)) {
+			struct sk_buff *skb;
+
+			skb = ptr_ring_consume(&vdm_dev->tx_queue);
+
+			if (skb) {
+				mctp_pcie_vdm_xmit(vdm_dev, skb);
+				kfree_skb(skb);
+			}
+		}
+		if (netif_queue_stopped(vdm_dev->ndev))
+			netif_wake_queue(vdm_dev->ndev);
+	}
+
+	pr_debug("%s stopping\n", __func__);
+	return 0;
+}
+
+static void mctp_pcie_vdm_rx_work_handler(struct work_struct *work)
+{
+	struct mctp_pcie_vdm_dev *vdm_dev;
+	u8 *packet;
+
+	vdm_dev = container_of(work, struct mctp_pcie_vdm_dev, rx_work);
+	packet = vdm_dev->callback_ops->recv_packet(vdm_dev->dev);
+
+	while (!IS_ERR(packet)) {
+		MCTP_PCIE_SWAP_LITTLE_ENDIAN((u32 *)packet,
+					     sizeof(struct mctp_pcie_vdm_hdr) / sizeof(u32));
+		struct mctp_pcie_vdm_hdr *vdm_hdr = (struct mctp_pcie_vdm_hdr *)packet;
+		struct mctp_skb_cb *cb;
+		struct net_device_stats *stats;
+		struct sk_buff *skb;
+		u16 len;
+		int net_status;
+
+		stats = &vdm_dev->ndev->stats;
+		len = vdm_hdr->length * sizeof(u32) -
+				vdm_hdr->tag_pad_len;
+		len += MCTP_PCIE_VDM_HDR_SIZE;
+		skb = netdev_alloc_skb(vdm_dev->ndev, len);
+		pr_debug("%s: received packet size: %d\n", __func__,
+			 len);
+
+		if (!skb) {
+			stats->rx_errors++;
+			pr_err("%s: failed to alloc skb\n", __func__);
+			continue;
+		}
+
+		skb->protocol = htons(ETH_P_MCTP);
+		/* put data into tail sk buff */
+		skb_put_data(skb, packet, len);
+		/* remove first 12bytes PCIe VDM header */
+		skb_pull(skb, sizeof(struct mctp_pcie_vdm_hdr));
+		mctp_pcie_vdm_display_skb_buff_data(skb);
+
+		cb = __mctp_cb(skb);
+		cb->halen = 2; // BDF size is 2 bytes
+		memcpy(cb->haddr, &vdm_hdr->pci_req_id, cb->halen);
+
+		net_status = netif_rx(skb);
+		if (net_status == NET_RX_SUCCESS) {
+			stats->rx_packets++;
+			stats->rx_bytes += skb->len;
+		} else {
+			stats->rx_dropped++;
+		}
+
+		mctp_pcie_vdm_ctrl_msg_handler(vdm_dev, packet);
+
+		if (vdm_hdr->route_type ==
+			(MCTP_PCIE_VDM_TYPE_MSG |
+				MCTP_PCIE_VDM_ROUTE_BY_ID)) {
+			u32 mctp_hdr;
+			u16 bdf;
+			u8 src_eid;
+
+			bdf = vdm_hdr->pci_req_id;
+			mctp_hdr = *((u32 *)packet +
+				     sizeof(struct mctp_pcie_vdm_hdr) /
+					     sizeof(u32));
+			MCTP_PCIE_SWAP_LITTLE_ENDIAN(&mctp_hdr, 1);
+			src_eid = FIELD_GET(GENMASK(15, 8), mctp_hdr);
+
+			mctp_pcie_vdm_update_route_table(vdm_dev,
+							 src_eid, bdf);
+		}
+
+		vdm_dev->callback_ops->free_packet(packet);
+		packet = vdm_dev->callback_ops->recv_packet(vdm_dev->dev);
+	}
+}
+
+static int mctp_pcie_vdm_add_mctp_dev(struct mctp_pcie_vdm_dev *vdm_dev)
+{
+	hash_init(vdm_dev->route_table);
+	INIT_LIST_HEAD(&vdm_dev->list);
+	init_waitqueue_head(&vdm_dev->tx_wait);
+	ptr_ring_init(&vdm_dev->tx_queue, MCTP_PCIE_VDM_DEV_TX_QUEUE_SIZE,
+		      GFP_KERNEL);
+	vdm_dev->tx_thread = kthread_run(mctp_pcie_vdm_tx_thread, vdm_dev,
+					 "mctp_pcie_vdm_tx_thread");
+	INIT_WORK(&vdm_dev->rx_work, mctp_pcie_vdm_rx_work_handler);
+
+	mutex_lock(&mctp_pcie_vdm_dev_mutex);
+	list_add_tail(&vdm_dev->list, &mctp_pcie_vdm_devs);
+	mutex_unlock(&mctp_pcie_vdm_dev_mutex);
+	return 0;
+}
+
+static void mctp_pcie_vdm_uninit(struct net_device *ndev)
+{
+	struct mctp_pcie_vdm_dev *vdm_dev;
+
+	struct mctp_pcie_vdm_route_info *route;
+	struct hlist_node *tmp;
+	int bkt;
+
+	pr_debug("%s: uninitializing vdm_dev %s\n", __func__,
+		 vdm_dev->ndev->name);
+
+	vdm_dev = netdev_priv(ndev);
+	vdm_dev->callback_ops->uninit(vdm_dev->dev);
+
+	hash_for_each_safe(vdm_dev->route_table, bkt, tmp, route, hnode) {
+		hash_del(&route->hnode);
+		kfree(route);
+	}
+
+	if (vdm_dev->tx_thread) {
+		kthread_stop(vdm_dev->tx_thread);
+		vdm_dev->tx_thread = NULL;
+	}
+
+	if (mctp_pcie_vdm_wq) {
+		cancel_work_sync(&vdm_dev->rx_work);
+		flush_workqueue(mctp_pcie_vdm_wq);
+		destroy_workqueue(mctp_pcie_vdm_wq);
+		mctp_pcie_vdm_wq = NULL;
+	}
+
+	while (__ptr_ring_peek(&vdm_dev->tx_queue)) {
+		struct sk_buff *skb;
+
+		skb = ptr_ring_consume(&vdm_dev->tx_queue);
+
+		if (skb)
+			kfree_skb(skb);
+	}
+
+	mutex_lock(&mctp_pcie_vdm_dev_mutex);
+	list_del(&vdm_dev->list);
+	mutex_unlock(&mctp_pcie_vdm_dev_mutex);
+}
+
+static int mctp_pcie_vdm_hdr_create(struct sk_buff *skb,
+				    struct net_device *ndev,
+				    unsigned short type, const void *daddr,
+				    const void *saddr, unsigned int len)
+{
+	struct mctp_pcie_vdm_hdr *hdr =
+		(struct mctp_pcie_vdm_hdr *)skb_push(skb, sizeof(*hdr));
+
+	pr_debug("%s type %d len %d\n", __func__, type, len);
+	memcpy(hdr, &mctp_pcie_vdm_hdr_template, sizeof(*hdr));
+	if (daddr) {
+		pr_debug("%s dst addr %d\n", __func__, *(u16 *)daddr);
+		hdr->pci_target_id = *(u16 *)daddr;
+	}
+
+	if (saddr) {
+		pr_debug("%s src addr %d\n", __func__, *(u16 *)saddr);
+		hdr->pci_req_id = *(u16 *)saddr;
+	}
+
+	return 0;
+}
+
+static const struct net_device_ops mctp_pcie_vdm_net_ops = {
+	.ndo_start_xmit = mctp_pcie_vdm_start_xmit,
+	.ndo_uninit = mctp_pcie_vdm_uninit,
+};
+
+static const struct header_ops mctp_pcie_vdm_net_hdr_ops = {
+	.create = mctp_pcie_vdm_hdr_create,
+};
+
+static void mctp_pcie_vdm_net_setup(struct net_device *ndev)
+{
+	ndev->type = ARPHRD_MCTP;
+
+	ndev->mtu = MCTP_PCIE_VDM_MIN_MTU;
+	ndev->min_mtu = MCTP_PCIE_VDM_MIN_MTU;
+	ndev->max_mtu = MCTP_PCIE_VDM_MAX_MTU;
+	ndev->tx_queue_len = MCTP_PCIE_VDM_NET_DEV_TX_QUEUE_LEN;
+	ndev->addr_len = 2; //PCIe bdf is 2 bytes
+	ndev->hard_header_len = sizeof(struct mctp_pcie_vdm_hdr);
+
+	ndev->netdev_ops = &mctp_pcie_vdm_net_ops;
+	ndev->header_ops = &mctp_pcie_vdm_net_hdr_ops;
+}
+
+static int mctp_pcie_vdm_add_net_dev(struct net_device **dev)
+{
+	struct net_device *ndev = alloc_netdev(sizeof(struct mctp_pcie_vdm_dev),
+					       "mctppci%d", NET_NAME_UNKNOWN,
+					       mctp_pcie_vdm_net_setup);
+
+	if (!ndev) {
+		pr_err("%s: failed to allocate net device\n", __func__);
+		return -ENOMEM;
+	}
+	dev_net_set(ndev, current->nsproxy->net_ns);
+
+	*dev = ndev;
+	int rc;
+
+	rc = mctp_register_netdev(ndev, NULL, MCTP_PHYS_BINDING_PCIE_VDM);
+	if (rc) {
+		pr_err("%s: failed to register net device\n", __func__);
+		free_netdev(ndev);
+		return rc;
+	}
+	return rc;
+}
+
+struct mctp_pcie_vdm_dev *mctp_pcie_vdm_add_dev(struct device *dev,
+						const struct mctp_pcie_vdm_ops *ops)
+{
+	struct net_device *ndev;
+	int rc;
+
+	rc = mctp_pcie_vdm_add_net_dev(&ndev);
+	if (rc) {
+		pr_err("%s: failed to add net device\n", __func__);
+		return ERR_PTR(rc);
+	}
+
+	struct mctp_pcie_vdm_dev *vdm_dev;
+
+	vdm_dev = netdev_priv(ndev);
+	vdm_dev->ndev = ndev;
+	vdm_dev->dev = dev;
+	vdm_dev->callback_ops = ops;
+
+	rc = mctp_pcie_vdm_add_mctp_dev(vdm_dev);
+	if (rc) {
+		pr_err("%s: failed to add mctp device\n", __func__);
+		unregister_netdev(ndev);
+		free_netdev(ndev);
+		return ERR_PTR(rc);
+	}
+	return vdm_dev;
+}
+EXPORT_SYMBOL_GPL(mctp_pcie_vdm_add_dev);
+
+void mctp_pcie_vdm_remove_dev(struct mctp_pcie_vdm_dev *vdm_dev)
+{
+	pr_debug("%s: removing vdm_dev %s\n", __func__, vdm_dev->ndev->name);
+	struct net_device *ndev = vdm_dev->ndev;
+
+	if (ndev) {
+		// objects in vdm_dev will be released in net_dev uninit operator
+		mctp_unregister_netdev(ndev);
+		free_netdev(ndev);
+	}
+}
+EXPORT_SYMBOL_GPL(mctp_pcie_vdm_remove_dev);
+
+void mctp_pcie_vdm_notify_rx(struct mctp_pcie_vdm_dev *vdm_dev)
+{
+	queue_work(mctp_pcie_vdm_wq, &vdm_dev->rx_work);
+}
+EXPORT_SYMBOL_GPL(mctp_pcie_vdm_notify_rx);
+
+static __init int mctp_pcie_vdm_mod_init(void)
+{
+	mctp_pcie_vdm_wq = alloc_workqueue("mctp_pcie_vdm_wq", WQ_UNBOUND, 1);
+	if (!mctp_pcie_vdm_wq) {
+		pr_err("Failed to create mctp pcie vdm workqueue\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static __exit void mctp_pcie_vdm_mod_exit(void)
+{
+	struct mctp_pcie_vdm_dev *vdm_dev;
+	struct mctp_pcie_vdm_dev *tmp;
+
+	list_for_each_entry_safe(vdm_dev, tmp, &mctp_pcie_vdm_devs, list) {
+		mctp_pcie_vdm_remove_dev(vdm_dev);
+	}
+}
+
+module_init(mctp_pcie_vdm_mod_init);
+module_exit(mctp_pcie_vdm_mod_exit);
+
+MODULE_DESCRIPTION("MCTP PCIe VDM transport");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("YH <yh_chung@aspeedtech.com>");
diff --git a/include/linux/mctp-pcie-vdm.h b/include/linux/mctp-pcie-vdm.h
new file mode 100644
index 000000000000..da3fad7929b2
--- /dev/null
+++ b/include/linux/mctp-pcie-vdm.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mctp-pcie-vdm.h - MCTP-over-PCIe-VDM (DMTF DSP0238) transport binding Interface
+ * for PCIe VDM devices to register and implement.
+ *
+ */
+
+#ifndef __LINUX_MCTP_PCIE_VDM_H
+#define __LINUX_MCTP_PCIE_VDM_H
+
+#include <linux/device.h>
+#include <linux/notifier.h>
+
+struct mctp_pcie_vdm_dev;
+
+/**
+ * @send_packet: referenced to send packets with PCIe VDM header packed.
+ * @recv_packet: referenced multiple times until no RX packet to be handled.
+ *               received pointer shall start from the PCIe VDM header.
+ * @free_packet: referenced when the packet is processed and okay to be freed.
+ * @uninit: uninitialize the device.
+ */
+struct mctp_pcie_vdm_ops {
+	int (*send_packet)(struct device *dev, u8 *data, size_t len);
+	u8 *(*recv_packet)(struct device *dev);
+	void (*free_packet)(void *packet);
+	void (*uninit)(struct device *dev);
+};
+
+struct mctp_pcie_vdm_dev *mctp_pcie_vdm_add_dev(struct device *dev,
+						const struct mctp_pcie_vdm_ops *ops);
+void mctp_pcie_vdm_remove_dev(struct mctp_pcie_vdm_dev *vdm_dev);
+
+/**
+ * Notify mctp-pcie-vdm that packets are received and ready to be processed.
+ * After notified, mctp-pcie-vdm will call recv_packet() multiple times
+ * until no more packets to be processed. The lower layer driver can keep receiving
+ * packets while the upper layer is processing the received packets.
+ */
+void mctp_pcie_vdm_notify_rx(struct mctp_pcie_vdm_dev *vdm_dev);
+
+#endif	 /* __LINUX_MCTP_PCIE_VDM_H */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-14  6:25 [PATCH] net: mctp: Add MCTP PCIe VDM transport driver aspeedyh
@ 2025-07-14  8:54 ` Jeremy Kerr
  2025-07-14 12:37   ` YH Chung
  2025-07-15  2:26 ` kernel test robot
  2025-07-17 10:39 ` kernel test robot
  2 siblings, 1 reply; 17+ messages in thread
From: Jeremy Kerr @ 2025-07-14  8:54 UTC (permalink / raw)
  To: aspeedyh, matt, andrew+netdev, davem, edumazet, kuba, pabeni,
	linux-kernel, netdev, bmc-sw
  Cc: Khang D Nguyen

Hi YH,

[+CC Khang]

I have some overall questions before we get into a full review of the
series, inline below.

> Add an implementation for DMTF DSP0238 MCTP PCIe VDM transport spec.
> 
> Introduce struct mctp_pcie_vdm_dev to represent each PCIe VDM
> interface and its send/receive operations.  Register a
> net_device with the MCTP core so packets traverse the standard
> networking socket API.
> 
> Because there is no generic PCIe VDM bus framework in-tree, this
> driver provides a transport interface for lower layers to
> implement vendor-specific read/write callbacks.

Do we really need an abstraction for MCTP VDM drivers? How many are you
expecting? Can you point us to a client of the VDM abstraction?

There is some value in keeping consistency for the MCTP lladdr formats
across PCIe transports, but I'm not convinced we need a whole
abstraction layer for this.

> TX path uses a dedicated kernel thread and ptr_ring: skbs queued by the
> MCTP stack are enqueued on the ring and processed in-thread context.

Is this somehow more suitable than the existing netdev queues?

> +struct mctp_pcie_vdm_route_info {
> +       u8 eid;
> +       u8 dirty;
> +       u16 bdf_addr;
> +       struct hlist_node hnode;
> +};

Why are you keeping your own routing table in the transport driver? We
already have the route and neighbour tables in the MCTP core code.

Your assumption that you can intercept MCTP control messages to keep a
separate routing table will not work.

> +static void mctp_pcie_vdm_net_setup(struct net_device *ndev)
> +{
> +       ndev->type = ARPHRD_MCTP;
> +
> +       ndev->mtu = MCTP_PCIE_VDM_MIN_MTU;
> +       ndev->min_mtu = MCTP_PCIE_VDM_MIN_MTU;
> +       ndev->max_mtu = MCTP_PCIE_VDM_MAX_MTU;
> +       ndev->tx_queue_len = MCTP_PCIE_VDM_NET_DEV_TX_QUEUE_LEN;
> +       ndev->addr_len = 2; //PCIe bdf is 2 bytes

One of the critical things to get right is the lladdr format for PCIe
VDM devices, as it will be visible to userspace. While the PCIe b/d/fn
data is indeed two bytes, the MCTP addressing format for VDM data is not
entirely representable in two bytes: we also have the routing type to
encode. DSP0238 requires us to use Route to Root Complex and Broadcast
from Root Complex for certain messages, so we need some way to represent
that in your proposed lladdr format.

For this reason, you may want to encode this as [type, bdfn] data.

Also, in flit mode, we also have the segment byte to encode.

Cheers,


Jeremy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-14  8:54 ` Jeremy Kerr
@ 2025-07-14 12:37   ` YH Chung
  2025-07-15  1:08     ` Jeremy Kerr
  0 siblings, 1 reply; 17+ messages in thread
From: YH Chung @ 2025-07-14 12:37 UTC (permalink / raw)
  To: Jeremy Kerr, matt@codeconstruct.com.au, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen

Hi Jeremy,
Appreciate for the feedback! Please check inline below.

>Hi YH,
>
>[+CC Khang]
>
>I have some overall questions before we get into a full review of the series,
>inline below.
>
>> Add an implementation for DMTF DSP0238 MCTP PCIe VDM transport spec.
>>
>> Introduce struct mctp_pcie_vdm_dev to represent each PCIe VDM
>> interface and its send/receive operations.  Register a net_device with
>> the MCTP core so packets traverse the standard networking socket API.
>>
>> Because there is no generic PCIe VDM bus framework in-tree, this
>> driver provides a transport interface for lower layers to implement
>> vendor-specific read/write callbacks.
>
>Do we really need an abstraction for MCTP VDM drivers? How many are you
>expecting? Can you point us to a client of the VDM abstraction?
>
>There is some value in keeping consistency for the MCTP lladdr formats across
>PCIe transports, but I'm not convinced we need a whole abstraction layer for
>this.
>
We plan to follow existing upstream MCTP transports—such as I²C, I³C, and USB—by abstracting the hardware-specific details into a common interface and focus on the transport binding protocol in this patch. This driver has been tested by our AST2600 and AST2700 MCTP driver.

>> TX path uses a dedicated kernel thread and ptr_ring: skbs queued by
>> the MCTP stack are enqueued on the ring and processed in-thread context.
>
>Is this somehow more suitable than the existing netdev queues?
>
Our current implementation has two operations that take time:
1) Configure the PCIe VDM routing type as DSP0238 requested if we are sending certain ctrl message command codes like Discovery Notify request or Endpoint Discovery response.
2) Update the BDF/EID routing table.

Since the netdev TX queue call-stack should be executed quickly, we offload these items to a dedicated kernel thread and use ptr_ring to process packets in batches.

>> +struct mctp_pcie_vdm_route_info {
>> +       u8 eid;
>> +       u8 dirty;
>> +       u16 bdf_addr;
>> +       struct hlist_node hnode;
>> +};
>
>Why are you keeping your own routing table in the transport driver? We already
>have the route and neighbour tables in the MCTP core code.
>
>Your assumption that you can intercept MCTP control messages to keep a
>separate routing table will not work.
>
We maintain a routing table in the transport driver to record the mapping between BDFs and EIDs, as the BDF is only present in the PCIe VDM header of received Endpoint Discovery Responses. This information is not forwarded to the MCTP core in the MCTP payload. We update the table with this mapping before forwarding the MCTP message to the core.

Additionally, if the MCTP Bus Owner operates in Endpoint (EP) role on the PCIe bus, it cannot obtain the physical addresses of other devices from the PCIe bus. Therefore, recording the physical address in the transport driver may be, in our view, a practical solution to this limitation.

>> +static void mctp_pcie_vdm_net_setup(struct net_device *ndev) {
>> +       ndev->type = ARPHRD_MCTP;
>> +
>> +       ndev->mtu = MCTP_PCIE_VDM_MIN_MTU;
>> +       ndev->min_mtu = MCTP_PCIE_VDM_MIN_MTU;
>> +       ndev->max_mtu = MCTP_PCIE_VDM_MAX_MTU;
>> +       ndev->tx_queue_len =
>MCTP_PCIE_VDM_NET_DEV_TX_QUEUE_LEN;
>> +       ndev->addr_len = 2; //PCIe bdf is 2 bytes
>
>One of the critical things to get right is the lladdr format for PCIe VDM devices,
>as it will be visible to userspace. While the PCIe b/d/fn data is indeed two bytes,
>the MCTP addressing format for VDM data is not entirely representable in two
>bytes: we also have the routing type to encode. DSP0238 requires us to use
>Route to Root Complex and Broadcast from Root Complex for certain messages,
>so we need some way to represent that in your proposed lladdr format.
>
>For this reason, you may want to encode this as [type, bdfn] data.
>
>Also, in flit mode, we also have the segment byte to encode.
>
Agreed. In our implement, we always fill in the "Route By ID" type when core asks us to create the header, since we don't know the correct type to fill at that time.  And later we update the Route type based on the ctrl message code when doing TX. I think it would be nice if we can have a uniformed address format to get the actual Route type by passed-in lladdr when creating the header.

>Cheers,
>
>
>Jeremy
************* Email Confidentiality Notice ********************
免責聲明:
本信件(或其附件)可能包含機密資訊,並受法律保護。如 台端非指定之收件者,請以電子郵件通知本電子郵件之發送者, 並請立即刪除本電子郵件及其附件和銷毀所有複印件。謝謝您的合作!
信驊科技以誠信正直原則永續經營企業,並已委由第三方公正單位勤業眾信及信驊科技獨立董事來管理匿名舉報系統,如各個利害關係人等有發現任何違法及違反公司從業道德、違反法令法規及專業準則、亦或霸凌及違反性別平等之情事,請直接透過以下可選擇匿名之舉報系統舉報,再次感謝您協助信驊持續邁向永續經營。信驊科技舉報網站連結:https://secure.conductwatch.com/aspeedtech/

DISCLAIMER:
This message (and any attachments) may contain legally privileged and/or other confidential information. If you have received it in error, please notify the sender by reply e-mail and immediately delete the e-mail and any attachments without copying or disclosing the contents. Thank you.
ASPEED Technology is committed to sustainable business practices based on integrity and honesty principles. In order to ensure that all information can be openly and transparently communicated, a third-party independent organization, Deloitte and ASPEED Technology's independent directors, have been entrusted to manage the anonymous reporting system. If any stakeholders discover any illegal activities, violations of the company's professional ethics, infringements of laws and regulations, or incidents of bullying and gender inequality, please directly report through the anonymous reporting system provided below. We thank you for your assistance in helping ASPEED Technology continue its journey towards sustainable operations: https://secure.conductwatch.com/aspeedtech/

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-14 12:37   ` YH Chung
@ 2025-07-15  1:08     ` Jeremy Kerr
  2025-07-17  3:07       ` YH Chung
  0 siblings, 1 reply; 17+ messages in thread
From: Jeremy Kerr @ 2025-07-15  1:08 UTC (permalink / raw)
  To: YH Chung, matt@codeconstruct.com.au, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen

Hi YH,

> > Do we really need an abstraction for MCTP VDM drivers? How many are you
> > expecting? Can you point us to a client of the VDM abstraction?
> > 
> > There is some value in keeping consistency for the MCTP lladdr formats across
> > PCIe transports, but I'm not convinced we need a whole abstraction layer for
> > this.
> > 
> We plan to follow existing upstream MCTP transports—such as I²C, I³C,
> and USB—by abstracting the hardware-specific details into a common
> interface and focus on the transport binding protocol in this patch.
> This driver has been tested by our AST2600 and AST2700 MCTP driver.

Is that one driver (for both 2600 and 2700) or two?

I'm still not convinced you need an abstraction layer specifically for
VDM transports, especially as you're forcing a specific driver model
with the deferral of TX to a separate thread.

Even if this abstraction layer is a valid approach, it would not be
merged until you also have an in-kernel user of it.

> > > TX path uses a dedicated kernel thread and ptr_ring: skbs queued by
> > > the MCTP stack are enqueued on the ring and processed in-thread context.
> > 
> > Is this somehow more suitable than the existing netdev queues?
> > 
> Our current implementation has two operations that take time: 1)
> Configure the PCIe VDM routing type as DSP0238 requested if we are
> sending certain ctrl message command codes like Discovery Notify
> request or Endpoint Discovery response. 2) Update the BDF/EID routing
> table.

More on this below, but: you don't need to handle either of those in
a transport driver.

> > > +struct mctp_pcie_vdm_route_info {
> > > +       u8 eid;
> > > +       u8 dirty;
> > > +       u16 bdf_addr;
> > > +       struct hlist_node hnode;
> > > +};
> > 
> > Why are you keeping your own routing table in the transport driver? We already
> > have the route and neighbour tables in the MCTP core code.
> > 
> > Your assumption that you can intercept MCTP control messages to keep a
> > separate routing table will not work.
> > 
> We maintain a routing table in the transport driver to record the
> mapping between BDFs and EIDs, as the BDF is only present in the PCIe
> VDM header of received Endpoint Discovery Responses. This information
> is not forwarded to the MCTP core in the MCTP payload. We update the
> table with this mapping before forwarding the MCTP message to the
> core.

There is already support for this in the MCTP core - the neighbour table
maintains mappings between EID and link-layer addresses. In the case of
a PCIe VDM transport, those link-layer addresses contain the bdf data.

The transport driver only needs to be involved in packet transmit and
receive. Your bdf data is provided to the driver through the
header_ops->create() op.

Any management of the neighbour table is performed by userspace, which
has visibility of the link-layer addresses of incoming skbs - assuming
your drivers are properly setting the cb->haddr data on receive.

This has already been established through the existing transports that
consume lladdr data (i2c and i3c). You should not be handling the
lladdr-to-EID mapping *at all* in the transport driver.

> Additionally, if the MCTP Bus Owner operates in Endpoint (EP) role on
> the PCIe bus, it cannot obtain the physical addresses of other devices
> from the PCIe bus.

Sure it can, there are mechanisms for discovery. However, that's
entirely handled by userspace, which can update the existing neighbour
table.

> Agreed. In our implement, we always fill in the "Route By ID" type
> when core asks us to create the header, since we don't know the
> correct type to fill at that time.  And later we update the Route type
> based on the ctrl message code when doing TX. I think it would be nice
> if we can have a uniformed address format to get the actual Route type
> by passed-in lladdr when creating the header.

OK, so we'd include the routing type in the lladdr data then.

Cheers,


Jeremy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-14  6:25 [PATCH] net: mctp: Add MCTP PCIe VDM transport driver aspeedyh
  2025-07-14  8:54 ` Jeremy Kerr
@ 2025-07-15  2:26 ` kernel test robot
  2025-07-17 10:39 ` kernel test robot
  2 siblings, 0 replies; 17+ messages in thread
From: kernel test robot @ 2025-07-15  2:26 UTC (permalink / raw)
  To: aspeedyh, jk, matt, andrew+netdev, davem, edumazet, kuba, pabeni,
	linux-kernel, netdev, bmc-sw
  Cc: llvm, oe-kbuild-all

Hi aspeedyh,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]
[also build test WARNING on net/main linus/master v6.16-rc6 next-20250714]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/aspeedyh/net-mctp-Add-MCTP-PCIe-VDM-transport-driver/20250714-142656
base:   net-next/main
patch link:    https://lore.kernel.org/r/20250714062544.2612693-1-yh_chung%40aspeedtech.com
patch subject: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20250715/202507151015.GPkeA21H-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250715/202507151015.GPkeA21H-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507151015.GPkeA21H-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/net/mctp/mctp-pcie-vdm.c:491:4: warning: variable 'vdm_dev' is uninitialized when used here [-Wuninitialized]
     491 |                  vdm_dev->ndev->name);
         |                  ^~~~~~~
   include/linux/printk.h:631:26: note: expanded from macro 'pr_debug'
     631 |         dynamic_pr_debug(fmt, ##__VA_ARGS__)
         |                                 ^~~~~~~~~~~
   include/linux/dynamic_debug.h:270:22: note: expanded from macro 'dynamic_pr_debug'
     270 |                            pr_fmt(fmt), ##__VA_ARGS__)
         |                                           ^~~~~~~~~~~
   include/linux/dynamic_debug.h:250:59: note: expanded from macro '_dynamic_func_call'
     250 |         _dynamic_func_call_cls(_DPRINTK_CLASS_DFLT, fmt, func, ##__VA_ARGS__)
         |                                                                  ^~~~~~~~~~~
   include/linux/dynamic_debug.h:248:65: note: expanded from macro '_dynamic_func_call_cls'
     248 |         __dynamic_func_call_cls(__UNIQUE_ID(ddebug), cls, fmt, func, ##__VA_ARGS__)
         |                                                                        ^~~~~~~~~~~
   include/linux/dynamic_debug.h:224:15: note: expanded from macro '__dynamic_func_call_cls'
     224 |                 func(&id, ##__VA_ARGS__);                       \
         |                             ^~~~~~~~~~~
   drivers/net/mctp/mctp-pcie-vdm.c:484:35: note: initialize the variable 'vdm_dev' to silence this warning
     484 |         struct mctp_pcie_vdm_dev *vdm_dev;
         |                                          ^
         |                                           = NULL
   1 warning generated.


vim +/vdm_dev +491 drivers/net/mctp/mctp-pcie-vdm.c

   481	
   482	static void mctp_pcie_vdm_uninit(struct net_device *ndev)
   483	{
   484		struct mctp_pcie_vdm_dev *vdm_dev;
   485	
   486		struct mctp_pcie_vdm_route_info *route;
   487		struct hlist_node *tmp;
   488		int bkt;
   489	
   490		pr_debug("%s: uninitializing vdm_dev %s\n", __func__,
 > 491			 vdm_dev->ndev->name);
   492	
   493		vdm_dev = netdev_priv(ndev);
   494		vdm_dev->callback_ops->uninit(vdm_dev->dev);
   495	
   496		hash_for_each_safe(vdm_dev->route_table, bkt, tmp, route, hnode) {
   497			hash_del(&route->hnode);
   498			kfree(route);
   499		}
   500	
   501		if (vdm_dev->tx_thread) {
   502			kthread_stop(vdm_dev->tx_thread);
   503			vdm_dev->tx_thread = NULL;
   504		}
   505	
   506		if (mctp_pcie_vdm_wq) {
   507			cancel_work_sync(&vdm_dev->rx_work);
   508			flush_workqueue(mctp_pcie_vdm_wq);
   509			destroy_workqueue(mctp_pcie_vdm_wq);
   510			mctp_pcie_vdm_wq = NULL;
   511		}
   512	
   513		while (__ptr_ring_peek(&vdm_dev->tx_queue)) {
   514			struct sk_buff *skb;
   515	
   516			skb = ptr_ring_consume(&vdm_dev->tx_queue);
   517	
   518			if (skb)
   519				kfree_skb(skb);
   520		}
   521	
   522		mutex_lock(&mctp_pcie_vdm_dev_mutex);
   523		list_del(&vdm_dev->list);
   524		mutex_unlock(&mctp_pcie_vdm_dev_mutex);
   525	}
   526	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-15  1:08     ` Jeremy Kerr
@ 2025-07-17  3:07       ` YH Chung
  2025-07-17  5:37         ` Jeremy Kerr
  0 siblings, 1 reply; 17+ messages in thread
From: YH Chung @ 2025-07-17  3:07 UTC (permalink / raw)
  To: Jeremy Kerr, matt@codeconstruct.com.au, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen

Hi Jeremy,

>> > Do we really need an abstraction for MCTP VDM drivers? How many are
>> > you expecting? Can you point us to a client of the VDM abstraction?
>> >
>> > There is some value in keeping consistency for the MCTP lladdr
>> > formats across PCIe transports, but I'm not convinced we need a
>> > whole abstraction layer for this.
>> >
>> We plan to follow existing upstream MCTP transports-such as I²C, I³C,
>> and USB-by abstracting the hardware-specific details into a common
>> interface and focus on the transport binding protocol in this patch.
>> This driver has been tested by our AST2600 and AST2700 MCTP driver.
>
>Is that one driver (for both 2600 and 2700) or two?
>
It's written in one file to reuse common functions, but the behavior is slightly different depending on the hardware.

>I'm still not convinced you need an abstraction layer specifically for VDM
>transports, especially as you're forcing a specific driver model with the deferral
>of TX to a separate thread.
>
We followed the same implementation pattern as mctp-i2c and mctp-i3c, both of which also abstract the hardware layer via the existing i2c/i3c device interface and use a kernel thread for TX data. 
That said, I believe it's reasonable to remove the kernel thread and instead send the packet directly downward after we remove the route table part.
Could you kindly help to share your thoughts on which approach might be preferable?

>Even if this abstraction layer is a valid approach, it would not be merged until
>you also have an in-kernel user of it.
>
We have an MCTP controller driver (mentioned above, which we used for testing this driver) that utilizes this abstraction for transmission, which we're planning to upstream in the future.
REF Link: https://github.com/AspeedTech-BMC/linux/blob/aspeed-master-v6.6/drivers/soc/aspeed/aspeed-mctp.c




Since the PCIe VDM driver is implemented as an abstraction layer, our current plan is to submit it separately as we believe the review process for each driver can proceed independently.
Would you recommend submitting both drivers together in the same patch series for review, or is it preferable to keep them separate? 

>> > > TX path uses a dedicated kernel thread and ptr_ring: skbs queued
>> > > by the MCTP stack are enqueued on the ring and processed in-thread
>context.
>> >
>> > Is this somehow more suitable than the existing netdev queues?
>> >
>> Our current implementation has two operations that take time: 1)
>> Configure the PCIe VDM routing type as DSP0238 requested if we are
>> sending certain ctrl message command codes like Discovery Notify
>> request or Endpoint Discovery response. 2) Update the BDF/EID routing
>> table.
>
>More on this below, but: you don't need to handle either of those in a transport
>driver.
>
>> > > +struct mctp_pcie_vdm_route_info {
>> > > +       u8 eid;
>> > > +       u8 dirty;
>> > > +       u16 bdf_addr;
>> > > +       struct hlist_node hnode;
>> > > +};
>> >
>> > Why are you keeping your own routing table in the transport driver?
>> > We already have the route and neighbour tables in the MCTP core code.
>> >
>> > Your assumption that you can intercept MCTP control messages to keep
>> > a separate routing table will not work.
>> >
>> We maintain a routing table in the transport driver to record the
>> mapping between BDFs and EIDs, as the BDF is only present in the PCIe
>> VDM header of received Endpoint Discovery Responses. This information
>> is not forwarded to the MCTP core in the MCTP payload. We update the
>> table with this mapping before forwarding the MCTP message to the
>> core.
>
>There is already support for this in the MCTP core - the neighbour table
>maintains mappings between EID and link-layer addresses. In the case of a PCIe
>VDM transport, those link-layer addresses contain the bdf data.
>
>The transport driver only needs to be involved in packet transmit and receive.
>Your bdf data is provided to the driver through the
>header_ops->create() op.
>
>Any management of the neighbour table is performed by userspace, which has
>visibility of the link-layer addresses of incoming skbs - assuming your drivers are
>properly setting the cb->haddr data on receive.
>
Agreed, we'll remove the table.

>This has already been established through the existing transports that consume
>lladdr data (i2c and i3c). You should not be handling the lladdr-to-EID mapping
>*at all* in the transport driver.
>
>> Additionally, if the MCTP Bus Owner operates in Endpoint (EP) role on
>> the PCIe bus, it cannot obtain the physical addresses of other devices
>> from the PCIe bus.
>
>Sure it can, there are mechanisms for discovery. However, that's entirely
>handled by userspace, which can update the existing neighbour table.
>
>> Agreed. In our implement, we always fill in the "Route By ID" type
>> when core asks us to create the header, since we don't know the
>> correct type to fill at that time.  And later we update the Route type
>> based on the ctrl message code when doing TX. I think it would be nice
>> if we can have a uniformed address format to get the actual Route type
>> by passed-in lladdr when creating the header.
>
>OK, so we'd include the routing type in the lladdr data then.
>
Could you share if there's any preliminary prototype or idea for the format of the lladdr that core plans to implement, particularly regarding how the route type should be encoded or parsed?

For example, should we expect the first byte of the daddr parameter in the create_hdr op to indicate the route type?

>Cheers,
>
>
>Jeremy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-17  3:07       ` YH Chung
@ 2025-07-17  5:37         ` Jeremy Kerr
  2025-07-17  7:17           ` YH Chung
  2025-07-18  5:48           ` Khang D Nguyen
  0 siblings, 2 replies; 17+ messages in thread
From: Jeremy Kerr @ 2025-07-17  5:37 UTC (permalink / raw)
  To: YH Chung, matt@codeconstruct.com.au, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen

Hi YH,

> > Is that one driver (for both 2600 and 2700) or two?
> > 
> It's written in one file to reuse common functions, but the behavior
> is slightly different depending on the hardware.

OK, but it's still the one driver (one struct platform_driver handling
multiple of_device_id matches), and there's only one instance where
you're interacting with the mctp_pcie_vdm API. So that would be a single
user of the proposed abstraction.

> > I'm still not convinced you need an abstraction layer specifically for VDM
> > transports, especially as you're forcing a specific driver model with the deferral
> > of TX to a separate thread.
> > 
> We followed the same implementation pattern as mctp-i2c and mctp-i3c,
> both of which also abstract the hardware layer via the existing
> i2c/i3c device interface and use a kernel thread for TX data. 

You're combing two concepts there: the use of a workqueue for transmit,
and the use of a driver abstraction layer between the hardware and the
MCTP netdev.

Some existing MCTP transport drivers have the deferred TX via workqueue,
but none have an abstraction layer like you are proposing here.

In the case of I2C and I3C, we cannot transmit directly from the
->ndo_start_xmit() op, because i2c/i3c operations are done in a
sleepable context. Hence the requirement for the defer for those.

However, I would be surprised if transmitting via your platform PCIe VDM
interface would require blocking operations. From what I can see, it
can all be atomic for your driver. As you say:

> That said, I believe it's reasonable to remove the kernel thread and
> instead send the packet directly downward after we remove the route
> table part.
>
> Could you kindly help to share your thoughts on which approach might
> be preferable?

The direct approach would definitely be preferable, if possible.

Now, given:

 1) you don't need the routing table
 2) you probably don't need a workqueue
 3) there is only one driver using the proposed abstraction

- then it sounds like you don't need an abstraction layer at all. Just
implement your hardware driver to use the netdev operations directly, as
a self-contained drivers/net/mctp/mctp-pcie-aspeed.c.

> Would you recommend submitting both drivers together in the same patch
> series for review, or is it preferable to keep them separate? 

I would recommend removing the abstraction altogether.

If, for some reason, we do end up needing it, I would prefer they they
be submitted together. This allows us to review against an actual
use-case.

> > OK, so we'd include the routing type in the lladdr data then.
> > 
> Could you share if there's any preliminary prototype or idea for the
> format of the lladdr that core plans to implement, particularly
> regarding how the route type should be encoded or parsed?

Excellent question! I suspect we would want a four-byte representation,
being:

 [0]: routing type (bits 0:2, others reserved)
 [1]: segment (or 0 for non-flit mode)
 [2]: bus
 [3]: device / function

which assumes there is some value in combining formats between flit- and
non-flit modes. I am happy to adjust if there are better ideas.

Khang: any inputs from your side there?

Cheers,


Jeremy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-17  5:37         ` Jeremy Kerr
@ 2025-07-17  7:17           ` YH Chung
  2025-07-17  7:31             ` Jeremy Kerr
  2025-07-17 21:43             ` Adam Young
  2025-07-18  5:48           ` Khang D Nguyen
  1 sibling, 2 replies; 17+ messages in thread
From: YH Chung @ 2025-07-17  7:17 UTC (permalink / raw)
  To: Jeremy Kerr, matt@codeconstruct.com.au, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen

Hi Jeremy,

>> > Is that one driver (for both 2600 and 2700) or two?
>> >
>> It's written in one file to reuse common functions, but the behavior
>> is slightly different depending on the hardware.
>
>OK, but it's still the one driver (one struct platform_driver handling multiple
>of_device_id matches), and there's only one instance where you're interacting
>with the mctp_pcie_vdm API. So that would be a single user of the proposed
>abstraction.
>> > I'm still not convinced you need an abstraction layer specifically
>> > for VDM transports, especially as you're forcing a specific driver
>> > model with the deferral of TX to a separate thread.
>> >
>> We followed the same implementation pattern as mctp-i2c and mctp-i3c,
>> both of which also abstract the hardware layer via the existing
>> i2c/i3c device interface and use a kernel thread for TX data.
>
>You're combing two concepts there: the use of a workqueue for transmit, and
>the use of a driver abstraction layer between the hardware and the MCTP
>netdev.
>
>Some existing MCTP transport drivers have the deferred TX via workqueue, but
>none have an abstraction layer like you are proposing here.
>
From my perspective, the other MCTP transport drivers do make use of abstraction layers that already exist in the kernel tree. For example, mctp-i3c uses i3c_device_do_priv_xfers(), which ultimately invokes operations registered by the underlying I3C driver. This is effectively an abstraction layer handling the hardware-specific details of TX packet transmission.

In our case, there is no standard interface—like those for I2C/I3C—that serves PCIe VDM. So, the proposed driver tries to introduce a unified interface, defined in mctp-pcie-vdm.h, to provide a reusable interface that allows developers to focus on hardware-specific implementation without needing to duplicate or rework the transport binding logic each time.

Could you kindly share your thoughts or guidance on how the abstraction model used in our PCIe VDM driver compares to the existing abstractions used in I2C/I3C transport implementations?

>In the case of I2C and I3C, we cannot transmit directly from the
>->ndo_start_xmit() op, because i2c/i3c operations are done in a
>sleepable context. Hence the requirement for the defer for those.
>
Understood, thanks for the clarification!

>However, I would be surprised if transmitting via your platform PCIe VDM
>interface would require blocking operations. From what I can see, it can all be
>atomic for your driver. As you say:
>
>> That said, I believe it's reasonable to remove the kernel thread and
>> instead send the packet directly downward after we remove the route
>> table part.
>>
>> Could you kindly help to share your thoughts on which approach might
>> be preferable?
>
>The direct approach would definitely be preferable, if possible.
>
Got it. Then we'll remove the kernel thread and do TX directly.

>Now, given:
>
> 1) you don't need the routing table
> 2) you probably don't need a workqueue
> 3) there is only one driver using the proposed abstraction
>
>- then it sounds like you don't need an abstraction layer at all. Just implement
>your hardware driver to use the netdev operations directly, as a self-contained
>drivers/net/mctp/mctp-pcie-aspeed.c.
>
>> Would you recommend submitting both drivers together in the same patch
>> series for review, or is it preferable to keep them separate?
>
>I would recommend removing the abstraction altogether.
>
>If, for some reason, we do end up needing it, I would prefer they they be
>submitted together. This allows us to review against an actual use-case.
>
Okay, we’ll include it in the next patch series once we’ve reached a conclusion on the abstraction.

>> > OK, so we'd include the routing type in the lladdr data then.
>> >
>> Could you share if there's any preliminary prototype or idea for the
>> format of the lladdr that core plans to implement, particularly
>> regarding how the route type should be encoded or parsed?
>
>Excellent question! I suspect we would want a four-byte representation,
>being:
>
> [0]: routing type (bits 0:2, others reserved)
> [1]: segment (or 0 for non-flit mode)
> [2]: bus
> [3]: device / function
>
>which assumes there is some value in combining formats between flit- and
>non-flit modes. I am happy to adjust if there are better ideas.
>
This looks good to me—thanks for sharing!
>Khang: any inputs from your side there?
>
>Cheers,
>
>
>Jeremy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-17  7:17           ` YH Chung
@ 2025-07-17  7:31             ` Jeremy Kerr
  2025-07-17  8:21               ` YH Chung
  2025-07-17 21:43             ` Adam Young
  1 sibling, 1 reply; 17+ messages in thread
From: Jeremy Kerr @ 2025-07-17  7:31 UTC (permalink / raw)
  To: YH Chung, matt@codeconstruct.com.au, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen

Hi YH,

> From my perspective, the other MCTP transport drivers do make use of
> abstraction layers that already exist in the kernel tree. For example,
> mctp-i3c uses i3c_device_do_priv_xfers(), which ultimately invokes
> operations registered by the underlying I3C driver. This is
> effectively an abstraction layer handling the hardware-specific
> details of TX packet transmission.
> 
> In our case, there is no standard interface—like those for
> I2C/I3C—that serves PCIe VDM.

But that's not what you're proposing here - your abstraction layer
serves one type of PCIe VDM messaging (MCTP), for only one PCIe VDM MCTP
driver.

If you were proposing adding a *generic* PCIe VDM interface, that is
suitable for all messaging types (not just MCTP), and all PCIe VDM
hardware (not just ASPEED's) that would make more sense. But I think
that would be a much larger task than what you're intending here.

Start small. If we have other use-cases for an abstraction layer, we can
introduce it at that point - where we have real-world design inputs for
it.

Regardless, we have worked out that there is nothing to actually abstract
*anyway*.

> > The direct approach would definitely be preferable, if possible.
> > 
> Got it. Then we'll remove the kernel thread and do TX directly.

Super!

> > Excellent question! I suspect we would want a four-byte representation,
> > being:
> > 
> > [0]: routing type (bits 0:2, others reserved)
> > [1]: segment (or 0 for non-flit mode)
> > [2]: bus
> > [3]: device / function
> > 
> > which assumes there is some value in combining formats between flit- and
> > non-flit modes. I am happy to adjust if there are better ideas.
> > 
> This looks good to me—thanks for sharing!

No problem! We'll still want a bit of wider consensus on this, because
we cannot change it once upstreamed.

Cheers,


Jeremy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-17  7:31             ` Jeremy Kerr
@ 2025-07-17  8:21               ` YH Chung
  2025-07-17 21:40                 ` Adam Young
  2025-07-18  3:24                 ` Jeremy Kerr
  0 siblings, 2 replies; 17+ messages in thread
From: YH Chung @ 2025-07-17  8:21 UTC (permalink / raw)
  To: Jeremy Kerr, matt@codeconstruct.com.au, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen

Hi Jeremy,

>> From my perspective, the other MCTP transport drivers do make use of
>> abstraction layers that already exist in the kernel tree. For example,
>> mctp-i3c uses i3c_device_do_priv_xfers(), which ultimately invokes
>> operations registered by the underlying I3C driver. This is
>> effectively an abstraction layer handling the hardware-specific
>> details of TX packet transmission.
>>
>> In our case, there is no standard interface-like those for
>> I2C/I3C-that serves PCIe VDM.
>
>But that's not what you're proposing here - your abstraction layer serves one
>type of PCIe VDM messaging (MCTP), for only one PCIe VDM MCTP driver.
>
>If you were proposing adding a *generic* PCIe VDM interface, that is suitable
>for all messaging types (not just MCTP), and all PCIe VDM hardware (not just
>ASPEED's) that would make more sense. But I think that would be a much larger
>task than what you're intending here.
>
Agreed. Our proposed interface is intended only for MCTP, and thus not generic for all VDM messages.

>Start small. If we have other use-cases for an abstraction layer, we can
>introduce it at that point - where we have real-world design inputs for it.
>
We're planning to split the MCTP controller driver into two separate drivers for AST2600 and AST2700, removing the AST2600-specific workarounds in the process for improving long-term maintainability. And it's part of the reason we want to decouple the binding protocol logic into its own layer.

Would it be preferable to create a directory such as net/mctp/aspeed/ to host the abstraction layer alongside the hardware-specific drivers?
We're considering this structure to help encapsulate the shared logic and keep the MCTP PCIe VDM-related components organized.
Appreciate any guidance on whether this aligns with the expected upstream organization.

>Regardless, we have worked out that there is nothing to actually abstract
>*anyway*.
>
>> > The direct approach would definitely be preferable, if possible.
>> >
>> Got it. Then we'll remove the kernel thread and do TX directly.
>
>Super!
>
>> > Excellent question! I suspect we would want a four-byte
>> > representation,
>> > being:
>> >
>> > [0]: routing type (bits 0:2, others reserved)
>> > [1]: segment (or 0 for non-flit mode)
>> > [2]: bus
>> > [3]: device / function
>> >
>> > which assumes there is some value in combining formats between flit-
>> > and non-flit modes. I am happy to adjust if there are better ideas.
>> >
>> This looks good to me-thanks for sharing!
>
>No problem! We'll still want a bit of wider consensus on this, because we
>cannot change it once upstreamed.
>
>Cheers,
>
>
>Jeremy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-14  6:25 [PATCH] net: mctp: Add MCTP PCIe VDM transport driver aspeedyh
  2025-07-14  8:54 ` Jeremy Kerr
  2025-07-15  2:26 ` kernel test robot
@ 2025-07-17 10:39 ` kernel test robot
  2 siblings, 0 replies; 17+ messages in thread
From: kernel test robot @ 2025-07-17 10:39 UTC (permalink / raw)
  To: aspeedyh, jk, matt, andrew+netdev, davem, edumazet, kuba, pabeni,
	linux-kernel, netdev, bmc-sw
  Cc: oe-kbuild-all

Hi aspeedyh,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]
[also build test WARNING on net/main linus/master v6.16-rc6 next-20250716]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/aspeedyh/net-mctp-Add-MCTP-PCIe-VDM-transport-driver/20250714-142656
base:   net-next/main
patch link:    https://lore.kernel.org/r/20250714062544.2612693-1-yh_chung%40aspeedtech.com
patch subject: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
config: powerpc64-randconfig-r122-20250717 (https://download.01.org/0day-ci/archive/20250717/202507171855.3WJVpX2l-lkp@intel.com/config)
compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project 16534d19bf50bde879a83f0ae62875e2c5120e64)
reproduce: (https://download.01.org/0day-ci/archive/20250717/202507171855.3WJVpX2l-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507171855.3WJVpX2l-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> drivers/net/mctp/mctp-pcie-vdm.c:130:1: sparse: sparse: symbol 'mctp_pcie_vdm_dev_mutex' was not declared. Should it be static?
>> drivers/net/mctp/mctp-pcie-vdm.c:131:1: sparse: sparse: symbol 'mctp_pcie_vdm_devs' was not declared. Should it be static?
>> drivers/net/mctp/mctp-pcie-vdm.c:132:25: sparse: sparse: symbol 'mctp_pcie_vdm_wq' was not declared. Should it be static?
>> drivers/net/mctp/mctp-pcie-vdm.c:343:9: sparse: sparse: incorrect type in assignment (different base types) @@     expected unsigned int [usertype] @@     got restricted __be32 [usertype] @@
   drivers/net/mctp/mctp-pcie-vdm.c:343:9: sparse:     expected unsigned int [usertype]
   drivers/net/mctp/mctp-pcie-vdm.c:343:9: sparse:     got restricted __be32 [usertype]
>> drivers/net/mctp/mctp-pcie-vdm.c:398:17: sparse: sparse: cast to restricted __be32
>> drivers/net/mctp/mctp-pcie-vdm.c:398:17: sparse: sparse: incorrect type in assignment (different base types) @@     expected unsigned int [usertype] @@     got restricted __le32 [usertype] @@
   drivers/net/mctp/mctp-pcie-vdm.c:398:17: sparse:     expected unsigned int [usertype]
   drivers/net/mctp/mctp-pcie-vdm.c:398:17: sparse:     got restricted __le32 [usertype]
   drivers/net/mctp/mctp-pcie-vdm.c:453:25: sparse: sparse: cast to restricted __be32
   drivers/net/mctp/mctp-pcie-vdm.c:453:25: sparse: sparse: incorrect type in assignment (different base types) @@     expected unsigned int [usertype] @@     got restricted __le32 [usertype] @@
   drivers/net/mctp/mctp-pcie-vdm.c:453:25: sparse:     expected unsigned int [usertype]
   drivers/net/mctp/mctp-pcie-vdm.c:453:25: sparse:     got restricted __le32 [usertype]

vim +/mctp_pcie_vdm_dev_mutex +130 drivers/net/mctp/mctp-pcie-vdm.c

   128	
   129	/* mutex for vdm_devs add/delete */
 > 130	DEFINE_MUTEX(mctp_pcie_vdm_dev_mutex);
 > 131	LIST_HEAD(mctp_pcie_vdm_devs);
 > 132	struct workqueue_struct *mctp_pcie_vdm_wq;
   133	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-17  8:21               ` YH Chung
@ 2025-07-17 21:40                 ` Adam Young
  2025-07-18  3:24                 ` Jeremy Kerr
  1 sibling, 0 replies; 17+ messages in thread
From: Adam Young @ 2025-07-17 21:40 UTC (permalink / raw)
  To: YH Chung, Jeremy Kerr, matt@codeconstruct.com.au,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen


On 7/17/25 04:21, YH Chung wrote:
> Would it be preferable to create a directory such as net/mctp/aspeed/ to host the abstraction layer alongside the hardware-specific drivers?
> We're considering this structure to help encapsulate the shared logic and keep the MCTP PCIe VDM-related components organized.

My suggestiong  is: Keep it simple.  There are only a handful of mctp 
device drivers thus far, and there seems to be little justification for 
a deeper hierarchy.

Just focus on the cleanest implementation.  Having two drivers plus a 
single common source code for each in drivers/net/mctp seems to be 
easier to manage for now.




^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-17  7:17           ` YH Chung
  2025-07-17  7:31             ` Jeremy Kerr
@ 2025-07-17 21:43             ` Adam Young
  1 sibling, 0 replies; 17+ messages in thread
From: Adam Young @ 2025-07-17 21:43 UTC (permalink / raw)
  To: YH Chung, Jeremy Kerr, matt@codeconstruct.com.au,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen


On 7/17/25 03:17, YH Chung wrote:
>  From my perspective, the other MCTP transport drivers do make use of abstraction layers that already exist in the kernel tree. For example, mctp-i3c uses i3c_device_do_priv_xfers(), which ultimately invokes operations registered by the underlying I3C driver. This is effectively an abstraction layer handling the hardware-specific details of TX packet transmission.
>
> In our case, there is no standard interface—like those for I2C/I3C—that serves PCIe VDM. So, the proposed driver tries to introduce a unified interface, defined in mctp-pcie-vdm.h, to provide a reusable interface that allows developers to focus on hardware-specific implementation without needing to duplicate or rework the transport binding logic each time.
>
> Could you kindly share your thoughts or guidance on how the abstraction model used in our PCIe VDM driver compares to the existing abstractions used in I2C/I3C transport implementations?


Would the mailbox abstraction work for this?  It is what I am using in 
the MCTP over PCC code.  Perhaps a PCIe VDM mailbox implementation will 
have a wider use  than just MCTP.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-17  8:21               ` YH Chung
  2025-07-17 21:40                 ` Adam Young
@ 2025-07-18  3:24                 ` Jeremy Kerr
  1 sibling, 0 replies; 17+ messages in thread
From: Jeremy Kerr @ 2025-07-18  3:24 UTC (permalink / raw)
  To: YH Chung, matt@codeconstruct.com.au, andrew+netdev@lunn.ch,
	davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
	pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Khang D Nguyen

Hi YH,

> We're planning to split the MCTP controller driver into two separate
> drivers for AST2600 and AST2700, removing the AST2600-specific
> workarounds in the process for improving long-term maintainability.
> And it's part of the reason we want to decouple the binding protocol
> logic into its own layer.

The split is entirely up to you (and sounds reasonable), but the
"binding protocol logic" is really minimal. I would think a couple of
shared helper functions should be sufficient?

> Would it be preferable to create a directory such as net/mctp/aspeed/
> to host the abstraction layer alongside the hardware-specific
> drivers?

Just put them in the top-level at drivers/net/mctp/ for now. There's not
much in there at present, so will be simple to keep organised. If you
end up with two drivers and a common set of utils between the two,
that's a total of four files, which I think we can manage.

Cheers,


Jeremy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-17  5:37         ` Jeremy Kerr
  2025-07-17  7:17           ` YH Chung
@ 2025-07-18  5:48           ` Khang D Nguyen
  2025-07-18  6:03             ` Jeremy Kerr
  1 sibling, 1 reply; 17+ messages in thread
From: Khang D Nguyen @ 2025-07-18  5:48 UTC (permalink / raw)
  To: Jeremy Kerr, YH Chung, matt@codeconstruct.com.au,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Hieu Le

Hi YH and Jeremy,

[+CC Hieu]

>> Could you share if there's any preliminary prototype or idea for the
>> format of the lladdr that core plans to implement, particularly
>> regarding how the route type should be encoded or parsed?
> 
> Excellent question! I suspect we would want a four-byte representation,
> being:
> 
>   [0]: routing type (bits 0:2, others reserved)
>   [1]: segment (or 0 for non-flit mode)
>   [2]: bus
>   [3]: device / function
> 
> which assumes there is some value in combining formats between flit- and
> non-flit modes. I am happy to adjust if there are better ideas.
> 
> Khang: any inputs from your side there?

I believe segment 0 is a common valid segment and is not reserved. If we 
want to combine, we might need another bit in the first byte to 
represent if it is flit-mode or not. But I am not sure if it is worth 
the effort, rather than just separate them.

As far as I understand, I think we have been discussing the address 
format for each "Transport Binding" (e.g, I2C vs PCIe-VDM vs ...).

According to DSP0239, I suggest we should decide the lladdr format based 
on the more precise "Physical Medium Identifier" (e.g, PCIe 5.0 or PCIe 
6.0 non-Flit or PCIe 6.0 Flit Mode), rather than the more general 
"Physical Transport Binding Identifier". The specification seems to 
suggest this by the wording "primarily".

     The identifier is primarily used to identify which physical
     addressing format is used for MCTP packets on the bus.

Transport Binding Identifier used to be enough, until they separate the 
address for PCIe-VDM non-Flit and Flit Mode. I am afraid if they add 
something new again, we might not be able to retrofit.

It should be safer and easier to get the format right for each Physical 
Medium Identifier, rather than for each Physical Transport Binding.

So my opinion:

- 3-byte for non-Flit (0x08-0x0E medium type)
   4-byte for Flit Mode (0x40 medium type)
- Drivers should be able to advertise their Physical Medium Identifier
   alongside the existing Physical Transport Binding Identifier.
- We can document a stable lladdr / Linux kernel physical format used
   for sockets for each Physical Medium Identifier, not Physical
   Transport Binding.

Sincerely,
Khang

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-18  5:48           ` Khang D Nguyen
@ 2025-07-18  6:03             ` Jeremy Kerr
  2025-07-18 11:05               ` YH Chung
  0 siblings, 1 reply; 17+ messages in thread
From: Jeremy Kerr @ 2025-07-18  6:03 UTC (permalink / raw)
  To: Khang D Nguyen, YH Chung, matt@codeconstruct.com.au,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Hieu Le

Hi Khang,

Thanks for the input, I appreciate it.

> > Khang: any inputs from your side there?
> 
> I believe segment 0 is a common valid segment and is not reserved.

0 would not have a special meaning in flit mode (ie, when we're using
the segment number); this was more a reference to being optional in
non-flit mode.

> If we want to combine, we might need another bit in the first byte to
> represent if it is flit-mode or not. But I am not sure if it is worth
> the effort, rather than just separate them.

Yep, had the same line of thinking here too.

I agree that it would make sense to have the address format reflect the
actual hardware transport. Having variation across the binding
identifier should not be a problem.

> It should be safer and easier to get the format right for each Physical 
> Medium Identifier, rather than for each Physical Transport Binding.
>
> 
> So my opinion:
> 
> - 3-byte for non-Flit (0x08-0x0E medium type)
>    4-byte for Flit Mode (0x40 medium type)
> - Drivers should be able to advertise their Physical Medium Identifier
>    alongside the existing Physical Transport Binding Identifier.
> - We can document a stable lladdr / Linux kernel physical format used
>    for sockets for each Physical Medium Identifier, not Physical
>    Transport Binding.

Agreed, sounds like a good plan. I'll look at options for the physical
medium identifier.

YH: so we would just have the three-byte format for your proposed
driver:

   [0]: routing type (bits 0:2, others reserved)
   [1]: bus
   [2]: device / function

- assuming you're only handling non-flit mode for now.

Cheers,


Jeremy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH] net: mctp: Add MCTP PCIe VDM transport driver
  2025-07-18  6:03             ` Jeremy Kerr
@ 2025-07-18 11:05               ` YH Chung
  0 siblings, 0 replies; 17+ messages in thread
From: YH Chung @ 2025-07-18 11:05 UTC (permalink / raw)
  To: Jeremy Kerr, Khang D Nguyen, matt@codeconstruct.com.au,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, BMC-SW
  Cc: Hieu Le

Hi Jeremy,

>Just put them in the top-level at drivers/net/mctp/ for now. There's not much in
>there at present, so will be simple to keep organised. If you end up with two
>drivers and a common set of utils between the two, that's a total of four files,
>which I think we can manage.

Understood, thanks for the guidance!
We plan to start with the AST2700 driver in the next submission, along with the shared utility code.

>YH: so we would just have the three-byte format for your proposed
>driver:
>
>   [0]: routing type (bits 0:2, others reserved)
>   [1]: bus
>   [2]: device / function
>
>- assuming you're only handling non-flit mode for now.
>
Sounds good-We'll apply this format in the next patch.


Hi Adam,

> Would the mailbox abstraction work for this?  It is what I am using in the MCTP over PCC code.  Perhaps a PCIe VDM mailbox implementation will have a wider use  than just MCTP.
>
Thanks a lot for the suggestion! For now, we're planning to remove the current abstraction and use a set of common utility functions shared between the AST2600 and AST2700 drivers.
As Jeremy mentioned, we'd prefer to hold off on introducing a PCIe VDM abstraction layer until there are more concrete use cases that would benefit from it.

>My suggestiong  is: Keep it simple.  There are only a handful of mctp device
>drivers thus far, and there seems to be little justification for a deeper hierarchy.
>
>Just focus on the cleanest implementation.  Having two drivers plus a single
>common source code for each in drivers/net/mctp seems to be easier to
>manage for now.

Appreciate the guidance!
We'll proceed as you and Jeremy suggested and keep everything under drivers/net/mctp for now.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2025-07-18 11:05 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-14  6:25 [PATCH] net: mctp: Add MCTP PCIe VDM transport driver aspeedyh
2025-07-14  8:54 ` Jeremy Kerr
2025-07-14 12:37   ` YH Chung
2025-07-15  1:08     ` Jeremy Kerr
2025-07-17  3:07       ` YH Chung
2025-07-17  5:37         ` Jeremy Kerr
2025-07-17  7:17           ` YH Chung
2025-07-17  7:31             ` Jeremy Kerr
2025-07-17  8:21               ` YH Chung
2025-07-17 21:40                 ` Adam Young
2025-07-18  3:24                 ` Jeremy Kerr
2025-07-17 21:43             ` Adam Young
2025-07-18  5:48           ` Khang D Nguyen
2025-07-18  6:03             ` Jeremy Kerr
2025-07-18 11:05               ` YH Chung
2025-07-15  2:26 ` kernel test robot
2025-07-17 10:39 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).