public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Tariq Toukan <tariqt@nvidia.com>
To: Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>
Cc: Donald Hunter <donald.hunter@gmail.com>,
	Jiri Pirko <jiri@resnulli.us>, Jonathan Corbet <corbet@lwn.net>,
	Saeed Mahameed <saeedm@nvidia.com>,
	"Leon Romanovsky" <leon@kernel.org>,
	Tariq Toukan <tariqt@nvidia.com>, Mark Bloch <mbloch@nvidia.com>,
	<netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-doc@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
	Gal Pressman <gal@nvidia.com>, Moshe Shemesh <moshe@nvidia.com>,
	Carolina Jubran <cjubran@nvidia.com>,
	Cosmin Ratiu <cratiu@nvidia.com>, Jiri Pirko <jiri@nvidia.com>,
	Randy Dunlap <rdunlap@infradead.org>,
	Simon Horman <horms@kernel.org>,
	Krzysztof Kozlowski <krzk@kernel.org>
Subject: [PATCH net-next V7 02/14] devlink: introduce shared devlink instance for PFs on same chip
Date: Wed, 28 Jan 2026 13:25:32 +0200	[thread overview]
Message-ID: <20260128112544.1661250-3-tariqt@nvidia.com> (raw)
In-Reply-To: <20260128112544.1661250-1-tariqt@nvidia.com>

From: Jiri Pirko <jiri@nvidia.com>

Multiple PFs may reside on the same physical chip, running a single
firmware. Some of the resources and configurations may be shared among
these PFs. Currently, there is no good object to pin the configuration
knobs on.

Introduce a shared devlink instance, instantiated upon probe of the
first PF and removed during remove of the last PF. The shared devlink
instance is backed by a faux device, as there is no PCI device related
to it. The implementation uses reference counting to manage the
lifecycle: each PF that probes calls devlink_shd_get() to get or create
the shared instance, and calls devlink_shd_put() when it removes. The
shared instance is automatically destroyed when the last PF removes.

Example:

$ devlink dev
pci/0000:08:00.0:
  nested_devlink:
    auxiliary/mlx5_core.eth.0
faux/mlx5_core_83013c12b77faa1a30000c82a1045c91:
  nested_devlink:
    pci/0000:08:00.0
    pci/0000:08:00.1
auxiliary/mlx5_core.eth.0
pci/0000:08:00.1:
  nested_devlink:
    auxiliary/mlx5_core.eth.1
auxiliary/mlx5_core.eth.1

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 include/net/devlink.h |   6 ++
 net/devlink/Makefile  |   2 +-
 net/devlink/sh_dev.c  | 163 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 net/devlink/sh_dev.c

diff --git a/include/net/devlink.h b/include/net/devlink.h
index cb839e0435a1..c453faec8ebf 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1644,6 +1644,12 @@ void devlink_register(struct devlink *devlink);
 void devlink_unregister(struct devlink *devlink);
 void devlink_free(struct devlink *devlink);
 
+struct devlink *devlink_shd_get(const char *id,
+				const struct devlink_ops *ops,
+				size_t priv_size);
+void devlink_shd_put(struct devlink *devlink);
+void *devlink_shd_get_priv(struct devlink *devlink);
+
 /**
  * struct devlink_port_ops - Port operations
  * @port_split: Callback used to split the port into multiple ones.
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 000da622116a..8f2adb5e5836 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-y := core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
-	 resource.o param.o region.o health.o trap.o rate.o linecard.o
+	 resource.o param.o region.o health.o trap.o rate.o linecard.o sh_dev.o
diff --git a/net/devlink/sh_dev.c b/net/devlink/sh_dev.c
new file mode 100644
index 000000000000..acc8d549aaae
--- /dev/null
+++ b/net/devlink/sh_dev.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/device/faux.h>
+#include <net/devlink.h>
+
+#include "devl_internal.h"
+
+static LIST_HEAD(shd_list);
+static DEFINE_MUTEX(shd_mutex); /* Protects shd_list and shd->list */
+
+/* This structure represents a shared devlink instance,
+ * there is one created per identifier (e.g., serial number).
+ */
+struct devlink_shd {
+	struct list_head list; /* Node in shd list */
+	const char *id; /* Identifier string (e.g., serial number) */
+	struct faux_device *faux_dev; /* Related faux device */
+	refcount_t refcount; /* Reference count */
+	char priv[] __aligned(NETDEV_ALIGN); /* Driver private data */
+};
+
+static struct devlink_shd *devlink_shd_lookup(const char *id)
+{
+	struct devlink_shd *shd;
+
+	list_for_each_entry(shd, &shd_list, list) {
+		if (!strcmp(shd->id, id))
+			return shd;
+	}
+
+	return NULL;
+}
+
+static struct devlink_shd *devlink_shd_create(const char *id,
+					      const struct devlink_ops *ops,
+					      size_t priv_size)
+{
+	struct faux_device *faux_dev;
+	struct devlink_shd *shd;
+	struct devlink *devlink;
+
+	/* Create faux device - probe will be called synchronously */
+	faux_dev = faux_device_create(id, NULL, NULL);
+	if (!faux_dev)
+		return NULL;
+
+	devlink = devlink_alloc(ops, sizeof(struct devlink_shd) + priv_size,
+				&faux_dev->dev);
+	if (!devlink)
+		goto err_devlink_alloc;
+	shd = devlink_priv(devlink);
+
+	shd->id = kstrdup(id, GFP_KERNEL);
+	if (!shd->id)
+		goto err_kstrdup_id;
+	shd->faux_dev = faux_dev;
+	refcount_set(&shd->refcount, 1);
+
+	devl_lock(devlink);
+	devl_register(devlink);
+	devl_unlock(devlink);
+
+	list_add_tail(&shd->list, &shd_list);
+
+	return shd;
+
+err_kstrdup_id:
+	devlink_free(devlink);
+
+err_devlink_alloc:
+	faux_device_destroy(faux_dev);
+	return NULL;
+}
+
+static void devlink_shd_destroy(struct devlink_shd *shd)
+{
+	struct devlink *devlink = priv_to_devlink(shd);
+	struct faux_device *faux_dev = shd->faux_dev;
+
+	list_del(&shd->list);
+	devl_lock(devlink);
+	devl_unregister(devlink);
+	devl_unlock(devlink);
+	kfree(shd->id);
+	devlink_free(devlink);
+	faux_device_destroy(faux_dev);
+}
+
+/**
+ * devlink_shd_get - Get or create a shared devlink instance
+ * @id: Identifier string (e.g., serial number) for the shared instance
+ * @ops: Devlink operations structure
+ * @priv_size: Size of private data structure
+ *
+ * Get an existing shared devlink instance identified by @id, or create
+ * a new one if it doesn't exist. The device is automatically added to
+ * the shared instance's device list. Return the devlink instance
+ * with a reference held. The caller must call devlink_shd_put() when done.
+ *
+ * Return: Pointer to the shared devlink instance on success,
+ *         NULL on failure
+ */
+struct devlink *devlink_shd_get(const char *id,
+				const struct devlink_ops *ops,
+				size_t priv_size)
+{
+	struct devlink_shd *shd;
+
+	if (WARN_ON(!id || !ops))
+		return NULL;
+
+	mutex_lock(&shd_mutex);
+
+	shd = devlink_shd_lookup(id);
+	if (!shd)
+		shd = devlink_shd_create(id, ops, priv_size);
+	else
+		refcount_inc(&shd->refcount);
+
+	mutex_unlock(&shd_mutex);
+	return shd ? priv_to_devlink(shd) : NULL;
+}
+EXPORT_SYMBOL_GPL(devlink_shd_get);
+
+/**
+ * devlink_shd_put - Release a reference on a shared devlink instance
+ * @devlink: Shared devlink instance
+ *
+ * Release a reference on a shared devlink instance obtained via
+ * devlink_shd_get().
+ */
+void devlink_shd_put(struct devlink *devlink)
+{
+	struct devlink_shd *shd;
+
+	if (WARN_ON(!devlink))
+		return;
+
+	mutex_lock(&shd_mutex);
+	shd = devlink_priv(devlink);
+	if (refcount_dec_and_test(&shd->refcount))
+		devlink_shd_destroy(shd);
+	mutex_unlock(&shd_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_shd_put);
+
+/**
+ * devlink_shd_get_priv - Get private data from shared devlink instance
+ * @devlink: Devlink instance
+ *
+ * Returns a pointer to the driver's private data structure within
+ * the shared devlink instance.
+ *
+ * Return: Pointer to private data
+ */
+void *devlink_shd_get_priv(struct devlink *devlink)
+{
+	struct devlink_shd *shd = devlink_priv(devlink);
+
+	return shd->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_shd_get_priv);
-- 
2.44.0


  parent reply	other threads:[~2026-01-28 11:27 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-28 11:25 [PATCH net-next V7 00/14] devlink and mlx5: Support cross-function rate scheduling Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 01/14] documentation: networking: add shared devlink documentation Tariq Toukan
2026-02-03  3:40   ` Jakub Kicinski
2026-02-03  9:18     ` Jiri Pirko
2026-02-04  3:01       ` Jakub Kicinski
2026-02-04  7:12         ` Jiri Pirko
2026-02-05  2:02           ` Jakub Kicinski
2026-02-06 10:52             ` Jiri Pirko
2026-02-07  1:50               ` Jakub Kicinski
2026-01-28 11:25 ` Tariq Toukan [this message]
2026-02-03  3:49   ` [PATCH net-next V7 02/14] devlink: introduce shared devlink instance for PFs on same chip Jakub Kicinski
2026-02-03  9:44     ` Jiri Pirko
2026-02-04  2:42       ` Jakub Kicinski
2026-02-04  7:15         ` Jiri Pirko
2026-02-05  2:06           ` Jakub Kicinski
2026-01-28 11:25 ` [PATCH net-next V7 03/14] devlink: Reverse locking order for nested instances Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 04/14] devlink: Add helpers to lock nested-in instances Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 05/14] devlink: Refactor devlink_rate_nodes_check Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 06/14] devlink: Decouple rate storage from associated devlink object Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 07/14] devlink: Add parent dev to devlink API Tariq Toukan
2026-02-03  4:00   ` Jakub Kicinski
2026-02-11 16:28     ` Cosmin Ratiu
2026-02-11 16:57       ` Jakub Kicinski
2026-01-28 11:25 ` [PATCH net-next V7 08/14] devlink: Allow parent dev for rate-set and rate-new Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 09/14] devlink: Allow rate node parents from other devlinks Tariq Toukan
2026-02-03  4:04   ` Jakub Kicinski
2026-01-28 11:25 ` [PATCH net-next V7 10/14] net/mlx5: Add a shared devlink instance for PFs on same chip Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 11/14] net/mlx5: Expose a function to clear a vport's parent Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 12/14] net/mlx5: Store QoS sched nodes in the sh_devlink Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 13/14] net/mlx5: qos: Support cross-device tx scheduling Tariq Toukan
2026-01-28 11:25 ` [PATCH net-next V7 14/14] net/mlx5: Document devlink rates Tariq Toukan
2026-02-03  4:09 ` [PATCH net-next V7 00/14] devlink and mlx5: Support cross-function rate scheduling Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260128112544.1661250-3-tariqt@nvidia.com \
    --to=tariqt@nvidia.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=cjubran@nvidia.com \
    --cc=corbet@lwn.net \
    --cc=cratiu@nvidia.com \
    --cc=davem@davemloft.net \
    --cc=donald.hunter@gmail.com \
    --cc=edumazet@google.com \
    --cc=gal@nvidia.com \
    --cc=horms@kernel.org \
    --cc=jiri@nvidia.com \
    --cc=jiri@resnulli.us \
    --cc=krzk@kernel.org \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=moshe@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=rdunlap@infradead.org \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox