netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] ice: lighter locking for PTP time reading
@ 2024-02-26 15:11 Michal Schmidt
  2024-02-26 15:11 ` [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC Michal Schmidt
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Michal Schmidt @ 2024-02-26 15:11 UTC (permalink / raw)
  To: intel-wired-lan
  Cc: Jesse Brandeburg, Richard Cochran, Arkadiusz Kubalewski,
	Karol Kolacinski, Jacob Keller, netdev

This series removes the use of the heavy-weight PTP hardware semaphore
in the gettimex64 path. Instead, serialization of access to the time
register is done using a host-side spinlock. The timer hardware is
shared between PFs on the PCI adapter, so the spinlock must be shared
between ice_pf instances too.

Michal Schmidt (3):
  ice: add ice_adapter for shared data across PFs on the same NIC
  ice: avoid the PTP hardware semaphore in gettimex64 path
  ice: fold ice_ptp_read_time into ice_ptp_gettimex64

 drivers/net/ethernet/intel/ice/Makefile      |  3 +-
 drivers/net/ethernet/intel/ice/ice.h         |  2 +
 drivers/net/ethernet/intel/ice/ice_adapter.c | 69 ++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_adapter.h | 28 ++++++++
 drivers/net/ethernet/intel/ice/ice_main.c    |  8 +++
 drivers/net/ethernet/intel/ice/ice_ptp.c     | 33 ++--------
 drivers/net/ethernet/intel/ice/ice_ptp_hw.c  |  3 +
 7 files changed, 116 insertions(+), 30 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h

-- 
2.43.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC
  2024-02-26 15:11 [PATCH net-next 0/3] ice: lighter locking for PTP time reading Michal Schmidt
@ 2024-02-26 15:11 ` Michal Schmidt
  2024-02-26 19:18   ` Jacob Keller
                     ` (2 more replies)
  2024-02-26 15:11 ` [PATCH net-next 2/3] ice: avoid the PTP hardware semaphore in gettimex64 path Michal Schmidt
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 15+ messages in thread
From: Michal Schmidt @ 2024-02-26 15:11 UTC (permalink / raw)
  To: intel-wired-lan
  Cc: Jesse Brandeburg, Richard Cochran, Arkadiusz Kubalewski,
	Karol Kolacinski, Jacob Keller, netdev

There is a need for synchronization between ice PFs on the same physical
adapter.

Add a "struct ice_adapter" for holding data shared between PFs of the
same multifunction PCI device. The struct is refcounted - each ice_pf
holds a reference to it.

Its first use will be for PTP. I expect it will be useful also to
improve the ugliness that is ice_prot_id_tbl.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
---
 drivers/net/ethernet/intel/ice/Makefile      |  3 +-
 drivers/net/ethernet/intel/ice/ice.h         |  2 +
 drivers/net/ethernet/intel/ice/ice_adapter.c | 67 ++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_adapter.h | 22 +++++++
 drivers/net/ethernet/intel/ice/ice_main.c    |  8 +++
 5 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index cddd82d4ca0f..4fa09c321440 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -36,7 +36,8 @@ ice-y := ice_main.o	\
 	 ice_repr.o	\
 	 ice_tc_lib.o	\
 	 ice_fwlog.o	\
-	 ice_debugfs.o
+	 ice_debugfs.o  \
+	 ice_adapter.o
 ice-$(CONFIG_PCI_IOV) +=	\
 	ice_sriov.o		\
 	ice_virtchnl.o		\
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 365c03d1c462..1ffecbdd361a 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -77,6 +77,7 @@
 #include "ice_gnss.h"
 #include "ice_irq.h"
 #include "ice_dpll.h"
+#include "ice_adapter.h"
 
 #define ICE_BAR0		0
 #define ICE_REQ_DESC_MULTIPLE	32
@@ -544,6 +545,7 @@ struct ice_agg_node {
 
 struct ice_pf {
 	struct pci_dev *pdev;
+	struct ice_adapter *adapter;
 
 	struct devlink_region *nvm_region;
 	struct devlink_region *sram_region;
diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
new file mode 100644
index 000000000000..deb063401238
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright Red Hat
+
+#include <linux/cleanup.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include "ice_adapter.h"
+
+static DEFINE_MUTEX(ice_adapters_lock);
+static DEFINE_XARRAY(ice_adapters);
+
+static unsigned long ice_adapter_index(const struct pci_dev *pdev)
+{
+	unsigned int domain = pci_domain_nr(pdev->bus);
+
+	WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
+	return ((unsigned long)domain << 13) |
+	       ((unsigned long)pdev->bus->number << 5) |
+	       PCI_SLOT(pdev->devfn);
+}
+
+struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
+{
+	unsigned long index = ice_adapter_index(pdev);
+	struct ice_adapter *a;
+
+	guard(mutex)(&ice_adapters_lock);
+
+	a = xa_load(&ice_adapters, index);
+	if (a) {
+		refcount_inc(&a->refcount);
+		return a;
+	}
+
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (!a)
+		return NULL;
+
+	refcount_set(&a->refcount, 1);
+
+	if (xa_is_err(xa_store(&ice_adapters, index, a, GFP_KERNEL))) {
+		kfree(a);
+		return NULL;
+	}
+
+	return a;
+}
+
+void ice_adapter_put(const struct pci_dev *pdev)
+{
+	unsigned long index = ice_adapter_index(pdev);
+	struct ice_adapter *a;
+
+	guard(mutex)(&ice_adapters_lock);
+
+	a = xa_load(&ice_adapters, index);
+	if (WARN_ON(!a))
+		return;
+
+	if (!refcount_dec_and_test(&a->refcount))
+		return;
+
+	WARN_ON(xa_erase(&ice_adapters, index) != a);
+	kfree(a);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
new file mode 100644
index 000000000000..cb5a02eb24c1
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-FileCopyrightText: Copyright Red Hat */
+
+#ifndef _ICE_ADAPTER_H_
+#define _ICE_ADAPTER_H_
+
+#include <linux/refcount_types.h>
+
+struct pci_dev;
+
+/**
+ * struct ice_adapter - PCI adapter resources shared across PFs
+ * @refcount: Reference count. struct ice_pf objects hold the references.
+ */
+struct ice_adapter {
+	refcount_t refcount;
+};
+
+struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev);
+void ice_adapter_put(const struct pci_dev *pdev);
+
+#endif /* _ICE_ADAPTER_H */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 9c2c8637b4a7..4a60957221fc 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5093,6 +5093,7 @@ static int
 ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 {
 	struct device *dev = &pdev->dev;
+	struct ice_adapter *adapter;
 	struct ice_pf *pf;
 	struct ice_hw *hw;
 	int err;
@@ -5145,7 +5146,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	pci_set_master(pdev);
 
+	adapter = ice_adapter_get(pdev);
+	if (!adapter)
+		return -ENOMEM;
+
 	pf->pdev = pdev;
+	pf->adapter = adapter;
 	pci_set_drvdata(pdev, pf);
 	set_bit(ICE_DOWN, pf->state);
 	/* Disable service task until DOWN bit is cleared */
@@ -5196,6 +5202,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 err_load:
 	ice_deinit(pf);
 err_init:
+	ice_adapter_put(pdev);
 	pci_disable_device(pdev);
 	return err;
 }
@@ -5302,6 +5309,7 @@ static void ice_remove(struct pci_dev *pdev)
 	ice_setup_mc_magic_wake(pf);
 	ice_set_wake(pf);
 
+	ice_adapter_put(pdev);
 	pci_disable_device(pdev);
 }
 
-- 
2.43.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next 2/3] ice: avoid the PTP hardware semaphore in gettimex64 path
  2024-02-26 15:11 [PATCH net-next 0/3] ice: lighter locking for PTP time reading Michal Schmidt
  2024-02-26 15:11 ` [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC Michal Schmidt
@ 2024-02-26 15:11 ` Michal Schmidt
  2024-02-26 19:36   ` Jacob Keller
  2024-02-26 15:11 ` [PATCH net-next 3/3] ice: fold ice_ptp_read_time into ice_ptp_gettimex64 Michal Schmidt
  2024-02-26 19:16 ` [PATCH net-next 0/3] ice: lighter locking for PTP time reading Jacob Keller
  3 siblings, 1 reply; 15+ messages in thread
From: Michal Schmidt @ 2024-02-26 15:11 UTC (permalink / raw)
  To: intel-wired-lan
  Cc: Jesse Brandeburg, Richard Cochran, Arkadiusz Kubalewski,
	Karol Kolacinski, Jacob Keller, netdev

The PTP hardware semaphore (PFTSYN_SEM) is used to synchronize
operations that program the PTP timers. The operations involve issuing
commands to the sideband queue. The E810 does not have a hardware
sideband queue, so the admin queue is used. The admin queue is slow.
I have observed delays in hundreds of milliseconds waiting for
ice_sq_done.

When phc2sys reads the time from the ice PTP clock and PFTSYN_SEM is
held by a task performing one of the slow operations, ice_ptp_lock can
easily time out. phc2sys gets -EBUSY and the kernel prints:
  ice 0000:XX:YY.0: PTP failed to get time
These messages appear once every few seconds, causing log spam.

The E810 datasheet recommends an algorithm for reading the upper 64 bits
of the GLTSYN_TIME register. It matches what's implemented in
ice_ptp_read_src_clk_reg. It is robust against wrap-around, but not
necessarily against the concurrent setting of the register (with
GLTSYN_CMD_{INIT,ADJ}_TIME commands). Perhaps that's why
ice_ptp_gettimex64 also takes PFTSYN_SEM.

The race with time setters can be prevented without relying on the PTP
hardware semaphore. Using the "ice_adapter" from the previous patch,
we can have a common spinlock for the PFs that share the clock hardware.
It will protect the reading and writing to the GLTSYN_TIME register.
The writing is performed indirectly, by the hardware, as a result of
the driver writing GLTSYN_CMD_SYNC in ice_ptp_exec_tmr_cmd. I wasn't
sure if the ice_flush there is enough to make sure GLTSYN_TIME has been
updated, but it works well in my testing.

My test code can be seen here:
https://gitlab.com/mschmidt2/linux/-/commits/ice-ptp-host-side-lock
It consists of:
 - kernel threads reading the time in a busy loop and looking at the
   deltas between consecutive values, reporting new maxima.
   in the consecutive values;
 - a shell script that sets the time repeatedly;
 - a bpftrace probe to produce a histogram of the measured deltas.
Without the spinlock ptp_gltsyn_time_lock, it is easy to see tearing.
Deltas in the [2G, 4G) range appear in the histograms.
With the spinlock added, there is no tearing and the biggest delta I saw
was in the range [1M, 2M), that is under 2 ms.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
---
 drivers/net/ethernet/intel/ice/ice_adapter.c | 2 ++
 drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++++++
 drivers/net/ethernet/intel/ice/ice_ptp.c     | 8 +-------
 drivers/net/ethernet/intel/ice/ice_ptp_hw.c  | 3 +++
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
index deb063401238..4b9f5d29811c 100644
--- a/drivers/net/ethernet/intel/ice/ice_adapter.c
+++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
@@ -5,6 +5,7 @@
 #include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/xarray.h>
 #include "ice_adapter.h"
 
@@ -38,6 +39,7 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
 	if (!a)
 		return NULL;
 
+	spin_lock_init(&a->ptp_gltsyn_time_lock);
 	refcount_set(&a->refcount, 1);
 
 	if (xa_is_err(xa_store(&ice_adapters, index, a, GFP_KERNEL))) {
diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
index cb5a02eb24c1..9d11014ec02f 100644
--- a/drivers/net/ethernet/intel/ice/ice_adapter.h
+++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
@@ -4,15 +4,21 @@
 #ifndef _ICE_ADAPTER_H_
 #define _ICE_ADAPTER_H_
 
+#include <linux/spinlock_types.h>
 #include <linux/refcount_types.h>
 
 struct pci_dev;
 
 /**
  * struct ice_adapter - PCI adapter resources shared across PFs
+ * @ptp_gltsyn_time_lock: Spinlock protecting access to the GLTSYN_TIME
+ *                        register of the PTP clock.
  * @refcount: Reference count. struct ice_pf objects hold the references.
  */
 struct ice_adapter {
+	/* For access to the GLTSYN_TIME register */
+	spinlock_t ptp_gltsyn_time_lock;
+
 	refcount_t refcount;
 };
 
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index c11eba07283c..b6c7246245c6 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -374,6 +374,7 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
 	u8 tmr_idx;
 
 	tmr_idx = ice_get_ptp_src_clock_index(hw);
+	guard(spinlock_irqsave)(&pf->adapter->ptp_gltsyn_time_lock);
 	/* Read the system timestamp pre PHC read */
 	ptp_read_system_prets(sts);
 
@@ -1925,15 +1926,8 @@ ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts,
 		   struct ptp_system_timestamp *sts)
 {
 	struct ice_pf *pf = ptp_info_to_pf(info);
-	struct ice_hw *hw = &pf->hw;
-
-	if (!ice_ptp_lock(hw)) {
-		dev_err(ice_pf_to_dev(pf), "PTP failed to get time\n");
-		return -EBUSY;
-	}
 
 	ice_ptp_read_time(pf, ts, sts);
-	ice_ptp_unlock(hw);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index 187ce9b54e1a..a47dbbfadb74 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -274,6 +274,9 @@ void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
  */
 static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw)
 {
+	struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
+
+	guard(spinlock_irqsave)(&pf->adapter->ptp_gltsyn_time_lock);
 	wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD);
 	ice_flush(hw);
 }
-- 
2.43.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next 3/3] ice: fold ice_ptp_read_time into ice_ptp_gettimex64
  2024-02-26 15:11 [PATCH net-next 0/3] ice: lighter locking for PTP time reading Michal Schmidt
  2024-02-26 15:11 ` [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC Michal Schmidt
  2024-02-26 15:11 ` [PATCH net-next 2/3] ice: avoid the PTP hardware semaphore in gettimex64 path Michal Schmidt
@ 2024-02-26 15:11 ` Michal Schmidt
  2024-02-26 19:36   ` Jacob Keller
  2024-02-26 19:16 ` [PATCH net-next 0/3] ice: lighter locking for PTP time reading Jacob Keller
  3 siblings, 1 reply; 15+ messages in thread
From: Michal Schmidt @ 2024-02-26 15:11 UTC (permalink / raw)
  To: intel-wired-lan
  Cc: Jesse Brandeburg, Richard Cochran, Arkadiusz Kubalewski,
	Karol Kolacinski, Jacob Keller, netdev

This is a cleanup. It is unnecessary to have this function just to call
another function.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
---
 drivers/net/ethernet/intel/ice/ice_ptp.c | 25 +++---------------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index b6c7246245c6..cb061966cc9e 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -1166,26 +1166,6 @@ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf)
 	ice_ptp_mark_tx_tracker_stale(&pf->ptp.port.tx);
 }
 
-/**
- * ice_ptp_read_time - Read the time from the device
- * @pf: Board private structure
- * @ts: timespec structure to hold the current time value
- * @sts: Optional parameter for holding a pair of system timestamps from
- *       the system clock. Will be ignored if NULL is given.
- *
- * This function reads the source clock registers and stores them in a timespec.
- * However, since the registers are 64 bits of nanoseconds, we must convert the
- * result to a timespec before we can return.
- */
-static void
-ice_ptp_read_time(struct ice_pf *pf, struct timespec64 *ts,
-		  struct ptp_system_timestamp *sts)
-{
-	u64 time_ns = ice_ptp_read_src_clk_reg(pf, sts);
-
-	*ts = ns_to_timespec64(time_ns);
-}
-
 /**
  * ice_ptp_write_init - Set PHC time to provided value
  * @pf: Board private structure
@@ -1926,9 +1906,10 @@ ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts,
 		   struct ptp_system_timestamp *sts)
 {
 	struct ice_pf *pf = ptp_info_to_pf(info);
+	u64 time_ns;
 
-	ice_ptp_read_time(pf, ts, sts);
-
+	time_ns = ice_ptp_read_src_clk_reg(pf, sts);
+	*ts = ns_to_timespec64(time_ns);
 	return 0;
 }
 
-- 
2.43.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 0/3] ice: lighter locking for PTP time reading
  2024-02-26 15:11 [PATCH net-next 0/3] ice: lighter locking for PTP time reading Michal Schmidt
                   ` (2 preceding siblings ...)
  2024-02-26 15:11 ` [PATCH net-next 3/3] ice: fold ice_ptp_read_time into ice_ptp_gettimex64 Michal Schmidt
@ 2024-02-26 19:16 ` Jacob Keller
  2024-02-26 20:01   ` Michal Schmidt
  3 siblings, 1 reply; 15+ messages in thread
From: Jacob Keller @ 2024-02-26 19:16 UTC (permalink / raw)
  To: Michal Schmidt, intel-wired-lan
  Cc: Jesse Brandeburg, Richard Cochran, Arkadiusz Kubalewski,
	Karol Kolacinski, netdev



On 2/26/2024 7:11 AM, Michal Schmidt wrote:
> This series removes the use of the heavy-weight PTP hardware semaphore
> in the gettimex64 path. Instead, serialization of access to the time
> register is done using a host-side spinlock. The timer hardware is
> shared between PFs on the PCI adapter, so the spinlock must be shared
> between ice_pf instances too.
> 
> Michal Schmidt (3):
>   ice: add ice_adapter for shared data across PFs on the same NIC
>   ice: avoid the PTP hardware semaphore in gettimex64 path
>   ice: fold ice_ptp_read_time into ice_ptp_gettimex64
> 

Glad to see some fix and improvement in this place. I had been
considering switching the hardware semaphore entirely to be a shared
mutex instead, but this direction also seems reasonable and fixes most
of the issues. We could actually extend this to replace the semaphore
with a mutex in order to avoid the PCIe transactions required to handle
the hardware semaphore register.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC
  2024-02-26 15:11 ` [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC Michal Schmidt
@ 2024-02-26 19:18   ` Jacob Keller
  2024-02-27  7:05   ` Jiri Pirko
  2024-02-28  2:35   ` Jakub Kicinski
  2 siblings, 0 replies; 15+ messages in thread
From: Jacob Keller @ 2024-02-26 19:18 UTC (permalink / raw)
  To: Michal Schmidt, intel-wired-lan
  Cc: Jesse Brandeburg, Richard Cochran, Arkadiusz Kubalewski,
	Karol Kolacinski, netdev



On 2/26/2024 7:11 AM, Michal Schmidt wrote:
> There is a need for synchronization between ice PFs on the same physical
> adapter.
> 
> Add a "struct ice_adapter" for holding data shared between PFs of the
> same multifunction PCI device. The struct is refcounted - each ice_pf
> holds a reference to it.
> 
> Its first use will be for PTP. I expect it will be useful also to
> improve the ugliness that is ice_prot_id_tbl.
> 

We could alternatively have this be part of the ice PTP auxiliary bus
interface we added. However, I think a cross-adapter structure has uses
beyond just PTP and this implementation seems a bit more simple than the
auxiliary interface.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/3] ice: avoid the PTP hardware semaphore in gettimex64 path
  2024-02-26 15:11 ` [PATCH net-next 2/3] ice: avoid the PTP hardware semaphore in gettimex64 path Michal Schmidt
@ 2024-02-26 19:36   ` Jacob Keller
  2024-02-26 20:11     ` Michal Schmidt
  0 siblings, 1 reply; 15+ messages in thread
From: Jacob Keller @ 2024-02-26 19:36 UTC (permalink / raw)
  To: Michal Schmidt, intel-wired-lan
  Cc: Jesse Brandeburg, Richard Cochran, Arkadiusz Kubalewski,
	Karol Kolacinski, netdev



On 2/26/2024 7:11 AM, Michal Schmidt wrote:
> The PTP hardware semaphore (PFTSYN_SEM) is used to synchronize
> operations that program the PTP timers. The operations involve issuing
> commands to the sideband queue. The E810 does not have a hardware
> sideband queue, so the admin queue is used. The admin queue is slow.
> I have observed delays in hundreds of milliseconds waiting for
> ice_sq_done.

Yep, this can happen. We've worked to try and improve this, but
fundamentally the firmware was never originally intended to be involved
in these flows.

> 
> When phc2sys reads the time from the ice PTP clock and PFTSYN_SEM is
> held by a task performing one of the slow operations, ice_ptp_lock can
> easily time out. phc2sys gets -EBUSY and the kernel prints:
>   ice 0000:XX:YY.0: PTP failed to get time
> These messages appear once every few seconds, causing log spam.
> 

Yea, this is something we wanted to fix.

> The E810 datasheet recommends an algorithm for reading the upper 64 bits
> of the GLTSYN_TIME register. It matches what's implemented in
> ice_ptp_read_src_clk_reg. It is robust against wrap-around, but not
> necessarily against the concurrent setting of the register (with
> GLTSYN_CMD_{INIT,ADJ}_TIME commands). Perhaps that's why
> ice_ptp_gettimex64 also takes PFTSYN_SEM.
> 

Yes. We tried removing this as well. However, as you point out later,
doing so would result in tearing and incorrect clock reads.

> The race with time setters can be prevented without relying on the PTP
> hardware semaphore. Using the "ice_adapter" from the previous patch,
> we can have a common spinlock for the PFs that share the clock hardware.
> It will protect the reading and writing to the GLTSYN_TIME register.
> The writing is performed indirectly, by the hardware, as a result of
> the driver writing GLTSYN_CMD_SYNC in ice_ptp_exec_tmr_cmd. I wasn't
> sure if the ice_flush there is enough to make sure GLTSYN_TIME has been
> updated, but it works well in my testing.
> 

I believe this is good enough. I don't know the exact timing involved
here, but the hardware synchronizes the update to all the PHYs and the
MAC and it is supposed to be on the order of nanoseconds.

> My test code can be seen here:
> https://gitlab.com/mschmidt2/linux/-/commits/ice-ptp-host-side-lock
> It consists of:
>  - kernel threads reading the time in a busy loop and looking at the
>    deltas between consecutive values, reporting new maxima.
>    in the consecutive values;
>  - a shell script that sets the time repeatedly;
>  - a bpftrace probe to produce a histogram of the measured deltas.
> Without the spinlock ptp_gltsyn_time_lock, it is easy to see tearing.
> Deltas in the [2G, 4G) range appear in the histograms.
> With the spinlock added, there is no tearing and the biggest delta I saw
> was in the range [1M, 2M), that is under 2 ms.
> 

Nice.


At first, I wasn't convinced we actually need cross-adapter spinlock
here. I thought all the flows that took hardware semaphore were on the
clock owner. Only the clock owner PF will access the GLTSYN_TIME
registers, (gettimex is only exposed through the ptp device, which hooks
into the clock owner). Similarly, only the clock owner does adjtime,
settime, etc.

However... It turns out that at least a couple of flows use the
semaphore on non-clock owners. Specifically E822 hardware has to
initialize the PHY after a link restart, which includes re-doing Vernier
calibration. Each PF handles this itself, but does require use of the
timer synchronization commands to do it. In this flow, the main timer is
not modified but we still use the semaphore and sync registers.

I don't think that impacts the E810 card, but we use the same code flow
here. The E822 series hardware doesn't use the AdminQ for the PHY
messages, so its faster but I think the locking improvement would still
be relevant for them as well.

Given all this, I think it makes sense to go this route.

I'd like to follow-up with possibly replacing the entire HW semaphore
with a mutex initialized here. That would remove a bunch of PCIe posted
transactions required to acquire the HW semaphore which would be a
further improvement here.

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>

> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
> ---
>  drivers/net/ethernet/intel/ice/ice_adapter.c | 2 ++
>  drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++++++
>  drivers/net/ethernet/intel/ice/ice_ptp.c     | 8 +-------
>  drivers/net/ethernet/intel/ice/ice_ptp_hw.c  | 3 +++
>  4 files changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
> index deb063401238..4b9f5d29811c 100644
> --- a/drivers/net/ethernet/intel/ice/ice_adapter.c
> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
> @@ -5,6 +5,7 @@
>  #include <linux/mutex.h>
>  #include <linux/pci.h>
>  #include <linux/slab.h>
> +#include <linux/spinlock.h>
>  #include <linux/xarray.h>
>  #include "ice_adapter.h"
>  
> @@ -38,6 +39,7 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
>  	if (!a)
>  		return NULL;
>  
> +	spin_lock_init(&a->ptp_gltsyn_time_lock);
>  	refcount_set(&a->refcount, 1);
>  
>  	if (xa_is_err(xa_store(&ice_adapters, index, a, GFP_KERNEL))) {
> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
> index cb5a02eb24c1..9d11014ec02f 100644
> --- a/drivers/net/ethernet/intel/ice/ice_adapter.h
> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
> @@ -4,15 +4,21 @@
>  #ifndef _ICE_ADAPTER_H_
>  #define _ICE_ADAPTER_H_
>  
> +#include <linux/spinlock_types.h>
>  #include <linux/refcount_types.h>
>  
>  struct pci_dev;
>  
>  /**
>   * struct ice_adapter - PCI adapter resources shared across PFs
> + * @ptp_gltsyn_time_lock: Spinlock protecting access to the GLTSYN_TIME
> + *                        register of the PTP clock.
>   * @refcount: Reference count. struct ice_pf objects hold the references.
>   */
>  struct ice_adapter {
> +	/* For access to the GLTSYN_TIME register */
> +	spinlock_t ptp_gltsyn_time_lock;
> +
>  	refcount_t refcount;
>  };
>  
> diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
> index c11eba07283c..b6c7246245c6 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ptp.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
> @@ -374,6 +374,7 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
>  	u8 tmr_idx;
>  
>  	tmr_idx = ice_get_ptp_src_clock_index(hw);
> +	guard(spinlock_irqsave)(&pf->adapter->ptp_gltsyn_time_lock);
>  	/* Read the system timestamp pre PHC read */
>  	ptp_read_system_prets(sts);
>  
> @@ -1925,15 +1926,8 @@ ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts,
>  		   struct ptp_system_timestamp *sts)
>  {
>  	struct ice_pf *pf = ptp_info_to_pf(info);
> -	struct ice_hw *hw = &pf->hw;
> -
> -	if (!ice_ptp_lock(hw)) {
> -		dev_err(ice_pf_to_dev(pf), "PTP failed to get time\n");
> -		return -EBUSY;
> -	}
>  
>  	ice_ptp_read_time(pf, ts, sts);
> -	ice_ptp_unlock(hw);
>  
>  	return 0;
>  }
> diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
> index 187ce9b54e1a..a47dbbfadb74 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
> @@ -274,6 +274,9 @@ void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
>   */
>  static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw)
>  {
> +	struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
> +
> +	guard(spinlock_irqsave)(&pf->adapter->ptp_gltsyn_time_lock);
>  	wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD);
>  	ice_flush(hw);
>  }

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 3/3] ice: fold ice_ptp_read_time into ice_ptp_gettimex64
  2024-02-26 15:11 ` [PATCH net-next 3/3] ice: fold ice_ptp_read_time into ice_ptp_gettimex64 Michal Schmidt
@ 2024-02-26 19:36   ` Jacob Keller
  0 siblings, 0 replies; 15+ messages in thread
From: Jacob Keller @ 2024-02-26 19:36 UTC (permalink / raw)
  To: Michal Schmidt, intel-wired-lan
  Cc: Jesse Brandeburg, Richard Cochran, Arkadiusz Kubalewski,
	Karol Kolacinski, netdev



On 2/26/2024 7:11 AM, Michal Schmidt wrote:
> This is a cleanup. It is unnecessary to have this function just to call
> another function.
> 
> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>

Obvious cleanup, thanks!

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 0/3] ice: lighter locking for PTP time reading
  2024-02-26 19:16 ` [PATCH net-next 0/3] ice: lighter locking for PTP time reading Jacob Keller
@ 2024-02-26 20:01   ` Michal Schmidt
  0 siblings, 0 replies; 15+ messages in thread
From: Michal Schmidt @ 2024-02-26 20:01 UTC (permalink / raw)
  To: Jacob Keller
  Cc: intel-wired-lan, Jesse Brandeburg, Richard Cochran,
	Arkadiusz Kubalewski, Karol Kolacinski, netdev

On Mon, Feb 26, 2024 at 8:17 PM Jacob Keller <jacob.e.keller@intel.com> wrote:
> On 2/26/2024 7:11 AM, Michal Schmidt wrote:
> > This series removes the use of the heavy-weight PTP hardware semaphore
> > in the gettimex64 path. Instead, serialization of access to the time
> > register is done using a host-side spinlock. The timer hardware is
> > shared between PFs on the PCI adapter, so the spinlock must be shared
> > between ice_pf instances too.
> >
> > Michal Schmidt (3):
> >   ice: add ice_adapter for shared data across PFs on the same NIC
> >   ice: avoid the PTP hardware semaphore in gettimex64 path
> >   ice: fold ice_ptp_read_time into ice_ptp_gettimex64
> >
>
> Glad to see some fix and improvement in this place. I had been
> considering switching the hardware semaphore entirely to be a shared
> mutex instead, but this direction also seems reasonable and fixes most
> of the issues. We could actually extend this to replace the semaphore
> with a mutex in order to avoid the PCIe transactions required to handle
> the hardware semaphore register.

Thanks for the review. I'm glad you mentioned replacing the hw
semaphore with a mutex, because I was already going in that direction
:)
Michal


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/3] ice: avoid the PTP hardware semaphore in gettimex64 path
  2024-02-26 19:36   ` Jacob Keller
@ 2024-02-26 20:11     ` Michal Schmidt
  2024-02-26 21:13       ` Jacob Keller
  0 siblings, 1 reply; 15+ messages in thread
From: Michal Schmidt @ 2024-02-26 20:11 UTC (permalink / raw)
  To: Jacob Keller
  Cc: intel-wired-lan, Jesse Brandeburg, Richard Cochran,
	Arkadiusz Kubalewski, Karol Kolacinski, netdev

On Mon, Feb 26, 2024 at 8:36 PM Jacob Keller <jacob.e.keller@intel.com> wrote:
> On 2/26/2024 7:11 AM, Michal Schmidt wrote:
> > The writing is performed indirectly, by the hardware, as a result of
> > the driver writing GLTSYN_CMD_SYNC in ice_ptp_exec_tmr_cmd. I wasn't
> > sure if the ice_flush there is enough to make sure GLTSYN_TIME has been
> > updated, but it works well in my testing.
> >
>
> I believe this is good enough. I don't know the exact timing involved
> here, but the hardware synchronizes the update to all the PHYs and the
> MAC and it is supposed to be on the order of nanoseconds.

Thanks, that's good to know.

> > My test code can be seen here:
> > https://gitlab.com/mschmidt2/linux/-/commits/ice-ptp-host-side-lock
> > It consists of:
> >  - kernel threads reading the time in a busy loop and looking at the
> >    deltas between consecutive values, reporting new maxima.
> >    in the consecutive values;
> >  - a shell script that sets the time repeatedly;
> >  - a bpftrace probe to produce a histogram of the measured deltas.
> > Without the spinlock ptp_gltsyn_time_lock, it is easy to see tearing.
> > Deltas in the [2G, 4G) range appear in the histograms.
> > With the spinlock added, there is no tearing and the biggest delta I saw
> > was in the range [1M, 2M), that is under 2 ms.
> >
>
> Nice.
>
>
> At first, I wasn't convinced we actually need cross-adapter spinlock
> here. I thought all the flows that took hardware semaphore were on the
> clock owner. Only the clock owner PF will access the GLTSYN_TIME
> registers, (gettimex is only exposed through the ptp device, which hooks
> into the clock owner). Similarly, only the clock owner does adjtime,
> settime, etc.

Non-owners do not expose a ptp device to userspace, but they still do
ice_ptp_periodic_work -> ice_ptp_update_cached_phctime ->
ice_ptp_read_src_clk_reg, where they read GLTSYN_TIME.

> However... It turns out that at least a couple of flows use the
> semaphore on non-clock owners. Specifically E822 hardware has to
> initialize the PHY after a link restart, which includes re-doing Vernier
> calibration. Each PF handles this itself, but does require use of the
> timer synchronization commands to do it. In this flow, the main timer is
> not modified but we still use the semaphore and sync registers.
>
> I don't think that impacts the E810 card, but we use the same code flow
> here. The E822 series hardware doesn't use the AdminQ for the PHY
> messages, so its faster but I think the locking improvement would still
> be relevant for them as well.
>
> Given all this, I think it makes sense to go this route.
>
> I'd like to follow-up with possibly replacing the entire HW semaphore
> with a mutex initialized here. That would remove a bunch of PCIe posted
> transactions required to acquire the HW semaphore which would be a
> further improvement here.

Yes, I agree and I have already been looking into this.
Thanks,
Michal

> Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
>
> > Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
> > ---
> >  drivers/net/ethernet/intel/ice/ice_adapter.c | 2 ++
> >  drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++++++
> >  drivers/net/ethernet/intel/ice/ice_ptp.c     | 8 +-------
> >  drivers/net/ethernet/intel/ice/ice_ptp_hw.c  | 3 +++
> >  4 files changed, 12 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
> > index deb063401238..4b9f5d29811c 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_adapter.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
> > @@ -5,6 +5,7 @@
> >  #include <linux/mutex.h>
> >  #include <linux/pci.h>
> >  #include <linux/slab.h>
> > +#include <linux/spinlock.h>
> >  #include <linux/xarray.h>
> >  #include "ice_adapter.h"
> >
> > @@ -38,6 +39,7 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
> >       if (!a)
> >               return NULL;
> >
> > +     spin_lock_init(&a->ptp_gltsyn_time_lock);
> >       refcount_set(&a->refcount, 1);
> >
> >       if (xa_is_err(xa_store(&ice_adapters, index, a, GFP_KERNEL))) {
> > diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
> > index cb5a02eb24c1..9d11014ec02f 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_adapter.h
> > +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
> > @@ -4,15 +4,21 @@
> >  #ifndef _ICE_ADAPTER_H_
> >  #define _ICE_ADAPTER_H_
> >
> > +#include <linux/spinlock_types.h>
> >  #include <linux/refcount_types.h>
> >
> >  struct pci_dev;
> >
> >  /**
> >   * struct ice_adapter - PCI adapter resources shared across PFs
> > + * @ptp_gltsyn_time_lock: Spinlock protecting access to the GLTSYN_TIME
> > + *                        register of the PTP clock.
> >   * @refcount: Reference count. struct ice_pf objects hold the references.
> >   */
> >  struct ice_adapter {
> > +     /* For access to the GLTSYN_TIME register */
> > +     spinlock_t ptp_gltsyn_time_lock;
> > +
> >       refcount_t refcount;
> >  };
> >
> > diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > index c11eba07283c..b6c7246245c6 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_ptp.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
> > @@ -374,6 +374,7 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
> >       u8 tmr_idx;
> >
> >       tmr_idx = ice_get_ptp_src_clock_index(hw);
> > +     guard(spinlock_irqsave)(&pf->adapter->ptp_gltsyn_time_lock);
> >       /* Read the system timestamp pre PHC read */
> >       ptp_read_system_prets(sts);
> >
> > @@ -1925,15 +1926,8 @@ ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts,
> >                  struct ptp_system_timestamp *sts)
> >  {
> >       struct ice_pf *pf = ptp_info_to_pf(info);
> > -     struct ice_hw *hw = &pf->hw;
> > -
> > -     if (!ice_ptp_lock(hw)) {
> > -             dev_err(ice_pf_to_dev(pf), "PTP failed to get time\n");
> > -             return -EBUSY;
> > -     }
> >
> >       ice_ptp_read_time(pf, ts, sts);
> > -     ice_ptp_unlock(hw);
> >
> >       return 0;
> >  }
> > diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
> > index 187ce9b54e1a..a47dbbfadb74 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
> > @@ -274,6 +274,9 @@ void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
> >   */
> >  static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw)
> >  {
> > +     struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
> > +
> > +     guard(spinlock_irqsave)(&pf->adapter->ptp_gltsyn_time_lock);
> >       wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD);
> >       ice_flush(hw);
> >  }
>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/3] ice: avoid the PTP hardware semaphore in gettimex64 path
  2024-02-26 20:11     ` Michal Schmidt
@ 2024-02-26 21:13       ` Jacob Keller
  0 siblings, 0 replies; 15+ messages in thread
From: Jacob Keller @ 2024-02-26 21:13 UTC (permalink / raw)
  To: Michal Schmidt, Kolacinski, Karol
  Cc: intel-wired-lan, Jesse Brandeburg, Richard Cochran,
	Arkadiusz Kubalewski, Karol Kolacinski, netdev



On 2/26/2024 12:11 PM, Michal Schmidt wrote:
> On Mon, Feb 26, 2024 at 8:36 PM Jacob Keller <jacob.e.keller@intel.com> wrote:
>> On 2/26/2024 7:11 AM, Michal Schmidt wrote:
>>> The writing is performed indirectly, by the hardware, as a result of
>>> the driver writing GLTSYN_CMD_SYNC in ice_ptp_exec_tmr_cmd. I wasn't
>>> sure if the ice_flush there is enough to make sure GLTSYN_TIME has been
>>> updated, but it works well in my testing.
>>>
>>
>> I believe this is good enough. I don't know the exact timing involved
>> here, but the hardware synchronizes the update to all the PHYs and the
>> MAC and it is supposed to be on the order of nanoseconds.
> 
> Thanks, that's good to know.
> 
>>> My test code can be seen here:
>>> https://gitlab.com/mschmidt2/linux/-/commits/ice-ptp-host-side-lock
>>> It consists of:
>>>  - kernel threads reading the time in a busy loop and looking at the
>>>    deltas between consecutive values, reporting new maxima.
>>>    in the consecutive values;
>>>  - a shell script that sets the time repeatedly;
>>>  - a bpftrace probe to produce a histogram of the measured deltas.
>>> Without the spinlock ptp_gltsyn_time_lock, it is easy to see tearing.
>>> Deltas in the [2G, 4G) range appear in the histograms.
>>> With the spinlock added, there is no tearing and the biggest delta I saw
>>> was in the range [1M, 2M), that is under 2 ms.
>>>
>>
>> Nice.
>>
>>
>> At first, I wasn't convinced we actually need cross-adapter spinlock
>> here. I thought all the flows that took hardware semaphore were on the
>> clock owner. Only the clock owner PF will access the GLTSYN_TIME
>> registers, (gettimex is only exposed through the ptp device, which hooks
>> into the clock owner). Similarly, only the clock owner does adjtime,
>> settime, etc.
> 
> Non-owners do not expose a ptp device to userspace, but they still do
> ice_ptp_periodic_work -> ice_ptp_update_cached_phctime ->
> ice_ptp_read_src_clk_reg, where they read GLTSYN_TIME.
> 

I think we (Karol?) have some work to fix this by refactoring so that
the clock owner does this over auxiliary bus for all PFs, rather than
having 4/8 PFs each with their own background thread we have a single
background thread that reads the value once and updates the cache of all
the PFs. It looks like its still only in internal testing however.. the
current kernel code does what you said above.

Either way, we also have the other flows for E822 devices which also
need to execute timer commands (albiet ones which do not directly affect
the main timer).

It is much simpler to reason about correctness if we simply lock every
executed command and the reads of the main timer. This has the advantage
that we also do not need to lock the timer reads while waiting on
firmware to prep all the PHYs, so it solves a major problem we had
before as well.

Thanks!

>> However... It turns out that at least a couple of flows use the
>> semaphore on non-clock owners. Specifically E822 hardware has to
>> initialize the PHY after a link restart, which includes re-doing Vernier
>> calibration. Each PF handles this itself, but does require use of the
>> timer synchronization commands to do it. In this flow, the main timer is
>> not modified but we still use the semaphore and sync registers.
>>
>> I don't think that impacts the E810 card, but we use the same code flow
>> here. The E822 series hardware doesn't use the AdminQ for the PHY
>> messages, so its faster but I think the locking improvement would still
>> be relevant for them as well.
>>
>> Given all this, I think it makes sense to go this route.
>>
>> I'd like to follow-up with possibly replacing the entire HW semaphore
>> with a mutex initialized here. That would remove a bunch of PCIe posted
>> transactions required to acquire the HW semaphore which would be a
>> further improvement here.
> 
> Yes, I agree and I have already been looking into this.
> Thanks,
> Michal

Ok great. If you think you already have patches for this I can go ahead
and wait on this work instead of duplicating effort.
> 
>> Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
>>
>>> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
>>> ---
>>>  drivers/net/ethernet/intel/ice/ice_adapter.c | 2 ++
>>>  drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++++++
>>>  drivers/net/ethernet/intel/ice/ice_ptp.c     | 8 +-------
>>>  drivers/net/ethernet/intel/ice/ice_ptp_hw.c  | 3 +++
>>>  4 files changed, 12 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
>>> index deb063401238..4b9f5d29811c 100644
>>> --- a/drivers/net/ethernet/intel/ice/ice_adapter.c
>>> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
>>> @@ -5,6 +5,7 @@
>>>  #include <linux/mutex.h>
>>>  #include <linux/pci.h>
>>>  #include <linux/slab.h>
>>> +#include <linux/spinlock.h>
>>>  #include <linux/xarray.h>
>>>  #include "ice_adapter.h"
>>>
>>> @@ -38,6 +39,7 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
>>>       if (!a)
>>>               return NULL;
>>>
>>> +     spin_lock_init(&a->ptp_gltsyn_time_lock);
>>>       refcount_set(&a->refcount, 1);
>>>
>>>       if (xa_is_err(xa_store(&ice_adapters, index, a, GFP_KERNEL))) {
>>> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
>>> index cb5a02eb24c1..9d11014ec02f 100644
>>> --- a/drivers/net/ethernet/intel/ice/ice_adapter.h
>>> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
>>> @@ -4,15 +4,21 @@
>>>  #ifndef _ICE_ADAPTER_H_
>>>  #define _ICE_ADAPTER_H_
>>>
>>> +#include <linux/spinlock_types.h>
>>>  #include <linux/refcount_types.h>
>>>
>>>  struct pci_dev;
>>>
>>>  /**
>>>   * struct ice_adapter - PCI adapter resources shared across PFs
>>> + * @ptp_gltsyn_time_lock: Spinlock protecting access to the GLTSYN_TIME
>>> + *                        register of the PTP clock.
>>>   * @refcount: Reference count. struct ice_pf objects hold the references.
>>>   */
>>>  struct ice_adapter {
>>> +     /* For access to the GLTSYN_TIME register */
>>> +     spinlock_t ptp_gltsyn_time_lock;
>>> +
>>>       refcount_t refcount;
>>>  };
>>>
>>> diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
>>> index c11eba07283c..b6c7246245c6 100644
>>> --- a/drivers/net/ethernet/intel/ice/ice_ptp.c
>>> +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
>>> @@ -374,6 +374,7 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
>>>       u8 tmr_idx;
>>>
>>>       tmr_idx = ice_get_ptp_src_clock_index(hw);
>>> +     guard(spinlock_irqsave)(&pf->adapter->ptp_gltsyn_time_lock);
>>>       /* Read the system timestamp pre PHC read */
>>>       ptp_read_system_prets(sts);
>>>
>>> @@ -1925,15 +1926,8 @@ ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts,
>>>                  struct ptp_system_timestamp *sts)
>>>  {
>>>       struct ice_pf *pf = ptp_info_to_pf(info);
>>> -     struct ice_hw *hw = &pf->hw;
>>> -
>>> -     if (!ice_ptp_lock(hw)) {
>>> -             dev_err(ice_pf_to_dev(pf), "PTP failed to get time\n");
>>> -             return -EBUSY;
>>> -     }
>>>
>>>       ice_ptp_read_time(pf, ts, sts);
>>> -     ice_ptp_unlock(hw);
>>>
>>>       return 0;
>>>  }
>>> diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
>>> index 187ce9b54e1a..a47dbbfadb74 100644
>>> --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
>>> +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
>>> @@ -274,6 +274,9 @@ void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
>>>   */
>>>  static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw)
>>>  {
>>> +     struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
>>> +
>>> +     guard(spinlock_irqsave)(&pf->adapter->ptp_gltsyn_time_lock);
>>>       wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD);
>>>       ice_flush(hw);
>>>  }
>>
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC
  2024-02-26 15:11 ` [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC Michal Schmidt
  2024-02-26 19:18   ` Jacob Keller
@ 2024-02-27  7:05   ` Jiri Pirko
  2024-02-27  8:11     ` Michal Schmidt
  2024-02-28  2:35   ` Jakub Kicinski
  2 siblings, 1 reply; 15+ messages in thread
From: Jiri Pirko @ 2024-02-27  7:05 UTC (permalink / raw)
  To: Michal Schmidt
  Cc: intel-wired-lan, Jesse Brandeburg, Richard Cochran,
	Arkadiusz Kubalewski, Karol Kolacinski, Jacob Keller, netdev

Mon, Feb 26, 2024 at 04:11:23PM CET, mschmidt@redhat.com wrote:
>There is a need for synchronization between ice PFs on the same physical
>adapter.
>
>Add a "struct ice_adapter" for holding data shared between PFs of the
>same multifunction PCI device. The struct is refcounted - each ice_pf
>holds a reference to it.
>
>Its first use will be for PTP. I expect it will be useful also to
>improve the ugliness that is ice_prot_id_tbl.
>
>Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
>---
> drivers/net/ethernet/intel/ice/Makefile      |  3 +-
> drivers/net/ethernet/intel/ice/ice.h         |  2 +
> drivers/net/ethernet/intel/ice/ice_adapter.c | 67 ++++++++++++++++++++
> drivers/net/ethernet/intel/ice/ice_adapter.h | 22 +++++++
> drivers/net/ethernet/intel/ice/ice_main.c    |  8 +++
> 5 files changed, 101 insertions(+), 1 deletion(-)
> create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
> create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h
>
>diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
>index cddd82d4ca0f..4fa09c321440 100644
>--- a/drivers/net/ethernet/intel/ice/Makefile
>+++ b/drivers/net/ethernet/intel/ice/Makefile
>@@ -36,7 +36,8 @@ ice-y := ice_main.o	\
> 	 ice_repr.o	\
> 	 ice_tc_lib.o	\
> 	 ice_fwlog.o	\
>-	 ice_debugfs.o
>+	 ice_debugfs.o  \
>+	 ice_adapter.o
> ice-$(CONFIG_PCI_IOV) +=	\
> 	ice_sriov.o		\
> 	ice_virtchnl.o		\
>diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
>index 365c03d1c462..1ffecbdd361a 100644
>--- a/drivers/net/ethernet/intel/ice/ice.h
>+++ b/drivers/net/ethernet/intel/ice/ice.h
>@@ -77,6 +77,7 @@
> #include "ice_gnss.h"
> #include "ice_irq.h"
> #include "ice_dpll.h"
>+#include "ice_adapter.h"
> 
> #define ICE_BAR0		0
> #define ICE_REQ_DESC_MULTIPLE	32
>@@ -544,6 +545,7 @@ struct ice_agg_node {
> 
> struct ice_pf {
> 	struct pci_dev *pdev;
>+	struct ice_adapter *adapter;
> 
> 	struct devlink_region *nvm_region;
> 	struct devlink_region *sram_region;
>diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
>new file mode 100644
>index 000000000000..deb063401238
>--- /dev/null
>+++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
>@@ -0,0 +1,67 @@
>+// SPDX-License-Identifier: GPL-2.0-only
>+// SPDX-FileCopyrightText: Copyright Red Hat
>+
>+#include <linux/cleanup.h>
>+#include <linux/mutex.h>
>+#include <linux/pci.h>
>+#include <linux/slab.h>
>+#include <linux/xarray.h>
>+#include "ice_adapter.h"
>+
>+static DEFINE_MUTEX(ice_adapters_lock);

Why you need and extra mutex and not just rely on xarray lock?


>+static DEFINE_XARRAY(ice_adapters);
>+
>+static unsigned long ice_adapter_index(const struct pci_dev *pdev)
>+{
>+	unsigned int domain = pci_domain_nr(pdev->bus);
>+
>+	WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
>+	return ((unsigned long)domain << 13) |
>+	       ((unsigned long)pdev->bus->number << 5) |
>+	       PCI_SLOT(pdev->devfn);
>+}
>+
>+struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
>+{
>+	unsigned long index = ice_adapter_index(pdev);
>+	struct ice_adapter *a;
>+
>+	guard(mutex)(&ice_adapters_lock);
>+
>+	a = xa_load(&ice_adapters, index);
>+	if (a) {
>+		refcount_inc(&a->refcount);
>+		return a;
>+	}
>+
>+	a = kzalloc(sizeof(*a), GFP_KERNEL);
>+	if (!a)
>+		return NULL;
>+
>+	refcount_set(&a->refcount, 1);
>+
>+	if (xa_is_err(xa_store(&ice_adapters, index, a, GFP_KERNEL))) {
>+		kfree(a);
>+		return NULL;
>+	}
>+
>+	return a;
>+}
>+
>+void ice_adapter_put(const struct pci_dev *pdev)
>+{
>+	unsigned long index = ice_adapter_index(pdev);
>+	struct ice_adapter *a;
>+
>+	guard(mutex)(&ice_adapters_lock);
>+
>+	a = xa_load(&ice_adapters, index);
>+	if (WARN_ON(!a))
>+		return;
>+
>+	if (!refcount_dec_and_test(&a->refcount))
>+		return;
>+
>+	WARN_ON(xa_erase(&ice_adapters, index) != a);
>+	kfree(a);
>+}
>diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
>new file mode 100644
>index 000000000000..cb5a02eb24c1
>--- /dev/null
>+++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
>@@ -0,0 +1,22 @@
>+/* SPDX-License-Identifier: GPL-2.0-only */
>+/* SPDX-FileCopyrightText: Copyright Red Hat */
>+
>+#ifndef _ICE_ADAPTER_H_
>+#define _ICE_ADAPTER_H_
>+
>+#include <linux/refcount_types.h>
>+
>+struct pci_dev;
>+
>+/**
>+ * struct ice_adapter - PCI adapter resources shared across PFs
>+ * @refcount: Reference count. struct ice_pf objects hold the references.
>+ */
>+struct ice_adapter {
>+	refcount_t refcount;
>+};
>+
>+struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev);
>+void ice_adapter_put(const struct pci_dev *pdev);
>+
>+#endif /* _ICE_ADAPTER_H */
>diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
>index 9c2c8637b4a7..4a60957221fc 100644
>--- a/drivers/net/ethernet/intel/ice/ice_main.c
>+++ b/drivers/net/ethernet/intel/ice/ice_main.c
>@@ -5093,6 +5093,7 @@ static int
> ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
> {
> 	struct device *dev = &pdev->dev;
>+	struct ice_adapter *adapter;
> 	struct ice_pf *pf;
> 	struct ice_hw *hw;
> 	int err;
>@@ -5145,7 +5146,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
> 
> 	pci_set_master(pdev);
> 
>+	adapter = ice_adapter_get(pdev);
>+	if (!adapter)
>+		return -ENOMEM;
>+
> 	pf->pdev = pdev;
>+	pf->adapter = adapter;
> 	pci_set_drvdata(pdev, pf);
> 	set_bit(ICE_DOWN, pf->state);
> 	/* Disable service task until DOWN bit is cleared */
>@@ -5196,6 +5202,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
> err_load:
> 	ice_deinit(pf);
> err_init:
>+	ice_adapter_put(pdev);
> 	pci_disable_device(pdev);
> 	return err;
> }
>@@ -5302,6 +5309,7 @@ static void ice_remove(struct pci_dev *pdev)
> 	ice_setup_mc_magic_wake(pf);
> 	ice_set_wake(pf);
> 
>+	ice_adapter_put(pdev);
> 	pci_disable_device(pdev);
> }
> 
>-- 
>2.43.2
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC
  2024-02-27  7:05   ` Jiri Pirko
@ 2024-02-27  8:11     ` Michal Schmidt
  0 siblings, 0 replies; 15+ messages in thread
From: Michal Schmidt @ 2024-02-27  8:11 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: intel-wired-lan, Jesse Brandeburg, Richard Cochran,
	Arkadiusz Kubalewski, Karol Kolacinski, Jacob Keller, netdev

On 2/27/24 08:05, Jiri Pirko wrote:
> Mon, Feb 26, 2024 at 04:11:23PM CET, mschmidt@redhat.com wrote:
>> There is a need for synchronization between ice PFs on the same physical
>> adapter.
>>
>> Add a "struct ice_adapter" for holding data shared between PFs of the
>> same multifunction PCI device. The struct is refcounted - each ice_pf
>> holds a reference to it.
>>
>> Its first use will be for PTP. I expect it will be useful also to
>> improve the ugliness that is ice_prot_id_tbl.
>>
>> Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
>> ---
>> drivers/net/ethernet/intel/ice/Makefile      |  3 +-
>> drivers/net/ethernet/intel/ice/ice.h         |  2 +
>> drivers/net/ethernet/intel/ice/ice_adapter.c | 67 ++++++++++++++++++++
>> drivers/net/ethernet/intel/ice/ice_adapter.h | 22 +++++++
>> drivers/net/ethernet/intel/ice/ice_main.c    |  8 +++
>> 5 files changed, 101 insertions(+), 1 deletion(-)
>> create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.c
>> create mode 100644 drivers/net/ethernet/intel/ice/ice_adapter.h
>>
>> diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
>> index cddd82d4ca0f..4fa09c321440 100644
>> --- a/drivers/net/ethernet/intel/ice/Makefile
>> +++ b/drivers/net/ethernet/intel/ice/Makefile
>> @@ -36,7 +36,8 @@ ice-y := ice_main.o	\
>> 	 ice_repr.o	\
>> 	 ice_tc_lib.o	\
>> 	 ice_fwlog.o	\
>> -	 ice_debugfs.o
>> +	 ice_debugfs.o  \
>> +	 ice_adapter.o
>> ice-$(CONFIG_PCI_IOV) +=	\
>> 	ice_sriov.o		\
>> 	ice_virtchnl.o		\
>> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
>> index 365c03d1c462..1ffecbdd361a 100644
>> --- a/drivers/net/ethernet/intel/ice/ice.h
>> +++ b/drivers/net/ethernet/intel/ice/ice.h
>> @@ -77,6 +77,7 @@
>> #include "ice_gnss.h"
>> #include "ice_irq.h"
>> #include "ice_dpll.h"
>> +#include "ice_adapter.h"
>>
>> #define ICE_BAR0		0
>> #define ICE_REQ_DESC_MULTIPLE	32
>> @@ -544,6 +545,7 @@ struct ice_agg_node {
>>
>> struct ice_pf {
>> 	struct pci_dev *pdev;
>> +	struct ice_adapter *adapter;
>>
>> 	struct devlink_region *nvm_region;
>> 	struct devlink_region *sram_region;
>> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c
>> new file mode 100644
>> index 000000000000..deb063401238
>> --- /dev/null
>> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c
>> @@ -0,0 +1,67 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +// SPDX-FileCopyrightText: Copyright Red Hat
>> +
>> +#include <linux/cleanup.h>
>> +#include <linux/mutex.h>
>> +#include <linux/pci.h>
>> +#include <linux/slab.h>
>> +#include <linux/xarray.h>
>> +#include "ice_adapter.h"
>> +
>> +static DEFINE_MUTEX(ice_adapters_lock);
> 
> Why you need and extra mutex and not just rely on xarray lock?

I suppose I could use xa_lock() and the __xa_{load,store} calls. 
Alright, let's see what it will look like...
Thanks,
Michal


>> +static DEFINE_XARRAY(ice_adapters);
>> +
>> +static unsigned long ice_adapter_index(const struct pci_dev *pdev)
>> +{
>> +	unsigned int domain = pci_domain_nr(pdev->bus);
>> +
>> +	WARN_ON((unsigned long)domain >> (BITS_PER_LONG - 13));
>> +	return ((unsigned long)domain << 13) |
>> +	       ((unsigned long)pdev->bus->number << 5) |
>> +	       PCI_SLOT(pdev->devfn);
>> +}
>> +
>> +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev)
>> +{
>> +	unsigned long index = ice_adapter_index(pdev);
>> +	struct ice_adapter *a;
>> +
>> +	guard(mutex)(&ice_adapters_lock);
>> +
>> +	a = xa_load(&ice_adapters, index);
>> +	if (a) {
>> +		refcount_inc(&a->refcount);
>> +		return a;
>> +	}
>> +
>> +	a = kzalloc(sizeof(*a), GFP_KERNEL);
>> +	if (!a)
>> +		return NULL;
>> +
>> +	refcount_set(&a->refcount, 1);
>> +
>> +	if (xa_is_err(xa_store(&ice_adapters, index, a, GFP_KERNEL))) {
>> +		kfree(a);
>> +		return NULL;
>> +	}
>> +
>> +	return a;
>> +}
>> +
>> +void ice_adapter_put(const struct pci_dev *pdev)
>> +{
>> +	unsigned long index = ice_adapter_index(pdev);
>> +	struct ice_adapter *a;
>> +
>> +	guard(mutex)(&ice_adapters_lock);
>> +
>> +	a = xa_load(&ice_adapters, index);
>> +	if (WARN_ON(!a))
>> +		return;
>> +
>> +	if (!refcount_dec_and_test(&a->refcount))
>> +		return;
>> +
>> +	WARN_ON(xa_erase(&ice_adapters, index) != a);
>> +	kfree(a);
>> +}
>> diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h
>> new file mode 100644
>> index 000000000000..cb5a02eb24c1
>> --- /dev/null
>> +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h
>> @@ -0,0 +1,22 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/* SPDX-FileCopyrightText: Copyright Red Hat */
>> +
>> +#ifndef _ICE_ADAPTER_H_
>> +#define _ICE_ADAPTER_H_
>> +
>> +#include <linux/refcount_types.h>
>> +
>> +struct pci_dev;
>> +
>> +/**
>> + * struct ice_adapter - PCI adapter resources shared across PFs
>> + * @refcount: Reference count. struct ice_pf objects hold the references.
>> + */
>> +struct ice_adapter {
>> +	refcount_t refcount;
>> +};
>> +
>> +struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev);
>> +void ice_adapter_put(const struct pci_dev *pdev);
>> +
>> +#endif /* _ICE_ADAPTER_H */
>> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
>> index 9c2c8637b4a7..4a60957221fc 100644
>> --- a/drivers/net/ethernet/intel/ice/ice_main.c
>> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
>> @@ -5093,6 +5093,7 @@ static int
>> ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
>> {
>> 	struct device *dev = &pdev->dev;
>> +	struct ice_adapter *adapter;
>> 	struct ice_pf *pf;
>> 	struct ice_hw *hw;
>> 	int err;
>> @@ -5145,7 +5146,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
>>
>> 	pci_set_master(pdev);
>>
>> +	adapter = ice_adapter_get(pdev);
>> +	if (!adapter)
>> +		return -ENOMEM;
>> +
>> 	pf->pdev = pdev;
>> +	pf->adapter = adapter;
>> 	pci_set_drvdata(pdev, pf);
>> 	set_bit(ICE_DOWN, pf->state);
>> 	/* Disable service task until DOWN bit is cleared */
>> @@ -5196,6 +5202,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
>> err_load:
>> 	ice_deinit(pf);
>> err_init:
>> +	ice_adapter_put(pdev);
>> 	pci_disable_device(pdev);
>> 	return err;
>> }
>> @@ -5302,6 +5309,7 @@ static void ice_remove(struct pci_dev *pdev)
>> 	ice_setup_mc_magic_wake(pf);
>> 	ice_set_wake(pf);
>>
>> +	ice_adapter_put(pdev);
>> 	pci_disable_device(pdev);
>> }
>>
>> -- 
>> 2.43.2
>>
>>
> 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC
  2024-02-26 15:11 ` [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC Michal Schmidt
  2024-02-26 19:18   ` Jacob Keller
  2024-02-27  7:05   ` Jiri Pirko
@ 2024-02-28  2:35   ` Jakub Kicinski
  2024-02-28 17:39     ` Keller, Jacob E
  2 siblings, 1 reply; 15+ messages in thread
From: Jakub Kicinski @ 2024-02-28  2:35 UTC (permalink / raw)
  To: Michal Schmidt
  Cc: intel-wired-lan, Jesse Brandeburg, Richard Cochran,
	Arkadiusz Kubalewski, Karol Kolacinski, Jacob Keller, netdev

On Mon, 26 Feb 2024 16:11:23 +0100 Michal Schmidt wrote:
> There is a need for synchronization between ice PFs on the same physical
> adapter.
> 
> Add a "struct ice_adapter" for holding data shared between PFs of the
> same multifunction PCI device. The struct is refcounted - each ice_pf
> holds a reference to it.
> 
> Its first use will be for PTP. I expect it will be useful also to
> improve the ugliness that is ice_prot_id_tbl.

ice doesn't support any multi-host devices?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC
  2024-02-28  2:35   ` Jakub Kicinski
@ 2024-02-28 17:39     ` Keller, Jacob E
  0 siblings, 0 replies; 15+ messages in thread
From: Keller, Jacob E @ 2024-02-28 17:39 UTC (permalink / raw)
  To: Jakub Kicinski, mschmidt
  Cc: intel-wired-lan@lists.osuosl.org, Brandeburg, Jesse,
	Richard Cochran, Kubalewski, Arkadiusz, Kolacinski, Karol,
	netdev@vger.kernel.org



> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: Tuesday, February 27, 2024 6:36 PM
> To: mschmidt <mschmidt@redhat.com>
> Cc: intel-wired-lan@lists.osuosl.org; Brandeburg, Jesse
> <jesse.brandeburg@intel.com>; Richard Cochran <richardcochran@gmail.com>;
> Kubalewski, Arkadiusz <arkadiusz.kubalewski@intel.com>; Kolacinski, Karol
> <karol.kolacinski@intel.com>; Keller, Jacob E <jacob.e.keller@intel.com>;
> netdev@vger.kernel.org
> Subject: Re: [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs
> on the same NIC
> 
> On Mon, 26 Feb 2024 16:11:23 +0100 Michal Schmidt wrote:
> > There is a need for synchronization between ice PFs on the same physical
> > adapter.
> >
> > Add a "struct ice_adapter" for holding data shared between PFs of the
> > same multifunction PCI device. The struct is refcounted - each ice_pf
> > holds a reference to it.
> >
> > Its first use will be for PTP. I expect it will be useful also to
> > improve the ugliness that is ice_prot_id_tbl.
> 
> ice doesn't support any multi-host devices?

No. I guess you could try to direct-assign one PF into a VM? But otherwise it doesn't have support for any multi-host. I also am not aware of any plans to add such a device to it in the future. I think all our future multi-host stuff would be with idpf.

Thanks,
Jake

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2024-02-28 17:40 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-26 15:11 [PATCH net-next 0/3] ice: lighter locking for PTP time reading Michal Schmidt
2024-02-26 15:11 ` [PATCH net-next 1/3] ice: add ice_adapter for shared data across PFs on the same NIC Michal Schmidt
2024-02-26 19:18   ` Jacob Keller
2024-02-27  7:05   ` Jiri Pirko
2024-02-27  8:11     ` Michal Schmidt
2024-02-28  2:35   ` Jakub Kicinski
2024-02-28 17:39     ` Keller, Jacob E
2024-02-26 15:11 ` [PATCH net-next 2/3] ice: avoid the PTP hardware semaphore in gettimex64 path Michal Schmidt
2024-02-26 19:36   ` Jacob Keller
2024-02-26 20:11     ` Michal Schmidt
2024-02-26 21:13       ` Jacob Keller
2024-02-26 15:11 ` [PATCH net-next 3/3] ice: fold ice_ptp_read_time into ice_ptp_gettimex64 Michal Schmidt
2024-02-26 19:36   ` Jacob Keller
2024-02-26 19:16 ` [PATCH net-next 0/3] ice: lighter locking for PTP time reading Jacob Keller
2024-02-26 20:01   ` Michal Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).