* [PATCH net-next v3 1/5] eth: fbnic: add software TX timestamping support
2024-10-03 12:39 [PATCH net-next v3 0/5] eth: fbnic: add timestamping support Vadim Fedorenko
@ 2024-10-03 12:39 ` Vadim Fedorenko
2024-10-04 22:55 ` Jacob Keller
2024-10-03 12:39 ` [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support Vadim Fedorenko
` (3 subsequent siblings)
4 siblings, 1 reply; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-03 12:39 UTC (permalink / raw)
To: Vadim Fedorenko, Jakub Kicinski, David Ahern, Paolo Abeni,
David S. Miller, Alexander Duyck
Cc: Vadim Fedorenko, netdev, Richard Cochran
Add software TX timestamping support. RX software timestamping is
implemented in the core and there is no need to provide special flag
in the driver anymore.
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c | 11 +++++++++++
drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 3 +++
2 files changed, 14 insertions(+)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index 5d980e178941..ffc773014e0f 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -6,6 +6,16 @@
#include "fbnic_netdev.h"
#include "fbnic_tlv.h"
+static int
+fbnic_get_ts_info(struct net_device *netdev,
+ struct kernel_ethtool_ts_info *tsinfo)
+{
+ tsinfo->so_timestamping =
+ SOF_TIMESTAMPING_TX_SOFTWARE;
+
+ return 0;
+}
+
static void
fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
{
@@ -66,6 +76,7 @@ fbnic_get_eth_mac_stats(struct net_device *netdev,
static const struct ethtool_ops fbnic_ethtool_ops = {
.get_drvinfo = fbnic_get_drvinfo,
+ .get_ts_info = fbnic_get_ts_info,
.get_eth_mac_stats = fbnic_get_eth_mac_stats,
};
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index 6a6d7e22f1a7..8337d49bad0b 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -205,6 +205,9 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
ring->tail = tail;
+ /* Record SW timestamp */
+ skb_tx_timestamp(skb);
+
/* Verify there is room for another packet */
fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC);
--
2.43.5
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 1/5] eth: fbnic: add software TX timestamping support
2024-10-03 12:39 ` [PATCH net-next v3 1/5] eth: fbnic: add software TX " Vadim Fedorenko
@ 2024-10-04 22:55 ` Jacob Keller
2024-10-04 23:18 ` Jacob Keller
2024-10-07 9:56 ` Vadim Fedorenko
0 siblings, 2 replies; 24+ messages in thread
From: Jacob Keller @ 2024-10-04 22:55 UTC (permalink / raw)
To: Vadim Fedorenko, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
> Add software TX timestamping support. RX software timestamping is
> implemented in the core and there is no need to provide special flag
> in the driver anymore.
>
> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
> drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c | 11 +++++++++++
> drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 3 +++
> 2 files changed, 14 insertions(+)
>
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> index 5d980e178941..ffc773014e0f 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
> @@ -6,6 +6,16 @@
> #include "fbnic_netdev.h"
> #include "fbnic_tlv.h"
>
> +static int
> +fbnic_get_ts_info(struct net_device *netdev,
> + struct kernel_ethtool_ts_info *tsinfo)
> +{
> + tsinfo->so_timestamping =
> + SOF_TIMESTAMPING_TX_SOFTWARE;
> +
> + return 0;
> +}
> +
You could use ethtool_op_get_ts_info(), but I imagine future patches
will update this for hardware timestamping, so I don't think thats a big
deal.
I think you *do* still want to report SOF_TIMESTAMPING_RX_SOFTWARE and
SOF_TIMESTAMPING_SOFTWARE to get the API correct... Perhaps that could
be improved in the core stack though.... Or did that already get changed
recently?
You should also set phc_index to -1 until you have a PTP clock device.
> static void
> fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
> {
> @@ -66,6 +76,7 @@ fbnic_get_eth_mac_stats(struct net_device *netdev,
>
> static const struct ethtool_ops fbnic_ethtool_ops = {
> .get_drvinfo = fbnic_get_drvinfo,
> + .get_ts_info = fbnic_get_ts_info,
> .get_eth_mac_stats = fbnic_get_eth_mac_stats,
> };
>
> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
> index 6a6d7e22f1a7..8337d49bad0b 100644
> --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
> @@ -205,6 +205,9 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
>
> ring->tail = tail;
>
> + /* Record SW timestamp */
> + skb_tx_timestamp(skb);
> +
> /* Verify there is room for another packet */
> fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC);
>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 1/5] eth: fbnic: add software TX timestamping support
2024-10-04 22:55 ` Jacob Keller
@ 2024-10-04 23:18 ` Jacob Keller
2024-10-07 9:56 ` Vadim Fedorenko
1 sibling, 0 replies; 24+ messages in thread
From: Jacob Keller @ 2024-10-04 23:18 UTC (permalink / raw)
To: Vadim Fedorenko, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 10/4/2024 3:55 PM, Jacob Keller wrote:
> I think you *do* still want to report SOF_TIMESTAMPING_RX_SOFTWARE and
> SOF_TIMESTAMPING_SOFTWARE to get the API correct... Perhaps that could
> be improved in the core stack though.... Or did that already get changed
> recently?
>
Ah, this did change recently, but the helper ethtool_op_get_ts_info()
was not updated to do the same. :D
For other reviewers who weren't aware, this is true since 12d337339d9f
("ethtool: RX software timestamp for all")
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 1/5] eth: fbnic: add software TX timestamping support
2024-10-04 22:55 ` Jacob Keller
2024-10-04 23:18 ` Jacob Keller
@ 2024-10-07 9:56 ` Vadim Fedorenko
2024-10-07 23:52 ` Jacob Keller
1 sibling, 1 reply; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-07 9:56 UTC (permalink / raw)
To: Jacob Keller, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 04/10/2024 23:55, Jacob Keller wrote:
>
>
> On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
>> Add software TX timestamping support. RX software timestamping is
>> implemented in the core and there is no need to provide special flag
>> in the driver anymore.
>>
>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>> ---
>> drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c | 11 +++++++++++
>> drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 3 +++
>> 2 files changed, 14 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
>> index 5d980e178941..ffc773014e0f 100644
>> --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
>> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
>> @@ -6,6 +6,16 @@
>> #include "fbnic_netdev.h"
>> #include "fbnic_tlv.h"
>>
>> +static int
>> +fbnic_get_ts_info(struct net_device *netdev,
>> + struct kernel_ethtool_ts_info *tsinfo)
>> +{
>> + tsinfo->so_timestamping =
>> + SOF_TIMESTAMPING_TX_SOFTWARE;
>> +
>> + return 0;
>> +}
>> +
>
> You could use ethtool_op_get_ts_info(), but I imagine future patches
> will update this for hardware timestamping, so I don't think thats a big
> deal.
>
> I think you *do* still want to report SOF_TIMESTAMPING_RX_SOFTWARE and
> SOF_TIMESTAMPING_SOFTWARE to get the API correct... Perhaps that could
> be improved in the core stack though.... Or did that already get changed
> recently?
Yeah, as you found in the next mail, software RX timestamping was moved
to the core recently.
> You should also set phc_index to -1 until you have a PTP clock device.
That's definitely missing, thanks! I'll add it to the next version.
>
>> static void
>> fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
>> {
>> @@ -66,6 +76,7 @@ fbnic_get_eth_mac_stats(struct net_device *netdev,
>>
>> static const struct ethtool_ops fbnic_ethtool_ops = {
>> .get_drvinfo = fbnic_get_drvinfo,
>> + .get_ts_info = fbnic_get_ts_info,
>> .get_eth_mac_stats = fbnic_get_eth_mac_stats,
>> };
>>
>> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
>> index 6a6d7e22f1a7..8337d49bad0b 100644
>> --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
>> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
>> @@ -205,6 +205,9 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
>>
>> ring->tail = tail;
>>
>> + /* Record SW timestamp */
>> + skb_tx_timestamp(skb);
>> +
>> /* Verify there is room for another packet */
>> fbnic_maybe_stop_tx(skb->dev, ring, FBNIC_MAX_SKB_DESC);
>>
>
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 1/5] eth: fbnic: add software TX timestamping support
2024-10-07 9:56 ` Vadim Fedorenko
@ 2024-10-07 23:52 ` Jacob Keller
0 siblings, 0 replies; 24+ messages in thread
From: Jacob Keller @ 2024-10-07 23:52 UTC (permalink / raw)
To: Vadim Fedorenko, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 10/7/2024 2:56 AM, Vadim Fedorenko wrote:
> On 04/10/2024 23:55, Jacob Keller wrote:
>>
>>
>> On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
>>> Add software TX timestamping support. RX software timestamping is
>>> implemented in the core and there is no need to provide special flag
>>> in the driver anymore.
>>>
>>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>>> ---
>>> drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c | 11 +++++++++++
>>> drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 3 +++
>>> 2 files changed, 14 insertions(+)
>>>
>>> diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
>>> index 5d980e178941..ffc773014e0f 100644
>>> --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
>>> +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
>>> @@ -6,6 +6,16 @@
>>> #include "fbnic_netdev.h"
>>> #include "fbnic_tlv.h"
>>>
>>> +static int
>>> +fbnic_get_ts_info(struct net_device *netdev,
>>> + struct kernel_ethtool_ts_info *tsinfo)
>>> +{
>>> + tsinfo->so_timestamping =
>>> + SOF_TIMESTAMPING_TX_SOFTWARE;
>>> +
>>> + return 0;
>>> +}
>>> +
>>
>> You could use ethtool_op_get_ts_info(), but I imagine future patches
>> will update this for hardware timestamping, so I don't think thats a big
>> deal.
>>
>> I think you *do* still want to report SOF_TIMESTAMPING_RX_SOFTWARE and
>> SOF_TIMESTAMPING_SOFTWARE to get the API correct... Perhaps that could
>> be improved in the core stack though.... Or did that already get changed
>> recently?
>
> Yeah, as you found in the next mail, software RX timestamping was moved
> to the core recently.
>
>> You should also set phc_index to -1 until you have a PTP clock device.
>
> That's definitely missing, thanks! I'll add it to the next version.
>
Since I had missed this, I reviewed the patch that changed Rx timestamp
to the core. It also initialized PHC index to -1 automatically so it
shouldn't be necessary.
Apologies for the misleading comment.
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support
2024-10-03 12:39 [PATCH net-next v3 0/5] eth: fbnic: add timestamping support Vadim Fedorenko
2024-10-03 12:39 ` [PATCH net-next v3 1/5] eth: fbnic: add software TX " Vadim Fedorenko
@ 2024-10-03 12:39 ` Vadim Fedorenko
2024-10-04 23:05 ` Jacob Keller
2024-10-03 12:39 ` [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support Vadim Fedorenko
` (2 subsequent siblings)
4 siblings, 1 reply; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-03 12:39 UTC (permalink / raw)
To: Vadim Fedorenko, Jakub Kicinski, David Ahern, Paolo Abeni,
David S. Miller, Alexander Duyck
Cc: Vadim Fedorenko, netdev, Richard Cochran
Create PHC device and provide callbacks needed for ptp_clock device.
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
drivers/net/ethernet/meta/fbnic/Makefile | 3 +-
drivers/net/ethernet/meta/fbnic/fbnic.h | 11 +
drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 38 +++
.../net/ethernet/meta/fbnic/fbnic_netdev.c | 11 +-
.../net/ethernet/meta/fbnic/fbnic_netdev.h | 15 +
drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 9 +-
drivers/net/ethernet/meta/fbnic/fbnic_time.c | 311 ++++++++++++++++++
7 files changed, 394 insertions(+), 4 deletions(-)
create mode 100644 drivers/net/ethernet/meta/fbnic/fbnic_time.c
diff --git a/drivers/net/ethernet/meta/fbnic/Makefile b/drivers/net/ethernet/meta/fbnic/Makefile
index ed4533a73c57..8615d516a274 100644
--- a/drivers/net/ethernet/meta/fbnic/Makefile
+++ b/drivers/net/ethernet/meta/fbnic/Makefile
@@ -18,4 +18,5 @@ fbnic-y := fbnic_devlink.o \
fbnic_phylink.o \
fbnic_rpc.o \
fbnic_tlv.o \
- fbnic_txrx.o
+ fbnic_txrx.o \
+ fbnic_time.o
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h
index 0f9e8d79461c..ca59261f0155 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic.h
@@ -6,6 +6,7 @@
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/types.h>
#include <linux/workqueue.h>
@@ -49,6 +50,16 @@ struct fbnic_dev {
/* Number of TCQs/RCQs available on hardware */
u16 max_num_queues;
+ /* Lock protecting writes to @time_high, @time_offset of fbnic_netdev,
+ * and the HW time CSR machinery.
+ */
+ spinlock_t time_lock;
+ /* Externally accessible PTP clock, may be NULL */
+ struct ptp_clock *ptp;
+ struct ptp_clock_info ptp_info;
+ /* Last @time_high refresh time in jiffies (to catch stalls) */
+ unsigned long last_read;
+
/* Local copy of hardware statistics */
struct fbnic_hw_stats hw_stats;
};
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
index 21db509acbc1..290b924b7749 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
@@ -413,6 +413,44 @@ enum {
#define FBNIC_TMI_DROP_CTRL 0x04401 /* 0x11004 */
#define FBNIC_TMI_DROP_CTRL_EN CSR_BIT(0)
#define FBNIC_CSR_END_TMI 0x0443f /* CSR section delimiter */
+
+/* Precision Time Protocol Registers */
+#define FBNIC_CSR_START_PTP 0x04800 /* CSR section delimiter */
+#define FBNIC_PTP_REG_BASE 0x04800 /* 0x12000 */
+
+#define FBNIC_PTP_CTRL 0x04800 /* 0x12000 */
+#define FBNIC_PTP_CTRL_EN CSR_BIT(0)
+#define FBNIC_PTP_CTRL_MONO_EN CSR_BIT(4)
+#define FBNIC_PTP_CTRL_TQS_OUT_EN CSR_BIT(8)
+#define FBNIC_PTP_CTRL_MAC_OUT_IVAL CSR_GENMASK(16, 12)
+#define FBNIC_PTP_CTRL_TICK_IVAL CSR_GENMASK(23, 20)
+
+#define FBNIC_PTP_ADJUST 0x04801 /* 0x12004 */
+#define FBNIC_PTP_ADJUST_INIT CSR_BIT(0)
+#define FBNIC_PTP_ADJUST_SUB_NUDGE CSR_BIT(8)
+#define FBNIC_PTP_ADJUST_ADD_NUDGE CSR_BIT(16)
+#define FBNIC_PTP_ADJUST_ADDEND_SET CSR_BIT(24)
+
+#define FBNIC_PTP_INIT_HI 0x04802 /* 0x12008 */
+#define FBNIC_PTP_INIT_LO 0x04803 /* 0x1200c */
+
+#define FBNIC_PTP_NUDGE_NS 0x04804 /* 0x12010 */
+#define FBNIC_PTP_NUDGE_SUBNS 0x04805 /* 0x12014 */
+
+#define FBNIC_PTP_ADD_VAL_NS 0x04806 /* 0x12018 */
+#define FBNIC_PTP_ADD_VAL_NS_MASK CSR_GENMASK(15, 0)
+#define FBNIC_PTP_ADD_VAL_SUBNS 0x04807 /* 0x1201c */
+
+#define FBNIC_PTP_CTR_VAL_HI 0x04808 /* 0x12020 */
+#define FBNIC_PTP_CTR_VAL_LO 0x04809 /* 0x12024 */
+
+#define FBNIC_PTP_MONO_PTP_CTR_HI 0x0480a /* 0x12028 */
+#define FBNIC_PTP_MONO_PTP_CTR_LO 0x0480b /* 0x1202c */
+
+#define FBNIC_PTP_CDC_FIFO_STATUS 0x0480c /* 0x12030 */
+#define FBNIC_PTP_SPARE 0x0480d /* 0x12034 */
+#define FBNIC_CSR_END_PTP 0x0480d /* CSR section delimiter */
+
/* Rx Buffer Registers */
#define FBNIC_CSR_START_RXB 0x08000 /* CSR section delimiter */
enum {
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index a400616a24d4..6e6d8988db54 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -42,18 +42,24 @@ int __fbnic_open(struct fbnic_net *fbn)
goto free_resources;
}
- err = fbnic_fw_init_heartbeat(fbd, false);
+ err = fbnic_time_start(fbn);
if (err)
goto release_ownership;
+ err = fbnic_fw_init_heartbeat(fbd, false);
+ if (err)
+ goto time_stop;
+
err = fbnic_pcs_irq_enable(fbd);
if (err)
- goto release_ownership;
+ goto time_stop;
/* Pull the BMC config and initialize the RPC */
fbnic_bmc_rpc_init(fbd);
fbnic_rss_reinit(fbd, fbn);
return 0;
+time_stop:
+ fbnic_time_stop(fbn);
release_ownership:
fbnic_fw_xmit_ownership_msg(fbn->fbd, false);
free_resources:
@@ -82,6 +88,7 @@ static int fbnic_stop(struct net_device *netdev)
fbnic_down(fbn);
fbnic_pcs_irq_disable(fbn->fbd);
+ fbnic_time_stop(fbn);
fbnic_fw_xmit_ownership_msg(fbn->fbd, false);
fbnic_free_resources(fbn);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
index 6c27da09a612..f530e3235634 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
@@ -33,6 +33,15 @@ struct fbnic_net {
u8 fec;
u8 link_mode;
+ /* Cached top bits of the HW time counter for 40b -> 64b conversion */
+ u32 time_high;
+ /* Protect readers of @time_offset, writers take @time_lock. */
+ struct u64_stats_sync time_seq;
+ /* Offset in ns between free running NIC PHC and time set via PTP
+ * clock callbacks
+ */
+ s64 time_offset;
+
u16 num_tx_queues;
u16 num_rx_queues;
@@ -60,6 +69,12 @@ void fbnic_reset_queues(struct fbnic_net *fbn,
unsigned int tx, unsigned int rx);
void fbnic_set_ethtool_ops(struct net_device *dev);
+int fbnic_ptp_setup(struct fbnic_dev *fbd);
+void fbnic_ptp_destroy(struct fbnic_dev *fbd);
+void fbnic_time_init(struct fbnic_net *fbn);
+int fbnic_time_start(struct fbnic_net *fbn);
+void fbnic_time_stop(struct fbnic_net *fbn);
+
void __fbnic_set_rx_mode(struct net_device *netdev);
void fbnic_clear_rx_mode(struct net_device *netdev);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
index a4809fe0fc24..32e5b4cc55bd 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
@@ -300,14 +300,20 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto init_failure_mode;
}
+ err = fbnic_ptp_setup(fbd);
+ if (err)
+ goto ifm_free_netdev;
+
err = fbnic_netdev_register(netdev);
if (err) {
dev_err(&pdev->dev, "Netdev registration failed: %d\n", err);
- goto ifm_free_netdev;
+ goto ifm_destroy_ptp;
}
return 0;
+ifm_destroy_ptp:
+ fbnic_ptp_destroy(fbd);
ifm_free_netdev:
fbnic_netdev_free(fbd);
init_failure_mode:
@@ -342,6 +348,7 @@ static void fbnic_remove(struct pci_dev *pdev)
fbnic_netdev_unregister(netdev);
cancel_delayed_work_sync(&fbd->service_task);
+ fbnic_ptp_destroy(fbd);
fbnic_netdev_free(fbd);
}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_time.c b/drivers/net/ethernet/meta/fbnic/fbnic_time.c
new file mode 100644
index 000000000000..f79b1388cb6e
--- /dev/null
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_time.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bitfield.h>
+#include <linux/jiffies.h>
+#include <linux/limits.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timer.h>
+
+#include "fbnic.h"
+#include "fbnic_csr.h"
+#include "fbnic_netdev.h"
+
+/* FBNIC timing & PTP implementation
+ * Datapath uses truncated 40b timestamps for scheduling and event reporting.
+ * We need to promote those to full 64b, hence we periodically cache the top
+ * 32bit of the HW time counter. Since this makes our time reporting non-atomic
+ * we leave the HW clock free running and adjust time offsets in SW as needed.
+ * Time offset is 64bit - we need a seq counter for 32bit machines.
+ * Time offset and the cache of top bits are independent so we don't need
+ * a coherent snapshot of both - READ_ONCE()/WRITE_ONCE() + writer side lock
+ * are enough.
+ */
+
+/* Period of refresh of top bits of timestamp, give ourselves a 8x margin.
+ * This should translate to once a minute.
+ * The use of nsecs_to_jiffies() should be safe for a <=40b nsec value.
+ */
+#define FBNIC_TS_HIGH_REFRESH_JIF nsecs_to_jiffies((1ULL << 40) / 16)
+
+static struct fbnic_dev *fbnic_from_ptp_info(struct ptp_clock_info *ptp)
+{
+ return container_of(ptp, struct fbnic_dev, ptp_info);
+}
+
+/* This function is "slow" because we could try guessing which high part
+ * is correct based on low instead of re-reading, and skip reading @hi
+ * twice altogether if @lo is far enough from 0.
+ */
+static u64 __fbnic_time_get_slow(struct fbnic_dev *fbd)
+{
+ u32 hi, lo;
+
+ lockdep_assert_held(&fbd->time_lock);
+
+ do {
+ hi = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI);
+ lo = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_LO);
+ } while (hi != fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI));
+
+ return (u64)hi << 32 | lo;
+}
+
+static void __fbnic_time_set_addend(struct fbnic_dev *fbd, u64 addend)
+{
+ lockdep_assert_held(&fbd->time_lock);
+
+ fbnic_wr32(fbd, FBNIC_PTP_ADD_VAL_NS,
+ FIELD_PREP(FBNIC_PTP_ADD_VAL_NS_MASK, addend >> 32));
+ fbnic_wr32(fbd, FBNIC_PTP_ADD_VAL_SUBNS, (u32)addend);
+}
+
+static void fbnic_ptp_fresh_check(struct fbnic_dev *fbd)
+{
+ if (time_is_after_jiffies(fbd->last_read +
+ FBNIC_TS_HIGH_REFRESH_JIF * 3 / 2))
+ return;
+
+ dev_warn(fbd->dev, "NIC timestamp refresh stall, delayed by %lu sec\n",
+ (jiffies - fbd->last_read - FBNIC_TS_HIGH_REFRESH_JIF) / HZ);
+}
+
+static void fbnic_ptp_refresh_time(struct fbnic_dev *fbd, struct fbnic_net *fbn)
+{
+ unsigned long flags;
+ u32 hi;
+
+ spin_lock_irqsave(&fbd->time_lock, flags);
+ hi = fbnic_rd32(fbn->fbd, FBNIC_PTP_CTR_VAL_HI);
+ if (!fbnic_present(fbd))
+ goto out; /* Don't bother handling, reset is pending */
+ WRITE_ONCE(fbn->time_high, hi);
+ fbd->last_read = jiffies;
+ out:
+ spin_unlock_irqrestore(&fbd->time_lock, flags);
+}
+
+static long fbnic_ptp_do_aux_work(struct ptp_clock_info *ptp)
+{
+ struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
+ struct fbnic_net *fbn;
+
+ fbn = netdev_priv(fbd->netdev);
+
+ fbnic_ptp_fresh_check(fbd);
+ fbnic_ptp_refresh_time(fbd, fbn);
+
+ return FBNIC_TS_HIGH_REFRESH_JIF;
+}
+
+static int fbnic_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+ int scale = 16; /* scaled_ppm has 16 fractional places */
+ struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
+ u64 scaled_delta, dclk_period;
+ unsigned long flags;
+ s64 delta;
+ int sgn;
+
+ sgn = scaled_ppm >= 0 ? 1 : -1;
+ scaled_ppm *= sgn;
+
+ /* d_clock is 600 MHz; which in Q16.32 fixed point ns is: */
+ dclk_period = (((u64)1000000000) << 32) / FBNIC_CLOCK_FREQ;
+
+ while (scaled_ppm > U64_MAX / dclk_period) {
+ scaled_ppm >>= 1;
+ scale--;
+ }
+
+ scaled_delta = (u64)scaled_ppm * dclk_period;
+ delta = div_u64(scaled_delta, 1000 * 1000) >> scale;
+ delta *= sgn;
+
+ spin_lock_irqsave(&fbd->time_lock, flags);
+ __fbnic_time_set_addend(fbd, dclk_period + delta);
+ fbnic_wr32(fbd, FBNIC_PTP_ADJUST, FBNIC_PTP_ADJUST_ADDEND_SET);
+
+ /* Flush, make sure FBNIC_PTP_ADD_VAL_* is stable for at least 4 clks */
+ fbnic_rd32(fbd, FBNIC_PTP_SPARE);
+ spin_unlock_irqrestore(&fbd->time_lock, flags);
+
+ return fbnic_present(fbd) ? 0 : -EIO;
+}
+
+static int fbnic_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
+ struct fbnic_net *fbn;
+ unsigned long flags;
+
+ fbn = netdev_priv(fbd->netdev);
+
+ spin_lock_irqsave(&fbd->time_lock, flags);
+ u64_stats_update_begin(&fbn->time_seq);
+ WRITE_ONCE(fbn->time_offset, READ_ONCE(fbn->time_offset) + delta);
+ u64_stats_update_end(&fbn->time_seq);
+ spin_unlock_irqrestore(&fbd->time_lock, flags);
+
+ return 0;
+}
+
+static int
+fbnic_ptp_gettimex64(struct ptp_clock_info *ptp, struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
+ struct fbnic_net *fbn;
+ unsigned long flags;
+ u64 time_ns;
+ u32 hi, lo;
+
+ fbn = netdev_priv(fbd->netdev);
+
+ spin_lock_irqsave(&fbd->time_lock, flags);
+
+ do {
+ hi = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI);
+ ptp_read_system_prets(sts);
+ lo = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_LO);
+ ptp_read_system_postts(sts);
+ /* Similarly to comment above __fbnic_time_get_slow()
+ * - this can be optimized if needed.
+ */
+ } while (hi != fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI));
+
+ time_ns = ((u64)hi << 32 | lo) + fbn->time_offset;
+ spin_unlock_irqrestore(&fbd->time_lock, flags);
+
+ if (!fbnic_present(fbd))
+ return -EIO;
+
+ *ts = ns_to_timespec64(time_ns);
+
+ return 0;
+}
+
+static int
+fbnic_ptp_settime64(struct ptp_clock_info *ptp, const struct timespec64 *ts)
+{
+ struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
+ struct fbnic_net *fbn;
+ unsigned long flags;
+ u64 dev_ns, host_ns;
+ int ret;
+
+ fbn = netdev_priv(fbd->netdev);
+
+ host_ns = timespec64_to_ns(ts);
+
+ spin_lock_irqsave(&fbd->time_lock, flags);
+
+ dev_ns = __fbnic_time_get_slow(fbd);
+
+ if (fbnic_present(fbd)) {
+ u64_stats_update_begin(&fbn->time_seq);
+ WRITE_ONCE(fbn->time_offset, host_ns - dev_ns);
+ u64_stats_update_end(&fbn->time_seq);
+ ret = 0;
+ } else {
+ ret = -EIO;
+ }
+ spin_unlock_irqrestore(&fbd->time_lock, flags);
+
+ return ret;
+}
+
+static const struct ptp_clock_info fbnic_ptp_info = {
+ .owner = THIS_MODULE,
+ /* 1,000,000,000 - 1 PPB to ensure increment is positive
+ * after max negative adjustment.
+ */
+ .max_adj = 999999999,
+ .do_aux_work = fbnic_ptp_do_aux_work,
+ .adjfine = fbnic_ptp_adjfine,
+ .adjtime = fbnic_ptp_adjtime,
+ .gettimex64 = fbnic_ptp_gettimex64,
+ .settime64 = fbnic_ptp_settime64,
+};
+
+static void fbnic_ptp_reset(struct fbnic_dev *fbd)
+{
+ struct fbnic_net *fbn = netdev_priv(fbd->netdev);
+ u64 dclk_period;
+
+ fbnic_wr32(fbd, FBNIC_PTP_CTRL,
+ FBNIC_PTP_CTRL_EN |
+ FIELD_PREP(FBNIC_PTP_CTRL_TICK_IVAL, 1));
+
+ /* d_clock is 600 MHz; which in Q16.32 fixed point ns is: */
+ dclk_period = (((u64)1000000000) << 32) / FBNIC_CLOCK_FREQ;
+
+ __fbnic_time_set_addend(fbd, dclk_period);
+
+ fbnic_wr32(fbd, FBNIC_PTP_INIT_HI, 0);
+ fbnic_wr32(fbd, FBNIC_PTP_INIT_LO, 0);
+
+ fbnic_wr32(fbd, FBNIC_PTP_ADJUST, FBNIC_PTP_ADJUST_INIT);
+
+ fbnic_wr32(fbd, FBNIC_PTP_CTRL,
+ FBNIC_PTP_CTRL_EN |
+ FBNIC_PTP_CTRL_TQS_OUT_EN |
+ FIELD_PREP(FBNIC_PTP_CTRL_MAC_OUT_IVAL, 3) |
+ FIELD_PREP(FBNIC_PTP_CTRL_TICK_IVAL, 1));
+
+ fbnic_rd32(fbd, FBNIC_PTP_SPARE);
+
+ fbn->time_offset = 0;
+ fbn->time_high = 0;
+}
+
+void fbnic_time_init(struct fbnic_net *fbn)
+{
+ /* This is not really a statistic, but the lockng primitive fits
+ * our usecase perfectly, we need an atomic 8 bytes READ_ONCE() /
+ * WRITE_ONCE() behavior.
+ */
+ u64_stats_init(&fbn->time_seq);
+}
+
+int fbnic_time_start(struct fbnic_net *fbn)
+{
+ fbnic_ptp_refresh_time(fbn->fbd, fbn);
+ /* Assume that fbnic_ptp_do_aux_work() will never be called if not
+ * scheduled here
+ */
+ return ptp_schedule_worker(fbn->fbd->ptp, FBNIC_TS_HIGH_REFRESH_JIF);
+}
+
+void fbnic_time_stop(struct fbnic_net *fbn)
+{
+ ptp_cancel_worker_sync(fbn->fbd->ptp);
+ fbnic_ptp_fresh_check(fbn->fbd);
+}
+
+int fbnic_ptp_setup(struct fbnic_dev *fbd)
+{
+ struct device *dev = fbd->dev;
+ unsigned long flags;
+
+ spin_lock_init(&fbd->time_lock);
+
+ spin_lock_irqsave(&fbd->time_lock, flags); /* Appease lockdep */
+ fbnic_ptp_reset(fbd);
+ spin_unlock_irqrestore(&fbd->time_lock, flags);
+
+ memcpy(&fbd->ptp_info, &fbnic_ptp_info, sizeof(fbnic_ptp_info));
+
+ fbd->ptp = ptp_clock_register(&fbd->ptp_info, dev);
+ if (IS_ERR(fbd->ptp))
+ dev_err(dev, "Failed to register PTP: %pe\n", fbd->ptp);
+
+ return PTR_ERR_OR_ZERO(fbd->ptp);
+}
+
+void fbnic_ptp_destroy(struct fbnic_dev *fbd)
+{
+ if (!fbd->ptp)
+ return;
+ ptp_clock_unregister(fbd->ptp);
+}
--
2.43.5
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support
2024-10-03 12:39 ` [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support Vadim Fedorenko
@ 2024-10-04 23:05 ` Jacob Keller
2024-10-07 13:07 ` Vadim Fedorenko
0 siblings, 1 reply; 24+ messages in thread
From: Jacob Keller @ 2024-10-04 23:05 UTC (permalink / raw)
To: Vadim Fedorenko, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
> +/* FBNIC timing & PTP implementation
> + * Datapath uses truncated 40b timestamps for scheduling and event reporting.
> + * We need to promote those to full 64b, hence we periodically cache the top
> + * 32bit of the HW time counter. Since this makes our time reporting non-atomic
> + * we leave the HW clock free running and adjust time offsets in SW as needed.
> + * Time offset is 64bit - we need a seq counter for 32bit machines.
> + * Time offset and the cache of top bits are independent so we don't need
> + * a coherent snapshot of both - READ_ONCE()/WRITE_ONCE() + writer side lock
> + * are enough.
> + */
> +
If you're going to implement adjustments only in software anyways, can
you use a timecounter+cyclecounter instead of re-implementing?
> +/* Period of refresh of top bits of timestamp, give ourselves a 8x margin.
> + * This should translate to once a minute.
> + * The use of nsecs_to_jiffies() should be safe for a <=40b nsec value.
> + */
> +#define FBNIC_TS_HIGH_REFRESH_JIF nsecs_to_jiffies((1ULL << 40) / 16)
> +
> +static struct fbnic_dev *fbnic_from_ptp_info(struct ptp_clock_info *ptp)
> +{
> + return container_of(ptp, struct fbnic_dev, ptp_info);
> +}
> +
> +/* This function is "slow" because we could try guessing which high part
> + * is correct based on low instead of re-reading, and skip reading @hi
> + * twice altogether if @lo is far enough from 0.
> + */
> +static u64 __fbnic_time_get_slow(struct fbnic_dev *fbd)
> +{
> + u32 hi, lo;
> +
> + lockdep_assert_held(&fbd->time_lock);
> +
> + do {
> + hi = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI);
> + lo = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_LO);
> + } while (hi != fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI));
> +
How long does it take hi to overflow? You may be able to get away
without looping.
I think another way to implement this is to read lo, then hi, then lo
again, and if lo2 is smaller than lo, you know hi overflowed and you can
re-read hi
> +static int fbnic_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
> +{
> + int scale = 16; /* scaled_ppm has 16 fractional places */
> + struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
> + u64 scaled_delta, dclk_period;
> + unsigned long flags;
> + s64 delta;
> + int sgn;
> +
> + sgn = scaled_ppm >= 0 ? 1 : -1;
> + scaled_ppm *= sgn;
> +
> + /* d_clock is 600 MHz; which in Q16.32 fixed point ns is: */
> + dclk_period = (((u64)1000000000) << 32) / FBNIC_CLOCK_FREQ;
> +
> + while (scaled_ppm > U64_MAX / dclk_period) {
> + scaled_ppm >>= 1;
> + scale--;
> + }
> +
> + scaled_delta = (u64)scaled_ppm * dclk_period;
> + delta = div_u64(scaled_delta, 1000 * 1000) >> scale;
> + delta *= sgn;
Please use adjust_by_scaled_ppm or diff_by_scaled_ppm. It makes use of
mul_u64_u64_div_u64 where feasible to do the temporary multiplication
step as 128 bit arithmetic.
> +
> + spin_lock_irqsave(&fbd->time_lock, flags);
> + __fbnic_time_set_addend(fbd, dclk_period + delta);
> + fbnic_wr32(fbd, FBNIC_PTP_ADJUST, FBNIC_PTP_ADJUST_ADDEND_SET);
> +
> + /* Flush, make sure FBNIC_PTP_ADD_VAL_* is stable for at least 4 clks */
> + fbnic_rd32(fbd, FBNIC_PTP_SPARE);
> + spin_unlock_irqrestore(&fbd->time_lock, flags);
> +
> + return fbnic_present(fbd) ? 0 : -EIO;
> +}
> +
> +static int fbnic_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
> +{
> + struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
> + struct fbnic_net *fbn;
> + unsigned long flags;
> +
> + fbn = netdev_priv(fbd->netdev);
> +
> + spin_lock_irqsave(&fbd->time_lock, flags);
> + u64_stats_update_begin(&fbn->time_seq);
> + WRITE_ONCE(fbn->time_offset, READ_ONCE(fbn->time_offset) + delta);
> + u64_stats_update_end(&fbn->time_seq);
> + spin_unlock_irqrestore(&fbd->time_lock, flags);
> +
> + return 0;
> +}
> +
> +static int
> +fbnic_ptp_gettimex64(struct ptp_clock_info *ptp, struct timespec64 *ts,
> + struct ptp_system_timestamp *sts)
> +{
> + struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
> + struct fbnic_net *fbn;
> + unsigned long flags;
> + u64 time_ns;
> + u32 hi, lo;
> +
> + fbn = netdev_priv(fbd->netdev);
> +
> + spin_lock_irqsave(&fbd->time_lock, flags);
> +
> + do {
> + hi = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI);
> + ptp_read_system_prets(sts);
> + lo = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_LO);
> + ptp_read_system_postts(sts);
> + /* Similarly to comment above __fbnic_time_get_slow()
> + * - this can be optimized if needed.
> + */
> + } while (hi != fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI));
> +
> + time_ns = ((u64)hi << 32 | lo) + fbn->time_offset;
> + spin_unlock_irqrestore(&fbd->time_lock, flags);
> +
> + if (!fbnic_present(fbd))
> + return -EIO;
> +
> + *ts = ns_to_timespec64(time_ns);
> +
> + return 0;
> +}
> +
> +static int
> +fbnic_ptp_settime64(struct ptp_clock_info *ptp, const struct timespec64 *ts)
> +{
> + struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
> + struct fbnic_net *fbn;
> + unsigned long flags;
> + u64 dev_ns, host_ns;
> + int ret;
> +
> + fbn = netdev_priv(fbd->netdev);
> +
> + host_ns = timespec64_to_ns(ts);
> +
> + spin_lock_irqsave(&fbd->time_lock, flags);
> +
> + dev_ns = __fbnic_time_get_slow(fbd);
> +
> + if (fbnic_present(fbd)) {
> + u64_stats_update_begin(&fbn->time_seq);
> + WRITE_ONCE(fbn->time_offset, host_ns - dev_ns);
> + u64_stats_update_end(&fbn->time_seq);
> + ret = 0;
> + } else {
> + ret = -EIO;
> + }
> + spin_unlock_irqrestore(&fbd->time_lock, flags);
> +
> + return ret;
> +}
> +
Since all your operations are using a software offset and leaving the
timer free-running, I think this really would make more sense using a
timecounter and cyclecounter.
> +static const struct ptp_clock_info fbnic_ptp_info = {
> + .owner = THIS_MODULE,
> + /* 1,000,000,000 - 1 PPB to ensure increment is positive
> + * after max negative adjustment.
> + */
> + .max_adj = 999999999,
> + .do_aux_work = fbnic_ptp_do_aux_work,
> + .adjfine = fbnic_ptp_adjfine,
> + .adjtime = fbnic_ptp_adjtime,
> + .gettimex64 = fbnic_ptp_gettimex64,
> + .settime64 = fbnic_ptp_settime64,
> +};
> +
> +static void fbnic_ptp_reset(struct fbnic_dev *fbd)
> +{
> + struct fbnic_net *fbn = netdev_priv(fbd->netdev);
> + u64 dclk_period;
> +
> + fbnic_wr32(fbd, FBNIC_PTP_CTRL,
> + FBNIC_PTP_CTRL_EN |
> + FIELD_PREP(FBNIC_PTP_CTRL_TICK_IVAL, 1));
> +
> + /* d_clock is 600 MHz; which in Q16.32 fixed point ns is: */
> + dclk_period = (((u64)1000000000) << 32) / FBNIC_CLOCK_FREQ;
> +
> + __fbnic_time_set_addend(fbd, dclk_period);
> +
> + fbnic_wr32(fbd, FBNIC_PTP_INIT_HI, 0);
> + fbnic_wr32(fbd, FBNIC_PTP_INIT_LO, 0);
> +
> + fbnic_wr32(fbd, FBNIC_PTP_ADJUST, FBNIC_PTP_ADJUST_INIT);
> +
> + fbnic_wr32(fbd, FBNIC_PTP_CTRL,
> + FBNIC_PTP_CTRL_EN |
> + FBNIC_PTP_CTRL_TQS_OUT_EN |
> + FIELD_PREP(FBNIC_PTP_CTRL_MAC_OUT_IVAL, 3) |
> + FIELD_PREP(FBNIC_PTP_CTRL_TICK_IVAL, 1));
> +
> + fbnic_rd32(fbd, FBNIC_PTP_SPARE);
> +
> + fbn->time_offset = 0;
> + fbn->time_high = 0;
Not entirely sure how it works for you, but we found that most users
expect to minimize the time loss or changes due to a reset, and we
re-apply the last known configuration during a reset instead of letting
the clock reset back to base state.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support
2024-10-04 23:05 ` Jacob Keller
@ 2024-10-07 13:07 ` Vadim Fedorenko
2024-10-07 23:09 ` Jakub Kicinski
0 siblings, 1 reply; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-07 13:07 UTC (permalink / raw)
To: Jacob Keller, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 05/10/2024 00:05, Jacob Keller wrote:
>
>
> On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
>> +/* FBNIC timing & PTP implementation
>> + * Datapath uses truncated 40b timestamps for scheduling and event reporting.
>> + * We need to promote those to full 64b, hence we periodically cache the top
>> + * 32bit of the HW time counter. Since this makes our time reporting non-atomic
>> + * we leave the HW clock free running and adjust time offsets in SW as needed.
>> + * Time offset is 64bit - we need a seq counter for 32bit machines.
>> + * Time offset and the cache of top bits are independent so we don't need
>> + * a coherent snapshot of both - READ_ONCE()/WRITE_ONCE() + writer side lock
>> + * are enough.
>> + */
>> +
>
> If you're going to implement adjustments only in software anyways, can
> you use a timecounter+cyclecounter instead of re-implementing?
Thanks for pointing this out, I'll make it with timecounter/cyclecounter
>
>> +/* Period of refresh of top bits of timestamp, give ourselves a 8x margin.
>> + * This should translate to once a minute.
>> + * The use of nsecs_to_jiffies() should be safe for a <=40b nsec value.
>> + */
>> +#define FBNIC_TS_HIGH_REFRESH_JIF nsecs_to_jiffies((1ULL << 40) / 16)
>> +
>> +static struct fbnic_dev *fbnic_from_ptp_info(struct ptp_clock_info *ptp)
>> +{
>> + return container_of(ptp, struct fbnic_dev, ptp_info);
>> +}
>> +
>> +/* This function is "slow" because we could try guessing which high part
>> + * is correct based on low instead of re-reading, and skip reading @hi
>> + * twice altogether if @lo is far enough from 0.
>> + */
>> +static u64 __fbnic_time_get_slow(struct fbnic_dev *fbd)
>> +{
>> + u32 hi, lo;
>> +
>> + lockdep_assert_held(&fbd->time_lock);
>> +
>> + do {
>> + hi = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI);
>> + lo = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_LO);
>> + } while (hi != fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI));
>> +
>
> How long does it take hi to overflow? You may be able to get away
> without looping.
According to comment above it may take up to 8 minutes to overflow, but
the updates to the cache should be done every minute. We do not expect
this cycle to happen often.
> I think another way to implement this is to read lo, then hi, then lo
> again, and if lo2 is smaller than lo, you know hi overflowed and you can
> re-read hi
That's an option too, I'll think of it, thanks!
>
>> +static int fbnic_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
>> +{
>> + int scale = 16; /* scaled_ppm has 16 fractional places */
>> + struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
>> + u64 scaled_delta, dclk_period;
>> + unsigned long flags;
>> + s64 delta;
>> + int sgn;
>> +
>> + sgn = scaled_ppm >= 0 ? 1 : -1;
>> + scaled_ppm *= sgn;
>> +
>> + /* d_clock is 600 MHz; which in Q16.32 fixed point ns is: */
>> + dclk_period = (((u64)1000000000) << 32) / FBNIC_CLOCK_FREQ;
>> +
>> + while (scaled_ppm > U64_MAX / dclk_period) {
>> + scaled_ppm >>= 1;
>> + scale--;
>> + }
>> +
>> + scaled_delta = (u64)scaled_ppm * dclk_period;
>> + delta = div_u64(scaled_delta, 1000 * 1000) >> scale;
>> + delta *= sgn;
>
>
> Please use adjust_by_scaled_ppm or diff_by_scaled_ppm. It makes use of
> mul_u64_u64_div_u64 where feasible to do the temporary multiplication
> step as 128 bit arithmetic.
>
Got it, will use it in the next version.
>> +
>> + spin_lock_irqsave(&fbd->time_lock, flags);
>> + __fbnic_time_set_addend(fbd, dclk_period + delta);
>> + fbnic_wr32(fbd, FBNIC_PTP_ADJUST, FBNIC_PTP_ADJUST_ADDEND_SET);
>> +
>> + /* Flush, make sure FBNIC_PTP_ADD_VAL_* is stable for at least 4 clks */
>> + fbnic_rd32(fbd, FBNIC_PTP_SPARE);
>> + spin_unlock_irqrestore(&fbd->time_lock, flags);
>> +
>> + return fbnic_present(fbd) ? 0 : -EIO;
>> +}
>> +
>> +static int fbnic_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
>> +{
>> + struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
>> + struct fbnic_net *fbn;
>> + unsigned long flags;
>> +
>> + fbn = netdev_priv(fbd->netdev);
>> +
>> + spin_lock_irqsave(&fbd->time_lock, flags);
>> + u64_stats_update_begin(&fbn->time_seq);
>> + WRITE_ONCE(fbn->time_offset, READ_ONCE(fbn->time_offset) + delta);
>> + u64_stats_update_end(&fbn->time_seq);
>> + spin_unlock_irqrestore(&fbd->time_lock, flags);
>> +
>> + return 0;
>> +}
>> +
>> +static int
>> +fbnic_ptp_gettimex64(struct ptp_clock_info *ptp, struct timespec64 *ts,
>> + struct ptp_system_timestamp *sts)
>> +{
>> + struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
>> + struct fbnic_net *fbn;
>> + unsigned long flags;
>> + u64 time_ns;
>> + u32 hi, lo;
>> +
>> + fbn = netdev_priv(fbd->netdev);
>> +
>> + spin_lock_irqsave(&fbd->time_lock, flags);
>> +
>> + do {
>> + hi = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI);
>> + ptp_read_system_prets(sts);
>> + lo = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_LO);
>> + ptp_read_system_postts(sts);
>> + /* Similarly to comment above __fbnic_time_get_slow()
>> + * - this can be optimized if needed.
>> + */
>> + } while (hi != fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI));
>> +
>> + time_ns = ((u64)hi << 32 | lo) + fbn->time_offset;
>> + spin_unlock_irqrestore(&fbd->time_lock, flags);
>> +
>> + if (!fbnic_present(fbd))
>> + return -EIO;
>> +
>> + *ts = ns_to_timespec64(time_ns);
>> +
>> + return 0;
>> +}
>> +
>> +static int
>> +fbnic_ptp_settime64(struct ptp_clock_info *ptp, const struct timespec64 *ts)
>> +{
>> + struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
>> + struct fbnic_net *fbn;
>> + unsigned long flags;
>> + u64 dev_ns, host_ns;
>> + int ret;
>> +
>> + fbn = netdev_priv(fbd->netdev);
>> +
>> + host_ns = timespec64_to_ns(ts);
>> +
>> + spin_lock_irqsave(&fbd->time_lock, flags);
>> +
>> + dev_ns = __fbnic_time_get_slow(fbd);
>> +
>> + if (fbnic_present(fbd)) {
>> + u64_stats_update_begin(&fbn->time_seq);
>> + WRITE_ONCE(fbn->time_offset, host_ns - dev_ns);
>> + u64_stats_update_end(&fbn->time_seq);
>> + ret = 0;
>> + } else {
>> + ret = -EIO;
>> + }
>> + spin_unlock_irqrestore(&fbd->time_lock, flags);
>> +
>> + return ret;
>> +}
>> +
>
> Since all your operations are using a software offset and leaving the
> timer free-running, I think this really would make more sense using a
> timecounter and cyclecounter.
>
Yeah, looks like it's better to use common interfaces.
>> +static const struct ptp_clock_info fbnic_ptp_info = {
>> + .owner = THIS_MODULE,
>> + /* 1,000,000,000 - 1 PPB to ensure increment is positive
>> + * after max negative adjustment.
>> + */
>> + .max_adj = 999999999,
>> + .do_aux_work = fbnic_ptp_do_aux_work,
>> + .adjfine = fbnic_ptp_adjfine,
>> + .adjtime = fbnic_ptp_adjtime,
>> + .gettimex64 = fbnic_ptp_gettimex64,
>> + .settime64 = fbnic_ptp_settime64,
>> +};
>> +
>> +static void fbnic_ptp_reset(struct fbnic_dev *fbd)
>> +{
>> + struct fbnic_net *fbn = netdev_priv(fbd->netdev);
>> + u64 dclk_period;
>> +
>> + fbnic_wr32(fbd, FBNIC_PTP_CTRL,
>> + FBNIC_PTP_CTRL_EN |
>> + FIELD_PREP(FBNIC_PTP_CTRL_TICK_IVAL, 1));
>> +
>> + /* d_clock is 600 MHz; which in Q16.32 fixed point ns is: */
>> + dclk_period = (((u64)1000000000) << 32) / FBNIC_CLOCK_FREQ;
>> +
>> + __fbnic_time_set_addend(fbd, dclk_period);
>> +
>> + fbnic_wr32(fbd, FBNIC_PTP_INIT_HI, 0);
>> + fbnic_wr32(fbd, FBNIC_PTP_INIT_LO, 0);
>> +
>> + fbnic_wr32(fbd, FBNIC_PTP_ADJUST, FBNIC_PTP_ADJUST_INIT);
>> +
>> + fbnic_wr32(fbd, FBNIC_PTP_CTRL,
>> + FBNIC_PTP_CTRL_EN |
>> + FBNIC_PTP_CTRL_TQS_OUT_EN |
>> + FIELD_PREP(FBNIC_PTP_CTRL_MAC_OUT_IVAL, 3) |
>> + FIELD_PREP(FBNIC_PTP_CTRL_TICK_IVAL, 1));
>> +
>> + fbnic_rd32(fbd, FBNIC_PTP_SPARE);
>> +
>> + fbn->time_offset = 0;
>> + fbn->time_high = 0;
>
> Not entirely sure how it works for you, but we found that most users
> expect to minimize the time loss or changes due to a reset, and we
> re-apply the last known configuration during a reset instead of letting
> the clock reset back to base state.
In case of reset the counter will be re-initialized, which means the
stored offset will be invalid anyways. It's better to reset values back
to base state because it will make easier for the app to decide to make
a step rather then adjust frequency. And it will make clocks align
faster.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support
2024-10-07 13:07 ` Vadim Fedorenko
@ 2024-10-07 23:09 ` Jakub Kicinski
2024-10-07 23:49 ` Jacob Keller
2024-10-07 23:57 ` Jacob Keller
0 siblings, 2 replies; 24+ messages in thread
From: Jakub Kicinski @ 2024-10-07 23:09 UTC (permalink / raw)
To: Vadim Fedorenko
Cc: Jacob Keller, Vadim Fedorenko, David Ahern, Paolo Abeni,
David S. Miller, Alexander Duyck, netdev, Richard Cochran
On Mon, 7 Oct 2024 14:07:17 +0100 Vadim Fedorenko wrote:
> On 05/10/2024 00:05, Jacob Keller wrote:
> > On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
> >> +/* FBNIC timing & PTP implementation
> >> + * Datapath uses truncated 40b timestamps for scheduling and event reporting.
> >> + * We need to promote those to full 64b, hence we periodically cache the top
> >> + * 32bit of the HW time counter. Since this makes our time reporting non-atomic
> >> + * we leave the HW clock free running and adjust time offsets in SW as needed.
> >> + * Time offset is 64bit - we need a seq counter for 32bit machines.
> >> + * Time offset and the cache of top bits are independent so we don't need
> >> + * a coherent snapshot of both - READ_ONCE()/WRITE_ONCE() + writer side lock
> >> + * are enough.
> >> + */
> >> +
> >
> > If you're going to implement adjustments only in software anyways, can
> > you use a timecounter+cyclecounter instead of re-implementing?
>
> Thanks for pointing this out, I'll make it with timecounter/cyclecounter
Please don't, the clock is synthonized, we only do simple offsetting.
> >> +/* Period of refresh of top bits of timestamp, give ourselves a 8x margin.
> >> + * This should translate to once a minute.
> >> + * The use of nsecs_to_jiffies() should be safe for a <=40b nsec value.
> >> + */
> >> +#define FBNIC_TS_HIGH_REFRESH_JIF nsecs_to_jiffies((1ULL << 40) / 16)
> >> +
> >> +static struct fbnic_dev *fbnic_from_ptp_info(struct ptp_clock_info *ptp)
> >> +{
> >> + return container_of(ptp, struct fbnic_dev, ptp_info);
> >> +}
> >> +
> >> +/* This function is "slow" because we could try guessing which high part
> >> + * is correct based on low instead of re-reading, and skip reading @hi
> >> + * twice altogether if @lo is far enough from 0.
> >> + */
> >> +static u64 __fbnic_time_get_slow(struct fbnic_dev *fbd)
> >> +{
> >> + u32 hi, lo;
> >> +
> >> + lockdep_assert_held(&fbd->time_lock);
> >> +
> >> + do {
> >> + hi = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI);
> >> + lo = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_LO);
> >> + } while (hi != fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI));
> >> +
> >
> > How long does it take hi to overflow? You may be able to get away
> > without looping.
>
> According to comment above it may take up to 8 minutes to overflow, but
> the updates to the cache should be done every minute. We do not expect
> this cycle to happen often.
>
> > I think another way to implement this is to read lo, then hi, then lo
> > again, and if lo2 is smaller than lo, you know hi overflowed and you can
> > re-read hi
>
> That's an option too, I'll think of it, thanks!
The triple read is less neat in case hi jumps by more than 1.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support
2024-10-07 23:09 ` Jakub Kicinski
@ 2024-10-07 23:49 ` Jacob Keller
2024-10-08 1:16 ` Jakub Kicinski
2024-10-07 23:57 ` Jacob Keller
1 sibling, 1 reply; 24+ messages in thread
From: Jacob Keller @ 2024-10-07 23:49 UTC (permalink / raw)
To: Jakub Kicinski, Vadim Fedorenko
Cc: Vadim Fedorenko, David Ahern, Paolo Abeni, David S. Miller,
Alexander Duyck, netdev, Richard Cochran
On 10/7/2024 4:09 PM, Jakub Kicinski wrote:
> On Mon, 7 Oct 2024 14:07:17 +0100 Vadim Fedorenko wrote:
>> On 05/10/2024 00:05, Jacob Keller wrote:
>>> On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
>>>> +/* FBNIC timing & PTP implementation
>>>> + * Datapath uses truncated 40b timestamps for scheduling and event reporting.
>>>> + * We need to promote those to full 64b, hence we periodically cache the top
>>>> + * 32bit of the HW time counter. Since this makes our time reporting non-atomic
>>>> + * we leave the HW clock free running and adjust time offsets in SW as needed.
>>>> + * Time offset is 64bit - we need a seq counter for 32bit machines.
>>>> + * Time offset and the cache of top bits are independent so we don't need
>>>> + * a coherent snapshot of both - READ_ONCE()/WRITE_ONCE() + writer side lock
>>>> + * are enough.
>>>> + */
>>>> +
>>>
>>> If you're going to implement adjustments only in software anyways, can
>>> you use a timecounter+cyclecounter instead of re-implementing?
>>
>> Thanks for pointing this out, I'll make it with timecounter/cyclecounter
>
> Please don't, the clock is synthonized, we only do simple offsetting.
>
I still think it makes sense to re-use the logic for converting cycles
to full 64bit time values if possible.
If you're already doing offset adjustment, you still have to apply the
same logic to every timestamp, which is exactly what a timecounter does
for you.
You can even use a timecounter and cyclecounter without using its
ability to do syntonizing, by just setting the cyclecounter values
appropriately, and leaving the syntonizing to the hardware mechanism.
I think the result is much easier to understand and follow than
re-implementing a different mechanism for offset correction with bespoke
code.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support
2024-10-07 23:49 ` Jacob Keller
@ 2024-10-08 1:16 ` Jakub Kicinski
0 siblings, 0 replies; 24+ messages in thread
From: Jakub Kicinski @ 2024-10-08 1:16 UTC (permalink / raw)
To: Jacob Keller
Cc: Vadim Fedorenko, Vadim Fedorenko, David Ahern, Paolo Abeni,
David S. Miller, Alexander Duyck, netdev, Richard Cochran
On Mon, 7 Oct 2024 16:49:45 -0700 Jacob Keller wrote:
> >> Thanks for pointing this out, I'll make it with timecounter/cyclecounter
> >
> > Please don't, the clock is synthonized, we only do simple offsetting.
> >
> I still think it makes sense to re-use the logic for converting cycles
> to full 64bit time values if possible.
>
> If you're already doing offset adjustment, you still have to apply the
> same logic to every timestamp, which is exactly what a timecounter does
> for you.
"exactly what a timecounter does for you" is an overstatement.
timecounter tracks the overflows by itself and synthonizes.
We have the 64b value, just need to combine the top bits.
> You can even use a timecounter and cyclecounter without using its
> ability to do syntonizing, by just setting the cyclecounter values
> appropriately, and leaving the syntonizing to the hardware mechanism.
>
> I think the result is much easier to understand and follow than
> re-implementing a different mechanism for offset correction with bespoke
> code.
This is fast path code, I hope we can get a simple addition and
an overflow check right.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support
2024-10-07 23:09 ` Jakub Kicinski
2024-10-07 23:49 ` Jacob Keller
@ 2024-10-07 23:57 ` Jacob Keller
1 sibling, 0 replies; 24+ messages in thread
From: Jacob Keller @ 2024-10-07 23:57 UTC (permalink / raw)
To: Jakub Kicinski, Vadim Fedorenko
Cc: Vadim Fedorenko, David Ahern, Paolo Abeni, David S. Miller,
Alexander Duyck, netdev, Richard Cochran
On 10/7/2024 4:09 PM, Jakub Kicinski wrote:
>>> I think another way to implement this is to read lo, then hi, then lo
>>> again, and if lo2 is smaller than lo, you know hi overflowed and you can
>>> re-read hi
>>
>> That's an option too, I'll think of it, thanks!
>
> The triple read is less neat in case hi jumps by more than 1.
>
Wouldn't that require the lower bits to accumulate more than an entire
cycle for that to happen?
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support
2024-10-03 12:39 [PATCH net-next v3 0/5] eth: fbnic: add timestamping support Vadim Fedorenko
2024-10-03 12:39 ` [PATCH net-next v3 1/5] eth: fbnic: add software TX " Vadim Fedorenko
2024-10-03 12:39 ` [PATCH net-next v3 2/5] eth: fbnic: add initial PHC support Vadim Fedorenko
@ 2024-10-03 12:39 ` Vadim Fedorenko
2024-10-04 23:14 ` Jacob Keller
2024-10-04 23:18 ` Jacob Keller
2024-10-03 12:39 ` [PATCH net-next v3 4/5] eth: fbnic: add TX " Vadim Fedorenko
2024-10-03 12:39 ` [PATCH net-next v3 5/5] eth: fbnic: add ethtool timestamping statistics Vadim Fedorenko
4 siblings, 2 replies; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-03 12:39 UTC (permalink / raw)
To: Vadim Fedorenko, Jakub Kicinski, David Ahern, Paolo Abeni,
David S. Miller, Alexander Duyck
Cc: Vadim Fedorenko, netdev, Richard Cochran
Add callbacks to support timestamping configuration via ethtool.
Add processing of RX timestamps.
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 1 +
.../net/ethernet/meta/fbnic/fbnic_ethtool.c | 16 +++-
.../net/ethernet/meta/fbnic/fbnic_netdev.c | 80 +++++++++++++++++++
.../net/ethernet/meta/fbnic/fbnic_netdev.h | 3 +
drivers/net/ethernet/meta/fbnic/fbnic_rpc.c | 31 +++++++
drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 61 ++++++++++++++
drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 1 +
7 files changed, 192 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
index 290b924b7749..79cdd231d327 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
@@ -586,6 +586,7 @@ enum {
};
#define FBNIC_RPC_ACT_TBL0_DMA_HINT CSR_GENMASK(24, 16)
+#define FBNIC_RPC_ACT_TBL0_TS_ENA CSR_BIT(28)
#define FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID CSR_BIT(30)
#define FBNIC_RPC_ACT_TBL1_DEFAULT 0x0840b /* 0x2102c */
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index ffc773014e0f..3afb7227574a 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -10,8 +10,22 @@ static int
fbnic_get_ts_info(struct net_device *netdev,
struct kernel_ethtool_ts_info *tsinfo)
{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+
+ tsinfo->phc_index = ptp_clock_index(fbn->fbd->ptp);
+
tsinfo->so_timestamping =
- SOF_TIMESTAMPING_TX_SOFTWARE;
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ tsinfo->rx_filters =
+ BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
+ BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+ BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+ BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
+ BIT(HWTSTAMP_FILTER_ALL);
return 0;
}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index 6e6d8988db54..c08798fad203 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -324,6 +324,84 @@ void fbnic_clear_rx_mode(struct net_device *netdev)
__dev_mc_unsync(netdev, NULL);
}
+static int fbnic_hwtstamp_get(struct net_device *netdev,
+ struct kernel_hwtstamp_config *config)
+{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+
+ *config = fbn->hwtstamp_config;
+
+ return 0;
+}
+
+static int fbnic_hwtstamp_set(struct net_device *netdev,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
+{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+ int old_rx_filter;
+
+ if (config->source != HWTSTAMP_SOURCE_NETDEV)
+ return -EOPNOTSUPP;
+
+ if (!kernel_hwtstamp_config_changed(config, &fbn->hwtstamp_config))
+ return 0;
+
+ /* Upscale the filters */
+ switch (config->rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ break;
+ case HWTSTAMP_FILTER_NTP_ALL:
+ config->rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ config->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /* Configure */
+ old_rx_filter = fbn->hwtstamp_config.rx_filter;
+ memcpy(&fbn->hwtstamp_config, config, sizeof(*config));
+
+ if (old_rx_filter != config->rx_filter && netif_running(fbn->netdev)) {
+ fbnic_rss_reinit(fbn->fbd, fbn);
+ fbnic_write_rules(fbn->fbd);
+ }
+
+ /* Save / report back filter configuration
+ * Note that our filter configuration is inexact. Instead of
+ * filtering for a specific UDP port or L2 Ethertype we are
+ * filtering in all UDP or all non-IP packets for timestamping. So
+ * if anything other than FILTER_ALL is requested we report
+ * FILTER_SOME indicating that we will be timestamping a few
+ * additional packets.
+ */
+ if (config->rx_filter > HWTSTAMP_FILTER_ALL)
+ config->rx_filter = HWTSTAMP_FILTER_SOME;
+
+ return 0;
+}
+
static void fbnic_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats64)
{
@@ -401,6 +479,8 @@ static const struct net_device_ops fbnic_netdev_ops = {
.ndo_set_mac_address = fbnic_set_mac,
.ndo_set_rx_mode = fbnic_set_rx_mode,
.ndo_get_stats64 = fbnic_get_stats64,
+ .ndo_hwtstamp_get = fbnic_hwtstamp_get,
+ .ndo_hwtstamp_set = fbnic_hwtstamp_set,
};
static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
index f530e3235634..b8417b300778 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h
@@ -54,6 +54,9 @@ struct fbnic_net {
struct fbnic_queue_stats rx_stats;
u64 link_down_events;
+ /* Time stampinn filter config */
+ struct kernel_hwtstamp_config hwtstamp_config;
+
struct list_head napis;
};
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
index c8aa29fc052b..337b8b3aef2f 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
@@ -244,6 +244,12 @@ void fbnic_bmc_rpc_init(struct fbnic_dev *fbd)
((_ip) ? FBNIC_RPC_TCAM_ACT1_IP_VALID : 0) | \
((_v6) ? FBNIC_RPC_TCAM_ACT1_IP_IS_V6 : 0))
+#define FBNIC_TSTAMP_MASK(_all, _udp, _ether) \
+ (((_all) ? ((1u << FBNIC_NUM_HASH_OPT) - 1) : 0) | \
+ ((_udp) ? (1u << FBNIC_UDP6_HASH_OPT) | \
+ (1u << FBNIC_UDP4_HASH_OPT) : 0) | \
+ ((_ether) ? (1u << FBNIC_ETHER_HASH_OPT) : 0))
+
void fbnic_rss_reinit(struct fbnic_dev *fbd, struct fbnic_net *fbn)
{
static const u32 act1_value[FBNIC_NUM_HASH_OPT] = {
@@ -255,6 +261,7 @@ void fbnic_rss_reinit(struct fbnic_dev *fbd, struct fbnic_net *fbn)
FBNIC_ACT1_INIT(0, 0, 1, 0), /* IP4 */
0 /* Ether */
};
+ u32 tstamp_mask = 0;
unsigned int i;
/* To support scenarios where a BMC is present we must write the
@@ -264,6 +271,28 @@ void fbnic_rss_reinit(struct fbnic_dev *fbd, struct fbnic_net *fbn)
BUILD_BUG_ON(FBNIC_RSS_EN_NUM_UNICAST * 2 != FBNIC_RSS_EN_NUM_ENTRIES);
BUILD_BUG_ON(ARRAY_SIZE(act1_value) != FBNIC_NUM_HASH_OPT);
+ /* Set timestamp mask with 1b per flow type */
+ if (fbn->hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) {
+ switch (fbn->hwtstamp_config.rx_filter) {
+ case HWTSTAMP_FILTER_ALL:
+ tstamp_mask = FBNIC_TSTAMP_MASK(1, 1, 1);
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ tstamp_mask = FBNIC_TSTAMP_MASK(0, 1, 1);
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ tstamp_mask = FBNIC_TSTAMP_MASK(0, 1, 0);
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ tstamp_mask = FBNIC_TSTAMP_MASK(0, 0, 1);
+ break;
+ default:
+ netdev_warn(fbn->netdev, "Unsupported hwtstamp_rx_filter\n");
+ break;
+ }
+ }
+
/* Program RSS hash enable mask for host in action TCAM/table. */
for (i = fbnic_bmc_present(fbd) ? 0 : FBNIC_RSS_EN_NUM_UNICAST;
i < FBNIC_RSS_EN_NUM_ENTRIES; i++) {
@@ -287,6 +316,8 @@ void fbnic_rss_reinit(struct fbnic_dev *fbd, struct fbnic_net *fbn)
if (!dest)
dest = FBNIC_RPC_ACT_TBL0_DROP;
+ else if (tstamp_mask & (1u << flow_type))
+ dest |= FBNIC_RPC_ACT_TBL0_TS_ENA;
if (act1_value[flow_type] & FBNIC_RPC_TCAM_ACT1_L4_VALID)
dest |= FIELD_PREP(FBNIC_RPC_ACT_TBL0_DMA_HINT,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index 8337d49bad0b..b64c1d4f925e 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -43,6 +43,44 @@ static void fbnic_ring_wr32(struct fbnic_ring *ring, unsigned int csr, u32 val)
writel(val, csr_base + csr);
}
+/**
+ * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time
+ * @fbn: netdev priv of the FB NIC
+ * @ts40: timestamp read from a descriptor
+ *
+ * Return: u64 value of PHC time in nanoseconds
+ *
+ * Convert truncated 40 bit device timestamp as read from a descriptor
+ * to the full PHC time in nanoseconds.
+ */
+static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40)
+{
+ unsigned int s;
+ u64 time_ns;
+ s64 offset;
+ u8 ts_top;
+ u32 high;
+
+ do {
+ s = u64_stats_fetch_begin(&fbn->time_seq);
+ offset = READ_ONCE(fbn->time_offset);
+ } while (u64_stats_fetch_retry(&fbn->time_seq, s));
+
+ high = READ_ONCE(fbn->time_high);
+
+ /* Bits 63..40 from periodic clock reads, 39..0 from ts40 */
+ time_ns = (u64)(high >> 8) << 40 | ts40;
+
+ /* Compare bits 32-39 between periodic reads and ts40,
+ * see if HW clock may have wrapped since last read
+ */
+ ts_top = ts40 >> 32;
+ if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2)
+ time_ns += 1ULL << 40;
+
+ return time_ns + offset;
+}
+
static unsigned int fbnic_desc_unused(struct fbnic_ring *ring)
{
return (ring->head - ring->tail - 1) & ring->size_mask;
@@ -710,6 +748,10 @@ static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv,
/* Set MAC header specific fields */
skb->protocol = eth_type_trans(skb, nv->napi.dev);
+ /* Add timestamp if present */
+ if (pkt->hwtstamp)
+ skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp;
+
return skb;
}
@@ -720,6 +762,23 @@ static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd)
PKT_HASH_TYPE_L2;
}
+static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd,
+ struct fbnic_pkt_buff *pkt)
+{
+ struct fbnic_net *fbn;
+ u64 ns, ts;
+
+ if (!FIELD_GET(FBNIC_RCD_OPT_META_TS, rcd))
+ return;
+
+ fbn = netdev_priv(nv->napi.dev);
+ ts = FIELD_GET(FBNIC_RCD_OPT_META_TS_MASK, rcd);
+ ns = fbnic_ts40_to_ns(fbn, ts);
+
+ /* Add timestamp to shared info */
+ pkt->hwtstamp = ns_to_ktime(ns);
+}
+
static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv,
u64 rcd, struct sk_buff *skb,
struct fbnic_q_triad *qt)
@@ -784,6 +843,8 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
if (FIELD_GET(FBNIC_RCD_OPT_META_TYPE_MASK, rcd))
break;
+ fbnic_rx_tstamp(nv, rcd, pkt);
+
/* We currently ignore the action table index */
break;
case FBNIC_RCD_TYPE_META:
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
index 2f91f68d11d5..682d875f08c0 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -47,6 +47,7 @@ struct fbnic_net;
struct fbnic_pkt_buff {
struct xdp_buff buff;
+ ktime_t hwtstamp;
u32 data_truesize;
u16 data_len;
u16 nr_frags;
--
2.43.5
^ permalink raw reply related [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support
2024-10-03 12:39 ` [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support Vadim Fedorenko
@ 2024-10-04 23:14 ` Jacob Keller
2024-10-08 16:47 ` Vadim Fedorenko
2024-10-04 23:18 ` Jacob Keller
1 sibling, 1 reply; 24+ messages in thread
From: Jacob Keller @ 2024-10-04 23:14 UTC (permalink / raw)
To: Vadim Fedorenko, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
> Add callbacks to support timestamping configuration via ethtool.
> Add processing of RX timestamps.
>
> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>
> +/**
> + * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time
> + * @fbn: netdev priv of the FB NIC
> + * @ts40: timestamp read from a descriptor
> + *
> + * Return: u64 value of PHC time in nanoseconds
> + *
> + * Convert truncated 40 bit device timestamp as read from a descriptor
> + * to the full PHC time in nanoseconds.
> + */
> +static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40)
> +{
> + unsigned int s;
> + u64 time_ns;
> + s64 offset;
> + u8 ts_top;
> + u32 high;
> +
> + do {
> + s = u64_stats_fetch_begin(&fbn->time_seq);
> + offset = READ_ONCE(fbn->time_offset);
> + } while (u64_stats_fetch_retry(&fbn->time_seq, s));
> +
> + high = READ_ONCE(fbn->time_high);
> +
> + /* Bits 63..40 from periodic clock reads, 39..0 from ts40 */
> + time_ns = (u64)(high >> 8) << 40 | ts40;
> +
> + /* Compare bits 32-39 between periodic reads and ts40,
> + * see if HW clock may have wrapped since last read
> + */
> + ts_top = ts40 >> 32;
> + if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2)
> + time_ns += 1ULL << 40;
> +
> + return time_ns + offset;
> +}
> +
This logic doesn't seem to match the logic used by the cyclecounter
code, and Its not clear to me if this safe against a race between
time_high updating and the packet timestamp arriving.
the timestamp could arrive either before or after the time_high update,
and the logic needs to ensure the appropriate high bits are applied in
both cases.
Again, I think your use case makes sense to just implement with a
timecounter and cyclecounter, since you're not modifying the hardware
cycle counter and are leaving it as free-running.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support
2024-10-04 23:14 ` Jacob Keller
@ 2024-10-08 16:47 ` Vadim Fedorenko
2024-10-08 17:01 ` Jacob Keller
0 siblings, 1 reply; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-08 16:47 UTC (permalink / raw)
To: Jacob Keller, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 05/10/2024 00:14, Jacob Keller wrote:
>
>
> On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
>> Add callbacks to support timestamping configuration via ethtool.
>> Add processing of RX timestamps.
>>
>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>>
>> +/**
>> + * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time
>> + * @fbn: netdev priv of the FB NIC
>> + * @ts40: timestamp read from a descriptor
>> + *
>> + * Return: u64 value of PHC time in nanoseconds
>> + *
>> + * Convert truncated 40 bit device timestamp as read from a descriptor
>> + * to the full PHC time in nanoseconds.
>> + */
>> +static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40)
>> +{
>> + unsigned int s;
>> + u64 time_ns;
>> + s64 offset;
>> + u8 ts_top;
>> + u32 high;
>> +
>> + do {
>> + s = u64_stats_fetch_begin(&fbn->time_seq);
>> + offset = READ_ONCE(fbn->time_offset);
>> + } while (u64_stats_fetch_retry(&fbn->time_seq, s));
>> +
>> + high = READ_ONCE(fbn->time_high);
>> +
>> + /* Bits 63..40 from periodic clock reads, 39..0 from ts40 */
>> + time_ns = (u64)(high >> 8) << 40 | ts40;
>> +
>> + /* Compare bits 32-39 between periodic reads and ts40,
>> + * see if HW clock may have wrapped since last read
>> + */
>> + ts_top = ts40 >> 32;
>> + if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2)
>> + time_ns += 1ULL << 40;
>> +
>> + return time_ns + offset;
>> +}
>> +
>
> This logic doesn't seem to match the logic used by the cyclecounter
> code, and Its not clear to me if this safe against a race between
> time_high updating and the packet timestamp arriving.
>
> the timestamp could arrive either before or after the time_high update,
> and the logic needs to ensure the appropriate high bits are applied in
> both cases.
To avoid this race condition we decided to make sure that incoming
timestamps are always later then cached high bits. That will make the
logic above correct.
> Again, I think your use case makes sense to just implement with a
> timecounter and cyclecounter, since you're not modifying the hardware
> cycle counter and are leaving it as free-running.
After discussion with Jakub we decided to keep simple logic without
timecounter + cyclecounter, as it's pretty straight forward.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support
2024-10-08 16:47 ` Vadim Fedorenko
@ 2024-10-08 17:01 ` Jacob Keller
2024-10-08 17:13 ` Vadim Fedorenko
0 siblings, 1 reply; 24+ messages in thread
From: Jacob Keller @ 2024-10-08 17:01 UTC (permalink / raw)
To: Vadim Fedorenko, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 10/8/2024 9:47 AM, Vadim Fedorenko wrote:
> On 05/10/2024 00:14, Jacob Keller wrote:
>>
>>
>> On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
>>> Add callbacks to support timestamping configuration via ethtool.
>>> Add processing of RX timestamps.
>>>
>>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>>>
>>> +/**
>>> + * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time
>>> + * @fbn: netdev priv of the FB NIC
>>> + * @ts40: timestamp read from a descriptor
>>> + *
>>> + * Return: u64 value of PHC time in nanoseconds
>>> + *
>>> + * Convert truncated 40 bit device timestamp as read from a descriptor
>>> + * to the full PHC time in nanoseconds.
>>> + */
>>> +static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40)
>>> +{
>>> + unsigned int s;
>>> + u64 time_ns;
>>> + s64 offset;
>>> + u8 ts_top;
>>> + u32 high;
>>> +
>>> + do {
>>> + s = u64_stats_fetch_begin(&fbn->time_seq);
>>> + offset = READ_ONCE(fbn->time_offset);
>>> + } while (u64_stats_fetch_retry(&fbn->time_seq, s));
>>> +
>>> + high = READ_ONCE(fbn->time_high);
>>> +
>>> + /* Bits 63..40 from periodic clock reads, 39..0 from ts40 */
>>> + time_ns = (u64)(high >> 8) << 40 | ts40;
>>> +
>>> + /* Compare bits 32-39 between periodic reads and ts40,
>>> + * see if HW clock may have wrapped since last read
>>> + */
>>> + ts_top = ts40 >> 32;
>>> + if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2)
>>> + time_ns += 1ULL << 40;
>>> +
>>> + return time_ns + offset;
>>> +}
>>> +
>>
>> This logic doesn't seem to match the logic used by the cyclecounter
>> code, and Its not clear to me if this safe against a race between
>> time_high updating and the packet timestamp arriving.
>>
>> the timestamp could arrive either before or after the time_high update,
>> and the logic needs to ensure the appropriate high bits are applied in
>> both cases.
>
> To avoid this race condition we decided to make sure that incoming
> timestamps are always later then cached high bits. That will make the
> logic above correct.
>
How do you do that? Timestamps come in asynchronously. The value is
captured by hardware. How do you guarantee that it was captured only
after an update to the cached high bits?
I guess if it arrives before the roll-over, you handle that by the range
check to see if the clock wrapped around.
Hmm.
But what happens if an Rx timestamp races with an update to the high
value and comes in just before the 40 bit time would have overflowed,
but the cached time_high value is captured just after it overflowed.
Do you have some mechanism to ensure that this is impossible? i.e.
either ensuring that the conversion uses the old time_high value, or
ensuring that Rx timestamps can't come in during an update?
Otherwise, I think the logic here could accidentally combine a time
value from an Rx timestamp that is just prior to the time_high update
and just prior to a rollover, then it would see a huge gap between the
values and trigger the addition of another 1 << 40, which would cycle it
even farther out of what the real value should have been.
>> Again, I think your use case makes sense to just implement with a
>> timecounter and cyclecounter, since you're not modifying the hardware
>> cycle counter and are leaving it as free-running.
>
> After discussion with Jakub we decided to keep simple logic without
> timecounter + cyclecounter, as it's pretty straight forward.
Fair enough.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support
2024-10-08 17:01 ` Jacob Keller
@ 2024-10-08 17:13 ` Vadim Fedorenko
0 siblings, 0 replies; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-08 17:13 UTC (permalink / raw)
To: Jacob Keller, Jakub Kicinski, David Ahern, Paolo Abeni,
David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 08/10/2024 18:01, Jacob Keller wrote:
>
>
> On 10/8/2024 9:47 AM, Vadim Fedorenko wrote:
>> On 05/10/2024 00:14, Jacob Keller wrote:
>>>
>>>
>>> On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
>>>> Add callbacks to support timestamping configuration via ethtool.
>>>> Add processing of RX timestamps.
>>>>
>>>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>>>>
>>>> +/**
>>>> + * fbnic_ts40_to_ns() - convert descriptor timestamp to PHC time
>>>> + * @fbn: netdev priv of the FB NIC
>>>> + * @ts40: timestamp read from a descriptor
>>>> + *
>>>> + * Return: u64 value of PHC time in nanoseconds
>>>> + *
>>>> + * Convert truncated 40 bit device timestamp as read from a descriptor
>>>> + * to the full PHC time in nanoseconds.
>>>> + */
>>>> +static __maybe_unused u64 fbnic_ts40_to_ns(struct fbnic_net *fbn, u64 ts40)
>>>> +{
>>>> + unsigned int s;
>>>> + u64 time_ns;
>>>> + s64 offset;
>>>> + u8 ts_top;
>>>> + u32 high;
>>>> +
>>>> + do {
>>>> + s = u64_stats_fetch_begin(&fbn->time_seq);
>>>> + offset = READ_ONCE(fbn->time_offset);
>>>> + } while (u64_stats_fetch_retry(&fbn->time_seq, s));
>>>> +
>>>> + high = READ_ONCE(fbn->time_high);
>>>> +
>>>> + /* Bits 63..40 from periodic clock reads, 39..0 from ts40 */
>>>> + time_ns = (u64)(high >> 8) << 40 | ts40;
>>>> +
>>>> + /* Compare bits 32-39 between periodic reads and ts40,
>>>> + * see if HW clock may have wrapped since last read
>>>> + */
>>>> + ts_top = ts40 >> 32;
>>>> + if (ts_top < (u8)high && (u8)high - ts_top > U8_MAX / 2)
>>>> + time_ns += 1ULL << 40;
>>>> +
>>>> + return time_ns + offset;
>>>> +}
>>>> +
>>>
>>> This logic doesn't seem to match the logic used by the cyclecounter
>>> code, and Its not clear to me if this safe against a race between
>>> time_high updating and the packet timestamp arriving.
>>>
>>> the timestamp could arrive either before or after the time_high update,
>>> and the logic needs to ensure the appropriate high bits are applied in
>>> both cases.
>>
>> To avoid this race condition we decided to make sure that incoming
>> timestamps are always later then cached high bits. That will make the
>> logic above correct.
>>
>
> How do you do that? Timestamps come in asynchronously. The value is
> captured by hardware. How do you guarantee that it was captured only
> after an update to the cached high bits?
>
> I guess if it arrives before the roll-over, you handle that by the range
> check to see if the clock wrapped around.
>
> Hmm.
>
> But what happens if an Rx timestamp races with an update to the high
> value and comes in just before the 40 bit time would have overflowed,
> but the cached time_high value is captured just after it overflowed.
>
> Do you have some mechanism to ensure that this is impossible? i.e.
> either ensuring that the conversion uses the old time_high value, or
> ensuring that Rx timestamps can't come in during an update?
>
> Otherwise, I think the logic here could accidentally combine a time
> value from an Rx timestamp that is just prior to the time_high update
> and just prior to a rollover, then it would see a huge gap between the
> values and trigger the addition of another 1 << 40, which would cycle it
> even farther out of what the real value should have been.
Yes, you are absolutely correct, we have missed the situation when the
logic can bring additional (1 << 40) value on top of wrongly calculated
higher bits. This can only happen in case of overflow of lower 8 bits of
high cached value. But if we keep high cached value a bit below the real
value, this will never happen. If we subtract 16 from high value it will
translate into ~1 minute of oldness of cached value. If for any reasons
the packet processing will be delayed by 1 minute, user-space app will
definitely give up on waiting for the packet/timestamp and will ignore
wrong timestamp. In all other cases the logic in fbnic_ts40_to_ns() will
work perfectly fine.
>>> Again, I think your use case makes sense to just implement with a
>>> timecounter and cyclecounter, since you're not modifying the hardware
>>> cycle counter and are leaving it as free-running.
>>
>> After discussion with Jakub we decided to keep simple logic without
>> timecounter + cyclecounter, as it's pretty straight forward.
>
> Fair enough.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support
2024-10-03 12:39 ` [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support Vadim Fedorenko
2024-10-04 23:14 ` Jacob Keller
@ 2024-10-04 23:18 ` Jacob Keller
2024-10-07 10:26 ` Vadim Fedorenko
1 sibling, 1 reply; 24+ messages in thread
From: Jacob Keller @ 2024-10-04 23:18 UTC (permalink / raw)
To: Vadim Fedorenko, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
> Add callbacks to support timestamping configuration via ethtool.
> Add processing of RX timestamps.
>
> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
> +static int fbnic_hwtstamp_set(struct net_device *netdev,
> + struct kernel_hwtstamp_config *config,
> + struct netlink_ext_ack *extack)
> +{
> + struct fbnic_net *fbn = netdev_priv(netdev);
> + int old_rx_filter;
> +
> + if (config->source != HWTSTAMP_SOURCE_NETDEV)
> + return -EOPNOTSUPP;
> +
> + if (!kernel_hwtstamp_config_changed(config, &fbn->hwtstamp_config))
> + return 0;
> +
> + /* Upscale the filters */
> + switch (config->rx_filter) {
> + case HWTSTAMP_FILTER_NONE:
> + case HWTSTAMP_FILTER_ALL:
> + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
> + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
> + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
> + case HWTSTAMP_FILTER_PTP_V2_EVENT:
> + break;
> + case HWTSTAMP_FILTER_NTP_ALL:
> + config->rx_filter = HWTSTAMP_FILTER_ALL;
> + break;
> + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
> + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
> + config->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
> + break;
> + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
> + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
> + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
> + break;
> + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
> + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
> + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
> + break;
> + case HWTSTAMP_FILTER_PTP_V2_SYNC:
> + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
> + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
> + break;
> + default:
> + return -ERANGE;
> + }
> +
> + /* Configure */
> + old_rx_filter = fbn->hwtstamp_config.rx_filter;
> + memcpy(&fbn->hwtstamp_config, config, sizeof(*config));
> +
> + if (old_rx_filter != config->rx_filter && netif_running(fbn->netdev)) {
> + fbnic_rss_reinit(fbn->fbd, fbn);
> + fbnic_write_rules(fbn->fbd);
> + }
> +
> + /* Save / report back filter configuration
> + * Note that our filter configuration is inexact. Instead of
> + * filtering for a specific UDP port or L2 Ethertype we are
> + * filtering in all UDP or all non-IP packets for timestamping. So
> + * if anything other than FILTER_ALL is requested we report
> + * FILTER_SOME indicating that we will be timestamping a few
> + * additional packets.
> + */
Is there any benefit to implementing anything other than
HWTSTAMP_FILTER_ALL?
Those are typically considered legacy with the primary reason being to
support hardware which does not support timestamping all frames.
I suppose if you have measurement that supporting them is valuable (i.e.
because of performance impact on timestamping all frames?) it makes
sense to support. But otherwise I'm not sure its worth the extra complexity.
Upgrading the filtering modes to HWTSTAMP_FILTER_ALL is acceptable and
is done by a few drivers.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support
2024-10-04 23:18 ` Jacob Keller
@ 2024-10-07 10:26 ` Vadim Fedorenko
2024-10-07 23:51 ` Jacob Keller
0 siblings, 1 reply; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-07 10:26 UTC (permalink / raw)
To: Jacob Keller, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 05/10/2024 00:18, Jacob Keller wrote:
>
>
> On 10/3/2024 5:39 AM, Vadim Fedorenko wrote:
>> Add callbacks to support timestamping configuration via ethtool.
>> Add processing of RX timestamps.
>>
>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>> ---
>> +static int fbnic_hwtstamp_set(struct net_device *netdev,
>> + struct kernel_hwtstamp_config *config,
>> + struct netlink_ext_ack *extack)
>> +{
>> + struct fbnic_net *fbn = netdev_priv(netdev);
>> + int old_rx_filter;
>> +
>> + if (config->source != HWTSTAMP_SOURCE_NETDEV)
>> + return -EOPNOTSUPP;
>> +
>> + if (!kernel_hwtstamp_config_changed(config, &fbn->hwtstamp_config))
>> + return 0;
>> +
>> + /* Upscale the filters */
>> + switch (config->rx_filter) {
>> + case HWTSTAMP_FILTER_NONE:
>> + case HWTSTAMP_FILTER_ALL:
>> + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
>> + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
>> + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
>> + case HWTSTAMP_FILTER_PTP_V2_EVENT:
>> + break;
>> + case HWTSTAMP_FILTER_NTP_ALL:
>> + config->rx_filter = HWTSTAMP_FILTER_ALL;
>> + break;
>> + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
>> + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
>> + config->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
>> + break;
>> + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
>> + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
>> + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
>> + break;
>> + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
>> + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
>> + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
>> + break;
>> + case HWTSTAMP_FILTER_PTP_V2_SYNC:
>> + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
>> + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
>> + break;
>> + default:
>> + return -ERANGE;
>> + }
>> +
>> + /* Configure */
>> + old_rx_filter = fbn->hwtstamp_config.rx_filter;
>> + memcpy(&fbn->hwtstamp_config, config, sizeof(*config));
>> +
>> + if (old_rx_filter != config->rx_filter && netif_running(fbn->netdev)) {
>> + fbnic_rss_reinit(fbn->fbd, fbn);
>> + fbnic_write_rules(fbn->fbd);
>> + }
>> +
>> + /* Save / report back filter configuration
>> + * Note that our filter configuration is inexact. Instead of
>> + * filtering for a specific UDP port or L2 Ethertype we are
>> + * filtering in all UDP or all non-IP packets for timestamping. So
>> + * if anything other than FILTER_ALL is requested we report
>> + * FILTER_SOME indicating that we will be timestamping a few
>> + * additional packets.
>> + */
>
> Is there any benefit to implementing anything other than
> HWTSTAMP_FILTER_ALL?
>
> Those are typically considered legacy with the primary reason being to
> support hardware which does not support timestamping all frames.
>
> I suppose if you have measurement that supporting them is valuable (i.e.
> because of performance impact on timestamping all frames?) it makes
> sense to support. But otherwise I'm not sure its worth the extra complexity.
>
> Upgrading the filtering modes to HWTSTAMP_FILTER_ALL is acceptable and
> is done by a few drivers.
Even though the hardware is able to timestamp TX packets at line rate,
we would like to avoid having 2x times more interrupts for the cases
when we don't need all packets to be timestamped. And as it mentioned
in the comment, we don't have very precise HW filters, but we would like
to avoid timestamping TCP packets when TCP is the most used one on the
host.
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support
2024-10-07 10:26 ` Vadim Fedorenko
@ 2024-10-07 23:51 ` Jacob Keller
2024-10-08 9:58 ` Vadim Fedorenko
0 siblings, 1 reply; 24+ messages in thread
From: Jacob Keller @ 2024-10-07 23:51 UTC (permalink / raw)
To: Vadim Fedorenko, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 10/7/2024 3:26 AM, Vadim Fedorenko wrote:
> On 05/10/2024 00:18, Jacob Keller wrote:
>> Is there any benefit to implementing anything other than
>> HWTSTAMP_FILTER_ALL?
>>
>> Those are typically considered legacy with the primary reason being to
>> support hardware which does not support timestamping all frames.
>>
>> I suppose if you have measurement that supporting them is valuable (i.e.
>> because of performance impact on timestamping all frames?) it makes
>> sense to support. But otherwise I'm not sure its worth the extra complexity.
>>
>> Upgrading the filtering modes to HWTSTAMP_FILTER_ALL is acceptable and
>> is done by a few drivers.
>
> Even though the hardware is able to timestamp TX packets at line rate,
> we would like to avoid having 2x times more interrupts for the cases
> when we don't need all packets to be timestamped. And as it mentioned
> in the comment, we don't have very precise HW filters, but we would like
> to avoid timestamping TCP packets when TCP is the most used one on the
> host.
Tx timestamps don't use the filters in the first place. The filter only
applies to Rx timestamps. You should only initiate a Tx timestamp when
requested, which will generally not be the case for TCP.
Are you saying that Rx timestamps generate interrupts?
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support
2024-10-07 23:51 ` Jacob Keller
@ 2024-10-08 9:58 ` Vadim Fedorenko
0 siblings, 0 replies; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-08 9:58 UTC (permalink / raw)
To: Jacob Keller, Vadim Fedorenko, Jakub Kicinski, David Ahern,
Paolo Abeni, David S. Miller, Alexander Duyck
Cc: netdev, Richard Cochran
On 08/10/2024 00:51, Jacob Keller wrote:
>
>
> On 10/7/2024 3:26 AM, Vadim Fedorenko wrote:
>> On 05/10/2024 00:18, Jacob Keller wrote:
>>> Is there any benefit to implementing anything other than
>>> HWTSTAMP_FILTER_ALL?
>>>
>>> Those are typically considered legacy with the primary reason being to
>>> support hardware which does not support timestamping all frames.
>>>
>>> I suppose if you have measurement that supporting them is valuable (i.e.
>>> because of performance impact on timestamping all frames?) it makes
>>> sense to support. But otherwise I'm not sure its worth the extra complexity.
>>>
>>> Upgrading the filtering modes to HWTSTAMP_FILTER_ALL is acceptable and
>>> is done by a few drivers.
>>
>> Even though the hardware is able to timestamp TX packets at line rate,
>> we would like to avoid having 2x times more interrupts for the cases
>> when we don't need all packets to be timestamped. And as it mentioned
>> in the comment, we don't have very precise HW filters, but we would like
>> to avoid timestamping TCP packets when TCP is the most used one on the
>> host.
>
> Tx timestamps don't use the filters in the first place. The filter only
> applies to Rx timestamps. You should only initiate a Tx timestamp when
> requested, which will generally not be the case for TCP.
>
> Are you saying that Rx timestamps generate interrupts?
Sorry for the confusion with TX timestamping.
For RX we will utilize additional buffer to provide timestamp metadata,
and we will have to process this metadata even if it will not be needed
later in the stack. For 100G links that will add some delays which we
would like to avoid.
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH net-next v3 4/5] eth: fbnic: add TX packets timestamping support
2024-10-03 12:39 [PATCH net-next v3 0/5] eth: fbnic: add timestamping support Vadim Fedorenko
` (2 preceding siblings ...)
2024-10-03 12:39 ` [PATCH net-next v3 3/5] eth: fbnic: add RX packets timestamping support Vadim Fedorenko
@ 2024-10-03 12:39 ` Vadim Fedorenko
2024-10-03 12:39 ` [PATCH net-next v3 5/5] eth: fbnic: add ethtool timestamping statistics Vadim Fedorenko
4 siblings, 0 replies; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-03 12:39 UTC (permalink / raw)
To: Vadim Fedorenko, Jakub Kicinski, David Ahern, Paolo Abeni,
David S. Miller, Alexander Duyck
Cc: Vadim Fedorenko, netdev, Richard Cochran
Add TX configuration to ethtool interface. Add processing of TX
timestamp completions as well as configuration to request HW to create
TX timestamp completion.
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
.../net/ethernet/meta/fbnic/fbnic_ethtool.c | 5 +
drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 93 ++++++++++++++++++-
2 files changed, 95 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index 3afb7227574a..24e059443264 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -16,9 +16,14 @@ fbnic_get_ts_info(struct net_device *netdev,
tsinfo->so_timestamping =
SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_RAW_HARDWARE;
+ tsinfo->tx_types =
+ BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
+
tsinfo->rx_filters =
BIT(HWTSTAMP_FILTER_NONE) |
BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index b64c1d4f925e..d6f0f3faa82f 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -12,9 +12,14 @@
#include "fbnic_netdev.h"
#include "fbnic_txrx.h"
+enum {
+ FBNIC_XMIT_CB_TS = 0x01,
+};
+
struct fbnic_xmit_cb {
u32 bytecount;
u8 desc_count;
+ u8 flags;
int hw_head;
};
@@ -148,11 +153,32 @@ static void fbnic_unmap_page_twd(struct device *dev, __le64 *twd)
#define FBNIC_TWD_TYPE(_type) \
cpu_to_le64(FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_##_type))
+static bool fbnic_tx_tstamp(struct sk_buff *skb)
+{
+ struct fbnic_net *fbn;
+
+ if (!unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ return false;
+
+ fbn = netdev_priv(skb->dev);
+ if (fbn->hwtstamp_config.tx_type == HWTSTAMP_TX_OFF)
+ return false;
+
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ FBNIC_XMIT_CB(skb)->flags |= FBNIC_XMIT_CB_TS;
+ FBNIC_XMIT_CB(skb)->hw_head = -1;
+
+ return true;
+}
+
static bool
fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
{
unsigned int l2len, i3len;
+ if (fbnic_tx_tstamp(skb))
+ *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_TS);
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
return false;
@@ -372,6 +398,12 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
if (desc_cnt > clean_desc)
break;
+ if (unlikely(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS)) {
+ FBNIC_XMIT_CB(skb)->hw_head = hw_head;
+ if (likely(!discard))
+ break;
+ }
+
ring->tx_buf[head] = NULL;
clean_desc -= desc_cnt;
@@ -425,6 +457,53 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
FBNIC_TX_DESC_WAKEUP);
}
+static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
+ struct fbnic_ring *ring,
+ u64 tcd, int *ts_head, int *head0)
+{
+ struct skb_shared_hwtstamps hwtstamp;
+ struct fbnic_net *fbn;
+ struct sk_buff *skb;
+ int head;
+ u64 ns;
+
+ head = (*ts_head < 0) ? ring->head : *ts_head;
+
+ do {
+ unsigned int desc_cnt;
+
+ if (head == ring->tail) {
+ if (unlikely(net_ratelimit()))
+ netdev_err(nv->napi.dev,
+ "Tx timestamp without matching packet\n");
+ return;
+ }
+
+ skb = ring->tx_buf[head];
+ desc_cnt = FBNIC_XMIT_CB(skb)->desc_count;
+
+ head += desc_cnt;
+ head &= ring->size_mask;
+ } while (!(FBNIC_XMIT_CB(skb)->flags & FBNIC_XMIT_CB_TS));
+
+ fbn = netdev_priv(nv->napi.dev);
+ ns = fbnic_ts40_to_ns(fbn, FIELD_GET(FBNIC_TCD_TYPE1_TS_MASK, tcd));
+
+ memset(&hwtstamp, 0, sizeof(hwtstamp));
+ hwtstamp.hwtstamp = ns_to_ktime(ns);
+
+ *ts_head = head;
+
+ FBNIC_XMIT_CB(skb)->flags &= ~FBNIC_XMIT_CB_TS;
+ if (*head0 < 0) {
+ head = FBNIC_XMIT_CB(skb)->hw_head;
+ if (head >= 0)
+ *head0 = head;
+ }
+
+ skb_tstamp_tx(skb, &hwtstamp);
+}
+
static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx,
struct page *page)
{
@@ -458,10 +537,12 @@ static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx,
}
static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget,
- struct fbnic_q_triad *qt, s32 head0)
+ struct fbnic_q_triad *qt, s32 ts_head, s32 head0)
{
if (head0 >= 0)
fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0);
+ else if (ts_head >= 0)
+ fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head);
}
static void
@@ -469,9 +550,9 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
int napi_budget)
{
struct fbnic_ring *cmpl = &qt->cmpl;
+ s32 head0 = -1, ts_head = -1;
__le64 *raw_tcd, done;
u32 head = cmpl->head;
- s32 head0 = -1;
done = (head & (cmpl->size_mask + 1)) ? 0 : cpu_to_le64(FBNIC_TCD_DONE);
raw_tcd = &cmpl->desc[head & cmpl->size_mask];
@@ -494,6 +575,12 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
* they are skipped for now.
*/
break;
+ case FBNIC_TCD_TYPE_1:
+ if (WARN_ON_ONCE(tcd & FBNIC_TCD_TWQ1))
+ break;
+
+ fbnic_clean_tsq(nv, &qt->sub0, tcd, &ts_head, &head0);
+ break;
default:
break;
}
@@ -513,7 +600,7 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt,
}
/* Unmap and free processed buffers */
- fbnic_clean_twq(nv, napi_budget, qt, head0);
+ fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0);
}
static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget,
--
2.43.5
^ permalink raw reply related [flat|nested] 24+ messages in thread* [PATCH net-next v3 5/5] eth: fbnic: add ethtool timestamping statistics
2024-10-03 12:39 [PATCH net-next v3 0/5] eth: fbnic: add timestamping support Vadim Fedorenko
` (3 preceding siblings ...)
2024-10-03 12:39 ` [PATCH net-next v3 4/5] eth: fbnic: add TX " Vadim Fedorenko
@ 2024-10-03 12:39 ` Vadim Fedorenko
4 siblings, 0 replies; 24+ messages in thread
From: Vadim Fedorenko @ 2024-10-03 12:39 UTC (permalink / raw)
To: Vadim Fedorenko, Jakub Kicinski, David Ahern, Paolo Abeni,
David S. Miller, Alexander Duyck
Cc: Vadim Fedorenko, netdev, Richard Cochran
Add counters of packets with HW timestamps requests and lost timestamps
with no associated skbs. Use ethtool interface to report these counters.
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
.../net/ethernet/meta/fbnic/fbnic_ethtool.c | 24 +++++++++++++++++++
drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 9 ++++++-
drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 2 ++
3 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index 24e059443264..1117d5a32867 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -93,9 +93,33 @@ fbnic_get_eth_mac_stats(struct net_device *netdev,
&mac_stats->eth_mac.FrameTooLongErrors);
}
+static void fbnic_get_ts_stats(struct net_device *netdev,
+ struct ethtool_ts_stats *ts_stats)
+{
+ struct fbnic_net *fbn = netdev_priv(netdev);
+ u64 ts_packets, ts_lost;
+ struct fbnic_ring *ring;
+ unsigned int start;
+ int i;
+
+ ts_stats->pkts = fbn->tx_stats.ts_packets;
+ ts_stats->lost = fbn->tx_stats.ts_lost;
+ for (i = 0; i < fbn->num_tx_queues; i++) {
+ ring = fbn->tx[i];
+ do {
+ start = u64_stats_fetch_begin(&ring->stats.syncp);
+ ts_packets = ring->stats.ts_packets;
+ ts_lost = ring->stats.ts_lost;
+ } while (u64_stats_fetch_retry(&ring->stats.syncp, start));
+ ts_stats->pkts += ts_packets;
+ ts_stats->lost += ts_lost;
+ }
+}
+
static const struct ethtool_ops fbnic_ethtool_ops = {
.get_drvinfo = fbnic_get_drvinfo,
.get_ts_info = fbnic_get_ts_info,
+ .get_ts_stats = fbnic_get_ts_stats,
.get_eth_mac_stats = fbnic_get_eth_mac_stats,
};
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index d6f0f3faa82f..5e2c3512b4ab 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -383,7 +383,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
struct fbnic_ring *ring, bool discard,
unsigned int hw_head)
{
- u64 total_bytes = 0, total_packets = 0;
+ u64 total_bytes = 0, total_packets = 0, ts_lost = 0;
unsigned int head = ring->head;
struct netdev_queue *txq;
unsigned int clean_desc;
@@ -402,6 +402,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
FBNIC_XMIT_CB(skb)->hw_head = hw_head;
if (likely(!discard))
break;
+ ts_lost++;
}
ring->tx_buf[head] = NULL;
@@ -441,6 +442,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
if (unlikely(discard)) {
u64_stats_update_begin(&ring->stats.syncp);
ring->stats.dropped += total_packets;
+ ring->stats.ts_lost += ts_lost;
u64_stats_update_end(&ring->stats.syncp);
netdev_tx_completed_queue(txq, total_packets, total_bytes);
@@ -502,6 +504,9 @@ static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
}
skb_tstamp_tx(skb, &hwtstamp);
+ u64_stats_update_begin(&ring->stats.syncp);
+ ring->stats.ts_packets++;
+ u64_stats_update_end(&ring->stats.syncp);
}
static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx,
@@ -1058,6 +1063,8 @@ static void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
fbn->tx_stats.bytes += stats->bytes;
fbn->tx_stats.packets += stats->packets;
fbn->tx_stats.dropped += stats->dropped;
+ fbn->tx_stats.ts_lost += stats->ts_lost;
+ fbn->tx_stats.ts_packets += stats->ts_packets;
}
static void fbnic_remove_tx_ring(struct fbnic_net *fbn,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
index 682d875f08c0..8d626287c3f4 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h
@@ -57,6 +57,8 @@ struct fbnic_queue_stats {
u64 packets;
u64 bytes;
u64 dropped;
+ u64 ts_packets;
+ u64 ts_lost;
struct u64_stats_sync syncp;
};
--
2.43.5
^ permalink raw reply related [flat|nested] 24+ messages in thread