* [PATCH v2 net-next 4/7] net: dsa: sja1105: Implement the .gettimex64 system call for PTP
From: Vladimir Oltean @ 2019-09-10 1:34 UTC (permalink / raw)
To: f.fainelli, vivien.didelot, andrew, davem, richardcochran
Cc: netdev, Vladimir Oltean
In-Reply-To: <20190910013501.3262-1-olteanv@gmail.com>
Through the PTP_SYS_OFFSET_EXTENDED ioctl, it is possible for userspace
applications (i.e. phc2sys) to compensate for the delays incurred while
reading the PHC's time.
For now implement this ioctl in the driver, although the performance
improvements are minimal. The goal with this patch is to rework the
infrastructure in the driver for SPI transfers to be timestamped. Other
patches depend on this change.
The "performance" implementation of this ioctl will come later, once the
API in the SPI subsystem is agreed upon. The change in the sja1105
driver will be minimal then.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
---
drivers/net/dsa/sja1105/sja1105.h | 3 ++-
drivers/net/dsa/sja1105/sja1105_main.c | 8 +++---
drivers/net/dsa/sja1105/sja1105_ptp.c | 20 ++++++++------
drivers/net/dsa/sja1105/sja1105_ptp.h | 6 +++--
drivers/net/dsa/sja1105/sja1105_spi.c | 36 +++++++++++++++++++-------
5 files changed, 48 insertions(+), 25 deletions(-)
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index e4955a025e46..c80be59dafbd 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -131,7 +131,8 @@ int sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
void *packed_buf, size_t size_bytes);
int sja1105_spi_send_int(const struct sja1105_private *priv,
sja1105_spi_rw_mode_t rw, u64 reg_addr,
- u64 *value, u64 size_bytes);
+ u64 *value, u64 size_bytes,
+ struct ptp_system_timestamp *ptp_sts);
int sja1105_spi_send_long_packed_buf(const struct sja1105_private *priv,
sja1105_spi_rw_mode_t rw, u64 base_addr,
void *packed_buf, u64 buf_len);
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index c92326871fec..b886e1a09dfb 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1849,7 +1849,7 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port,
mutex_lock(&priv->ptp_lock);
- ticks = sja1105_ptpclkval_read(priv);
+ ticks = sja1105_ptpclkval_read(priv, NULL);
rc = sja1105_ptpegr_ts_poll(priv, slot, &ts);
if (rc < 0) {
@@ -2002,7 +2002,7 @@ static void sja1105_rxtstamp_work(struct work_struct *work)
mutex_lock(&priv->ptp_lock);
- ticks = sja1105_ptpclkval_read(priv);
+ ticks = sja1105_ptpclkval_read(priv, NULL);
while ((skb = skb_dequeue(&data->skb_rxtstamp_queue)) != NULL) {
struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb);
@@ -2097,8 +2097,8 @@ static int sja1105_check_device_id(struct sja1105_private *priv)
u64 part_no;
int rc;
- rc = sja1105_spi_send_int(priv, SPI_READ, regs->device_id,
- &device_id, SJA1105_SIZE_DEVICE_ID);
+ rc = sja1105_spi_send_int(priv, SPI_READ, regs->device_id, &device_id,
+ SJA1105_SIZE_DEVICE_ID, NULL);
if (rc < 0)
return rc;
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index bcdfdda46b9c..04693c702b09 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*/
+#include <linux/spi/spi.h>
#include "sja1105.h"
/* The adjfine API clamps ppb between [-32,768,000, 32,768,000], and
@@ -214,15 +215,16 @@ int sja1105_ptp_reset(struct sja1105_private *priv)
return rc;
}
-static int sja1105_ptp_gettime(struct ptp_clock_info *ptp,
- struct timespec64 *ts)
+static int sja1105_ptp_gettimex(struct ptp_clock_info *ptp,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
{
struct sja1105_private *priv = ptp_to_sja1105(ptp);
u64 ticks;
mutex_lock(&priv->ptp_lock);
- ticks = sja1105_ptpclkval_read(priv);
+ ticks = sja1105_ptpclkval_read(priv, sts);
*ts = ns_to_timespec64(sja1105_ticks_to_ns(ticks));
mutex_unlock(&priv->ptp_lock);
@@ -247,7 +249,8 @@ static int sja1105_ptpclkval_write(struct sja1105_private *priv, u64 val)
{
const struct sja1105_regs *regs = priv->info->regs;
- return sja1105_spi_send_int(priv, SPI_WRITE, regs->ptpclk, &val, 8);
+ return sja1105_spi_send_int(priv, SPI_WRITE, regs->ptpclk, &val, 8,
+ NULL);
}
/* Write to PTPCLKVAL while PTPCLKADD is 0 */
@@ -291,7 +294,7 @@ static int sja1105_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
mutex_lock(&priv->ptp_lock);
rc = sja1105_spi_send_int(priv, SPI_WRITE, regs->ptpclkrate,
- &clkrate, 4);
+ &clkrate, 4, NULL);
mutex_unlock(&priv->ptp_lock);
@@ -299,14 +302,15 @@ static int sja1105_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
}
/* Caller must hold priv->ptp_lock */
-u64 sja1105_ptpclkval_read(struct sja1105_private *priv)
+u64 sja1105_ptpclkval_read(struct sja1105_private *priv,
+ struct ptp_system_timestamp *sts)
{
const struct sja1105_regs *regs = priv->info->regs;
u64 ptpclkval = 0;
int rc;
rc = sja1105_spi_send_int(priv, SPI_READ, regs->ptpclk,
- &ptpclkval, 8);
+ &ptpclkval, 8, sts);
if (rc < 0)
dev_err_ratelimited(priv->ds->dev,
"failed to read ptp time: %d\n",
@@ -347,7 +351,7 @@ int sja1105_ptp_clock_register(struct sja1105_private *priv)
.name = "SJA1105 PHC",
.adjfine = sja1105_ptp_adjfine,
.adjtime = sja1105_ptp_adjtime,
- .gettime64 = sja1105_ptp_gettime,
+ .gettimex64 = sja1105_ptp_gettimex,
.settime64 = sja1105_ptp_settime,
.max_adj = SJA1105_MAX_ADJ_PPB,
};
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index 51e21d951548..80c33e5e4503 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -39,7 +39,8 @@ u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now,
int sja1105_ptp_reset(struct sja1105_private *priv);
-u64 sja1105_ptpclkval_read(struct sja1105_private *priv);
+u64 sja1105_ptpclkval_read(struct sja1105_private *priv,
+ struct ptp_system_timestamp *sts);
#else
@@ -70,7 +71,8 @@ static inline int sja1105_ptp_reset(struct sja1105_private *priv)
return 0;
}
-static inline u64 sja1105_ptpclkval_read(struct sja1105_private *priv)
+static inline u64 sja1105_ptpclkval_read(struct sja1105_private *priv,
+ struct ptp_system_timestamp *sts)
{
return 0;
}
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 1953d8c54af6..26985f1209ad 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -15,7 +15,8 @@
(SJA1105_SIZE_SPI_MSG_HEADER + SJA1105_SIZE_SPI_MSG_MAXLEN)
static int sja1105_spi_transfer(const struct sja1105_private *priv,
- const void *tx, void *rx, int size)
+ const void *tx, void *rx, int size,
+ struct ptp_system_timestamp *ptp_sts)
{
struct spi_device *spi = priv->spidev;
struct spi_transfer transfer = {
@@ -35,12 +36,16 @@ static int sja1105_spi_transfer(const struct sja1105_private *priv,
spi_message_init(&msg);
spi_message_add_tail(&transfer, &msg);
+ ptp_read_system_prets(ptp_sts);
+
rc = spi_sync(spi, &msg);
if (rc < 0) {
dev_err(&spi->dev, "SPI transfer failed: %d\n", rc);
return rc;
}
+ ptp_read_system_postts(ptp_sts);
+
return rc;
}
@@ -66,9 +71,11 @@ sja1105_spi_message_pack(void *buf, const struct sja1105_spi_message *msg)
* @size_bytes is smaller than SIZE_SPI_MSG_MAXLEN. Larger packed buffers
* are chunked in smaller pieces by sja1105_spi_send_long_packed_buf below.
*/
-int sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
- sja1105_spi_rw_mode_t rw, u64 reg_addr,
- void *packed_buf, size_t size_bytes)
+static int
+__sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
+ sja1105_spi_rw_mode_t rw, u64 reg_addr,
+ void *packed_buf, size_t size_bytes,
+ struct ptp_system_timestamp *ptp_sts)
{
u8 tx_buf[SJA1105_SIZE_SPI_TRANSFER_MAX] = {0};
u8 rx_buf[SJA1105_SIZE_SPI_TRANSFER_MAX] = {0};
@@ -90,7 +97,7 @@ int sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
memcpy(tx_buf + SJA1105_SIZE_SPI_MSG_HEADER,
packed_buf, size_bytes);
- rc = sja1105_spi_transfer(priv, tx_buf, rx_buf, msg_len);
+ rc = sja1105_spi_transfer(priv, tx_buf, rx_buf, msg_len, ptp_sts);
if (rc < 0)
return rc;
@@ -101,6 +108,14 @@ int sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
return 0;
}
+int sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
+ sja1105_spi_rw_mode_t rw, u64 reg_addr,
+ void *packed_buf, size_t size_bytes)
+{
+ return __sja1105_spi_send_packed_buf(priv, rw, reg_addr, packed_buf,
+ size_bytes, NULL);
+}
+
/* If @rw is:
* - SPI_WRITE: creates and sends an SPI write message at absolute
* address reg_addr, taking size_bytes from *packed_buf
@@ -114,7 +129,8 @@ int sja1105_spi_send_packed_buf(const struct sja1105_private *priv,
*/
int sja1105_spi_send_int(const struct sja1105_private *priv,
sja1105_spi_rw_mode_t rw, u64 reg_addr,
- u64 *value, u64 size_bytes)
+ u64 *value, u64 size_bytes,
+ struct ptp_system_timestamp *ptp_sts)
{
u8 packed_buf[SJA1105_SIZE_SPI_MSG_MAXLEN];
int rc;
@@ -126,8 +142,8 @@ int sja1105_spi_send_int(const struct sja1105_private *priv,
sja1105_pack(packed_buf, value, 8 * size_bytes - 1, 0,
size_bytes);
- rc = sja1105_spi_send_packed_buf(priv, rw, reg_addr, packed_buf,
- size_bytes);
+ rc = __sja1105_spi_send_packed_buf(priv, rw, reg_addr, packed_buf,
+ size_bytes, ptp_sts);
if (rw == SPI_READ)
sja1105_unpack(packed_buf, value, 8 * size_bytes - 1, 0,
@@ -291,7 +307,7 @@ int sja1105_inhibit_tx(const struct sja1105_private *priv,
int rc;
rc = sja1105_spi_send_int(priv, SPI_READ, regs->port_control,
- &inhibit_cmd, SJA1105_SIZE_PORT_CTRL);
+ &inhibit_cmd, SJA1105_SIZE_PORT_CTRL, NULL);
if (rc < 0)
return rc;
@@ -301,7 +317,7 @@ int sja1105_inhibit_tx(const struct sja1105_private *priv,
inhibit_cmd &= ~port_bitmap;
return sja1105_spi_send_int(priv, SPI_WRITE, regs->port_control,
- &inhibit_cmd, SJA1105_SIZE_PORT_CTRL);
+ &inhibit_cmd, SJA1105_SIZE_PORT_CTRL, NULL);
}
struct sja1105_status {
--
2.17.1
^ permalink raw reply related
* [PATCH v2 net-next 3/7] net: dsa: sja1105: Switch to hardware operations for PTP
From: Vladimir Oltean @ 2019-09-10 1:34 UTC (permalink / raw)
To: f.fainelli, vivien.didelot, andrew, davem, richardcochran
Cc: netdev, Vladimir Oltean
In-Reply-To: <20190910013501.3262-1-olteanv@gmail.com>
Adjusting the hardware clock (PTPCLKVAL, PTPCLKADD, PTPCLKRATE) is a
requirement for the auxiliary PTP functionality of the switch
(TTEthernet, PPS input, PPS output).
Now that the sync precision issues have been identified (and fixed in
the spi-fsl-dspi driver), we can get rid of the timecounter/cyclecounter
implementation, which is reliant on the free-running PTPTSCLK.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
---
drivers/net/dsa/sja1105/sja1105.h | 16 +--
drivers/net/dsa/sja1105/sja1105_main.c | 18 ++-
drivers/net/dsa/sja1105/sja1105_ptp.c | 181 ++++++++++++-------------
drivers/net/dsa/sja1105/sja1105_ptp.h | 22 +++
drivers/net/dsa/sja1105/sja1105_spi.c | 2 -
5 files changed, 122 insertions(+), 117 deletions(-)
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index d8a92646e80a..e4955a025e46 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -32,7 +32,6 @@ struct sja1105_regs {
u64 ptp_control;
u64 ptpclk;
u64 ptpclkrate;
- u64 ptptsclk;
u64 ptpegr_ts[SJA1105_NUM_PORTS];
u64 pad_mii_tx[SJA1105_NUM_PORTS];
u64 pad_mii_id[SJA1105_NUM_PORTS];
@@ -50,8 +49,15 @@ struct sja1105_regs {
u64 qlevel[SJA1105_NUM_PORTS];
};
+enum sja1105_ptp_clk_mode {
+ PTP_ADD_MODE = 1,
+ PTP_SET_MODE = 0,
+};
+
struct sja1105_ptp_cmd {
u64 resptp; /* reset */
+ u64 corrclk4ts; /* use the corrected clock for timestamps */
+ u64 ptpclkadd; /* enum sja1105_ptp_clk_mode */
};
struct sja1105_info {
@@ -96,13 +102,7 @@ struct sja1105_private {
struct sja1105_ptp_cmd ptp_cmd;
struct ptp_clock_info ptp_caps;
struct ptp_clock *clock;
- /* The cycle counter translates the PTP timestamps (based on
- * a free-running counter) into a software time domain.
- */
- struct cyclecounter tstamp_cc;
- struct timecounter tstamp_tc;
- struct delayed_work refresh_work;
- /* Serializes all operations on the cycle counter */
+ /* Serializes all operations on the PTP hardware clock */
struct mutex ptp_lock;
/* Serializes transmission of management frames so that
* the switch doesn't confuse them with one another.
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 0b0205abc3d2..c92326871fec 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1818,7 +1818,7 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port,
struct skb_shared_hwtstamps shwt = {0};
int slot = sp->mgmt_slot;
struct sk_buff *clone;
- u64 now, ts;
+ u64 ticks, ts;
int rc;
/* The tragic fact about the switch having 4x2 slots for installing
@@ -1849,7 +1849,7 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port,
mutex_lock(&priv->ptp_lock);
- now = priv->tstamp_cc.read(&priv->tstamp_cc);
+ ticks = sja1105_ptpclkval_read(priv);
rc = sja1105_ptpegr_ts_poll(priv, slot, &ts);
if (rc < 0) {
@@ -1858,10 +1858,9 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port,
goto out_unlock_ptp;
}
- ts = sja1105_tstamp_reconstruct(priv, now, ts);
- ts = timecounter_cyc2time(&priv->tstamp_tc, ts);
+ ts = sja1105_tstamp_reconstruct(priv, ticks, ts);
- shwt.hwtstamp = ns_to_ktime(ts);
+ shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(ts));
skb_complete_tx_timestamp(clone, &shwt);
out_unlock_ptp:
@@ -1999,11 +1998,11 @@ static void sja1105_rxtstamp_work(struct work_struct *work)
struct sja1105_tagger_data *data = to_tagger(work);
struct sja1105_private *priv = to_sja1105(data);
struct sk_buff *skb;
- u64 now;
+ u64 ticks;
mutex_lock(&priv->ptp_lock);
- now = priv->tstamp_cc.read(&priv->tstamp_cc);
+ ticks = sja1105_ptpclkval_read(priv);
while ((skb = skb_dequeue(&data->skb_rxtstamp_queue)) != NULL) {
struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb);
@@ -2012,10 +2011,9 @@ static void sja1105_rxtstamp_work(struct work_struct *work)
*shwt = (struct skb_shared_hwtstamps) {0};
ts = SJA1105_SKB_CB(skb)->meta_tstamp;
- ts = sja1105_tstamp_reconstruct(priv, now, ts);
- ts = timecounter_cyc2time(&priv->tstamp_tc, ts);
+ ts = sja1105_tstamp_reconstruct(priv, ticks, ts);
- shwt->hwtstamp = ns_to_ktime(ts);
+ shwt->hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(ts));
netif_rx_ni(skb);
}
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 13f9f5799e46..bcdfdda46b9c 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -13,24 +13,6 @@
#define SJA1105_MAX_ADJ_PPB 32000000
#define SJA1105_SIZE_PTP_CMD 4
-/* Timestamps are in units of 8 ns clock ticks (equivalent to a fixed
- * 125 MHz clock) so the scale factor (MULT / SHIFT) needs to be 8.
- * Furthermore, wisely pick SHIFT as 28 bits, which translates
- * MULT into 2^31 (0x80000000). This is the same value around which
- * the hardware PTPCLKRATE is centered, so the same ppb conversion
- * arithmetic can be reused.
- */
-#define SJA1105_CC_SHIFT 28
-#define SJA1105_CC_MULT (8 << SJA1105_CC_SHIFT)
-
-/* Having 33 bits of cycle counter left until a 64-bit overflow during delta
- * conversion, we multiply this by the 8 ns counter resolution and arrive at
- * a comfortable 68.71 second refresh interval until the delta would cause
- * an integer overflow, in absence of any other readout.
- * Approximate to 1 minute.
- */
-#define SJA1105_REFRESH_INTERVAL (HZ * 60)
-
/* This range is actually +/- SJA1105_MAX_ADJ_PPB
* divided by 1000 (ppb -> ppm) and with a 16-bit
* "fractional" part (actually fixed point).
@@ -41,7 +23,7 @@
*
* This forgoes a "ppb" numeric representation (up to NSEC_PER_SEC)
* and defines the scaling factor between scaled_ppm and the actual
- * frequency adjustments (both cycle counter and hardware).
+ * frequency adjustments of the PHC.
*
* ptpclkrate = scaled_ppm * 2^31 / (10^6 * 2^16)
* simplifies to
@@ -49,10 +31,9 @@
*/
#define SJA1105_CC_MULT_NUM (1 << 9)
#define SJA1105_CC_MULT_DEM 15625
+#define SJA1105_CC_MULT 0x80000000
#define ptp_to_sja1105(d) container_of((d), struct sja1105_private, ptp_caps)
-#define cc_to_sja1105(d) container_of((d), struct sja1105_private, tstamp_cc)
-#define dw_to_sja1105(d) container_of((d), struct sja1105_private, refresh_work)
int sja1105_get_ts_info(struct dsa_switch *ds, int port,
struct ethtool_ts_info *info)
@@ -86,6 +67,8 @@ int sja1105et_ptp_cmd(const void *ctx, const void *data)
sja1105_pack(buf, &valid, 31, 31, size);
sja1105_pack(buf, &cmd->resptp, 2, 2, size);
+ sja1105_pack(buf, &cmd->corrclk4ts, 1, 1, size);
+ sja1105_pack(buf, &cmd->ptpclkadd, 0, 0, size);
return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control,
buf, SJA1105_SIZE_PTP_CMD);
@@ -103,6 +86,8 @@ int sja1105pqrs_ptp_cmd(const void *ctx, const void *data)
sja1105_pack(buf, &valid, 31, 31, size);
sja1105_pack(buf, &cmd->resptp, 3, 3, size);
+ sja1105_pack(buf, &cmd->corrclk4ts, 2, 2, size);
+ sja1105_pack(buf, &cmd->ptpclkadd, 0, 0, size);
return sja1105_spi_send_packed_buf(priv, SPI_WRITE, regs->ptp_control,
buf, SJA1105_SIZE_PTP_CMD);
@@ -215,17 +200,14 @@ int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts)
int sja1105_ptp_reset(struct sja1105_private *priv)
{
struct sja1105_ptp_cmd cmd = priv->ptp_cmd;
- struct dsa_switch *ds = priv->ds;
int rc;
mutex_lock(&priv->ptp_lock);
cmd.resptp = 1;
- dev_dbg(ds->dev, "Resetting PTP clock\n");
- rc = priv->info->ptp_cmd(priv, &cmd);
- timecounter_init(&priv->tstamp_tc, &priv->tstamp_cc,
- ktime_to_ns(ktime_get_real()));
+ dev_dbg(priv->ds->dev, "Resetting PTP clock\n");
+ rc = priv->info->ptp_cmd(priv, &cmd);
mutex_unlock(&priv->ptp_lock);
@@ -236,124 +218,130 @@ static int sja1105_ptp_gettime(struct ptp_clock_info *ptp,
struct timespec64 *ts)
{
struct sja1105_private *priv = ptp_to_sja1105(ptp);
- u64 ns;
+ u64 ticks;
mutex_lock(&priv->ptp_lock);
- ns = timecounter_read(&priv->tstamp_tc);
- mutex_unlock(&priv->ptp_lock);
- *ts = ns_to_timespec64(ns);
+ ticks = sja1105_ptpclkval_read(priv);
+ *ts = ns_to_timespec64(sja1105_ticks_to_ns(ticks));
+
+ mutex_unlock(&priv->ptp_lock);
return 0;
}
+/* Caller must hold priv->ptp_lock */
+static int sja1105_ptp_mode_set(struct sja1105_private *priv,
+ enum sja1105_ptp_clk_mode mode)
+{
+ if (priv->ptp_cmd.ptpclkadd == mode)
+ return 0;
+
+ priv->ptp_cmd.ptpclkadd = mode;
+
+ return priv->info->ptp_cmd(priv, &priv->ptp_cmd);
+}
+
+/* Caller must hold priv->ptp_lock */
+static int sja1105_ptpclkval_write(struct sja1105_private *priv, u64 val)
+{
+ const struct sja1105_regs *regs = priv->info->regs;
+
+ return sja1105_spi_send_int(priv, SPI_WRITE, regs->ptpclk, &val, 8);
+}
+
+/* Write to PTPCLKVAL while PTPCLKADD is 0 */
static int sja1105_ptp_settime(struct ptp_clock_info *ptp,
const struct timespec64 *ts)
{
+ u64 ticks = ns_to_sja1105_ticks(timespec64_to_ns(ts));
struct sja1105_private *priv = ptp_to_sja1105(ptp);
- u64 ns = timespec64_to_ns(ts);
+ int rc;
mutex_lock(&priv->ptp_lock);
- timecounter_init(&priv->tstamp_tc, &priv->tstamp_cc, ns);
+
+ rc = sja1105_ptp_mode_set(priv, PTP_SET_MODE);
+ if (rc < 0) {
+ dev_err(priv->ds->dev, "Failed to put PTPCLK in set mode\n");
+ goto out;
+ }
+
+ rc = sja1105_ptpclkval_write(priv, ticks);
+
+out:
mutex_unlock(&priv->ptp_lock);
- return 0;
+ return rc;
}
static int sja1105_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
{
struct sja1105_private *priv = ptp_to_sja1105(ptp);
+ const struct sja1105_regs *regs = priv->info->regs;
s64 clkrate;
+ int rc;
clkrate = (s64)scaled_ppm * SJA1105_CC_MULT_NUM;
clkrate = div_s64(clkrate, SJA1105_CC_MULT_DEM);
- mutex_lock(&priv->ptp_lock);
-
- /* Force a readout to update the timer *before* changing its frequency.
- *
- * This way, its corrected time curve can at all times be modeled
- * as a linear "A * x + B" function, where:
- *
- * - B are past frequency adjustments and offset shifts, all
- * accumulated into the cycle_last variable.
- *
- * - A is the new frequency adjustments we're just about to set.
- *
- * Reading now makes B accumulate the correct amount of time,
- * corrected at the old rate, before changing it.
- *
- * Hardware timestamps then become simple points on the curve and
- * are approximated using the above function. This is still better
- * than letting the switch take the timestamps using the hardware
- * rate-corrected clock (PTPCLKVAL) - the comparison in this case would
- * be that we're shifting the ruler at the same time as we're taking
- * measurements with it.
- *
- * The disadvantage is that it's possible to receive timestamps when
- * a frequency adjustment took place in the near past.
- * In this case they will be approximated using the new ppb value
- * instead of a compound function made of two segments (one at the old
- * and the other at the new rate) - introducing some inaccuracy.
- */
- timecounter_read(&priv->tstamp_tc);
-
- priv->tstamp_cc.mult = SJA1105_CC_MULT + clkrate;
+ /* Take a +/- value and re-center it around 2^31. */
+ clkrate = SJA1105_CC_MULT + clkrate;
+ clkrate &= GENMASK_ULL(31, 0);
- mutex_unlock(&priv->ptp_lock);
-
- return 0;
-}
+ mutex_lock(&priv->ptp_lock);
-static int sja1105_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
-{
- struct sja1105_private *priv = ptp_to_sja1105(ptp);
+ rc = sja1105_spi_send_int(priv, SPI_WRITE, regs->ptpclkrate,
+ &clkrate, 4);
- mutex_lock(&priv->ptp_lock);
- timecounter_adjtime(&priv->tstamp_tc, delta);
mutex_unlock(&priv->ptp_lock);
- return 0;
+ return rc;
}
-static u64 sja1105_ptptsclk_read(const struct cyclecounter *cc)
+/* Caller must hold priv->ptp_lock */
+u64 sja1105_ptpclkval_read(struct sja1105_private *priv)
{
- struct sja1105_private *priv = cc_to_sja1105(cc);
const struct sja1105_regs *regs = priv->info->regs;
- u64 ptptsclk = 0;
+ u64 ptpclkval = 0;
int rc;
- rc = sja1105_spi_send_int(priv, SPI_READ, regs->ptptsclk,
- &ptptsclk, 8);
+ rc = sja1105_spi_send_int(priv, SPI_READ, regs->ptpclk,
+ &ptpclkval, 8);
if (rc < 0)
dev_err_ratelimited(priv->ds->dev,
- "failed to read ptp cycle counter: %d\n",
+ "failed to read ptp time: %d\n",
rc);
- return ptptsclk;
+
+ return ptpclkval;
}
-static void sja1105_ptp_overflow_check(struct work_struct *work)
+/* Write to PTPCLKVAL while PTPCLKADD is 1 */
+static int sja1105_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
- struct delayed_work *dw = to_delayed_work(work);
- struct sja1105_private *priv = dw_to_sja1105(dw);
- struct timespec64 ts;
+ struct sja1105_private *priv = ptp_to_sja1105(ptp);
+ s64 ticks = ns_to_sja1105_ticks(delta);
+ int rc;
- sja1105_ptp_gettime(&priv->ptp_caps, &ts);
+ mutex_lock(&priv->ptp_lock);
- schedule_delayed_work(&priv->refresh_work, SJA1105_REFRESH_INTERVAL);
+ rc = sja1105_ptp_mode_set(priv, PTP_ADD_MODE);
+ if (rc < 0) {
+ dev_err(priv->ds->dev, "Failed to put PTPCLK in add mode\n");
+ goto out;
+ }
+
+ rc = sja1105_ptpclkval_write(priv, ticks);
+
+out:
+ mutex_unlock(&priv->ptp_lock);
+
+ return rc;
}
int sja1105_ptp_clock_register(struct sja1105_private *priv)
{
struct dsa_switch *ds = priv->ds;
- /* Set up the cycle counter */
- priv->tstamp_cc = (struct cyclecounter) {
- .read = sja1105_ptptsclk_read,
- .mask = CYCLECOUNTER_MASK(64),
- .shift = SJA1105_CC_SHIFT,
- .mult = SJA1105_CC_MULT,
- };
priv->ptp_caps = (struct ptp_clock_info) {
.owner = THIS_MODULE,
.name = "SJA1105 PHC",
@@ -370,8 +358,8 @@ int sja1105_ptp_clock_register(struct sja1105_private *priv)
if (IS_ERR_OR_NULL(priv->clock))
return PTR_ERR(priv->clock);
- INIT_DELAYED_WORK(&priv->refresh_work, sja1105_ptp_overflow_check);
- schedule_delayed_work(&priv->refresh_work, SJA1105_REFRESH_INTERVAL);
+ priv->ptp_cmd.corrclk4ts = true;
+ priv->ptp_cmd.ptpclkadd = PTP_SET_MODE;
return sja1105_ptp_reset(priv);
}
@@ -381,7 +369,6 @@ void sja1105_ptp_clock_unregister(struct sja1105_private *priv)
if (IS_ERR_OR_NULL(priv->clock))
return;
- cancel_delayed_work_sync(&priv->refresh_work);
ptp_clock_unregister(priv->clock);
priv->clock = NULL;
}
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index af456b0a4d27..51e21d951548 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -4,6 +4,21 @@
#ifndef _SJA1105_PTP_H
#define _SJA1105_PTP_H
+/* Timestamps are in units of 8 ns clock ticks (equivalent to
+ * a fixed 125 MHz clock).
+ */
+#define SJA1105_TICK_NS 8
+
+static inline s64 ns_to_sja1105_ticks(s64 ns)
+{
+ return ns / SJA1105_TICK_NS;
+}
+
+static inline s64 sja1105_ticks_to_ns(s64 ticks)
+{
+ return ticks * SJA1105_TICK_NS;
+}
+
#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP)
int sja1105_ptp_clock_register(struct sja1105_private *priv);
@@ -24,6 +39,8 @@ u64 sja1105_tstamp_reconstruct(struct sja1105_private *priv, u64 now,
int sja1105_ptp_reset(struct sja1105_private *priv);
+u64 sja1105_ptpclkval_read(struct sja1105_private *priv);
+
#else
static inline int sja1105_ptp_clock_register(struct sja1105_private *priv)
@@ -53,6 +70,11 @@ static inline int sja1105_ptp_reset(struct sja1105_private *priv)
return 0;
}
+static inline u64 sja1105_ptpclkval_read(struct sja1105_private *priv)
+{
+ return 0;
+}
+
#define sja1105et_ptp_cmd NULL
#define sja1105pqrs_ptp_cmd NULL
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 84dc603138cf..1953d8c54af6 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -517,7 +517,6 @@ static struct sja1105_regs sja1105et_regs = {
.ptp_control = 0x17,
.ptpclk = 0x18, /* Spans 0x18 to 0x19 */
.ptpclkrate = 0x1A,
- .ptptsclk = 0x1B, /* Spans 0x1B to 0x1C */
};
static struct sja1105_regs sja1105pqrs_regs = {
@@ -548,7 +547,6 @@ static struct sja1105_regs sja1105pqrs_regs = {
.ptp_control = 0x18,
.ptpclk = 0x19,
.ptpclkrate = 0x1B,
- .ptptsclk = 0x1C,
};
struct sja1105_info sja1105e_info = {
--
2.17.1
^ permalink raw reply related
* [PATCH v2 net-next 5/7] net: dsa: sja1105: Restore PTP time after switch reset
From: Vladimir Oltean @ 2019-09-10 1:34 UTC (permalink / raw)
To: f.fainelli, vivien.didelot, andrew, davem, richardcochran
Cc: netdev, Vladimir Oltean
In-Reply-To: <20190910013501.3262-1-olteanv@gmail.com>
The PTP time of the switch is not preserved when uploading a new static
configuration. Work around this hardware oddity by reading its PTP time
before a static config upload, and restoring it afterwards.
Static config changes are expected to occur at runtime even in scenarios
directly related to PTP, i.e. the Time-Aware Scheduler of the switch is
programmed in this way.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
---
drivers/net/dsa/sja1105/sja1105_main.c | 32 ++++++++++++-
drivers/net/dsa/sja1105/sja1105_ptp.c | 66 ++++++++++++++++++--------
drivers/net/dsa/sja1105/sja1105_ptp.h | 25 ++++++++++
drivers/net/dsa/sja1105/sja1105_spi.c | 4 --
4 files changed, 101 insertions(+), 26 deletions(-)
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index b886e1a09dfb..5a110b40f5f3 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1384,8 +1384,13 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
*/
static int sja1105_static_config_reload(struct sja1105_private *priv)
{
+ struct ptp_system_timestamp ptp_sts_before;
+ struct ptp_system_timestamp ptp_sts_after;
struct sja1105_mac_config_entry *mac;
int speed_mbps[SJA1105_NUM_PORTS];
+ s64 t1, t2, t3, t4;
+ s64 ptpclkval;
+ s64 t12, t34;
int rc, i;
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
@@ -1400,10 +1405,35 @@ static int sja1105_static_config_reload(struct sja1105_private *priv)
mac[i].speed = SJA1105_SPEED_AUTO;
}
+ /* No PTP operations can run right now */
+ mutex_lock(&priv->ptp_lock);
+
+ ptpclkval = __sja1105_ptp_gettimex(priv, &ptp_sts_before);
+
/* Reset switch and send updated static configuration */
rc = sja1105_static_config_upload(priv);
if (rc < 0)
- goto out;
+ goto out_unlock_ptp;
+
+ rc = __sja1105_ptp_settime(priv, 0, &ptp_sts_after);
+ if (rc < 0)
+ goto out_unlock_ptp;
+
+ t1 = timespec64_to_ns(&ptp_sts_before.pre_ts);
+ t2 = timespec64_to_ns(&ptp_sts_before.post_ts);
+ t3 = timespec64_to_ns(&ptp_sts_after.pre_ts);
+ t4 = timespec64_to_ns(&ptp_sts_after.post_ts);
+ /* Mid point, corresponds to pre-reset PTPCLKVAL */
+ t12 = t1 + (t2 - t1) / 2;
+ /* Mid point, corresponds to post-reset PTPCLKVAL, aka 0 */
+ t34 = t3 + (t4 - t3) / 2;
+ /* Advance PTPCLKVAL by the time it took since its readout */
+ ptpclkval += (t34 - t12);
+
+ __sja1105_ptp_adjtime(priv, ptpclkval);
+
+out_unlock_ptp:
+ mutex_unlock(&priv->ptp_lock);
/* Configure the CGU (PLLs) for MII and RMII PHYs.
* For these interfaces there is no dynamic configuration
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 04693c702b09..a7722c0944fb 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -215,17 +215,26 @@ int sja1105_ptp_reset(struct sja1105_private *priv)
return rc;
}
+/* Caller must hold priv->ptp_lock */
+u64 __sja1105_ptp_gettimex(struct sja1105_private *priv,
+ struct ptp_system_timestamp *sts)
+{
+ u64 ticks;
+
+ ticks = sja1105_ptpclkval_read(priv, sts);
+
+ return sja1105_ticks_to_ns(ticks);
+}
+
static int sja1105_ptp_gettimex(struct ptp_clock_info *ptp,
struct timespec64 *ts,
struct ptp_system_timestamp *sts)
{
struct sja1105_private *priv = ptp_to_sja1105(ptp);
- u64 ticks;
mutex_lock(&priv->ptp_lock);
- ticks = sja1105_ptpclkval_read(priv, sts);
- *ts = ns_to_timespec64(sja1105_ticks_to_ns(ticks));
+ *ts = ns_to_timespec64(__sja1105_ptp_gettimex(priv, sts));
mutex_unlock(&priv->ptp_lock);
@@ -245,33 +254,42 @@ static int sja1105_ptp_mode_set(struct sja1105_private *priv,
}
/* Caller must hold priv->ptp_lock */
-static int sja1105_ptpclkval_write(struct sja1105_private *priv, u64 val)
+static int sja1105_ptpclkval_write(struct sja1105_private *priv, u64 val,
+ struct ptp_system_timestamp *ptp_sts)
{
const struct sja1105_regs *regs = priv->info->regs;
return sja1105_spi_send_int(priv, SPI_WRITE, regs->ptpclk, &val, 8,
- NULL);
+ ptp_sts);
}
/* Write to PTPCLKVAL while PTPCLKADD is 0 */
-static int sja1105_ptp_settime(struct ptp_clock_info *ptp,
- const struct timespec64 *ts)
+int __sja1105_ptp_settime(struct sja1105_private *priv, u64 ns,
+ struct ptp_system_timestamp *ptp_sts)
{
- u64 ticks = ns_to_sja1105_ticks(timespec64_to_ns(ts));
- struct sja1105_private *priv = ptp_to_sja1105(ptp);
+ u64 ticks = ns_to_sja1105_ticks(ns);
int rc;
- mutex_lock(&priv->ptp_lock);
-
rc = sja1105_ptp_mode_set(priv, PTP_SET_MODE);
if (rc < 0) {
dev_err(priv->ds->dev, "Failed to put PTPCLK in set mode\n");
- goto out;
+ return rc;
}
- rc = sja1105_ptpclkval_write(priv, ticks);
+ return sja1105_ptpclkval_write(priv, ticks, ptp_sts);
+}
+
+static int sja1105_ptp_settime(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct sja1105_private *priv = ptp_to_sja1105(ptp);
+ u64 ns = timespec64_to_ns(ts);
+ int rc;
+
+ mutex_lock(&priv->ptp_lock);
+
+ rc = __sja1105_ptp_settime(priv, ns, NULL);
-out:
mutex_unlock(&priv->ptp_lock);
return rc;
@@ -320,23 +338,29 @@ u64 sja1105_ptpclkval_read(struct sja1105_private *priv,
}
/* Write to PTPCLKVAL while PTPCLKADD is 1 */
-static int sja1105_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+int __sja1105_ptp_adjtime(struct sja1105_private *priv, s64 delta)
{
- struct sja1105_private *priv = ptp_to_sja1105(ptp);
s64 ticks = ns_to_sja1105_ticks(delta);
int rc;
- mutex_lock(&priv->ptp_lock);
-
rc = sja1105_ptp_mode_set(priv, PTP_ADD_MODE);
if (rc < 0) {
dev_err(priv->ds->dev, "Failed to put PTPCLK in add mode\n");
- goto out;
+ return rc;
}
- rc = sja1105_ptpclkval_write(priv, ticks);
+ return sja1105_ptpclkval_write(priv, ticks, NULL);
+}
+
+static int sja1105_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct sja1105_private *priv = ptp_to_sja1105(ptp);
+ int rc;
+
+ mutex_lock(&priv->ptp_lock);
+
+ rc = __sja1105_ptp_adjtime(priv, delta);
-out:
mutex_unlock(&priv->ptp_lock);
return rc;
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index 80c33e5e4503..c699611e585d 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -42,6 +42,14 @@ int sja1105_ptp_reset(struct sja1105_private *priv);
u64 sja1105_ptpclkval_read(struct sja1105_private *priv,
struct ptp_system_timestamp *sts);
+u64 __sja1105_ptp_gettimex(struct sja1105_private *priv,
+ struct ptp_system_timestamp *sts);
+
+int __sja1105_ptp_settime(struct sja1105_private *priv, u64 ns,
+ struct ptp_system_timestamp *ptp_sts);
+
+int __sja1105_ptp_adjtime(struct sja1105_private *priv, s64 delta);
+
#else
static inline int sja1105_ptp_clock_register(struct sja1105_private *priv)
@@ -77,6 +85,23 @@ static inline u64 sja1105_ptpclkval_read(struct sja1105_private *priv,
return 0;
}
+static inline u64 __sja1105_ptp_gettimex(struct sja1105_private *priv,
+ struct ptp_system_timestamp *sts)
+{
+ return 0;
+}
+
+static inline int __sja1105_ptp_settime(struct sja1105_private *priv, u64 ns,
+ struct ptp_system_timestamp *ptp_sts)
+{
+ return 0;
+}
+
+static inline int __sja1105_ptp_adjtime(struct sja1105_private *priv, s64 delta)
+{
+ return 0;
+}
+
#define sja1105et_ptp_cmd NULL
#define sja1105pqrs_ptp_cmd NULL
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 26985f1209ad..eae9c9baa189 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -496,10 +496,6 @@ int sja1105_static_config_upload(struct sja1105_private *priv)
dev_info(dev, "Succeeded after %d tried\n", RETRIES - retries);
}
- rc = sja1105_ptp_reset(priv);
- if (rc < 0)
- dev_err(dev, "Failed to reset PTP clock: %d\n", rc);
-
dev_info(dev, "Reset switch and programmed static config\n");
out:
--
2.17.1
^ permalink raw reply related
* [PATCH v2 net-next 2/7] net: dsa: sja1105: Change the PTP command access pattern
From: Vladimir Oltean @ 2019-09-10 1:34 UTC (permalink / raw)
To: f.fainelli, vivien.didelot, andrew, davem, richardcochran
Cc: netdev, Vladimir Oltean
In-Reply-To: <20190910013501.3262-1-olteanv@gmail.com>
The PTP command register contains enable bits for:
- Putting the 64-bit PTPCLKVAL register in add/subtract or write mode
- Taking timestamps off of the corrected vs free-running clock
- Starting/stopping the TTEthernet scheduling
- Starting/stopping PPS output
- Resetting the switch
When a command needs to be issued (e.g. "change the PTPCLKVAL from write
mode to add/subtract mode"), one cannot simply write to the command
register setting the PTPCLKADD bit to 1, because that would zeroize the
other settings. One also cannot do a read-modify-write (that would be
too easy for this hardware) because not all bits of the command register
are readable over SPI.
So this leaves us with the only option of keeping the value of the PTP
command register in the driver, and operating on that.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
---
drivers/net/dsa/sja1105/sja1105.h | 5 +++++
drivers/net/dsa/sja1105/sja1105_ptp.c | 6 +-----
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 78094db32622..d8a92646e80a 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -50,6 +50,10 @@ struct sja1105_regs {
u64 qlevel[SJA1105_NUM_PORTS];
};
+struct sja1105_ptp_cmd {
+ u64 resptp; /* reset */
+};
+
struct sja1105_info {
u64 device_id;
/* Needed for distinction between P and R, and between Q and S
@@ -89,6 +93,7 @@ struct sja1105_private {
struct spi_device *spidev;
struct dsa_switch *ds;
struct sja1105_port ports[SJA1105_NUM_PORTS];
+ struct sja1105_ptp_cmd ptp_cmd;
struct ptp_clock_info ptp_caps;
struct ptp_clock *clock;
/* The cycle counter translates the PTP timestamps (based on
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index d0c93d0449dc..13f9f5799e46 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -54,10 +54,6 @@
#define cc_to_sja1105(d) container_of((d), struct sja1105_private, tstamp_cc)
#define dw_to_sja1105(d) container_of((d), struct sja1105_private, refresh_work)
-struct sja1105_ptp_cmd {
- u64 resptp; /* reset */
-};
-
int sja1105_get_ts_info(struct dsa_switch *ds, int port,
struct ethtool_ts_info *info)
{
@@ -218,8 +214,8 @@ int sja1105_ptpegr_ts_poll(struct sja1105_private *priv, int port, u64 *ts)
int sja1105_ptp_reset(struct sja1105_private *priv)
{
+ struct sja1105_ptp_cmd cmd = priv->ptp_cmd;
struct dsa_switch *ds = priv->ds;
- struct sja1105_ptp_cmd cmd = {0};
int rc;
mutex_lock(&priv->ptp_lock);
--
2.17.1
^ permalink raw reply related
* [PATCH v2 net-next 0/7] Hardware operations for the SJA1105 DSA PTP clock
From: Vladimir Oltean @ 2019-09-10 1:34 UTC (permalink / raw)
To: f.fainelli, vivien.didelot, andrew, davem, richardcochran
Cc: netdev, Vladimir Oltean
This series performs the following changes to the PTP portion of the
driver:
- Deletes the timecounter/cyclecounter implementation of a free-running
PHC and replaces it with actual hardware corrections of the PTP
timestamping clock.
- Deletes the approximation of timecounter_init() with the argument of
ktime_to_ns(ktime_get_real()) (aka system clock) that is currently
done in sja1105_ptp_reset. Now that the PTP clock can keep the wall
time in hardware, it makes sense to do so (and thus arises the need
to restore the PTP time after resetting the switch).
- Profits from the fact that timecounter/cyclecounter code has been
removed from sja1105_main.c, and goes a step further by doing some
more cleanup and making the PTP Kconfig more self-contained. The
cleanup also covers preparing the driver for the gettimex API
(PTP_SYS_OFFSET_EXTENDED ioctl) - an API which at the moment does
not have the best implementation available in the kernel for this
switch, but for which the addition of a better API is trivial after
the cleanup.
drivers/net/dsa/sja1105/sja1105.h | 18 +-
drivers/net/dsa/sja1105/sja1105_main.c | 66 ++++--
drivers/net/dsa/sja1105/sja1105_ptp.c | 283 +++++++++++++------------
drivers/net/dsa/sja1105/sja1105_ptp.h | 78 +++++++
drivers/net/dsa/sja1105/sja1105_spi.c | 42 ++--
5 files changed, 310 insertions(+), 177 deletions(-)
--
These are the PTP patches that have been split apart from:
https://www.spinics.net/lists/netdev/msg597214.html
("tc-taprio offload for SJA1105 DSA").
As such I have marked them as v2. There are no changes since the
tc-taprio patchset, it is only for better review-ability.
Vladimir Oltean (7):
net: dsa: sja1105: Get rid of global declaration of struct
ptp_clock_info
net: dsa: sja1105: Change the PTP command access pattern
net: dsa: sja1105: Switch to hardware operations for PTP
net: dsa: sja1105: Implement the .gettimex64 system call for PTP
net: dsa: sja1105: Restore PTP time after switch reset
net: dsa: sja1105: Disallow management xmit during switch reset
net: dsa: sja1105: Move PTP data to its own private structure
2.17.1
^ permalink raw reply
* [PATCH v2 net-next 1/7] net: dsa: sja1105: Get rid of global declaration of struct ptp_clock_info
From: Vladimir Oltean @ 2019-09-10 1:34 UTC (permalink / raw)
To: f.fainelli, vivien.didelot, andrew, davem, richardcochran
Cc: netdev, Vladimir Oltean
In-Reply-To: <20190910013501.3262-1-olteanv@gmail.com>
We need priv->ptp_caps to hold a structure and not just a pointer,
because we use container_of in the various PTP callbacks.
Therefore, the sja1105_ptp_caps structure declared in the global memory
of the driver serves no further purpose after copying it into
priv->ptp_caps.
So just populate priv->ptp_caps with the needed operations and remove
sja1105_ptp_caps.
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
---
drivers/net/dsa/sja1105/sja1105_ptp.c | 29 +++++++++++++--------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index d8e8dd59f3d1..d0c93d0449dc 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -347,29 +347,28 @@ static void sja1105_ptp_overflow_check(struct work_struct *work)
schedule_delayed_work(&priv->refresh_work, SJA1105_REFRESH_INTERVAL);
}
-static const struct ptp_clock_info sja1105_ptp_caps = {
- .owner = THIS_MODULE,
- .name = "SJA1105 PHC",
- .adjfine = sja1105_ptp_adjfine,
- .adjtime = sja1105_ptp_adjtime,
- .gettime64 = sja1105_ptp_gettime,
- .settime64 = sja1105_ptp_settime,
- .max_adj = SJA1105_MAX_ADJ_PPB,
-};
-
int sja1105_ptp_clock_register(struct sja1105_private *priv)
{
struct dsa_switch *ds = priv->ds;
/* Set up the cycle counter */
priv->tstamp_cc = (struct cyclecounter) {
- .read = sja1105_ptptsclk_read,
- .mask = CYCLECOUNTER_MASK(64),
- .shift = SJA1105_CC_SHIFT,
- .mult = SJA1105_CC_MULT,
+ .read = sja1105_ptptsclk_read,
+ .mask = CYCLECOUNTER_MASK(64),
+ .shift = SJA1105_CC_SHIFT,
+ .mult = SJA1105_CC_MULT,
};
+ priv->ptp_caps = (struct ptp_clock_info) {
+ .owner = THIS_MODULE,
+ .name = "SJA1105 PHC",
+ .adjfine = sja1105_ptp_adjfine,
+ .adjtime = sja1105_ptp_adjtime,
+ .gettime64 = sja1105_ptp_gettime,
+ .settime64 = sja1105_ptp_settime,
+ .max_adj = SJA1105_MAX_ADJ_PPB,
+ };
+
mutex_init(&priv->ptp_lock);
- priv->ptp_caps = sja1105_ptp_caps;
priv->clock = ptp_clock_register(&priv->ptp_caps, ds->dev);
if (IS_ERR_OR_NULL(priv->clock))
--
2.17.1
^ permalink raw reply related
* Re: [PATCH v1 net-next 00/15] tc-taprio offload for SJA1105 DSA
From: Vladimir Oltean @ 2019-09-10 1:06 UTC (permalink / raw)
To: Gomes, Vinicius
Cc: David Miller, f.fainelli@gmail.com, vivien.didelot@gmail.com,
andrew@lunn.ch, Patel, Vedang, richardcochran@gmail.com,
Voon, Weifeng, jiri@mellanox.com, m-karicheri2@ti.com,
Jose.Abreu@synopsys.com, ilias.apalodimas@linaro.org,
jhs@mojatatu.com, xiyou.wangcong@gmail.com,
kurt.kanzenbach@linutronix.de, netdev@vger.kernel.org
In-Reply-To: <BN6PR11MB00500FA0D6B5D39E794B44BB86B70@BN6PR11MB0050.namprd11.prod.outlook.com>
Hi Vinicius!
On 10/09/2019, Gomes, Vinicius <vinicius.gomes@intel.com> wrote:
> Hi Vladimir,
>
>> This is a warning that I will toss this patch series if it receives no
>> series review in
>> the next couple of days.
>
> Sorry about the delay on reviewing this. On top on the usual business, some
> changes to the
> IT infrastructure here have hit my email workflow pretty hard.
>
No problem, I've also been traveling and hence delaying patching some
taprio issues we discussed last week.
> I am taking a look at the datasheet in the meantime, it's been a long time
> since I looked at it,
> the idea is to help review the scheduler from hell :-)
>
Ok, but don't get hung up on it :)
> One thing that wasn't clear is what you did to test this series.
>
Right, this is one particular aspect I didn't really insist on a lot,
and I hope I'm not going to lose everybody when explaining it, because
it requires a bit of understanding of how sja1105 integrates with DSA
overall.
The basic idea is that none of the switch's ports is special in any
way from a hardware perspective, and that includes the "CPU port". But
to support the DSA paradigm of annotating frames that go towards the
CPU with information about the source port they came from, I am
repurposing VLAN tags with a customized EtherType (0xdadb instead of
0x8100).
This is relevant because to a 802.1Q bridge, the QoS hints come from:
- The 3-bit PCP field from the VLAN header
- A default, port-based VLAN PCP in case RX traffic is untagged (I
would also like to have a knob to change this, currently hardcoded to
0 in the driver!)
So to inject a frame into a sja1105 TX queue means to annotate it with
a VLAN PCP which maps to that queue. In the datasheet there is a
VLAN_PMAP register that manages the ingress-priority ->
egress-priority -> egress-queue mapping. I'm keeping that hardcoded to
1:1:1 for sanity.
Now back to the driver's use of the VLAN header.
- When the sja1105 operates as a bridge with vlan_filtering=1, the
VLANs are installed by the user (via the bridge command from
iproute2), parsed by the switch and VLAN-tagged traffic is expected to
be received from the connected ports. So it honors the VLAN PCP in
this mode.
- When it isn't (it is either a VLAN-unaware bridge, or 4x standalone
ports), then the VLAN header (with custom EtherType) is used to route
frames from the CPU towards the correct egress switch port. A
consequence of it still being parsed by the switch as VLAN is that the
host Linux system is able to specify the VLAN PCP to mean "inject in
this egress queue".
Now because the EtherType changes between these modes of operation,
the switch can either expose the VLAN PCP to (a) the host Linux netdev
queues (as DSA sees them*), or (b) to the devices connected to its
external ports.
* When operating in vlan_filtering=1 mode, technically the sja1105
becomes a "managed dumb switch" (control traffic: PTP, STP etc still
works, but for general purpose traffic you must now open your socket
on the DSA master netdevice, not the switch ports). So the DSA master
netdevice is in fact just another node connected to the switch in this
mode, for all the hardware cares. So technically you _can_ still do
QoS from the host Linux if you put a VLAN sub-interface on top of the
DSA master netdevice.
Now, to finally answer your question. I have used the sja1105 as a
bridge between two endpoints who are sending/receiving VLAN-tagged
traffic in a 3-board network synchronized by PTP. There is a schedule
configured on the switch that is aligned to the beginning of the
second, and the cycle time is known. PTP uses traffic class 7, and the
scheduled traffic uses traffic class 5.
The traffic sender is not too complicated: it's a raw L2 socket that
is sending scheduled traffic based on calls to
clock_nanosleep(CLOCK_REALTIME) and an a-priori knowledge of the
network schedule (it's invoked from a script), minus an advance time.
The reason I'm not sharing too many details about the traffic sender
now is that I just configured the advance time experimentally and
there's no hard guarantee that its egress latency will be smaller and
that the frames will always be sent on time. But the sender's
CLOCK_REALTIME is in sync with its /dev/ptp0 by phc2sys, that's why I
can poll it instead of polling the hardware clock.
Then I am taking TX and RX timestamps for the scheduled traffic on the
sender and on the receiver. I can do a reasonable diff between the 2
timestamps because the PHCs are kept in sync by PTP, and that is my
path delay. I expect it to be more or less 2x a single link's path
delay (sender -> bridge + bridge -> receiver), and not in any case a
multiple of the cycle time (which is a sign that cycles were missed).
As for the sja1105-as-endpoint use case, I checked that I can inject
traffic into each particular queue, but I didn't really explore it
further.
I'll make sure this subtlety is more clearly formulated in the next
version of the patch.
> Cheers,
> --
> Vinicius
>
>
>
Actually let me ask you a few questions as well:
- I'm trying to understand what is the correct use of the tc-mqprio
"queues" argument. I've only tested it with "1@0 1@1 1@2 1@3 1@4 1@5
1@6 1@7", which I believe is equivalent to not specifying it at all? I
believe it should be interpreted as: "allocate this many netdev queues
for each traffic class", where "traffic class" means a group of queues
having the same priority (equal to the traffic class's number), but
engaged in a strict priority scheme with other groups of queues
(traffic classes). Right?
- DSA can only formally support multi-queue, because its connection to
the Linux host is through an Ethernet MAC (FIFO). Even if the DSA
master netdevice may be multi-queue, allocating and separating those
queues for each front-panel switch port is a task best left to the
user/administrator. This means that DSA should reject all other
"queues" mappings except the trivial one I pointed to above?
- I'm looking at the "tc_mask_to_queue_mask" function that I'm
carrying along from your initial offload RFC. Are you sure this is the
right approach? I don't feel a need to translate from traffic class to
netdev queues, considering that in the general case, a traffic class
is a group of queues, and 802.1Qbv doesn't really specify that you can
gate individual queues from a traffic class. In the software
implementation you are only looking at netdev_get_prio_tc_map, which
is not equivalent as far as my understanding goes, but saner.
Actually 802.1Q-2018 does not really clarify this either. It looks to
me like they use the term "queue" and "traffic class" interchangeably.
See two examples below (emphasis mine):
Q.2 Using gate operations to create protected windows
The enhancements for scheduled traffic described in 8.6.8.4 allow
transmission to be switched on and off on a timed basis for each
_traffic class_ that is implemented on a port. This switching is
achieved by means of individual on/off transmission gates associated
with each _traffic class_ and a list of gate operations that control
the gates; an individual SetGateStates operation has a time delay
parameter that indicates the delay after the gate operation is
executed until the next operation is to occur, and a GateState
parameter that defines a vector of up to eight state values (open or
closed) that is to be applied to each gate when the operation is
executed. The gate operations allow any combination of open/closed
states to be defined, and the mechanism makes no assumptions about
which _traffic classes_ are being “protected” and which are
“unprotected”; any such assumptions are left to the designer of the
sequence of gate operations.
Table 8-7—Gate operations
The GateState parameter indicates a value, open or closed, for each of
the Port’s _queues_.
- What happens with the "clockid" argument now that hardware offload
is possible? Do we allow "/dev/ptp0" to be specified as input?
Actually this question is relevant to your txtime-assist mode as well:
doesn't it assume that there is an implicit phc2sys instance running
to keep the system time in sync with the PHC?
Thanks,
-Vladimir
^ permalink raw reply
* Re: [PATCH net] sctp: fix the missing put_user when dumping transport thresholds
From: Marcelo Ricardo Leitner @ 2019-09-10 0:03 UTC (permalink / raw)
To: Xin Long; +Cc: network dev, linux-sctp, davem, Neil Horman
In-Reply-To: <3fa4f7700c93f06530c80bc666d1696cb7c077de.1568014409.git.lucien.xin@gmail.com>
On Mon, Sep 09, 2019 at 03:33:29PM +0800, Xin Long wrote:
> This issue causes SCTP_PEER_ADDR_THLDS sockopt not to be able to dump
> a transport thresholds info.
>
> Fix it by adding 'goto' put_user in sctp_getsockopt_paddr_thresholds.
>
> Fixes: 8add543e369d ("sctp: add SCTP_FUTURE_ASSOC for SCTP_PEER_ADDR_THLDS sockopt")
> Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
^ permalink raw reply
* RE: [PATCH v1 net-next 00/15] tc-taprio offload for SJA1105 DSA
From: Gomes, Vinicius @ 2019-09-09 23:49 UTC (permalink / raw)
To: David Miller, olteanv@gmail.com
Cc: f.fainelli@gmail.com, vivien.didelot@gmail.com, andrew@lunn.ch,
Patel, Vedang, richardcochran@gmail.com, Voon, Weifeng,
jiri@mellanox.com, m-karicheri2@ti.com, Jose.Abreu@synopsys.com,
ilias.apalodimas@linaro.org, jhs@mojatatu.com,
xiyou.wangcong@gmail.com, kurt.kanzenbach@linutronix.de,
netdev@vger.kernel.org
In-Reply-To: <20190907.155549.1880685136488421385.davem@davemloft.net>
Hi Vladimir,
> This is a warning that I will toss this patch series if it receives no series review in
> the next couple of days.
Sorry about the delay on reviewing this. On top on the usual business, some changes to the
IT infrastructure here have hit my email workflow pretty hard.
I am taking a look at the datasheet in the meantime, it's been a long time since I looked at it,
the idea is to help review the scheduler from hell :-)
One thing that wasn't clear is what you did to test this series.
Cheers,
--
Vinicius
^ permalink raw reply
* Re: general protection fault in qdisc_put
From: Cong Wang @ 2019-09-09 23:14 UTC (permalink / raw)
To: Linus Torvalds
Cc: syzbot, Akinobu Mita, Andrew Morton, David Miller, Dmitry Vyukov,
Jamal Hadi Salim, Jiri Pirko, Linux List Kernel Mailing,
Michal Hocko, Netdev, syzkaller-bugs
In-Reply-To: <CAHk-=wgZneAegyitz7f+JLjB6=28ewtvT7M4xy_a-wqsTjOX_w@mail.gmail.com>
On Sun, Sep 8, 2019 at 10:19 AM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> I see two solutions:
>
> (a) move the
>
> q->qdisc = &noop_qdisc;
>
> up earlier in sfb_init(), so that qdisc is always initialized
> after sfb_init(), even on failure.
>
> (b) just make qdisc_put(NULL) just silently work as a no-op.
>
> (c) change all the semantics to not call ->destroy if ->init failed.
>
> Honestly, (a) seems very fragile - do all the other init routines do
> this? And (c) sounds like a big change, and very fragile too.
>
> So I'd suggest that qdisc_put() be made to just ignore a NULL pointer
> (and maybe an error pointer too?).
I think (a) is the best solution here.
(c) changes too much, we already rely on this behavior.
(b) is not bad either, just very slightly more risky.
Alternatively, we can add a quick NULL check inside
sfb_destroy().
I can send out a patch if you don't.
Thanks for looking at this!
^ permalink raw reply
* Re: [PATCH v2] net: enable wireless core features with LEGACY_WEXT_ALLCONFIG
From: Greg KH @ 2019-09-09 22:58 UTC (permalink / raw)
To: Mark Salyzyn
Cc: linux-kernel, kernel-team, Johannes Berg, David S. Miller,
Marcel Holtmann, linux-wireless, netdev, stable
In-Reply-To: <b7027a5d-5d75-677b-0e9b-cd70e5e30092@android.com>
On Mon, Sep 09, 2019 at 07:24:29AM -0700, Mark Salyzyn wrote:
> On 9/6/19 4:30 PM, Greg KH wrote:
> > On Fri, Sep 06, 2019 at 12:24:00PM -0700, Mark Salyzyn wrote:
> > > In embedded environments the requirements are to be able to pick and
> > > chose which features one requires built into the kernel. If an
> > > embedded environment wants to supports loading modules that have been
> > > kbuilt out of tree, there is a need to enable hidden configurations
> > > for legacy wireless core features to provide the API surface for
> > > them to load.
> > >
> > > Introduce CONFIG_LEGACY_WEXT_ALLCONFIG to select all legacy wireless
> > > extension core features by activating in turn all the associated
> > > hidden configuration options, without having to specifically select
> > > any wireless module(s).
> > >
> > > Signed-off-by: Mark Salyzyn <salyzyn@android.com>
> > > Cc: kernel-team@android.com
> > > Cc: Johannes Berg <johannes@sipsolutions.net>
> > > Cc: "David S. Miller" <davem@davemloft.net>
> > > Cc: Marcel Holtmann <marcel@holtmann.org>
> > > Cc: linux-wireless@vger.kernel.org
> > > Cc: netdev@vger.kernel.org
> > > Cc: linux-kernel@vger.kernel.org
> > > Cc: stable@vger.kernel.org # 4.19
> > > ---
> > > v2: change name and documentation to CONFIG_LEGACY_WEXT_ALLCONFIG
> > > ---
> > > net/wireless/Kconfig | 14 ++++++++++++++
> > > 1 file changed, 14 insertions(+)
> > >
> > > diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
> > > index 67f8360dfcee..0d646cf28de5 100644
> > > --- a/net/wireless/Kconfig
> > > +++ b/net/wireless/Kconfig
> > > @@ -17,6 +17,20 @@ config WEXT_SPY
> > > config WEXT_PRIV
> > > bool
> > > +config LEGACY_WEXT_ALLCONFIG
> > > + bool "allconfig for legacy wireless extensions"
> > > + select WIRELESS_EXT
> > > + select WEXT_CORE
> > > + select WEXT_PROC
> > > + select WEXT_SPY
> > > + select WEXT_PRIV
> > > + help
> > > + Config option used to enable all the legacy wireless extensions to
> > > + the core functionality used by add-in modules.
> > > +
> > > + If you are not building a kernel to be used for a variety of
> > > + out-of-kernel built wireless modules, say N here.
> > > +
> > > config CFG80211
> > > tristate "cfg80211 - wireless configuration API"
> > > depends on RFKILL || !RFKILL
> > > --
> > > 2.23.0.187.g17f5b7556c-goog
> > >
> > How is this patch applicable to stable kernels???
>
> A) worth a shot ;-}
Not nice, please, you know better :)
> B) there is a shortcoming in _all_ kernel versions with respect to hidden
> configurations options like this, hoping to set one precedent in how to
> handle them if acceptable to the community.
That's fine, but it's a new feature, not for stable.
> C) [AGENDA ALERT] Android _will_ be back-porting this to android-4.19 kernel
> anyway, would help maintenance if via stable. <holding hat in hand>
That's fine, lots of distros backport loads of stuff for new features
for stuff that is upstream. That's trivial to do, don't try to abuse
the stable tree for new features like this please. It only makes
maintainers grumpy when you do so :(
> D) Not an ABI or interface break, does not introduce instability, but rather
> keeps downstream kernels of any distributions from having to hack in their
> own alternate means of dealing with this problem leading to further
> fragmentation.
Again, new feature, not fixing a bug, so not applicable for stable.
For penance I require a handwritten copy of:
https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
thanks,
greg k-h
^ permalink raw reply
* Re: [PATCH] libbpf: Don't error out if getsockopt() fails for XDP_OPTIONS
From: Toke Høiland-Jørgensen @ 2019-09-09 23:06 UTC (permalink / raw)
To: Yonghong Song, Alexei Starovoitov, Daniel Borkmann,
netdev@vger.kernel.org, bpf@vger.kernel.org
In-Reply-To: <8e909219-a225-b242-aaa5-bee1180aed48@fb.com>
Yonghong Song <yhs@fb.com> writes:
> On 9/9/19 10:46 AM, Toke Høiland-Jørgensen wrote:
>> The xsk_socket__create() function fails and returns an error if it cannot
>> get the XDP_OPTIONS through getsockopt(). However, support for XDP_OPTIONS
>> was not added until kernel 5.3, so this means that creating XSK sockets
>> always fails on older kernels.
>>
>> Since the option is just used to set the zero-copy flag in the xsk struct,
>> there really is no need to error out if the getsockopt() call fails.
>>
>> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
>> ---
>> tools/lib/bpf/xsk.c | 8 ++------
>> 1 file changed, 2 insertions(+), 6 deletions(-)
>>
>> diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
>> index 680e63066cf3..598e487d9ce8 100644
>> --- a/tools/lib/bpf/xsk.c
>> +++ b/tools/lib/bpf/xsk.c
>> @@ -603,12 +603,8 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
>>
>> optlen = sizeof(opts);
>> err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen);
>> - if (err) {
>> - err = -errno;
>> - goto out_mmap_tx;
>> - }
>> -
>> - xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY;
>> + if (!err)
>> + xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY;
>>
>> if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
>> err = xsk_setup_xdp_prog(xsk);
>
> Since 'zc' is not used by anybody, maybe all codes 'zc' related can be
> removed? It can be added back back once there is an interface to use
> 'zc'?
Fine with me; up to the maintainers what they prefer, I guess? :)
-Toke
^ permalink raw reply
* Re: [PATCH v4 bpf-next 1/4] capability: introduce CAP_BPF and CAP_TRACING
From: Andy Lutomirski @ 2019-09-09 22:52 UTC (permalink / raw)
To: Alexei Starovoitov, James Morris, LSM List, Kees Cook, Jann Horn,
Steven Rostedt
Cc: David S. Miller, Daniel Borkmann, Peter Zijlstra,
Network Development, bpf, kernel-team, Linux API
In-Reply-To: <20190906231053.1276792-2-ast@kernel.org>
On Fri, Sep 6, 2019 at 4:10 PM Alexei Starovoitov <ast@kernel.org> wrote:
>
> Split BPF and perf/tracing operations that are allowed under
> CAP_SYS_ADMIN into corresponding CAP_BPF and CAP_TRACING.
> For backward compatibility include them in CAP_SYS_ADMIN as well.
>
> The end result provides simple safety model for applications that use BPF:
> - for tracing program types
> BPF_PROG_TYPE_{KPROBE, TRACEPOINT, PERF_EVENT, RAW_TRACEPOINT, etc}
> use CAP_BPF and CAP_TRACING
> - for networking program types
> BPF_PROG_TYPE_{SCHED_CLS, XDP, CGROUP_SKB, SK_SKB, etc}
> use CAP_BPF and CAP_NET_ADMIN
>
> There are few exceptions from this simple rule:
> - bpf_trace_printk() is allowed in networking programs, but it's using
> ftrace mechanism, hence this helper needs additional CAP_TRACING.
> - cpumap is used by XDP programs. Currently it's kept under CAP_SYS_ADMIN,
> but could be relaxed to CAP_NET_ADMIN in the future.
> - BPF_F_ZERO_SEED flag for hash/lru map is allowed under CAP_SYS_ADMIN only
> to discourage production use.
> - BPF HW offload is allowed under CAP_SYS_ADMIN.
> - cg_sysctl, cg_device, lirc program types are neither networking nor tracing.
> They can be loaded under CAP_BPF, but attach is allowed under CAP_NET_ADMIN.
> This will be cleaned up in the future.
>
> userid=nobody + (CAP_TRACING | CAP_NET_ADMIN) + CAP_BPF is safer than
> typical setup with userid=root and sudo by existing bpf applications.
> It's not secure, since these capabilities:
> - allow bpf progs access arbitrary memory
> - let tasks access any bpf map
> - let tasks attach/detach any bpf prog
>
> bpftool, bpftrace, bcc tools binaries should not be installed with
> cap_bpf+cap_tracing, since unpriv users will be able to read kernel secrets.
>
> CAP_BPF, CAP_NET_ADMIN, CAP_TRACING are roughly equal in terms of
> damage they can make to the system.
> Example:
> CAP_NET_ADMIN can stop network traffic. CAP_BPF can write into map
> and if that map is used by firewall-like bpf prog the network traffic
> may stop.
> CAP_BPF allows many bpf prog_load commands in parallel. The verifier
> may consume large amount of memory and significantly slow down the system.
> CAP_TRACING allows many kprobes that can slow down the system.
Do we want to split CAP_TRACE_KERNEL and CAP_TRACE_USER? It's not
entirely clear to me that it's useful.
>
> In the future more fine-grained bpf permissions may be added.
>
> Existing unprivileged BPF operations are not affected.
> In particular unprivileged users are allowed to load socket_filter and cg_skb
> program types and to create array, hash, prog_array, map-in-map map types.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
> include/linux/capability.h | 18 +++++++++++
> include/uapi/linux/capability.h | 49 ++++++++++++++++++++++++++++-
> security/selinux/include/classmap.h | 4 +--
> 3 files changed, 68 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/capability.h b/include/linux/capability.h
> index ecce0f43c73a..13eb49c75797 100644
> --- a/include/linux/capability.h
> +++ b/include/linux/capability.h
> @@ -247,6 +247,24 @@ static inline bool ns_capable_setid(struct user_namespace *ns, int cap)
> return true;
> }
> #endif /* CONFIG_MULTIUSER */
> +
> +static inline bool capable_bpf(void)
> +{
> + return capable(CAP_SYS_ADMIN) || capable(CAP_BPF);
> +}
> +static inline bool capable_tracing(void)
> +{
> + return capable(CAP_SYS_ADMIN) || capable(CAP_TRACING);
> +}
> +static inline bool capable_bpf_tracing(void)
> +{
> + return capable(CAP_SYS_ADMIN) || (capable(CAP_BPF) && capable(CAP_TRACING));
> +}
> +static inline bool capable_bpf_net_admin(void)
> +{
> + return (capable(CAP_SYS_ADMIN) || capable(CAP_BPF)) && capable(CAP_NET_ADMIN);
> +}
> +
These helpers are all wrong, unfortunately, since they will produce
inappropriate audit events. capable_bpf() should look more like this:
if (capable_noaudit(CAP_BPF))
return capable(CAP_BPF);
if (capable_noaudit(CAP_SYS_ADMIN))
return capable(CAP_SYS_ADMIN);
return capable(CAP_BPF);
James, etc: should there instead be new helpers to do this more
generically rather than going through the noaudit contortions? My
code above is horrible.
^ permalink raw reply
* Re: Default qdisc not correctly initialized with custom MTU
From: Cong Wang @ 2019-09-09 22:52 UTC (permalink / raw)
To: Holger Hoffstätte; +Cc: Netdev
In-Reply-To: <211c7151-7500-f895-7fd7-2c868dd48579@applied-asynchrony.com>
On Mon, Sep 9, 2019 at 5:44 AM Holger Hoffstätte
<holger@applied-asynchrony.com> wrote:
> I can't help but feel this is a slight bug in terms of initialization order,
> and that the default qdisc should only be created when it's first being
> used/attached to a link, not when the sysctls are configured.
Yeah, this is because the fq_codel qdisc is initialized once and
doesn't get any notification when the netdev's MTU get changed.
We can "fix" this by adding a NETDEV_CHANGEMTU notifier to
qdisc's, but I don't know if it is really worth the effort.
Is there any reason you can't change that order?
Thanks.
^ permalink raw reply
* Re: [PATCH] net/ibmvnic: Fix missing { in __ibmvnic_reset
From: Juliet Kim @ 2019-09-09 22:49 UTC (permalink / raw)
To: Michal Suchanek, netdev, David S. Miller
Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Thomas Falcon, John Allen, linuxppc-dev, linux-kernel
In-Reply-To: <20190909204451.7929-1-msuchanek@suse.de>
On 9/9/19 3:44 PM, Michal Suchanek wrote:
> Commit 1c2977c09499 ("net/ibmvnic: free reset work of removed device from queue")
> adds a } without corresponding { causing build break.
>
> Fixes: 1c2977c09499 ("net/ibmvnic: free reset work of removed device from queue")
> Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Reviewed-by: Juliet Kim <julietk@linux.vnet.ibm.com>
> ---
> drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
> index 6644cabc8e75..5cb55ea671e3 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -1984,7 +1984,7 @@ static void __ibmvnic_reset(struct work_struct *work)
> rwi = get_next_rwi(adapter);
> while (rwi) {
> if (adapter->state == VNIC_REMOVING ||
> - adapter->state == VNIC_REMOVED)
> + adapter->state == VNIC_REMOVED) {
> kfree(rwi);
> rc = EBUSY;
> break;
^ permalink raw reply
* [net-next v2 00/15][pull request] Intel Wired LAN Driver Updates 2019-09-09
From: Jeff Kirsher @ 2019-09-09 22:47 UTC (permalink / raw)
To: davem; +Cc: Jeff Kirsher, netdev, nhorman, sassmann
This series contains a variety of cold and hot savoury changes to Intel
drivers. Some of the fixes could be considered for stable even though
the author did not request it.
Hulk Robert cleans up (i.e. removes) a function that has no caller for
the iavf driver.
Radoslaw fixes an issue when there is no link in the VM after the
hypervisor is restored from a low-power state due to the driver not
properly restoring features in the device that had been disabled during
the suspension for ixgbevf.
Kai-Heng Feng modified e1000e to use mod_delayed_work() to help resolve
a hot plug speed detection issue by adding a deterministic 1 second
delay before running watchdog task after an interrupt.
Sasha moves functions around to avoid forward declarations, since the
forward declarations are not necessary for these static functions in
igc. Also added a check for igc during driver probe to validate the NVM
checksum. Cleaned up code defines that were not being used in the igc
driver. Adds support for IP generic transmit checksum offload in the
igc driver.
Updated the iavf kernel documentation by a developer with no life.
Jake provides another fm10k update to a local variable for ease of code
readability.
Mitch fixes the iavf driver to allow the VF to override the MAC address
set by the host, if the VF is in "trusted" mode.
Mauro S. M. Rodrigues provides several changes for i40e driver, first
with resolving hw_dbg usage and referencing a i40e_hw attribute. Also
implemented a debug macro using pr_debug, since the use of netdev_dbg
could cause a NULL pointer dereference during probe. Finally cleaned up
code that is no longer used or needed.
Firo Yang provides a change in the ixgbe driver to ensure we sync the
first fragment unconditionally to help resolve an issue seen in the XEN
environment when the upper network stack could receive an incomplete
network packet.
Mariusz adds a missing device to the i40e PCI table in the driver.
v2: Mauro S. M. Rodrigues updated patches 10 & 11 of the series based on
feedback from Jakub Kicinski. Also updated patch 13 description so
that the "Fixes:" tag was no wrapped.
The following are changes since commit 6703a605b5ab33502d7a327de880188013d7c377:
Merge branch 'net-tls-small-TX-offload-optimizations'
and are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 10GbE
Firo Yang (1):
ixgbe: sync the first fragment unconditionally
Jacob Keller (1):
fm10k: use a local variable for the frag pointer
Jeff Kirsher (1):
Documentation: iavf: Update the Intel LAN driver doc for iavf
Kai-Heng Feng (1):
e1000e: Make speed detection on hotplugging cable more reliable
Mariusz Stachura (1):
i40e: Add support for X710 device
Mauro S. M. Rodrigues (3):
i40e: fix hw_dbg usage in i40e_hmc_get_object_va
i40e: Implement debug macro hw_dbg using dev_dbg
i40e: Remove EMPR traces from debugfs facility
Mitch Williams (1):
iavf: allow permanent MAC address to change
Radoslaw Tyl (1):
ixgbevf: Link lost in VM on ixgbevf when restoring from freeze or
suspend
Sasha Neftin (4):
igc: Remove useless forward declaration
igc: Add NVM checksum validation
igc: Remove unneeded PCI bus defines
igc: Add tx_csum offload functionality
YueHaibing (1):
iavf: remove unused debug function iavf_debug_d
.../networking/device_drivers/intel/iavf.rst | 115 ++++++++---
drivers/net/ethernet/intel/e1000e/netdev.c | 12 +-
drivers/net/ethernet/intel/fm10k/fm10k_main.c | 8 +-
drivers/net/ethernet/intel/i40e/i40e.h | 1 -
drivers/net/ethernet/intel/i40e/i40e_common.c | 1 +
.../net/ethernet/intel/i40e/i40e_debugfs.c | 4 -
drivers/net/ethernet/intel/i40e/i40e_hmc.c | 1 +
.../net/ethernet/intel/i40e/i40e_lan_hmc.c | 21 +-
drivers/net/ethernet/intel/i40e/i40e_main.c | 1 +
drivers/net/ethernet/intel/i40e/i40e_osdep.h | 5 +-
drivers/net/ethernet/intel/iavf/iavf.h | 1 -
drivers/net/ethernet/intel/iavf/iavf_main.c | 26 ---
drivers/net/ethernet/intel/igc/igc.h | 4 +
drivers/net/ethernet/intel/igc/igc_base.h | 8 +
drivers/net/ethernet/intel/igc/igc_defines.h | 9 +-
drivers/net/ethernet/intel/igc/igc_mac.c | 73 ++++---
drivers/net/ethernet/intel/igc/igc_main.c | 106 ++++++++++
drivers/net/ethernet/intel/igc/igc_phy.c | 192 +++++++++---------
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 16 +-
.../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1 +
20 files changed, 373 insertions(+), 232 deletions(-)
--
2.21.0
^ permalink raw reply
* [net-next v2 01/15] iavf: remove unused debug function iavf_debug_d
From: Jeff Kirsher @ 2019-09-09 22:47 UTC (permalink / raw)
To: davem
Cc: YueHaibing, netdev, nhorman, sassmann, Hulk Robot, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190909224802.29595-1-jeffrey.t.kirsher@intel.com>
From: YueHaibing <yuehaibing@huawei.com>
There is no caller of function iavf_debug_d() in tree since
commit 75051ce4c5d8 ("iavf: Fix up debug print macro"),
so it can be removed.
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/iavf/iavf_main.c | 22 ---------------------
1 file changed, 22 deletions(-)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 9d2b50964a08..554aa619ff02 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -142,28 +142,6 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
return 0;
}
-/**
- * iavf_debug_d - OS dependent version of debug printing
- * @hw: pointer to the HW structure
- * @mask: debug level mask
- * @fmt_str: printf-type format description
- **/
-void iavf_debug_d(void *hw, u32 mask, char *fmt_str, ...)
-{
- char buf[512];
- va_list argptr;
-
- if (!(mask & ((struct iavf_hw *)hw)->debug_mask))
- return;
-
- va_start(argptr, fmt_str);
- vsnprintf(buf, sizeof(buf), fmt_str, argptr);
- va_end(argptr);
-
- /* the debug string is already formatted with a newline */
- pr_info("%s", buf);
-}
-
/**
* iavf_schedule_reset - Set the flags and schedule a reset event
* @adapter: board private structure
--
2.21.0
^ permalink raw reply related
* [net-next v2 07/15] igc: Add NVM checksum validation
From: Jeff Kirsher @ 2019-09-09 22:47 UTC (permalink / raw)
To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
Jeff Kirsher
In-Reply-To: <20190909224802.29595-1-jeffrey.t.kirsher@intel.com>
From: Sasha Neftin <sasha.neftin@intel.com>
Add NVM checksum validation during probe functionality.
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/igc/igc_main.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 251552855c40..965d1c939f0f 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4133,6 +4133,15 @@ static int igc_probe(struct pci_dev *pdev,
*/
hw->mac.ops.reset_hw(hw);
+ if (igc_get_flash_presence_i225(hw)) {
+ if (hw->nvm.ops.validate(hw) < 0) {
+ dev_err(&pdev->dev,
+ "The NVM Checksum Is Not Valid\n");
+ err = -EIO;
+ goto err_eeprom;
+ }
+ }
+
if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
/* copy the MAC address out of the NVM */
if (hw->mac.ops.read_mac_addr(hw))
--
2.21.0
^ permalink raw reply related
* [net-next v2 09/15] igc: Remove unneeded PCI bus defines
From: Jeff Kirsher @ 2019-09-09 22:47 UTC (permalink / raw)
To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
Jeff Kirsher
In-Reply-To: <20190909224802.29595-1-jeffrey.t.kirsher@intel.com>
From: Sasha Neftin <sasha.neftin@intel.com>
PCIe device control 2 defines does not use internally.
This patch comes to clean up those.
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/igc/igc_defines.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 11b99acf4abe..549134ecd105 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -10,10 +10,6 @@
#define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */
-/* PCI Bus Info */
-#define PCIE_DEVICE_CONTROL2 0x28
-#define PCIE_DEVICE_CONTROL2_16ms 0x0005
-
/* Physical Func Reset Done Indication */
#define IGC_CTRL_EXT_LINK_MODE_MASK 0x00C00000
--
2.21.0
^ permalink raw reply related
* [net-next v2 15/15] i40e: Add support for X710 device
From: Jeff Kirsher @ 2019-09-09 22:48 UTC (permalink / raw)
To: davem
Cc: Mariusz Stachura, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190909224802.29595-1-jeffrey.t.kirsher@intel.com>
From: Mariusz Stachura <mariusz.stachura@intel.com>
Add I40E_DEV_ID_10G_BASE_T_BC to i40e_pci_tbl
Signed-off-by: Mariusz Stachura <mariusz.stachura@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_main.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 3c8a2f55c43a..e9f2f276bf27 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -73,6 +73,7 @@ static const struct pci_device_id i40e_pci_tbl[] = {
{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0},
+ {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_BC), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_SFP), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_B), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0},
--
2.21.0
^ permalink raw reply related
* [net-next v2 12/15] i40e: Remove EMPR traces from debugfs facility
From: Jeff Kirsher @ 2019-09-09 22:47 UTC (permalink / raw)
To: davem
Cc: Mauro S. M. Rodrigues, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190909224802.29595-1-jeffrey.t.kirsher@intel.com>
From: "Mauro S. M. Rodrigues" <maurosr@linux.vnet.ibm.com>
Since commit
'5098850c9b9b ("i40e/i40evf: i40e_register.h updates")'
it is no longer possible to trigger an EMP Reset from debugfs, but it's
possible to request it either way, to end up with a bad reset request:
echo empr > /sys/kernel/debug/i40e/0002\:01\:00.1/command
i40e 0002:01:00.1: debugfs: forcing EMPR
i40e 0002:01:00.1: bad reset request 0x00010000
So let's remove this piece of code and show the available valid commands
as it is when any invalid command is issued.
Signed-off-by: "Mauro S. M. Rodrigues" <maurosr@linux.vnet.ibm.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e.h | 1 -
drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 4 ----
2 files changed, 5 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 3e535d3263b3..f1a1bd324b50 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -131,7 +131,6 @@ enum i40e_state_t {
__I40E_PF_RESET_REQUESTED,
__I40E_CORE_RESET_REQUESTED,
__I40E_GLOBAL_RESET_REQUESTED,
- __I40E_EMP_RESET_REQUESTED,
__I40E_EMP_RESET_INTR_RECEIVED,
__I40E_SUSPENDED,
__I40E_PTP_TX_IN_PROGRESS,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 41232898d8ae..99ea543dd245 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1125,10 +1125,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
dev_info(&pf->pdev->dev, "debugfs: forcing GlobR\n");
i40e_do_reset_safe(pf, BIT(__I40E_GLOBAL_RESET_REQUESTED));
- } else if (strncmp(cmd_buf, "empr", 4) == 0) {
- dev_info(&pf->pdev->dev, "debugfs: forcing EMPR\n");
- i40e_do_reset_safe(pf, BIT(__I40E_EMP_RESET_REQUESTED));
-
} else if (strncmp(cmd_buf, "read", 4) == 0) {
u32 address;
u32 value;
--
2.21.0
^ permalink raw reply related
* [net-next v2 14/15] igc: Add tx_csum offload functionality
From: Jeff Kirsher @ 2019-09-09 22:48 UTC (permalink / raw)
To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
Jeff Kirsher
In-Reply-To: <20190909224802.29595-1-jeffrey.t.kirsher@intel.com>
From: Sasha Neftin <sasha.neftin@intel.com>
Add IP generic TX checksum offload functionality.
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/igc/igc.h | 4 +
drivers/net/ethernet/intel/igc/igc_base.h | 8 ++
drivers/net/ethernet/intel/igc/igc_defines.h | 5 +
drivers/net/ethernet/intel/igc/igc_main.c | 97 ++++++++++++++++++++
4 files changed, 114 insertions(+)
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 0f5534ce27b0..7e16345d836e 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -135,6 +135,9 @@ extern char igc_driver_version[];
/* How many Rx Buffers do we bundle into one write to the hardware ? */
#define IGC_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+/* VLAN info */
+#define IGC_TX_FLAGS_VLAN_MASK 0xffff0000
+
/* igc_test_staterr - tests bits within Rx descriptor status and error fields */
static inline __le32 igc_test_staterr(union igc_adv_rx_desc *rx_desc,
const u32 stat_err_bits)
@@ -254,6 +257,7 @@ struct igc_ring {
u16 count; /* number of desc. in the ring */
u8 queue_index; /* logical index of the ring*/
u8 reg_idx; /* physical index of the ring */
+ bool launchtime_enable; /* true if LaunchTime is enabled */
/* everything past this point are written often */
u16 next_to_clean;
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
index 58d1109d7f3f..ea627ce52525 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.h
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -22,6 +22,14 @@ union igc_adv_tx_desc {
} wb;
};
+/* Context descriptors */
+struct igc_adv_tx_context_desc {
+ __le32 vlan_macip_lens;
+ __le32 launch_time;
+ __le32 type_tucmd_mlhl;
+ __le32 mss_l4len_idx;
+};
+
/* Adv Transmit Descriptor Config Masks */
#define IGC_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */
#define IGC_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 549134ecd105..f3f2325fe567 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -397,4 +397,9 @@
#define IGC_VLAPQF_P_VALID(_n) (0x1 << (3 + (_n) * 4))
#define IGC_VLAPQF_QUEUE_MASK 0x03
+#define IGC_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */
+#define IGC_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type:1=IPv4 */
+#define IGC_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet Type of TCP */
+#define IGC_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */
+
#endif /* _IGC_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 965d1c939f0f..63b62d74f961 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -5,6 +5,11 @@
#include <linux/types.h>
#include <linux/if_vlan.h>
#include <linux/aer.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+
+#include <net/ipv6.h>
#include "igc.h"
#include "igc_hw.h"
@@ -790,8 +795,96 @@ static int igc_set_mac(struct net_device *netdev, void *p)
return 0;
}
+static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
+ struct igc_tx_buffer *first,
+ u32 vlan_macip_lens, u32 type_tucmd,
+ u32 mss_l4len_idx)
+{
+ struct igc_adv_tx_context_desc *context_desc;
+ u16 i = tx_ring->next_to_use;
+ struct timespec64 ts;
+
+ context_desc = IGC_TX_CTXTDESC(tx_ring, i);
+
+ i++;
+ tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+ /* set bits to identify this as an advanced context descriptor */
+ type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
+
+ /* For 82575, context index must be unique per ring. */
+ if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+ mss_l4len_idx |= tx_ring->reg_idx << 4;
+
+ context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
+ context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
+ context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
+
+ /* We assume there is always a valid Tx time available. Invalid times
+ * should have been handled by the upper layers.
+ */
+ if (tx_ring->launchtime_enable) {
+ ts = ns_to_timespec64(first->skb->tstamp);
+ first->skb->tstamp = 0;
+ context_desc->launch_time = cpu_to_le32(ts.tv_nsec / 32);
+ } else {
+ context_desc->launch_time = 0;
+ }
+}
+
+static inline bool igc_ipv6_csum_is_sctp(struct sk_buff *skb)
+{
+ unsigned int offset = 0;
+
+ ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL);
+
+ return offset == skb_checksum_start_offset(skb);
+}
+
static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
{
+ struct sk_buff *skb = first->skb;
+ u32 vlan_macip_lens = 0;
+ u32 type_tucmd = 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+csum_failed:
+ if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
+ !tx_ring->launchtime_enable)
+ return;
+ goto no_csum;
+ }
+
+ switch (skb->csum_offset) {
+ case offsetof(struct tcphdr, check):
+ type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
+ /* fall through */
+ case offsetof(struct udphdr, check):
+ break;
+ case offsetof(struct sctphdr, checksum):
+ /* validate that this is actually an SCTP request */
+ if ((first->protocol == htons(ETH_P_IP) &&
+ (ip_hdr(skb)->protocol == IPPROTO_SCTP)) ||
+ (first->protocol == htons(ETH_P_IPV6) &&
+ igc_ipv6_csum_is_sctp(skb))) {
+ type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
+ break;
+ }
+ /* fall through */
+ default:
+ skb_checksum_help(skb);
+ goto csum_failed;
+ }
+
+ /* update TX checksum flag */
+ first->tx_flags |= IGC_TX_FLAGS_CSUM;
+ vlan_macip_lens = skb_checksum_start_offset(skb) -
+ skb_network_offset(skb);
+no_csum:
+ vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
+ vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
+
+ igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
}
static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
@@ -4116,6 +4209,9 @@ static int igc_probe(struct pci_dev *pdev,
if (err)
goto err_sw_init;
+ /* Add supported features to the features list*/
+ netdev->features |= NETIF_F_HW_CSUM;
+
/* setup the private structure */
err = igc_sw_init(adapter);
if (err)
@@ -4123,6 +4219,7 @@ static int igc_probe(struct pci_dev *pdev,
/* copy netdev features into list of user selectable features */
netdev->hw_features |= NETIF_F_NTUPLE;
+ netdev->hw_features |= netdev->features;
/* MTU range: 68 - 9216 */
netdev->min_mtu = ETH_MIN_MTU;
--
2.21.0
^ permalink raw reply related
* [net-next v2 13/15] ixgbe: sync the first fragment unconditionally
From: Jeff Kirsher @ 2019-09-09 22:48 UTC (permalink / raw)
To: davem
Cc: Firo Yang, netdev, nhorman, sassmann, Alexander Duyck,
Andrew Bowers, Jeff Kirsher
In-Reply-To: <20190909224802.29595-1-jeffrey.t.kirsher@intel.com>
From: Firo Yang <firo.yang@suse.com>
In Xen environment, if Xen-swiotlb is enabled, ixgbe driver
could possibly allocate a page, DMA memory buffer, for the first
fragment which is not suitable for Xen-swiotlb to do DMA operations.
Xen-swiotlb have to internally allocate another page for doing DMA
operations. This mechanism requires syncing the data from the internal
page to the page which ixgbe sends to upper network stack. However,
since commit f3213d932173 ("ixgbe: Update driver to make use of DMA
attributes in Rx path"), the unmap operation is performed with
DMA_ATTR_SKIP_CPU_SYNC. As a result, the sync is not performed.
Since the sync isn't performed, the upper network stack could receive
a incomplete network packet. By incomplete, it means the linear data
on the first fragment(between skb->head and skb->end) is invalid. So
we have to copy the data from the internal xen-swiotlb page to the page
which ixgbe sends to upper network stack through the sync operation.
More details from Alexander Duyck:
Specifically since we are mapping the frame with
DMA_ATTR_SKIP_CPU_SYNC we have to unmap with that as well. As a result
a sync is not performed on an unmap and must be done manually as we
skipped it for the first frag. As such we need to always sync before
possibly performing a page unmap operation.
Fixes: f3213d932173 ("ixgbe: Update driver to make use of DMA attributes in Rx path")
Signed-off-by: Firo Yang <firo.yang@suse.com>
Reviewed-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 9bcae44e9883..99df595abfba 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1825,13 +1825,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
struct sk_buff *skb)
{
- /* if the page was released unmap it, else just sync our portion */
- if (unlikely(IXGBE_CB(skb)->page_released)) {
- dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
- ixgbe_rx_pg_size(rx_ring),
- DMA_FROM_DEVICE,
- IXGBE_RX_DMA_ATTR);
- } else if (ring_uses_build_skb(rx_ring)) {
+ if (ring_uses_build_skb(rx_ring)) {
unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
dma_sync_single_range_for_cpu(rx_ring->dev,
@@ -1848,6 +1842,14 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
skb_frag_size(frag),
DMA_FROM_DEVICE);
}
+
+ /* If the page was released, just unmap it. */
+ if (unlikely(IXGBE_CB(skb)->page_released)) {
+ dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
+ ixgbe_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ IXGBE_RX_DMA_ATTR);
+ }
}
/**
--
2.21.0
^ permalink raw reply related
* [net-next v2 10/15] i40e: fix hw_dbg usage in i40e_hmc_get_object_va
From: Jeff Kirsher @ 2019-09-09 22:47 UTC (permalink / raw)
To: davem
Cc: Mauro S. M. Rodrigues, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190909224802.29595-1-jeffrey.t.kirsher@intel.com>
From: "Mauro S. M. Rodrigues" <maurosr@linux.vnet.ibm.com>
The mentioned function references a i40e_hw attribute, as parameter for
hw_dbg, but it doesn't exist in the function scope.
Fixes it by changing parameters from i40e_hmc_info to i40e_hw which can
retrieve the necessary i40e_hmc_info.
v2:
- Fixed reverse xmas tree code style issue as suggested by Jakub Kicinski
Signed-off-by: "Mauro S. M. Rodrigues" <maurosr@linux.vnet.ibm.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
.../net/ethernet/intel/i40e/i40e_lan_hmc.c | 21 ++++++++++---------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index 994011c38fb4..be24d42280d8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */
+#include "i40e.h"
#include "i40e_osdep.h"
#include "i40e_register.h"
#include "i40e_type.h"
@@ -963,7 +964,7 @@ static i40e_status i40e_set_hmc_context(u8 *context_bytes,
/**
* i40e_hmc_get_object_va - retrieves an object's virtual address
- * @hmc_info: pointer to i40e_hmc_info struct
+ * @hw: the hardware struct, from which we obtain the i40e_hmc_info pointer
* @object_base: pointer to u64 to get the va
* @rsrc_type: the hmc resource type
* @obj_idx: hmc object index
@@ -972,16 +973,16 @@ static i40e_status i40e_set_hmc_context(u8 *context_bytes,
* base pointer. This function is used for LAN Queue contexts.
**/
static
-i40e_status i40e_hmc_get_object_va(struct i40e_hmc_info *hmc_info,
- u8 **object_base,
- enum i40e_hmc_lan_rsrc_type rsrc_type,
- u32 obj_idx)
+i40e_status i40e_hmc_get_object_va(struct i40e_hw *hw, u8 **object_base,
+ enum i40e_hmc_lan_rsrc_type rsrc_type,
+ u32 obj_idx)
{
+ struct i40e_hmc_info *hmc_info = &hw->hmc;
u32 obj_offset_in_sd, obj_offset_in_pd;
- i40e_status ret_code = 0;
struct i40e_hmc_sd_entry *sd_entry;
struct i40e_hmc_pd_entry *pd_entry;
u32 pd_idx, pd_lmt, rel_pd_idx;
+ i40e_status ret_code = 0;
u64 obj_offset_in_fpm;
u32 sd_idx, sd_lmt;
@@ -1047,7 +1048,7 @@ i40e_status i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
i40e_status err;
u8 *context_bytes;
- err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+ err = i40e_hmc_get_object_va(hw, &context_bytes,
I40E_HMC_LAN_TX, queue);
if (err < 0)
return err;
@@ -1068,7 +1069,7 @@ i40e_status i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
i40e_status err;
u8 *context_bytes;
- err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+ err = i40e_hmc_get_object_va(hw, &context_bytes,
I40E_HMC_LAN_TX, queue);
if (err < 0)
return err;
@@ -1088,7 +1089,7 @@ i40e_status i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
i40e_status err;
u8 *context_bytes;
- err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+ err = i40e_hmc_get_object_va(hw, &context_bytes,
I40E_HMC_LAN_RX, queue);
if (err < 0)
return err;
@@ -1109,7 +1110,7 @@ i40e_status i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
i40e_status err;
u8 *context_bytes;
- err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+ err = i40e_hmc_get_object_va(hw, &context_bytes,
I40E_HMC_LAN_RX, queue);
if (err < 0)
return err;
--
2.21.0
^ permalink raw reply related
* [net-next v2 11/15] i40e: Implement debug macro hw_dbg using dev_dbg
From: Jeff Kirsher @ 2019-09-09 22:47 UTC (permalink / raw)
To: davem
Cc: Mauro S. M. Rodrigues, netdev, nhorman, sassmann, Andrew Bowers,
Jeff Kirsher
In-Reply-To: <20190909224802.29595-1-jeffrey.t.kirsher@intel.com>
From: "Mauro S. M. Rodrigues" <maurosr@linux.vnet.ibm.com>
There are several uses of hw_dbg in the code, producing no output. This
patch implements it using dev_debug.
Initially the intention was to implement it using netdev_dbg, analogously
to what is done in ixgbe for instance. That approach was avoided due to
some early usages of hw_dbg, like i40e_pf_reset, before the VSI structure
initialization causing NULL pointer dereference during the driver probe if
the debug messages were turned on as soon as the module is probed.
v2:
- Use dev_dbg instead of pr_debug, and take advantage of dev_name
instead of crafting pretty much the same device name locally as suggested
by Jakub Kicinski.
Signed-off-by: "Mauro S. M. Rodrigues" <maurosr@linux.vnet.ibm.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_common.c | 1 +
drivers/net/ethernet/intel/i40e/i40e_hmc.c | 1 +
drivers/net/ethernet/intel/i40e/i40e_osdep.h | 5 ++++-
3 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 46e649c09f72..d37c6e0e5f08 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */
+#include "i40e.h"
#include "i40e_type.h"
#include "i40e_adminq.h"
#include "i40e_prototype.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
index 19ce93d7fd0a..163ee8c6311c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2013 - 2018 Intel Corporation. */
+#include "i40e.h"
#include "i40e_osdep.h"
#include "i40e_register.h"
#include "i40e_status.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
index a07574bff550..c302ef2524f8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
@@ -18,7 +18,10 @@
* actual OS primitives
*/
-#define hw_dbg(hw, S, A...) do {} while (0)
+#define hw_dbg(hw, S, A...) \
+do { \
+ dev_dbg(&((struct i40e_pf *)hw->back)->pdev->dev, S, ##A); \
+} while (0)
#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
#define rd32(a, reg) readl((a)->hw_addr + (reg))
--
2.21.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox