* [PATCH net-next v3 08/12] net: stmmac: Add support for SA Insertion/Replacement in XGMAC cores
From: Jose Abreu @ 2019-08-17 18:54 UTC (permalink / raw)
To: netdev
Cc: Joao Pinto, Jakub Kicinski, Jose Abreu, Giuseppe Cavallaro,
Alexandre Torgue, David S. Miller, Maxime Coquelin, linux-stm32,
linux-arm-kernel, linux-kernel
In-Reply-To: <cover.1566067802.git.joabreu@synopsys.com>
Add the support for Source Address Insertion and Replacement in XGMAC
cores. Two methods are supported: Descriptor based and register based.
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 2 ++
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 11 +++++++++++
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 10 +++++++++-
drivers/net/ethernet/stmicro/stmmac/hwif.h | 7 +++++++
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++++
6 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 7fed3d2d4a95..3fb023953023 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -337,6 +337,8 @@
#define XGMAC_TDES3_CPC GENMASK(27, 26)
#define XGMAC_TDES3_CPC_SHIFT 26
#define XGMAC_TDES3_TCMSSV BIT(26)
+#define XGMAC_TDES3_SAIC GENMASK(25, 23)
+#define XGMAC_TDES3_SAIC_SHIFT 23
#define XGMAC_TDES3_THL GENMASK(22, 19)
#define XGMAC_TDES3_THL_SHIFT 19
#define XGMAC_TDES3_TSE BIT(18)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index a161285340c6..d0e7b62cc2ae 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -1075,6 +1075,16 @@ static int dwxgmac2_flex_pps_config(void __iomem *ioaddr, int index,
return 0;
}
+static void dwxgmac2_sarc_configure(void __iomem *ioaddr, int val)
+{
+ u32 value = readl(ioaddr + XGMAC_TX_CONFIG);
+
+ value &= ~XGMAC_CONFIG_SARC;
+ value |= val << XGMAC_CONFIG_SARC_SHIFT;
+
+ writel(value, ioaddr + XGMAC_TX_CONFIG);
+}
+
const struct stmmac_ops dwxgmac210_ops = {
.core_init = dwxgmac2_core_init,
.set_mac = dwxgmac2_set_mac,
@@ -1113,6 +1123,7 @@ const struct stmmac_ops dwxgmac210_ops = {
.rxp_config = dwxgmac3_rxp_config,
.get_mac_tx_timestamp = dwxgmac2_get_mac_tx_timestamp,
.flex_pps_config = dwxgmac2_flex_pps_config,
+ .sarc_configure = dwxgmac2_sarc_configure,
};
int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 41985a2d7380..ab11e73ac6b1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -148,7 +148,7 @@ static void dwxgmac2_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
p->des2 |= cpu_to_le32(len & XGMAC_TDES2_B1L);
- tdes3 = tot_pkt_len & XGMAC_TDES3_FL;
+ tdes3 |= tot_pkt_len & XGMAC_TDES3_FL;
if (is_fs)
tdes3 |= XGMAC_TDES3_FD;
else
@@ -298,6 +298,13 @@ static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
p->des3 = cpu_to_le32(upper_32_bits(addr));
}
+static void dwxgmac2_set_sarc(struct dma_desc *p, u32 sarc_type)
+{
+ sarc_type <<= XGMAC_TDES3_SAIC_SHIFT;
+
+ p->des3 |= cpu_to_le32(sarc_type & XGMAC_TDES3_SAIC);
+}
+
const struct stmmac_desc_ops dwxgmac210_desc_ops = {
.tx_status = dwxgmac2_get_tx_status,
.rx_status = dwxgmac2_get_rx_status,
@@ -324,4 +331,5 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = {
.get_rx_hash = dwxgmac2_get_rx_hash,
.get_rx_header_len = dwxgmac2_get_rx_header_len,
.set_sec_addr = dwxgmac2_set_sec_addr,
+ .set_sarc = dwxgmac2_set_sarc,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index ed9fda50ee22..e54864cde01b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -91,6 +91,7 @@ struct stmmac_desc_ops {
enum pkt_hash_types *type);
int (*get_rx_header_len)(struct dma_desc *p, unsigned int *len);
void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr);
+ void (*set_sarc)(struct dma_desc *p, u32 sarc_type);
};
#define stmmac_init_rx_desc(__priv, __args...) \
@@ -147,6 +148,8 @@ struct stmmac_desc_ops {
stmmac_do_callback(__priv, desc, get_rx_header_len, __args)
#define stmmac_set_desc_sec_addr(__priv, __args...) \
stmmac_do_void_callback(__priv, desc, set_sec_addr, __args)
+#define stmmac_set_desc_sarc(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, set_sarc, __args)
struct stmmac_dma_cfg;
struct dma_features;
@@ -350,6 +353,8 @@ struct stmmac_ops {
bool is_double);
/* TX Timestamp */
int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts);
+ /* Source Address Insertion / Replacement */
+ void (*sarc_configure)(void __iomem *ioaddr, int val);
};
#define stmmac_core_init(__priv, __args...) \
@@ -426,6 +431,8 @@ struct stmmac_ops {
stmmac_do_void_callback(__priv, mac, update_vlan_hash, __args)
#define stmmac_get_mac_tx_timestamp(__priv, __args...) \
stmmac_do_callback(__priv, mac, get_mac_tx_timestamp, __args)
+#define stmmac_sarc_configure(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, sarc_configure, __args)
/* PTP and HW Timer helpers */
struct stmmac_hwtimestamp {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 4597811fd325..dcb2e29a5717 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -139,6 +139,7 @@ struct stmmac_priv {
bool tx_path_in_lpi_mode;
bool tso;
int sph;
+ u32 sarc_type;
unsigned int dma_buf_sz;
unsigned int rx_copybreak;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f2a198eda20b..8e38b053d9ab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3004,6 +3004,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
+ if (priv->sarc_type)
+ stmmac_set_desc_sarc(priv, first, priv->sarc_type);
+
skb_tx_timestamp(skb);
if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
@@ -3217,6 +3220,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
priv->xstats.tx_set_ic_bit++;
}
+ if (priv->sarc_type)
+ stmmac_set_desc_sarc(priv, first, priv->sarc_type);
+
skb_tx_timestamp(skb);
/* Ready to fill the first descriptor and set the OWN bit w/o any
--
2.7.4
^ permalink raw reply related
* [PATCH net-next v3 02/12] net: stmmac: Prepare to add Split Header support
From: Jose Abreu @ 2019-08-17 18:54 UTC (permalink / raw)
To: netdev
Cc: Joao Pinto, Jakub Kicinski, Jose Abreu, Giuseppe Cavallaro,
Alexandre Torgue, David S. Miller, Maxime Coquelin, linux-stm32,
linux-arm-kernel, linux-kernel
In-Reply-To: <cover.1566067802.git.joabreu@synopsys.com>
In order to add Split Header support, stmmac_rx() needs to take into
account that packet may be split accross multiple descriptors.
Refactor the logic of this function in order to support this scenario.
Changes from v2:
- Fixup if condition detection (Jakub)
- Don't stop NAPI with unfinished packet (Jakub)
- Use napi_alloc_skb() (Jakub)
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 6 +
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 149 +++++++++++++---------
2 files changed, 95 insertions(+), 60 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 80276587048a..56158e1448ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -74,6 +74,12 @@ struct stmmac_rx_queue {
u32 rx_zeroc_thresh;
dma_addr_t dma_rx_phy;
u32 rx_tail_addr;
+ unsigned int state_saved;
+ struct {
+ struct sk_buff *skb;
+ unsigned int len;
+ unsigned int error;
+ } state;
};
struct stmmac_channel {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b2e5f4ecd551..05f0fa7a6f02 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3353,9 +3353,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
{
struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
struct stmmac_channel *ch = &priv->channel[queue];
+ unsigned int count = 0, error = 0, len = 0;
+ int status = 0, coe = priv->hw->rx_csum;
unsigned int next_entry = rx_q->cur_rx;
- int coe = priv->hw->rx_csum;
- unsigned int count = 0;
+ struct sk_buff *skb = NULL;
if (netif_msg_rx_status(priv)) {
void *rx_head;
@@ -3369,10 +3370,28 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
}
while (count < limit) {
+ enum pkt_hash_types hash_type;
struct stmmac_rx_buffer *buf;
+ unsigned int prev_len = 0;
struct dma_desc *np, *p;
- int entry, status;
+ int entry;
+ u32 hash;
+ if (!count && rx_q->state_saved) {
+ skb = rx_q->state.skb;
+ error = rx_q->state.error;
+ len = rx_q->state.len;
+ } else {
+ rx_q->state_saved = false;
+ skb = NULL;
+ error = 0;
+ len = 0;
+ }
+
+ if (count >= limit)
+ break;
+
+read_again:
entry = next_entry;
buf = &rx_q->buf_pool[entry];
@@ -3407,28 +3426,24 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
page_pool_recycle_direct(rx_q->page_pool, buf->page);
priv->dev->stats.rx_errors++;
buf->page = NULL;
+ error = 1;
+ }
+
+ if (unlikely(error && (status & rx_not_ls)))
+ goto read_again;
+ if (unlikely(error)) {
+ if (skb)
+ dev_kfree_skb(skb);
+ continue;
+ }
+
+ /* Buffer is good. Go on. */
+
+ if (likely(status & rx_not_ls)) {
+ len += priv->dma_buf_sz;
} else {
- enum pkt_hash_types hash_type;
- struct sk_buff *skb;
- unsigned int des;
- int frame_len;
- u32 hash;
-
- stmmac_get_desc_addr(priv, p, &des);
- frame_len = stmmac_get_rx_frame_len(priv, p, coe);
-
- /* If frame length is greater than skb buffer size
- * (preallocated during init) then the packet is
- * ignored
- */
- if (frame_len > priv->dma_buf_sz) {
- if (net_ratelimit())
- netdev_err(priv->dev,
- "len %d larger than size (%d)\n",
- frame_len, priv->dma_buf_sz);
- priv->dev->stats.rx_length_errors++;
- continue;
- }
+ prev_len = len;
+ len = stmmac_get_rx_frame_len(priv, p, coe);
/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
* Type frames (LLC/LLC-SNAP)
@@ -3439,57 +3454,71 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
*/
if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
unlikely(status != llc_snap))
- frame_len -= ETH_FCS_LEN;
-
- if (netif_msg_rx_status(priv)) {
- netdev_dbg(priv->dev, "\tdesc: %p [entry %d] buff=0x%x\n",
- p, entry, des);
- netdev_dbg(priv->dev, "frame size %d, COE: %d\n",
- frame_len, status);
- }
+ len -= ETH_FCS_LEN;
+ }
- skb = netdev_alloc_skb_ip_align(priv->dev, frame_len);
- if (unlikely(!skb)) {
+ if (!skb) {
+ skb = napi_alloc_skb(&ch->rx_napi, len);
+ if (!skb) {
priv->dev->stats.rx_dropped++;
continue;
}
- dma_sync_single_for_cpu(priv->device, buf->addr,
- frame_len, DMA_FROM_DEVICE);
+ dma_sync_single_for_cpu(priv->device, buf->addr, len,
+ DMA_FROM_DEVICE);
skb_copy_to_linear_data(skb, page_address(buf->page),
- frame_len);
- skb_put(skb, frame_len);
+ len);
+ skb_put(skb, len);
- if (netif_msg_pktdata(priv)) {
- netdev_dbg(priv->dev, "frame received (%dbytes)",
- frame_len);
- print_pkt(skb->data, frame_len);
- }
+ /* Data payload copied into SKB, page ready for recycle */
+ page_pool_recycle_direct(rx_q->page_pool, buf->page);
+ buf->page = NULL;
+ } else {
+ unsigned int buf_len = len - prev_len;
- stmmac_get_rx_hwtstamp(priv, p, np, skb);
+ if (likely(status & rx_not_ls))
+ buf_len = priv->dma_buf_sz;
- stmmac_rx_vlan(priv->dev, skb);
+ dma_sync_single_for_cpu(priv->device, buf->addr,
+ buf_len, DMA_FROM_DEVICE);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ buf->page, 0, buf_len,
+ priv->dma_buf_sz);
- skb->protocol = eth_type_trans(skb, priv->dev);
+ /* Data payload appended into SKB */
+ page_pool_release_page(rx_q->page_pool, buf->page);
+ buf->page = NULL;
+ }
- if (unlikely(!coe))
- skb_checksum_none_assert(skb);
- else
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (likely(status & rx_not_ls))
+ goto read_again;
- if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type))
- skb_set_hash(skb, hash, hash_type);
+ /* Got entire packet into SKB. Finish it. */
- skb_record_rx_queue(skb, queue);
- napi_gro_receive(&ch->rx_napi, skb);
+ stmmac_get_rx_hwtstamp(priv, p, np, skb);
+ stmmac_rx_vlan(priv->dev, skb);
+ skb->protocol = eth_type_trans(skb, priv->dev);
- /* Data payload copied into SKB, page ready for recycle */
- page_pool_recycle_direct(rx_q->page_pool, buf->page);
- buf->page = NULL;
+ if (unlikely(!coe))
+ skb_checksum_none_assert(skb);
+ else
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
- priv->dev->stats.rx_packets++;
- priv->dev->stats.rx_bytes += frame_len;
- }
+ if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type))
+ skb_set_hash(skb, hash, hash_type);
+
+ skb_record_rx_queue(skb, queue);
+ napi_gro_receive(&ch->rx_napi, skb);
+
+ priv->dev->stats.rx_packets++;
+ priv->dev->stats.rx_bytes += len;
+ }
+
+ if (status & rx_not_ls) {
+ rx_q->state_saved = true;
+ rx_q->state.skb = skb;
+ rx_q->state.error = error;
+ rx_q->state.len = len;
}
stmmac_rx_refill(priv, queue);
--
2.7.4
^ permalink raw reply related
* [PATCH net-next v3 04/12] net: stmmac: Add Split Header support and enable it in XGMAC cores
From: Jose Abreu @ 2019-08-17 18:54 UTC (permalink / raw)
To: netdev
Cc: Joao Pinto, Jakub Kicinski, Jose Abreu, Giuseppe Cavallaro,
Alexandre Torgue, David S. Miller, Maxime Coquelin, linux-stm32,
linux-arm-kernel, linux-kernel
In-Reply-To: <cover.1566067802.git.joabreu@synopsys.com>
Add the support for Split Header feature in the RX path and enable it in
XGMAC cores.
This does not impact neither beneficts bandwidth but it does reduces CPU
usage because without the feature all the entire packet is memcpy'ed,
while that with the feature only the header is.
With Split Header disabled 'perf stat -d' gives:
86870.624945 task-clock (msec) # 0.429 CPUs utilized
1073352 context-switches # 0.012 M/sec
1 cpu-migrations # 0.000 K/sec
213 page-faults # 0.002 K/sec
327113872376 cycles # 3.766 GHz (62.53%)
56618161216 instructions # 0.17 insn per cycle (75.06%)
10742205071 branches # 123.658 M/sec (75.36%)
584309242 branch-misses # 5.44% of all branches (75.19%)
17594787965 L1-dcache-loads # 202.540 M/sec (74.88%)
4003773131 L1-dcache-load-misses # 22.76% of all L1-dcache hits (74.89%)
1313301468 LLC-loads # 15.118 M/sec (49.75%)
355906510 LLC-load-misses # 27.10% of all LL-cache hits (49.92%)
With Split Header enabled 'perf stat -d' gives:
49324.456539 task-clock (msec) # 0.245 CPUs utilized
2542387 context-switches # 0.052 M/sec
1 cpu-migrations # 0.000 K/sec
213 page-faults # 0.004 K/sec
177092791469 cycles # 3.590 GHz (62.30%)
68555756017 instructions # 0.39 insn per cycle (75.16%)
12697019382 branches # 257.418 M/sec (74.81%)
442081897 branch-misses # 3.48% of all branches (74.79%)
20337958358 L1-dcache-loads # 412.330 M/sec (75.46%)
3820210140 L1-dcache-load-misses # 18.78% of all L1-dcache hits (75.35%)
1257719198 LLC-loads # 25.499 M/sec (49.73%)
685543923 LLC-load-misses # 54.51% of all LL-cache hits (49.86%)
Changes from v2:
- Reword commit message (Jakub)
Changes from v1:
- Add performance info (David)
- Add misssing dma_sync_single_for_device()
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
drivers/net/ethernet/stmicro/stmmac/common.h | 1 +
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 6 ++
.../net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 18 +++++-
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 18 ++++++
drivers/net/ethernet/stmicro/stmmac/hwif.h | 9 +++
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 3 +
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 75 +++++++++++++++++++++-
7 files changed, 128 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e1e6f67041ec..527f961579f4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -356,6 +356,7 @@ struct dma_features {
unsigned int addr64;
unsigned int rssen;
unsigned int vlhash;
+ unsigned int sphen;
};
/* GMAC TX FIFO is 8K, Rx FIFO is 16K */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 429c94e40c73..995d533b9316 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -32,6 +32,9 @@
#define XGMAC_CONFIG_ARPEN BIT(31)
#define XGMAC_CONFIG_GPSL GENMASK(29, 16)
#define XGMAC_CONFIG_GPSL_SHIFT 16
+#define XGMAC_CONFIG_HDSMS GENMASK(14, 12)
+#define XGMAC_CONFIG_HDSMS_SHIFT 12
+#define XGMAC_CONFIG_HDSMS_256 (0x2 << XGMAC_CONFIG_HDSMS_SHIFT)
#define XGMAC_CONFIG_S2KP BIT(11)
#define XGMAC_CONFIG_LM BIT(10)
#define XGMAC_CONFIG_IPC BIT(9)
@@ -101,6 +104,7 @@
#define XGMAC_HW_FEATURE1 0x00000120
#define XGMAC_HWFEAT_RSSEN BIT(20)
#define XGMAC_HWFEAT_TSOEN BIT(18)
+#define XGMAC_HWFEAT_SPHEN BIT(17)
#define XGMAC_HWFEAT_ADDR64 GENMASK(15, 14)
#define XGMAC_HWFEAT_TXFIFOSIZE GENMASK(10, 6)
#define XGMAC_HWFEAT_RXFIFOSIZE GENMASK(4, 0)
@@ -258,6 +262,7 @@
#define XGMAC_TCEIE BIT(0)
#define XGMAC_DMA_ECC_INT_STATUS 0x0000306c
#define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x)))
+#define XGMAC_SPH BIT(24)
#define XGMAC_PBLx8 BIT(16)
#define XGMAC_DMA_CH_TX_CONTROL(x) (0x00003104 + (0x80 * (x)))
#define XGMAC_TxPBL GENMASK(21, 16)
@@ -318,6 +323,7 @@
#define XGMAC_TDES3_CIC_SHIFT 16
#define XGMAC_TDES3_TPL GENMASK(17, 0)
#define XGMAC_TDES3_FL GENMASK(14, 0)
+#define XGMAC_RDES2_HL GENMASK(9, 0)
#define XGMAC_RDES3_OWN BIT(31)
#define XGMAC_RDES3_CTXT BIT(30)
#define XGMAC_RDES3_IOC BIT(30)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 2c1ed8c2a9d3..41985a2d7380 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -29,6 +29,8 @@ static int dwxgmac2_get_rx_status(void *data, struct stmmac_extra_stats *x,
if (unlikely(rdes3 & XGMAC_RDES3_OWN))
return dma_own;
+ if (unlikely(rdes3 & XGMAC_RDES3_CTXT))
+ return discard_frame;
if (likely(!(rdes3 & XGMAC_RDES3_LD)))
return rx_not_ls;
if (unlikely((rdes3 & XGMAC_RDES3_ES) && (rdes3 & XGMAC_RDES3_LD)))
@@ -54,7 +56,7 @@ static void dwxgmac2_set_tx_owner(struct dma_desc *p)
static void dwxgmac2_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
{
- p->des3 = cpu_to_le32(XGMAC_RDES3_OWN);
+ p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);
if (!disable_rx_ic)
p->des3 |= cpu_to_le32(XGMAC_RDES3_IOC);
@@ -284,6 +286,18 @@ static int dwxgmac2_get_rx_hash(struct dma_desc *p, u32 *hash,
return -EINVAL;
}
+static int dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len)
+{
+ *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL;
+ return 0;
+}
+
+static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
+{
+ p->des2 = cpu_to_le32(lower_32_bits(addr));
+ p->des3 = cpu_to_le32(upper_32_bits(addr));
+}
+
const struct stmmac_desc_ops dwxgmac210_desc_ops = {
.tx_status = dwxgmac2_get_tx_status,
.rx_status = dwxgmac2_get_rx_status,
@@ -308,4 +322,6 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = {
.set_addr = dwxgmac2_set_addr,
.clear = dwxgmac2_clear,
.get_rx_hash = dwxgmac2_get_rx_hash,
+ .get_rx_header_len = dwxgmac2_get_rx_header_len,
+ .set_sec_addr = dwxgmac2_set_sec_addr,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 18cbf4ab4ad2..0f3de4895cf7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -366,6 +366,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20;
dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
+ dma_cap->sphen = (hw_cap & XGMAC_HWFEAT_SPHEN) >> 17;
dma_cap->addr64 = (hw_cap & XGMAC_HWFEAT_ADDR64) >> 14;
switch (dma_cap->addr64) {
@@ -472,6 +473,22 @@ static void dwxgmac2_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
}
+static void dwxgmac2_enable_sph(void __iomem *ioaddr, bool en, u32 chan)
+{
+ u32 value = readl(ioaddr + XGMAC_RX_CONFIG);
+
+ value &= ~XGMAC_CONFIG_HDSMS;
+ value |= XGMAC_CONFIG_HDSMS_256; /* Segment max 256 bytes */
+ writel(value, ioaddr + XGMAC_RX_CONFIG);
+
+ value = readl(ioaddr + XGMAC_DMA_CH_CONTROL(chan));
+ if (en)
+ value |= XGMAC_SPH;
+ else
+ value &= ~XGMAC_SPH;
+ writel(value, ioaddr + XGMAC_DMA_CH_CONTROL(chan));
+}
+
const struct stmmac_dma_ops dwxgmac210_dma_ops = {
.reset = dwxgmac2_dma_reset,
.init = dwxgmac2_dma_init,
@@ -498,4 +515,5 @@ const struct stmmac_dma_ops dwxgmac210_dma_ops = {
.enable_tso = dwxgmac2_enable_tso,
.qmode = dwxgmac2_qmode,
.set_bfsize = dwxgmac2_set_bfsize,
+ .enable_sph = dwxgmac2_enable_sph,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 7e1523c6f456..ed9fda50ee22 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -89,6 +89,8 @@ struct stmmac_desc_ops {
/* RSS */
int (*get_rx_hash)(struct dma_desc *p, u32 *hash,
enum pkt_hash_types *type);
+ int (*get_rx_header_len)(struct dma_desc *p, unsigned int *len);
+ void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr);
};
#define stmmac_init_rx_desc(__priv, __args...) \
@@ -141,6 +143,10 @@ struct stmmac_desc_ops {
stmmac_do_void_callback(__priv, desc, clear, __args)
#define stmmac_get_rx_hash(__priv, __args...) \
stmmac_do_callback(__priv, desc, get_rx_hash, __args)
+#define stmmac_get_rx_header_len(__priv, __args...) \
+ stmmac_do_callback(__priv, desc, get_rx_header_len, __args)
+#define stmmac_set_desc_sec_addr(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, set_sec_addr, __args)
struct stmmac_dma_cfg;
struct dma_features;
@@ -191,6 +197,7 @@ struct stmmac_dma_ops {
void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
void (*qmode)(void __iomem *ioaddr, u32 channel, u8 qmode);
void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan);
+ void (*enable_sph)(void __iomem *ioaddr, bool en, u32 chan);
};
#define stmmac_reset(__priv, __args...) \
@@ -247,6 +254,8 @@ struct stmmac_dma_ops {
stmmac_do_void_callback(__priv, dma, qmode, __args)
#define stmmac_set_dma_bfsize(__priv, __args...) \
stmmac_do_void_callback(__priv, dma, set_bfsize, __args)
+#define stmmac_enable_sph(__priv, __args...) \
+ stmmac_do_void_callback(__priv, dma, enable_sph, __args)
struct mac_device_info;
struct net_device;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 56158e1448ac..4597811fd325 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -58,7 +58,9 @@ struct stmmac_tx_queue {
struct stmmac_rx_buffer {
struct page *page;
+ struct page *sec_page;
dma_addr_t addr;
+ dma_addr_t sec_addr;
};
struct stmmac_rx_queue {
@@ -136,6 +138,7 @@ struct stmmac_priv {
int hwts_tx_en;
bool tx_path_in_lpi_mode;
bool tso;
+ int sph;
unsigned int dma_buf_sz;
unsigned int rx_copybreak;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 05f0fa7a6f02..60e5f3584790 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1201,6 +1201,17 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
if (!buf->page)
return -ENOMEM;
+ if (priv->sph) {
+ buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
+ if (!buf->sec_page)
+ return -ENOMEM;
+
+ buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
+ stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
+ } else {
+ buf->sec_page = NULL;
+ }
+
buf->addr = page_pool_get_dma_addr(buf->page);
stmmac_set_desc_addr(priv, p, buf->addr);
if (priv->dma_buf_sz == BUF_SIZE_16KiB)
@@ -1223,6 +1234,10 @@ static void stmmac_free_rx_buffer(struct stmmac_priv *priv, u32 queue, int i)
if (buf->page)
page_pool_put_page(rx_q->page_pool, buf->page, false);
buf->page = NULL;
+
+ if (buf->sec_page)
+ page_pool_put_page(rx_q->page_pool, buf->sec_page, false);
+ buf->sec_page = NULL;
}
/**
@@ -2596,6 +2611,12 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
stmmac_enable_tso(priv, priv->ioaddr, 1, chan);
}
+ /* Enable Split Header */
+ if (priv->sph && priv->hw->rx_csum) {
+ for (chan = 0; chan < rx_cnt; chan++)
+ stmmac_enable_sph(priv, priv->ioaddr, 1, chan);
+ }
+
/* Start the ball rolling... */
stmmac_start_all_dma(priv);
@@ -3315,6 +3336,17 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
break;
}
+ if (priv->sph && !buf->sec_page) {
+ buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
+ if (!buf->sec_page)
+ break;
+
+ buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
+
+ dma_sync_single_for_device(priv->device, buf->sec_addr,
+ len, DMA_FROM_DEVICE);
+ }
+
buf->addr = page_pool_get_dma_addr(buf->page);
/* Sync whole allocation to device. This will invalidate old
@@ -3324,6 +3356,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
DMA_FROM_DEVICE);
stmmac_set_desc_addr(priv, p, buf->addr);
+ stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
stmmac_refill_desc3(priv, rx_q, p);
rx_q->rx_count_frames++;
@@ -3370,10 +3403,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
}
while (count < limit) {
+ unsigned int hlen = 0, prev_len = 0;
enum pkt_hash_types hash_type;
struct stmmac_rx_buffer *buf;
- unsigned int prev_len = 0;
struct dma_desc *np, *p;
+ unsigned int sec_len;
int entry;
u32 hash;
@@ -3392,6 +3426,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
break;
read_again:
+ sec_len = 0;
entry = next_entry;
buf = &rx_q->buf_pool[entry];
@@ -3418,6 +3453,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
np = rx_q->dma_rx + next_entry;
prefetch(np);
+ prefetch(page_address(buf->page));
if (priv->extend_desc)
stmmac_rx_extended_status(priv, &priv->dev->stats,
@@ -3458,6 +3494,17 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
}
if (!skb) {
+ int ret = stmmac_get_rx_header_len(priv, p, &hlen);
+
+ if (priv->sph && !ret && (hlen > 0)) {
+ sec_len = len;
+ if (!(status & rx_not_ls))
+ sec_len = sec_len - hlen;
+ len = hlen;
+
+ prefetch(page_address(buf->sec_page));
+ }
+
skb = napi_alloc_skb(&ch->rx_napi, len);
if (!skb) {
priv->dev->stats.rx_dropped++;
@@ -3490,6 +3537,20 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
buf->page = NULL;
}
+ if (sec_len > 0) {
+ dma_sync_single_for_cpu(priv->device, buf->sec_addr,
+ sec_len, DMA_FROM_DEVICE);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ buf->sec_page, 0, sec_len,
+ priv->dma_buf_sz);
+
+ len += sec_len;
+
+ /* Data payload appended into SKB */
+ page_pool_release_page(rx_q->page_pool, buf->sec_page);
+ buf->sec_page = NULL;
+ }
+
if (likely(status & rx_not_ls))
goto read_again;
@@ -3664,6 +3725,8 @@ static int stmmac_set_features(struct net_device *netdev,
netdev_features_t features)
{
struct stmmac_priv *priv = netdev_priv(netdev);
+ bool sph_en;
+ u32 chan;
/* Keep the COE Type in case of csum is supporting */
if (features & NETIF_F_RXCSUM)
@@ -3675,6 +3738,10 @@ static int stmmac_set_features(struct net_device *netdev,
*/
stmmac_rx_ipc(priv, priv->hw);
+ sph_en = (priv->hw->rx_csum > 0) && priv->sph;
+ for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
+ stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
+
return 0;
}
@@ -4367,6 +4434,12 @@ int stmmac_dvr_probe(struct device *device,
dev_info(priv->device, "TSO feature enabled\n");
}
+ if (priv->dma_cap.sphen) {
+ ndev->hw_features |= NETIF_F_GRO;
+ priv->sph = true;
+ dev_info(priv->device, "SPH feature enabled\n");
+ }
+
if (priv->dma_cap.addr64) {
ret = dma_set_mask_and_coherent(device,
DMA_BIT_MASK(priv->dma_cap.addr64));
--
2.7.4
^ permalink raw reply related
* [PATCH net-next v3 05/12] net: stmmac: Add a counter for Split Header packets
From: Jose Abreu @ 2019-08-17 18:54 UTC (permalink / raw)
To: netdev
Cc: Joao Pinto, Jakub Kicinski, Jose Abreu, Giuseppe Cavallaro,
Alexandre Torgue, David S. Miller, Maxime Coquelin, linux-stm32,
linux-arm-kernel, linux-kernel
In-Reply-To: <cover.1566067802.git.joabreu@synopsys.com>
Add a counter that increments each time a packet with split header is
received.
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
drivers/net/ethernet/stmicro/stmmac/common.h | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 +
3 files changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 527f961579f4..1303ec81fd3d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -75,6 +75,7 @@ struct stmmac_extra_stats {
unsigned long rx_missed_cntr;
unsigned long rx_overflow_cntr;
unsigned long rx_vlan;
+ unsigned long rx_split_hdr_pkt_n;
/* Tx/Rx IRQ error info */
unsigned long tx_undeflow_irq;
unsigned long tx_process_stopped_irq;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 2423160ab582..eb784fdb6d32 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -65,6 +65,7 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
STMMAC_STAT(rx_missed_cntr),
STMMAC_STAT(rx_overflow_cntr),
STMMAC_STAT(rx_vlan),
+ STMMAC_STAT(rx_split_hdr_pkt_n),
/* Tx/Rx IRQ error info */
STMMAC_STAT(tx_undeflow_irq),
STMMAC_STAT(tx_process_stopped_irq),
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 60e5f3584790..f2a198eda20b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3503,6 +3503,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
len = hlen;
prefetch(page_address(buf->sec_page));
+ priv->xstats.rx_split_hdr_pkt_n++;
}
skb = napi_alloc_skb(&ch->rx_napi, len);
--
2.7.4
^ permalink raw reply related
* [PATCH net-next v3 11/12] net: stmmac: Add support for VLAN Insertion Offload
From: Jose Abreu @ 2019-08-17 18:54 UTC (permalink / raw)
To: netdev
Cc: Joao Pinto, Jakub Kicinski, Jose Abreu, Giuseppe Cavallaro,
Alexandre Torgue, David S. Miller, Maxime Coquelin, linux-stm32,
linux-arm-kernel, linux-kernel
In-Reply-To: <cover.1566067802.git.joabreu@synopsys.com>
Adds the logic to insert a given VLAN ID in a packet. This is offloaded
to HW and its descriptor based. For now, only XGMAC implements the
necessary callbacks.
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
drivers/net/ethernet/stmicro/stmmac/common.h | 8 ++++
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 15 +++++++
.../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 14 ++++++
.../net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 35 +++++++++++++++
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 2 +
drivers/net/ethernet/stmicro/stmmac/hwif.h | 10 +++++
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 52 +++++++++++++++++++++-
7 files changed, 135 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1303ec81fd3d..49aa56ca09cc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -358,6 +358,8 @@ struct dma_features {
unsigned int rssen;
unsigned int vlhash;
unsigned int sphen;
+ unsigned int vlins;
+ unsigned int dvlan;
};
/* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -389,6 +391,12 @@ struct dma_features {
#define STMMAC_RSS_HASH_KEY_SIZE 40
#define STMMAC_RSS_MAX_TABLE_SIZE 256
+/* VLAN */
+#define STMMAC_VLAN_NONE 0x0
+#define STMMAC_VLAN_REMOVE 0x1
+#define STMMAC_VLAN_INSERT 0x2
+#define STMMAC_VLAN_REPLACE 0x3
+
extern const struct stmmac_desc_ops enh_desc_ops;
extern const struct stmmac_desc_ops ndesc_ops;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 79c145ba25a8..7357b8bdc128 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -63,6 +63,11 @@
#define XGMAC_VLAN_ETV BIT(16)
#define XGMAC_VLAN_VID GENMASK(15, 0)
#define XGMAC_VLAN_HASH_TABLE 0x00000058
+#define XGMAC_VLAN_INCL 0x00000060
+#define XGMAC_VLAN_VLTI BIT(20)
+#define XGMAC_VLAN_CSVL BIT(19)
+#define XGMAC_VLAN_VLC GENMASK(17, 16)
+#define XGMAC_VLAN_VLC_SHIFT 16
#define XGMAC_RXQ_CTRL0 0x000000a0
#define XGMAC_RXQEN(x) GENMASK((x) * 2 + 1, (x) * 2)
#define XGMAC_RXQEN_SHIFT(x) ((x) * 2)
@@ -128,6 +133,7 @@
#define XGMAC_HWFEAT_RXQCNT GENMASK(3, 0)
#define XGMAC_HW_FEATURE3 0x00000128
#define XGMAC_HWFEAT_ASP GENMASK(15, 14)
+#define XGMAC_HWFEAT_DVLAN BIT(13)
#define XGMAC_HWFEAT_FRPES GENMASK(12, 11)
#define XGMAC_HWFEAT_FRPPB GENMASK(10, 9)
#define XGMAC_HWFEAT_FRPSEL BIT(3)
@@ -337,10 +343,14 @@
#define XGMAC_REGSIZE ((0x0000317c + (0x80 * 15)) / 4)
/* Descriptors */
+#define XGMAC_TDES2_IVT GENMASK(31, 16)
+#define XGMAC_TDES2_IVT_SHIFT 16
#define XGMAC_TDES2_IOC BIT(31)
#define XGMAC_TDES2_TTSE BIT(30)
#define XGMAC_TDES2_B2L GENMASK(29, 16)
#define XGMAC_TDES2_B2L_SHIFT 16
+#define XGMAC_TDES2_VTIR GENMASK(15, 14)
+#define XGMAC_TDES2_VTIR_SHIFT 14
#define XGMAC_TDES2_B1L GENMASK(13, 0)
#define XGMAC_TDES3_OWN BIT(31)
#define XGMAC_TDES3_CTXT BIT(30)
@@ -353,10 +363,15 @@
#define XGMAC_TDES3_SAIC_SHIFT 23
#define XGMAC_TDES3_THL GENMASK(22, 19)
#define XGMAC_TDES3_THL_SHIFT 19
+#define XGMAC_TDES3_IVTIR GENMASK(19, 18)
+#define XGMAC_TDES3_IVTIR_SHIFT 18
#define XGMAC_TDES3_TSE BIT(18)
+#define XGMAC_TDES3_IVLTV BIT(17)
#define XGMAC_TDES3_CIC GENMASK(17, 16)
#define XGMAC_TDES3_CIC_SHIFT 16
#define XGMAC_TDES3_TPL GENMASK(17, 0)
+#define XGMAC_TDES3_VLTV BIT(16)
+#define XGMAC_TDES3_VT GENMASK(15, 0)
#define XGMAC_TDES3_FL GENMASK(14, 0)
#define XGMAC_RDES2_HL GENMASK(9, 0)
#define XGMAC_RDES3_OWN BIT(31)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index d8483d088711..e534a3aaf4a3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -1150,6 +1150,19 @@ static void dwxgmac2_sarc_configure(void __iomem *ioaddr, int val)
writel(value, ioaddr + XGMAC_TX_CONFIG);
}
+static void dwxgmac2_enable_vlan(struct mac_device_info *hw, u32 type)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+
+ value = readl(ioaddr + XGMAC_VLAN_INCL);
+ value |= XGMAC_VLAN_VLTI;
+ value |= XGMAC_VLAN_CSVL; /* Only use SVLAN */
+ value &= ~XGMAC_VLAN_VLC;
+ value |= (type << XGMAC_VLAN_VLC_SHIFT) & XGMAC_VLAN_VLC;
+ writel(value, ioaddr + XGMAC_VLAN_INCL);
+}
+
const struct stmmac_ops dwxgmac210_ops = {
.core_init = dwxgmac2_core_init,
.set_mac = dwxgmac2_set_mac,
@@ -1189,6 +1202,7 @@ const struct stmmac_ops dwxgmac210_ops = {
.get_mac_tx_timestamp = dwxgmac2_get_mac_tx_timestamp,
.flex_pps_config = dwxgmac2_flex_pps_config,
.sarc_configure = dwxgmac2_sarc_configure,
+ .enable_vlan = dwxgmac2_enable_vlan,
};
int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index ab11e73ac6b1..ae48154f933c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -305,6 +305,39 @@ static void dwxgmac2_set_sarc(struct dma_desc *p, u32 sarc_type)
p->des3 |= cpu_to_le32(sarc_type & XGMAC_TDES3_SAIC);
}
+static void dwxgmac2_set_vlan_tag(struct dma_desc *p, u16 tag, u16 inner_tag,
+ u32 inner_type)
+{
+ p->des0 = 0;
+ p->des1 = 0;
+ p->des2 = 0;
+ p->des3 = 0;
+
+ /* Inner VLAN */
+ if (inner_type) {
+ u32 des = inner_tag << XGMAC_TDES2_IVT_SHIFT;
+
+ des &= XGMAC_TDES2_IVT;
+ p->des2 = cpu_to_le32(des);
+
+ des = inner_type << XGMAC_TDES3_IVTIR_SHIFT;
+ des &= XGMAC_TDES3_IVTIR;
+ p->des3 = cpu_to_le32(des | XGMAC_TDES3_IVLTV);
+ }
+
+ /* Outer VLAN */
+ p->des3 |= cpu_to_le32(tag & XGMAC_TDES3_VT);
+ p->des3 |= cpu_to_le32(XGMAC_TDES3_VLTV);
+
+ p->des3 |= cpu_to_le32(XGMAC_TDES3_CTXT);
+}
+
+static void dwxgmac2_set_vlan(struct dma_desc *p, u32 type)
+{
+ type <<= XGMAC_TDES2_VTIR_SHIFT;
+ p->des2 |= cpu_to_le32(type & XGMAC_TDES2_VTIR);
+}
+
const struct stmmac_desc_ops dwxgmac210_desc_ops = {
.tx_status = dwxgmac2_get_tx_status,
.rx_status = dwxgmac2_get_rx_status,
@@ -332,4 +365,6 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = {
.get_rx_header_len = dwxgmac2_get_rx_header_len,
.set_sec_addr = dwxgmac2_set_sec_addr,
.set_sarc = dwxgmac2_set_sarc,
+ .set_vlan_tag = dwxgmac2_set_vlan_tag,
+ .set_vlan = dwxgmac2_set_vlan,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index f2d5901fbaff..64956465c030 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -359,6 +359,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
/* MAC HW feature 0 */
hw_cap = readl(ioaddr + XGMAC_HW_FEATURE0);
+ dma_cap->vlins = (hw_cap & XGMAC_HWFEAT_SAVLANINS) >> 27;
dma_cap->rx_coe = (hw_cap & XGMAC_HWFEAT_RXCOESEL) >> 16;
dma_cap->tx_coe = (hw_cap & XGMAC_HWFEAT_TXCOESEL) >> 14;
dma_cap->eee = (hw_cap & XGMAC_HWFEAT_EEESEL) >> 13;
@@ -413,6 +414,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
/* MAC HW feature 3 */
hw_cap = readl(ioaddr + XGMAC_HW_FEATURE3);
dma_cap->asp = (hw_cap & XGMAC_HWFEAT_ASP) >> 14;
+ dma_cap->dvlan = (hw_cap & XGMAC_HWFEAT_DVLAN) >> 13;
dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11;
dma_cap->frpbs = (hw_cap & XGMAC_HWFEAT_FRPPB) >> 9;
dma_cap->frpsel = (hw_cap & XGMAC_HWFEAT_FRPSEL) >> 3;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index e54864cde01b..9435b312495d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -92,6 +92,9 @@ struct stmmac_desc_ops {
int (*get_rx_header_len)(struct dma_desc *p, unsigned int *len);
void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr);
void (*set_sarc)(struct dma_desc *p, u32 sarc_type);
+ void (*set_vlan_tag)(struct dma_desc *p, u16 tag, u16 inner_tag,
+ u32 inner_type);
+ void (*set_vlan)(struct dma_desc *p, u32 type);
};
#define stmmac_init_rx_desc(__priv, __args...) \
@@ -150,6 +153,10 @@ struct stmmac_desc_ops {
stmmac_do_void_callback(__priv, desc, set_sec_addr, __args)
#define stmmac_set_desc_sarc(__priv, __args...) \
stmmac_do_void_callback(__priv, desc, set_sarc, __args)
+#define stmmac_set_desc_vlan_tag(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, set_vlan_tag, __args)
+#define stmmac_set_desc_vlan(__priv, __args...) \
+ stmmac_do_void_callback(__priv, desc, set_vlan, __args)
struct stmmac_dma_cfg;
struct dma_features;
@@ -351,6 +358,7 @@ struct stmmac_ops {
/* VLAN */
void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash,
bool is_double);
+ void (*enable_vlan)(struct mac_device_info *hw, u32 type);
/* TX Timestamp */
int (*get_mac_tx_timestamp)(struct mac_device_info *hw, u64 *ts);
/* Source Address Insertion / Replacement */
@@ -429,6 +437,8 @@ struct stmmac_ops {
stmmac_do_callback(__priv, mac, rss_configure, __args)
#define stmmac_update_vlan_hash(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, update_vlan_hash, __args)
+#define stmmac_enable_vlan(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, enable_vlan, __args)
#define stmmac_get_mac_tx_timestamp(__priv, __args...) \
stmmac_do_callback(__priv, mac, get_mac_tx_timestamp, __args)
#define stmmac_sarc_configure(__priv, __args...) \
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 8e38b053d9ab..bd1078433448 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2617,6 +2617,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
stmmac_enable_sph(priv, priv->ioaddr, 1, chan);
}
+ /* VLAN Tag Insertion */
+ if (priv->dma_cap.vlins)
+ stmmac_enable_vlan(priv, priv->hw, STMMAC_VLAN_INSERT);
+
/* Start the ball rolling... */
stmmac_start_all_dma(priv);
@@ -2794,6 +2798,33 @@ static int stmmac_release(struct net_device *dev)
return 0;
}
+static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb,
+ struct stmmac_tx_queue *tx_q)
+{
+ u16 tag = 0x0, inner_tag = 0x0;
+ u32 inner_type = 0x0;
+ struct dma_desc *p;
+
+ if (!priv->dma_cap.vlins)
+ return false;
+ if (!skb_vlan_tag_present(skb))
+ return false;
+ if (skb->vlan_proto == htons(ETH_P_8021AD)) {
+ inner_tag = skb_vlan_tag_get(skb);
+ inner_type = STMMAC_VLAN_INSERT;
+ }
+
+ tag = skb_vlan_tag_get(skb);
+
+ p = tx_q->dma_tx + tx_q->cur_tx;
+ if (stmmac_set_desc_vlan_tag(priv, p, tag, inner_tag, inner_type))
+ return false;
+
+ stmmac_set_tx_owner(priv, p);
+ tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+ return true;
+}
+
/**
* stmmac_tso_allocator - close entry point of the driver
* @priv: driver private structure
@@ -2873,12 +2904,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
struct stmmac_priv *priv = netdev_priv(dev);
int nfrags = skb_shinfo(skb)->nr_frags;
u32 queue = skb_get_queue_mapping(skb);
- unsigned int first_entry;
struct stmmac_tx_queue *tx_q;
+ unsigned int first_entry;
int tmp_pay_len = 0;
u32 pay_len, mss;
u8 proto_hdr_len;
dma_addr_t des;
+ bool has_vlan;
int i;
tx_q = &priv->tx_queue[queue];
@@ -2920,12 +2952,18 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
skb->data_len);
}
+ /* Check if VLAN can be inserted by HW */
+ has_vlan = stmmac_vlan_insert(priv, skb, tx_q);
+
first_entry = tx_q->cur_tx;
WARN_ON(tx_q->tx_skbuff[first_entry]);
desc = tx_q->dma_tx + first_entry;
first = desc;
+ if (has_vlan)
+ stmmac_set_desc_vlan(priv, first, STMMAC_VLAN_INSERT);
+
/* first descriptor: fill Headers on Buf1 */
des = dma_map_single(priv->device, skb->data, skb_headlen(skb),
DMA_TO_DEVICE);
@@ -3085,6 +3123,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
unsigned int first_entry;
unsigned int enh_desc;
dma_addr_t des;
+ bool has_vlan;
int entry;
tx_q = &priv->tx_queue[queue];
@@ -3110,6 +3149,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_BUSY;
}
+ /* Check if VLAN can be inserted by HW */
+ has_vlan = stmmac_vlan_insert(priv, skb, tx_q);
+
entry = tx_q->cur_tx;
first_entry = entry;
WARN_ON(tx_q->tx_skbuff[first_entry]);
@@ -3123,6 +3165,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
first = desc;
+ if (has_vlan)
+ stmmac_set_desc_vlan(priv, first, STMMAC_VLAN_INSERT);
+
enh_desc = priv->plat->enh_desc;
/* To program the descriptors according to the size of the frame */
if (enh_desc)
@@ -4473,6 +4518,11 @@ int stmmac_dvr_probe(struct device *device,
ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
}
+ if (priv->dma_cap.vlins) {
+ ndev->features |= NETIF_F_HW_VLAN_CTAG_TX;
+ if (priv->dma_cap.dvlan)
+ ndev->features |= NETIF_F_HW_VLAN_STAG_TX;
+ }
#endif
priv->msg_enable = netif_msg_init(debug, default_msg_level);
--
2.7.4
^ permalink raw reply related
* [PATCH net-next v3 03/12] net: stmmac: xgmac: Correctly return that RX descriptor is not last one
From: Jose Abreu @ 2019-08-17 18:54 UTC (permalink / raw)
To: netdev
Cc: Joao Pinto, Jakub Kicinski, Jose Abreu, Giuseppe Cavallaro,
Alexandre Torgue, David S. Miller, Maxime Coquelin, linux-stm32,
linux-arm-kernel, linux-kernel
In-Reply-To: <cover.1566067802.git.joabreu@synopsys.com>
Return the correct value when RX descriptor is not the last one.
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index 58b69fa97837..2c1ed8c2a9d3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -26,16 +26,15 @@ static int dwxgmac2_get_rx_status(void *data, struct stmmac_extra_stats *x,
struct dma_desc *p)
{
unsigned int rdes3 = le32_to_cpu(p->des3);
- int ret = good_frame;
if (unlikely(rdes3 & XGMAC_RDES3_OWN))
return dma_own;
if (likely(!(rdes3 & XGMAC_RDES3_LD)))
+ return rx_not_ls;
+ if (unlikely((rdes3 & XGMAC_RDES3_ES) && (rdes3 & XGMAC_RDES3_LD)))
return discard_frame;
- if (unlikely(rdes3 & XGMAC_RDES3_ES))
- ret = discard_frame;
- return ret;
+ return good_frame;
}
static int dwxgmac2_get_tx_len(struct dma_desc *p)
--
2.7.4
^ permalink raw reply related
* [PATCH net-next v3 00/12] net: stmmac: Improvements for -next
From: Jose Abreu @ 2019-08-17 18:54 UTC (permalink / raw)
To: netdev
Cc: Joao Pinto, Jakub Kicinski, Jose Abreu, Giuseppe Cavallaro,
Alexandre Torgue, David S. Miller, Maxime Coquelin, linux-stm32,
linux-arm-kernel, linux-kernel
Couple of improvements for -next tree. More info in commit logs.
---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
Jose Abreu (12):
net: stmmac: Get correct timestamp values from XGMAC
net: stmmac: Prepare to add Split Header support
net: stmmac: xgmac: Correctly return that RX descriptor is not last
one
net: stmmac: Add Split Header support and enable it in XGMAC cores
net: stmmac: Add a counter for Split Header packets
net: stmmac: dwxgmac: Add Flexible PPS support
net: stmmac: Add ethtool register dump for XGMAC cores
net: stmmac: Add support for SA Insertion/Replacement in XGMAC cores
net: stmmac: selftests: Add tests for SA Insertion/Replacement
net: stmmac: xgmac: Add EEE support
net: stmmac: Add support for VLAN Insertion Offload
net: stmmac: selftests: Add selftest for VLAN TX Offload
drivers/net/ethernet/stmicro/stmmac/common.h | 10 +
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 56 ++++
.../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 182 ++++++++++++-
.../net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 85 +++++-
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 31 ++-
drivers/net/ethernet/stmicro/stmmac/hwif.h | 30 +++
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 10 +
.../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 24 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 286 ++++++++++++++++-----
.../net/ethernet/stmicro/stmmac/stmmac_selftests.c | 194 +++++++++++++-
10 files changed, 817 insertions(+), 91 deletions(-)
--
2.7.4
^ permalink raw reply
* [PATCH net-next v3 06/12] net: stmmac: dwxgmac: Add Flexible PPS support
From: Jose Abreu @ 2019-08-17 18:54 UTC (permalink / raw)
To: netdev
Cc: Joao Pinto, Jakub Kicinski, Jose Abreu, Giuseppe Cavallaro,
Alexandre Torgue, David S. Miller, Maxime Coquelin, linux-stm32,
linux-arm-kernel, linux-kernel
In-Reply-To: <cover.1566067802.git.joabreu@synopsys.com>
Add the support for Flexible PPS in XGMAC cores.
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
---
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 19 ++++++++
.../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 56 ++++++++++++++++++++++
2 files changed, 75 insertions(+)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 995d533b9316..dbac63972faf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -149,6 +149,25 @@
#define XGMAC_TXTIMESTAMP_NSEC 0x00000d30
#define XGMAC_TXTSSTSLO GENMASK(30, 0)
#define XGMAC_TXTIMESTAMP_SEC 0x00000d34
+#define XGMAC_PPS_CONTROL 0x00000d70
+#define XGMAC_PPS_MAXIDX(x) ((((x) + 1) * 8) - 1)
+#define XGMAC_PPS_MINIDX(x) ((x) * 8)
+#define XGMAC_PPSx_MASK(x) \
+ GENMASK(XGMAC_PPS_MAXIDX(x), XGMAC_PPS_MINIDX(x))
+#define XGMAC_TRGTMODSELx(x, val) \
+ GENMASK(XGMAC_PPS_MAXIDX(x) - 1, XGMAC_PPS_MAXIDX(x) - 2) & \
+ ((val) << (XGMAC_PPS_MAXIDX(x) - 2))
+#define XGMAC_PPSCMDx(x, val) \
+ GENMASK(XGMAC_PPS_MINIDX(x) + 3, XGMAC_PPS_MINIDX(x)) & \
+ ((val) << XGMAC_PPS_MINIDX(x))
+#define XGMAC_PPSCMD_START 0x2
+#define XGMAC_PPSCMD_STOP 0x5
+#define XGMAC_PPSEN0 BIT(4)
+#define XGMAC_PPSx_TARGET_TIME_SEC(x) (0x00000d80 + (x) * 0x10)
+#define XGMAC_PPSx_TARGET_TIME_NSEC(x) (0x00000d84 + (x) * 0x10)
+#define XGMAC_TRGTBUSY0 BIT(31)
+#define XGMAC_PPSx_INTERVAL(x) (0x00000d88 + (x) * 0x10)
+#define XGMAC_PPSx_WIDTH(x) (0x00000d8c + (x) * 0x10)
/* MTL Registers */
#define XGMAC_MTL_OPMODE 0x00001000
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index ba5183f38f84..f843e3640f50 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -8,6 +8,7 @@
#include <linux/crc32.h>
#include <linux/iopoll.h>
#include "stmmac.h"
+#include "stmmac_ptp.h"
#include "dwxgmac2.h"
static void dwxgmac2_core_init(struct mac_device_info *hw,
@@ -1011,6 +1012,60 @@ static int dwxgmac2_get_mac_tx_timestamp(struct mac_device_info *hw, u64 *ts)
return 0;
}
+static int dwxgmac2_flex_pps_config(void __iomem *ioaddr, int index,
+ struct stmmac_pps_cfg *cfg, bool enable,
+ u32 sub_second_inc, u32 systime_flags)
+{
+ u32 tnsec = readl(ioaddr + XGMAC_PPSx_TARGET_TIME_NSEC(index));
+ u32 val = readl(ioaddr + XGMAC_PPS_CONTROL);
+ u64 period;
+
+ if (!cfg->available)
+ return -EINVAL;
+ if (tnsec & XGMAC_TRGTBUSY0)
+ return -EBUSY;
+ if (!sub_second_inc || !systime_flags)
+ return -EINVAL;
+
+ val &= ~XGMAC_PPSx_MASK(index);
+
+ if (!enable) {
+ val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_STOP);
+ writel(val, ioaddr + XGMAC_PPS_CONTROL);
+ return 0;
+ }
+
+ val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_START);
+ val |= XGMAC_TRGTMODSELx(index, XGMAC_PPSCMD_START);
+ val |= XGMAC_PPSEN0;
+
+ writel(cfg->start.tv_sec, ioaddr + XGMAC_PPSx_TARGET_TIME_SEC(index));
+
+ if (!(systime_flags & PTP_TCR_TSCTRLSSR))
+ cfg->start.tv_nsec = (cfg->start.tv_nsec * 1000) / 465;
+ writel(cfg->start.tv_nsec, ioaddr + XGMAC_PPSx_TARGET_TIME_NSEC(index));
+
+ period = cfg->period.tv_sec * 1000000000;
+ period += cfg->period.tv_nsec;
+
+ do_div(period, sub_second_inc);
+
+ if (period <= 1)
+ return -EINVAL;
+
+ writel(period - 1, ioaddr + XGMAC_PPSx_INTERVAL(index));
+
+ period >>= 1;
+ if (period <= 1)
+ return -EINVAL;
+
+ writel(period - 1, ioaddr + XGMAC_PPSx_WIDTH(index));
+
+ /* Finally, activate it */
+ writel(val, ioaddr + XGMAC_PPS_CONTROL);
+ return 0;
+}
+
const struct stmmac_ops dwxgmac210_ops = {
.core_init = dwxgmac2_core_init,
.set_mac = dwxgmac2_set_mac,
@@ -1048,6 +1103,7 @@ const struct stmmac_ops dwxgmac210_ops = {
.update_vlan_hash = dwxgmac2_update_vlan_hash,
.rxp_config = dwxgmac3_rxp_config,
.get_mac_tx_timestamp = dwxgmac2_get_mac_tx_timestamp,
+ .flex_pps_config = dwxgmac2_flex_pps_config,
};
int dwxgmac2_setup(struct stmmac_priv *priv)
--
2.7.4
^ permalink raw reply related
* Re: Unable to create htb tc classes more than 64K
From: Cong Wang @ 2019-08-17 18:24 UTC (permalink / raw)
To: Akshat Kakkar; +Cc: NetFilter, lartc, netdev
In-Reply-To: <CAA5aLPiqyhnWjY7A3xsaNJ71sDOf=Rqej8d+7=_PyJPmV9uApA@mail.gmail.com>
On Sat, Aug 17, 2019 at 5:46 AM Akshat Kakkar <akshat.1984@gmail.com> wrote:
>
> I agree that it is because of 16bit of minor I'd of class which
> restricts it to 64K.
> Point is, can we use multilevel qdisc and classes to extend it to more
> no. of classes i.e. to more than 64K classes
If your goal is merely having as many classes as you can, then yes.
>
> One scheme can be like
> 100: root qdisc
> |
> / | \
> / | \
> / | \
> / | \
> 100:1 100:2 100:3 child classes
> | | |
> | | |
> | | |
> 1: 2: 3: qdisc
> / \ / \ / \
> / \ / \
> 1:1 1:2 3:1 3:2 leaf classes
>
> with all qdisc and classes defined as htb.
>
> Is this correct approach? Any alternative??
Again, depends on what your goal is.
>
> Besides, in order to direct traffic to leaf classes 1:1, 1:2, 2:1,
> 2:2, 3:1, 3:2 .... , instead of using filters I am using ipset with
> skbprio and iptables map-set match rule.
> But even after all this it don't work. Why?
Again, the filters you use to classify the packets could only
work for the classes on the same level, no the next level.
Thanks.
^ permalink raw reply
* Re: [PATCH RFC net-next 3/3] net: dsa: mv88e6xxx: setup SERDES irq also for CPU/DSA ports
From: Marek Behun @ 2019-08-17 18:15 UTC (permalink / raw)
To: Vivien Didelot; +Cc: netdev, Andrew Lunn, Vladimir Oltean, Florian Fainelli
In-Reply-To: <20190817200342.567c13c4@nic.cz>
On Sat, 17 Aug 2019 20:03:42 +0200
Marek Behun <marek.behun@nic.cz> wrote:
> One way would be to rename the mv88e6xxx_setup_port function to
> mv88e6xxx_setup_port_regs, or mv88e6xxx_port_pre_setup, or something
> like that. Would the names mv88e6xxx_port_setup and
> mv88e6xxx_setup_port_regs still be very confusing and error prone?
> I think maybe yes...
>
> Other solution would be to, instead of the .port_setup()
> and .port_teardown() DSA ops, create the .after_setup()
> and .before_teardown() ops I mentioned in the previous mail.
>
> And yet another (in my opinion very improper) solution could be that
> the .setup() method could call dsa_port_setup() from within itself, to
> ensure that the needed structres exist.
I thought of another solution, one that does not need new DSA
operations. What if dsa_port_enable was called for CPU/DSA ports after
in dsa_port_setup_switches, after all ports are setup, and
dsa_port_disable called for CPU/DSA ports in dsa_port_teardown_switches?
This seems to me as cleaner solution.
Marek
^ permalink raw reply
* Re: [PATCH RFC net-next 3/3] net: dsa: mv88e6xxx: setup SERDES irq also for CPU/DSA ports
From: Marek Behun @ 2019-08-17 18:03 UTC (permalink / raw)
To: Vivien Didelot; +Cc: netdev, Andrew Lunn, Vladimir Oltean, Florian Fainelli
In-Reply-To: <20190816190537.GB14714@t480s.localdomain>
Hi Vivien,
On Fri, 16 Aug 2019 19:05:37 -0400
Vivien Didelot <vivien.didelot@gmail.com> wrote:
> I think the DSA switch port_setup/port_teardown operations are fine, but the
> idea would be that the drivers must no longer setup their ports directly
> in their .setup function. So for mv88e6xxx precisely, we should rename
> mv88e6xxx_setup_port to mv88e6xxx_port_setup, and move all the port-related
> code from mv88e6xxx_setup into mv88e6xxx_port_setup.
I looked into the driver, and found out that mv88e6xxx_setup calls many
other setup functions after the calls to mv88e6xxx_setup_port for each
port:
1. setup errata
2. cache cmode
3. for each port setup_port
4. irl setup
5. mac setup
6. phy setup
7. vtu setup
8. pvt setup
9. atu setup
10. broadcast setuo
11. pot setup
12. rmu setup
13. rsvd2cpu setup
14. trunk setup
15. devmap setup
16. pri setup
17. ptp setup
18. hwtstamp setup
19. stats setup
The problem is that some of these steps (after step 3) may depend on
some of the work done by step 3. Some of these functions iterate again
over the port array (mv88e6xxx_hwtstamp_setup, for example).
We cannot simply move step 3 to be called from DSA after
mv88e6xxx_setup.
I now do not know exactly what to do about the error prone naming of
setup_port vs port_setup.
One way would be to rename the mv88e6xxx_setup_port function to
mv88e6xxx_setup_port_regs, or mv88e6xxx_port_pre_setup, or something
like that. Would the names mv88e6xxx_port_setup and
mv88e6xxx_setup_port_regs still be very confusing and error prone?
I think maybe yes...
Other solution would be to, instead of the .port_setup()
and .port_teardown() DSA ops, create the .after_setup()
and .before_teardown() ops I mentioned in the previous mail.
And yet another (in my opinion very improper) solution could be that
the .setup() method could call dsa_port_setup() from within itself, to
ensure that the needed structres exist.
Please let me know what you think about this.
The first solution to me currently seems as the easiest.
Marek
^ permalink raw reply
* Re: [PATCH net] tcp: make sure EPOLLOUT wont be missed
From: Neal Cardwell @ 2019-08-17 17:10 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, netdev, Soheil Hassas Yeganeh, Eric Dumazet,
Jason Baron, Vladimir Rutsky
In-Reply-To: <20190817042622.91497-1-edumazet@google.com>
On Sat, Aug 17, 2019 at 12:26 AM Eric Dumazet <edumazet@google.com> wrote:
>
> As Jason Baron explained in commit 790ba4566c1a ("tcp: set SOCK_NOSPACE
> under memory pressure"), it is crucial we properly set SOCK_NOSPACE
> when needed.
>
> However, Jason patch had a bug, because the 'nonblocking' status
> as far as sk_stream_wait_memory() is concerned is governed
> by MSG_DONTWAIT flag passed at sendmsg() time :
>
> long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
>
> So it is very possible that tcp sendmsg() calls sk_stream_wait_memory(),
> and that sk_stream_wait_memory() returns -EAGAIN with SOCK_NOSPACE
> cleared, if sk->sk_sndtimeo has been set to a small (but not zero)
> value.
>
> This patch removes the 'noblock' variable since we must always
> set SOCK_NOSPACE if -EAGAIN is returned.
>
> It also renames the do_nonblock label since we might reach this
> code path even if we were in blocking mode.
>
> Fixes: 790ba4566c1a ("tcp: set SOCK_NOSPACE under memory pressure")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Jason Baron <jbaron@akamai.com>
> Reported-by: Vladimir Rutsky <rutsky@google.com>
> ---
> net/core/stream.c | 16 +++++++++-------
> 1 file changed, 9 insertions(+), 7 deletions(-)
Acked-by: Neal Cardwell <ncardwell@google.com>
Thanks, Eric!
neal
^ permalink raw reply
* Re: 5.3-rc3-ish VM crash: RIP: 0010:tcp_trim_head+0x20/0xe0
From: Eric Dumazet @ 2019-08-17 16:35 UTC (permalink / raw)
To: Sander Eikelenboom, Eric Dumazet, netdev, linux-kernel
In-Reply-To: <674de4ab-c37f-7787-f95a-3ae0f52bc196@eikelenboom.it>
On 8/17/19 10:24 AM, Sander Eikelenboom wrote:
> On 12/08/2019 19:56, Eric Dumazet wrote:
>>
>>
>> On 8/12/19 2:50 PM, Sander Eikelenboom wrote:
>>> L.S.,
>>>
>>> While testing a somewhere-after-5.3-rc3 kernel (which included the latest net merge (33920f1ec5bf47c5c0a1d2113989bdd9dfb3fae9),
>>> one of my Xen VM's (which gets quite some network load) crashed.
>>> See below for the stacktrace.
>>>
>>> Unfortunately I haven't got a clear trigger, so bisection doesn't seem to be an option at the moment.
>>> I haven't encountered this on 5.2, so it seems to be an regression against 5.2.
>>>
>>> Any ideas ?
>>>
>>> --
>>> Sander
>>>
>>>
>>> [16930.653595] general protection fault: 0000 [#1] SMP NOPTI
>>> [16930.653624] CPU: 0 PID: 3275 Comm: rsync Not tainted 5.3.0-rc3-20190809-doflr+ #1
>>> [16930.653657] RIP: 0010:tcp_trim_head+0x20/0xe0
>>> [16930.653677] Code: 2e 0f 1f 84 00 00 00 00 00 90 41 54 41 89 d4 55 48 89 fd 53 48 89 f3 f6 46 7e 01 74 2f 8b 86 bc 00 00 00 48 03 86 c0 00 00 00 <8b> 40 20 66 83 f8 01 74 19 31 d2 31 f6 b9 20 0a 00 00 48 89 df e8
>>> [16930.653741] RSP: 0000:ffffc90000003ad8 EFLAGS: 00010286
>>> [16930.653762] RAX: fffe888005bf62c0 RBX: ffff8880115fb800 RCX: 000000008010000b
>>
>> crash in " mov 0x20(%rax),%eax" and RAX=fffe888005bf62c0 (not a valid kernel address)
>>
>> Look like one bit corruption maybe.
>>
>> Nothing comes to mind really between 5.2 and 53 that could explain this.
>>
>>> [16930.653791] RDX: 00000000000005a0 RSI: ffff8880115fb800 RDI: ffff888016b00880
>>> [16930.653819] RBP: ffff888016b00880 R08: 0000000000000001 R09: 0000000000000000
>>> [16930.653848] R10: ffff88800ae00800 R11: 00000000bfe632e6 R12: 00000000000005a0
>>> [16930.653875] R13: 0000000000000001 R14: 00000000bfe62d46 R15: 0000000000000004
>>> [16930.653913] FS: 00007fe71fe2cb80(0000) GS:ffff88801f200000(0000) knlGS:0000000000000000
>>> [16930.653943] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [16930.653965] CR2: 000055de0f3e7000 CR3: 0000000011f32000 CR4: 00000000000006f0
>>> [16930.653993] Call Trace:
>>> [16930.654005] <IRQ>
>>> [16930.654018] tcp_ack+0xbb0/0x1230
>>> [16930.654033] tcp_rcv_established+0x2e8/0x630
>>> [16930.654053] tcp_v4_do_rcv+0x129/0x1d0
>>> [16930.654070] tcp_v4_rcv+0xac9/0xcb0
>>> [16930.654088] ip_protocol_deliver_rcu+0x27/0x1b0
>>> [16930.654109] ip_local_deliver_finish+0x3f/0x50
>>> [16930.654128] ip_local_deliver+0x4d/0xe0
>>> [16930.654145] ? ip_protocol_deliver_rcu+0x1b0/0x1b0
>>> [16930.654163] ip_rcv+0x4c/0xd0
>>> [16930.654179] __netif_receive_skb_one_core+0x79/0x90
>>> [16930.654200] netif_receive_skb_internal+0x2a/0xa0
>>> [16930.654219] napi_gro_receive+0xe7/0x140
>>> [16930.654237] xennet_poll+0x9be/0xae0
>>> [16930.654254] net_rx_action+0x136/0x340
>>> [16930.654271] __do_softirq+0xdd/0x2cf
>>> [16930.654287] irq_exit+0x7a/0xa0
>>> [16930.654304] xen_evtchn_do_upcall+0x27/0x40
>>> [16930.654320] xen_hvm_callback_vector+0xf/0x20
>>> [16930.654339] </IRQ>
>>> [16930.654349] RIP: 0033:0x55de0d87db99
>>> [16930.654364] Code: 00 00 48 89 7c 24 f8 45 39 fe 45 0f 42 fe 44 89 7c 24 f4 eb 09 0f 1f 40 00 83 e9 01 74 3e 89 f2 48 63 f8 4c 01 d2 44 38 1c 3a <75> 25 44 38 6c 3a ff 75 1e 41 0f b6 3c 24 40 38 3a 75 14 41 0f b6
>>> [16930.654432] RSP: 002b:00007ffd5531eec8 EFLAGS: 00000a87 ORIG_RAX: ffffffffffffff0c
>>> [16930.655004] RAX: 0000000000000002 RBX: 000055de0f3e8e50 RCX: 000000000000007f
>>> [16930.655034] RDX: 000055de0f3dc2d2 RSI: 0000000000003492 RDI: 0000000000000002
>>> [16930.655062] RBP: 0000000000007fff R08: 00000000000080ea R09: 00000000000001f0
>>> [16930.655089] R10: 000055de0f3d8e40 R11: 0000000000000094 R12: 000055de0f3e0f2a
>>> [16930.655116] R13: 0000000000000010 R14: 0000000000007f16 R15: 0000000000000080
>>> [16930.655144] Modules linked in:
>>> [16930.655200] ---[ end trace 533367c95501b645 ]---
>>> [16930.655223] RIP: 0010:tcp_trim_head+0x20/0xe0
>>> [16930.655243] Code: 2e 0f 1f 84 00 00 00 00 00 90 41 54 41 89 d4 55 48 89 fd 53 48 89 f3 f6 46 7e 01 74 2f 8b 86 bc 00 00 00 48 03 86 c0 00 00 00 <8b> 40 20 66 83 f8 01 74 19 31 d2 31 f6 b9 20 0a 00 00 48 89 df e8
>>> [16930.655312] RSP: 0000:ffffc90000003ad8 EFLAGS: 00010286
>>> [16930.655331] RAX: fffe888005bf62c0 RBX: ffff8880115fb800 RCX: 000000008010000b
>>> [16930.655360] RDX: 00000000000005a0 RSI: ffff8880115fb800 RDI: ffff888016b00880
>>> [16930.655387] RBP: ffff888016b00880 R08: 0000000000000001 R09: 0000000000000000
>>> [16930.655414] R10: ffff88800ae00800 R11: 00000000bfe632e6 R12: 00000000000005a0
>>> [16930.655441] R13: 0000000000000001 R14: 00000000bfe62d46 R15: 0000000000000004
>>> [16930.655475] FS: 00007fe71fe2cb80(0000) GS:ffff88801f200000(0000) knlGS:0000000000000000
>>> [16930.655502] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [16930.655525] CR2: 000055de0f3e7000 CR3: 0000000011f32000 CR4: 00000000000006f0
>>> [16930.655553] Kernel panic - not syncing: Fatal exception in interrupt
>>> [16930.655789] Kernel Offset: disabled
>>>
>
> Hi Eric,
>
> Got another VM crash, with a slightly different stacktrace this time around.
> Still networking though.
>
> --
> Sander
>
> [112522.697498] general protection fault: 0000 [#1] SMP NOPTI
> [112522.697555] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.0-rc4-20190812-doflr+ #1
> [112522.697592] RIP: 0010:skb_shift+0x63/0x430
> [112522.697608] Code: bc 00 00 00 48 03 8f c0 00 00 00 f6 41 03 08 74 07 48 83 79 28 00 75 d0 8b 8e bc 00 00 00 48 03 8e c0 00 00 00 48 85 f6 74 0a <f6> 41 03 08 0f 85 09 03 00 00 49 89 fd 8b bf bc 00 00 00 41 89
crash in "testb $0x8,0x3(%rcx)" with RCX==fffe8880117da6c0
Same strange looking address on x86_64
I have no idea.
> [112522.697673] RSP: 0018:ffffc900000039b0 EFLAGS: 00010286
> [112522.697693] RAX: 00000000000005a0 RBX: ffff8880117fb800 RCX: fffe8880117da6c0
> [112522.697721] RDX: 00000000000005a0 RSI: ffff8880117fb800 RDI: ffff88800ae58000
> [112522.697748] RBP: ffffc900000039e8 R08: 000000000004cfe0 R09: 00000000000005a0
> [112522.697775] R10: 00000000000005a0 R11: ffff8880117fb800 R12: 0000000000000000
> [112522.697803] R13: 00000000c95a98c2 R14: 0000000000000000 R15: ffff88800ae58000
> [112522.697839] FS: 0000000000000000(0000) GS:ffff88801f200000(0000) knlGS:0000000000000000
> [112522.697869] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [112522.697895] CR2: 00007f9210d8e078 CR3: 000000000b660000 CR4: 00000000000006f0
> [112522.697925] Call Trace:
> [112522.697938] <IRQ>
> [112522.697951] tcp_sacktag_walk+0x2af/0x480
> [112522.697967] tcp_sacktag_write_queue+0x34d/0x820
> [112522.697986] ? ip_forward_options.cold.0+0x1c/0x1c
> [112522.698007] tcp_ack+0xb8c/0x1230
> [112522.698023] ? tcp_event_new_data_sent+0x4a/0x90
> [112522.698043] tcp_rcv_established+0x14c/0x630
> [112522.698064] tcp_v4_do_rcv+0x129/0x1d0
> [112522.698081] tcp_v4_rcv+0xac9/0xcb0
> [112522.698099] ip_protocol_deliver_rcu+0x27/0x1b0
> [112522.698119] ip_local_deliver_finish+0x3f/0x50
> [112522.698139] ip_local_deliver+0x4d/0xe0
> [112522.698155] ? ip_protocol_deliver_rcu+0x1b0/0x1b0
> [112522.698177] ip_rcv+0x4c/0xd0
> [112522.698194] __netif_receive_skb_one_core+0x79/0x90
> [112522.698215] netif_receive_skb_internal+0x2a/0xa0
> [112522.698237] napi_gro_receive+0xe7/0x140
> [112522.698255] xennet_poll+0x9be/0xae0
> [112522.698271] net_rx_action+0x136/0x340
> [112522.698288] __do_softirq+0xdd/0x2cf
> [112522.698304] irq_exit+0x7a/0xa0
> [112522.698321] xen_evtchn_do_upcall+0x27/0x40
> [112522.698340] xen_hvm_callback_vector+0xf/0x20
> [112522.698359] </IRQ>
> [112522.698373] RIP: 0010:native_safe_halt+0xe/0x10
> [112522.698392] Code: 48 8b 04 25 c0 6b 01 00 f0 80 48 02 20 48 8b 00 a8 08 75 c4 eb 80 90 90 90 90 90 90 e9 07 00 00 00 0f 00 2d 54 fb 41 00 fb f4 <c3> 90 e9 07 00 00 00 0f 00 2d 44 fb 41 00 f4 c3 90 90 41 55 41 54
> [112522.699522] RSP: 0018:ffffffff82a03e90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff0c
> [112522.699552] RAX: 0001a54800000000 RBX: 0000000000000000 RCX: 0000000000000001
> [112522.699580] RDX: 0000000002b9f9b6 RSI: 0000000000000087 RDI: 0000000000000000
> [112522.699608] RBP: 0000000000000000 R08: 000000001eb5c3cb R09: ffffffff82a08460
> [112522.699634] R10: 000000000002e46e R11: 0000000000000000 R12: 0000000000000000
> [112522.699662] R13: 0000000000000000 R14: ffffffff8326e0a0 R15: 0000000000000000
> [112522.699692] default_idle+0x17/0x140
> [112522.699709] do_idle+0x1ee/0x210
> [112522.699726] cpu_startup_entry+0x14/0x20
> [112522.699743] start_kernel+0x4e9/0x50b
> [112522.699760] secondary_startup_64+0xa4/0xb0
> [112522.699780] Modules linked in:
> [112522.699829] ---[ end trace 3b8db3603485e952 ]---
> [112522.699850] RIP: 0010:skb_shift+0x63/0x430
> [112522.699866] Code: bc 00 00 00 48 03 8f c0 00 00 00 f6 41 03 08 74 07 48 83 79 28 00 75 d0 8b 8e bc 00 00 00 48 03 8e c0 00 00 00 48 85 f6 74 0a <f6> 41 03 08 0f 85 09 03 00 00 49 89 fd 8b bf bc 00 00 00 41 89 d4
> [112522.699938] RSP: 0018:ffffc900000039b0 EFLAGS: 00010286
> [112522.699959] RAX: 00000000000005a0 RBX: ffff8880117fb800 RCX: fffe8880117da6c0
> [112522.699986] RDX: 00000000000005a0 RSI: ffff8880117fb800 RDI: ffff88800ae58000
> [112522.700013] RBP: ffffc900000039e8 R08: 000000000004cfe0 R09: 00000000000005a0
> [112522.700041] R10: 00000000000005a0 R11: ffff8880117fb800 R12: 0000000000000000
> [112522.700067] R13: 00000000c95a98c2 R14: 0000000000000000 R15: ffff88800ae58000
> [112522.700111] FS: 0000000000000000(0000) GS:ffff88801f200000(0000) knlGS:0000000000000000
> [112522.700140] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [112522.700165] CR2: 00007f9210d8e078 CR3: 000000000b660000 CR4: 00000000000006f0
> [112522.700201] Kernel panic - not syncing: Fatal exception in interrupt
> [112522.702992] Kernel Offset: disabled
>
>
^ permalink raw reply
* Re: [PATCH net] tcp: make sure EPOLLOUT wont be missed
From: Eric Dumazet @ 2019-08-17 16:26 UTC (permalink / raw)
To: Jason Baron, Eric Dumazet, David S . Miller
Cc: netdev, Soheil Hassas Yeganeh, Neal Cardwell, Eric Dumazet,
Vladimir Rutsky
In-Reply-To: <b9ab6b03-664c-eb81-0fbd-6f696276d9aa@akamai.com>
On 8/17/19 4:19 PM, Jason Baron wrote:
>
>
> On 8/17/19 12:26 AM, Eric Dumazet wrote:
>> As Jason Baron explained in commit 790ba4566c1a ("tcp: set SOCK_NOSPACE
>> under memory pressure"), it is crucial we properly set SOCK_NOSPACE
>> when needed.
>>
>> However, Jason patch had a bug, because the 'nonblocking' status
>> as far as sk_stream_wait_memory() is concerned is governed
>> by MSG_DONTWAIT flag passed at sendmsg() time :
>>
>> long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
>>
>> So it is very possible that tcp sendmsg() calls sk_stream_wait_memory(),
>> and that sk_stream_wait_memory() returns -EAGAIN with SOCK_NOSPACE
>> cleared, if sk->sk_sndtimeo has been set to a small (but not zero)
>> value.
>
> Is MSG_DONTWAIT not set in this case? The original patch was intended
> only for the explicit non-blocking case. The epoll manpage says:
> "EPOLLET flag should use nonblocking file descriptors". So the original
> intention was not to impact the blocking case. This seems to me like
> a different use-case.
>
I guess the problem is how we define 'non-blocking' ...
SO_SNDTIMEO can be used by application to implement a variation of non-blocking,
by waiting for a socket event with a short timeout, to maybe recover
from memory pressure conditions in a more efficient way than simply looping.
Note that the man page for epoll() only _suggests_ to use nonblocking file descriptors.
<quote>
The suggested way to use epoll as an edge-triggered (EPOLLET)
interface is as follows:
i with nonblocking file descriptors; and
ii by waiting for an event only after read(2) or
write(2) return EAGAIN.
</quote>
^ permalink raw reply
* Re: [PATCH 1/2] PTP: introduce new versions of IOCTLs
From: Joe Perches @ 2019-08-17 16:17 UTC (permalink / raw)
To: Richard Cochran, Felipe Balbi; +Cc: Christopher S Hall, netdev, linux-kernel
In-Reply-To: <20190817155927.GA1540@localhost>
On Sat, 2019-08-17 at 08:59 -0700, Richard Cochran wrote:
> On Wed, Aug 14, 2019 at 10:47:11AM +0300, Felipe Balbi wrote:
> > The current version of the IOCTL have a small problem which prevents us
> > from extending the API by making use of reserved fields. In these new
> > IOCTLs, we are now making sure that flags and rsv fields are zero which
> > will allow us to extend the API in the future.
> >
> > Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
> > ---
> > drivers/ptp/ptp_chardev.c | 58 ++++++++++++++++++++++++++++++++--
> > include/uapi/linux/ptp_clock.h | 12 +++++++
> > 2 files changed, 68 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
[]
> > @@ -123,9 +123,11 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
> > struct timespec64 ts;
> > int enable, err = 0;
> >
> > + memset(&req, 0, sizeof(req));
>
> Nit: please leave a blank line between memset() and switch/case.
or just initialize the declaration of req with = {}
Is there a case where this initialization is
unnecessary such that it impacts performance
given the use in ptp_ioctl?
caps for instance is memset to zero only in
PTP_CLOCK_GETCAP
req is used in only 3 of the case blocks.
case PTP_EXTTS_REQUEST:
case PTP_PEROUT_REQUEST:
case PTP_ENABLE_PPS:
Maybe it would be better to move the memset(&req...)
into each of the case blocks.
> > switch (cmd) {
> >
> > case PTP_CLOCK_GETCAPS:
> > + case PTP_CLOCK_GETCAPS2:
> > memset(&caps, 0, sizeof(caps));
> > caps.max_adj = ptp->info->max_adj;
> > caps.n_alarm = ptp->info->n_alarm;
>
> Reviewed-by: Richard Cochran <richardcochran@gmail.com>
>
^ permalink raw reply
* Re: [PATCH 2/2] PTP: add support for one-shot output
From: Richard Cochran @ 2019-08-17 16:03 UTC (permalink / raw)
To: Felipe Balbi; +Cc: Christopher S Hall, netdev, linux-kernel
In-Reply-To: <20190814074712.10684-2-felipe.balbi@linux.intel.com>
On Wed, Aug 14, 2019 at 10:47:12AM +0300, Felipe Balbi wrote:
> diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
> index 039cd62ec706..9412b16cc8ed 100644
> --- a/include/uapi/linux/ptp_clock.h
> +++ b/include/uapi/linux/ptp_clock.h
> @@ -67,7 +67,9 @@ struct ptp_perout_request {
> struct ptp_clock_time start; /* Absolute start time. */
> struct ptp_clock_time period; /* Desired period, zero means disable. */
> unsigned int index; /* Which channel to configure. */
> - unsigned int flags; /* Reserved for future use. */
> +
> +#define PTP_PEROUT_ONE_SHOT BIT(0)
> + unsigned int flags; /* Bit 0 -> oneshot output. */
The .flags field doesn't need this comment. The individual BIT macro
names should be clear enough, and if not, then comment the macros.
> unsigned int rsv[4]; /* Reserved for future use. */
> };
>
> --
> 2.22.0
>
Reviewed-by: Richard Cochran <richardcochran@gmail.com>
^ permalink raw reply
* Re: [PATCH 1/2] PTP: introduce new versions of IOCTLs
From: Richard Cochran @ 2019-08-17 15:59 UTC (permalink / raw)
To: Felipe Balbi; +Cc: Christopher S Hall, netdev, linux-kernel
In-Reply-To: <20190814074712.10684-1-felipe.balbi@linux.intel.com>
On Wed, Aug 14, 2019 at 10:47:11AM +0300, Felipe Balbi wrote:
> The current version of the IOCTL have a small problem which prevents us
> from extending the API by making use of reserved fields. In these new
> IOCTLs, we are now making sure that flags and rsv fields are zero which
> will allow us to extend the API in the future.
>
> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
> ---
> drivers/ptp/ptp_chardev.c | 58 ++++++++++++++++++++++++++++++++--
> include/uapi/linux/ptp_clock.h | 12 +++++++
> 2 files changed, 68 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
> index 18ffe449efdf..204212fc3f8c 100644
> --- a/drivers/ptp/ptp_chardev.c
> +++ b/drivers/ptp/ptp_chardev.c
> @@ -123,9 +123,11 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
> struct timespec64 ts;
> int enable, err = 0;
>
> + memset(&req, 0, sizeof(req));
Nit: please leave a blank line between memset() and switch/case.
> switch (cmd) {
>
> case PTP_CLOCK_GETCAPS:
> + case PTP_CLOCK_GETCAPS2:
> memset(&caps, 0, sizeof(caps));
> caps.max_adj = ptp->info->max_adj;
> caps.n_alarm = ptp->info->n_alarm;
Reviewed-by: Richard Cochran <richardcochran@gmail.com>
^ permalink raw reply
* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Andy Lutomirski @ 2019-08-17 15:44 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Thomas Gleixner, Jordan Glover, Andy Lutomirski,
Daniel Colascione, Song Liu, Kees Cook, Networking, bpf,
Alexei Starovoitov, Daniel Borkmann, Kernel Team, Lorenz Bauer,
Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <20190817150245.xxzxqjpvgqsxmloe@ast-mbp>
> On Aug 17, 2019, at 8:02 AM, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>
> Can any of the mechanisms 1/2/3 address the concern in mds.rst?
>
seccomp() can. It’s straightforward to use seccomp to disable bpf() outright for a process tree. In this regard, bpf() isn’t particularly unique — it’s a system call that exposes some attack surface and that isn’t required by most programs for basic functionality.
At LPC this year, there will be a discussion about seccomp improvements that will, among other things, offer fiber-grained control. It’s quite likely, for example, that seccomp will soon be able to enable and disable specific map types or attach types. The exact mechanism isn’t decided yet, but I think everyone expects that this is mostly a design problem, not an implementation problem.
This is off topic for the current thread, but it could be useful to allow bpf programs to be loaded from files directly (i.e. pass an fd to a file into bpf() to load the program), which would enable LSMs to check that the file is appropriately labeled. This would dramatically raise the bar for exploitation of verifier bugs or speculation attacks, since anyone trying to exploit it would need to get the bpf payload through LSM policy first.
> I believe Andy wants to expand the attack surface when
> kernel.unprivileged_bpf_disabled=0
> Before that happens I'd like the community to work on addressing the text above.
>
Not by much. BPF maps are already largely exposed to unprivileged code (when unprivileged_bpf_disabled=0). The attack surface is there, and they’re arguably even more exposed than they should be. My patch 1 earlier was about locking these interfaces down.
Similarly, my suggestions about reworking cgroup attach and program load don’t actually allow fully unprivileged users to run arbitrary bpf() programs [0] — under my proposal, to attach a bpf cgroup program, you need a delegated cgroup. The mechanism could be extended by a requirement that a privileged cgroup manager explicitly enable certain attach types for a delegated subtree.
A cgroup knob to turn unprivileged bpf on and off for tasks in the cgroup might actually be quite useful.
[0] on some thought, the test run mechanism should probably remain root-only.
^ permalink raw reply
* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Christian Brauner @ 2019-08-17 15:42 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Andy Lutomirski, Kees Cook, Andy Lutomirski, Song Liu, Networking,
bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team,
Lorenz Bauer, Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <20190817153652.zfcsklt474j72dzm@ast-mbp.dhcp.thefacebook.com>
On August 17, 2019 5:36:54 PM GMT+02:00, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>On Sat, Aug 17, 2019 at 05:16:53PM +0200, Christian Brauner wrote:
>> On August 17, 2019 5:08:45 PM GMT+02:00, Alexei Starovoitov
><alexei.starovoitov@gmail.com> wrote:
>> >On Sat, Aug 17, 2019 at 12:22:53AM +0200, Christian Brauner wrote:
>> >>
>> >> (The one usecase I'd care about is to extend seccomp to do
>> >pointer-based
>> >> syscall filtering. Whether or not that'd require (unprivileged)
>ebpf
>> >is
>> >> up for discussion at KSummit.)
>> >
>> >Kees have been always against using ebpf in seccomp. I believe he
>still
>> >holds this opinion. Until he changes his mind let's stop bringing
>> >seccomp
>> >as a use case for unpriv bpf.
>>
>> That's why I said "whether or not".
>> For the record, I do prefer a non-unpriv-ebpf way.
>> It's still something that will most surely come up in the discussion
>though.
>
>It's very un-kernely way to defer to in-person meetings.
>If there is anything to discuss please discuss it on the public mailing
>list.
https://lists.linuxfoundation.org/pipermail/ksummit-discuss/2019-July/006699.html
^ permalink raw reply
* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Alexei Starovoitov @ 2019-08-17 15:36 UTC (permalink / raw)
To: Christian Brauner
Cc: Andy Lutomirski, Kees Cook, Andy Lutomirski, Song Liu, Networking,
bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team,
Lorenz Bauer, Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <61B88085-9FBB-41E6-9783-324E445E428D@ubuntu.com>
On Sat, Aug 17, 2019 at 05:16:53PM +0200, Christian Brauner wrote:
> On August 17, 2019 5:08:45 PM GMT+02:00, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
> >On Sat, Aug 17, 2019 at 12:22:53AM +0200, Christian Brauner wrote:
> >>
> >> (The one usecase I'd care about is to extend seccomp to do
> >pointer-based
> >> syscall filtering. Whether or not that'd require (unprivileged) ebpf
> >is
> >> up for discussion at KSummit.)
> >
> >Kees have been always against using ebpf in seccomp. I believe he still
> >holds this opinion. Until he changes his mind let's stop bringing
> >seccomp
> >as a use case for unpriv bpf.
>
> That's why I said "whether or not".
> For the record, I do prefer a non-unpriv-ebpf way.
> It's still something that will most surely come up in the discussion though.
It's very un-kernely way to defer to in-person meetings.
If there is anything to discuss please discuss it on the public mailing list.
^ permalink raw reply
* Re: kernel BUG at include/linux/skbuff.h:LINE! (2)
From: syzbot @ 2019-08-17 15:28 UTC (permalink / raw)
To: andriy.shevchenko, davem, edumazet, f.fainelli, idosch,
kimbrownkd, linux-kernel, linux-sctp, marcelo.leitner, netdev,
nhorman, syzkaller-bugs, tglx, vyasevich, wanghai26, yuehaibing
In-Reply-To: <0000000000008182a50590404a02@google.com>
syzbot has bisected this bug to:
commit bc389fd101e57b36aacfaec2df8fe479eabb44ea
Author: David S. Miller <davem@davemloft.net>
Date: Tue Jul 2 21:12:30 2019 +0000
Merge branch 'macsec-fix-some-bugs-in-the-receive-path'
bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=125c5c4c600000
start commit: 459c5fb4 Merge branch 'mscc-PTP-support'
git tree: net-next
final crash: https://syzkaller.appspot.com/x/report.txt?x=115c5c4c600000
console output: https://syzkaller.appspot.com/x/log.txt?x=165c5c4c600000
kernel config: https://syzkaller.appspot.com/x/.config?x=d4cf1ffb87d590d7
dashboard link: https://syzkaller.appspot.com/bug?extid=eb349eeee854e389c36d
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=111849e2600000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=1442c25a600000
Reported-by: syzbot+eb349eeee854e389c36d@syzkaller.appspotmail.com
Fixes: bc389fd101e5 ("Merge
branch 'macsec-fix-some-bugs-in-the-receive-path'")
For information about bisection process see: https://goo.gl/tpsmEJ#bisection
^ permalink raw reply
* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Christian Brauner @ 2019-08-17 15:16 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Andy Lutomirski, Kees Cook, Andy Lutomirski, Song Liu, Networking,
bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team,
Lorenz Bauer, Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <20190817150843.4vsmzpwpcvzndjld@ast-mbp>
On August 17, 2019 5:08:45 PM GMT+02:00, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:
>On Sat, Aug 17, 2019 at 12:22:53AM +0200, Christian Brauner wrote:
>>
>> (The one usecase I'd care about is to extend seccomp to do
>pointer-based
>> syscall filtering. Whether or not that'd require (unprivileged) ebpf
>is
>> up for discussion at KSummit.)
>
>Kees have been always against using ebpf in seccomp. I believe he still
>holds this opinion. Until he changes his mind let's stop bringing
>seccomp
>as a use case for unpriv bpf.
That's why I said "whether or not".
For the record, I do prefer a non-unpriv-ebpf way.
It's still something that will most surely come up in the discussion though.
^ permalink raw reply
* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Alexei Starovoitov @ 2019-08-17 15:08 UTC (permalink / raw)
To: Christian Brauner
Cc: Andy Lutomirski, Kees Cook, Andy Lutomirski, Song Liu, Networking,
bpf, Alexei Starovoitov, Daniel Borkmann, Kernel Team,
Lorenz Bauer, Jann Horn, Greg KH, Linux API, LSM List
In-Reply-To: <20190816222252.a7zizw7azkxnv3ot@wittgenstein>
On Sat, Aug 17, 2019 at 12:22:53AM +0200, Christian Brauner wrote:
>
> (The one usecase I'd care about is to extend seccomp to do pointer-based
> syscall filtering. Whether or not that'd require (unprivileged) ebpf is
> up for discussion at KSummit.)
Kees have been always against using ebpf in seccomp. I believe he still
holds this opinion. Until he changes his mind let's stop bringing seccomp
as a use case for unpriv bpf.
^ permalink raw reply
* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Alexei Starovoitov @ 2019-08-17 15:02 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Jordan Glover, Andy Lutomirski, Daniel Colascione, Song Liu,
Kees Cook, Networking, bpf, Alexei Starovoitov, Daniel Borkmann,
Kernel Team, Lorenz Bauer, Jann Horn, Greg KH, Linux API,
LSM List
In-Reply-To: <alpine.DEB.2.21.1908162211270.1923@nanos.tec.linutronix.de>
On Fri, Aug 16, 2019 at 10:28:29PM +0200, Thomas Gleixner wrote:
> Alexei,
>
> On Fri, 16 Aug 2019, Alexei Starovoitov wrote:
> > It's both of the above when 'systemd' is not taken literally.
> > To earlier Thomas's point: the use case is not only about systemd.
> > There are other containers management systems.
>
> <SNIP>
>
> > These daemons need to drop privileges to make the system safer == less
> > prone to corruption due to bugs in themselves. Not necessary security
> > bugs.
>
> Let's take a step back.
>
> While real usecases are helpful to understand a design decision, the design
> needs to be usecase independent.
>
> The kernel provides mechanisms, not policies. My impression of this whole
> discussion is that it is policy driven. That's the wrong approach.
not sure what you mean by 'policy driven'.
Proposed CAP_BPF is a policy?
My desire to do kernel.unprivileged_bpf_disabled=1 is driven by
text in Documentation/x86/mds.rst which says:
"There is one exception, which is untrusted BPF. The functionality of
untrusted BPF is limited, but it needs to be thoroughly investigated
whether it can be used to create such a construct."
commit 6a9e52927251 ("x86/speculation/mds: Add mds_clear_cpu_buffers()")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Jon Masters <jcm@redhat.com>
Tested-by: Jon Masters <jcm@redhat.com>
The way I read this text:
- there is a concern that mds is exploitable via bpf
- there is a desire to investigate to address this concern
I'm committed to help with the investigation.
In the mean time I propose a path to do
kernel.unprivileged_bpf_disabled=1 which is CAP_BPF.
Can kernel.unprivileged_bpf_disabled=1 be used now?
Yes, but it will weaken overall system security because things that
use unpriv to load bpf and CAP_NET_ADMIN to attach bpf would need
to move to stronger CAP_SYS_ADMIN.
With CAP_BPF both load and attach would happen under CAP_BPF
instead of CAP_SYS_ADMIN.
> So let's look at the mechanisms which we have at hand:
>
> 1) Capabilities
>
> 2) SUID and dropping priviledges
>
> 3) Seccomp and LSM
>
> Now the real interesting questions are:
>
> A) What kind of restrictions does BPF allow? Is it a binary on/off or is
> there a more finegrained control of BPF functionality?
>
> TBH, I can't tell.
>
> B) Depending on the answer to #A what is the control possibility for
> #1/#2/#3 ?
Can any of the mechanisms 1/2/3 address the concern in mds.rst?
I believe Andy wants to expand the attack surface when
kernel.unprivileged_bpf_disabled=0
Before that happens I'd like the community to work on addressing the text above.
^ permalink raw reply
* Re: [PATCH net] tcp: make sure EPOLLOUT wont be missed
From: Jason Baron @ 2019-08-17 14:19 UTC (permalink / raw)
To: Eric Dumazet, David S . Miller
Cc: netdev, Soheil Hassas Yeganeh, Neal Cardwell, Eric Dumazet,
Vladimir Rutsky
In-Reply-To: <20190817042622.91497-1-edumazet@google.com>
On 8/17/19 12:26 AM, Eric Dumazet wrote:
> As Jason Baron explained in commit 790ba4566c1a ("tcp: set SOCK_NOSPACE
> under memory pressure"), it is crucial we properly set SOCK_NOSPACE
> when needed.
>
> However, Jason patch had a bug, because the 'nonblocking' status
> as far as sk_stream_wait_memory() is concerned is governed
> by MSG_DONTWAIT flag passed at sendmsg() time :
>
> long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
>
> So it is very possible that tcp sendmsg() calls sk_stream_wait_memory(),
> and that sk_stream_wait_memory() returns -EAGAIN with SOCK_NOSPACE
> cleared, if sk->sk_sndtimeo has been set to a small (but not zero)
> value.
Is MSG_DONTWAIT not set in this case? The original patch was intended
only for the explicit non-blocking case. The epoll manpage says:
"EPOLLET flag should use nonblocking file descriptors". So the original
intention was not to impact the blocking case. This seems to me like
a different use-case.
Thanks,
-Jason
> This patch removes the 'noblock' variable since we must always
> set SOCK_NOSPACE if -EAGAIN is returned.
>
> It also renames the do_nonblock label since we might reach this
> code path even if we were in blocking mode.
>
> Fixes: 790ba4566c1a ("tcp: set SOCK_NOSPACE under memory pressure")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Jason Baron <jbaron@akamai.com>
> Reported-by: Vladimir Rutsky <rutsky@google.com>
> ---
> net/core/stream.c | 16 +++++++++-------
> 1 file changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/net/core/stream.c b/net/core/stream.c
> index e94bb02a56295ec2db34ab423a8c7c890df0a696..4f1d4aa5fb38d989a9c81f32dfce3f31bbc1fa47 100644
> --- a/net/core/stream.c
> +++ b/net/core/stream.c
> @@ -120,7 +120,6 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
> int err = 0;
> long vm_wait = 0;
> long current_timeo = *timeo_p;
> - bool noblock = (*timeo_p ? false : true);
> DEFINE_WAIT_FUNC(wait, woken_wake_function);
>
> if (sk_stream_memory_free(sk))
> @@ -133,11 +132,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
>
> if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
> goto do_error;
> - if (!*timeo_p) {
> - if (noblock)
> - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
> - goto do_nonblock;
> - }
> + if (!*timeo_p)
> + goto do_eagain;
> if (signal_pending(current))
> goto do_interrupted;
> sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
> @@ -169,7 +165,13 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
> do_error:
> err = -EPIPE;
> goto out;
> -do_nonblock:
> +do_eagain:
> + /* Make sure that whenever EAGAIN is returned, EPOLLOUT event can
> + * be generated later.
> + * When TCP receives ACK packets that make room, tcp_check_space()
> + * only calls tcp_new_space() if SOCK_NOSPACE is set.
> + */
> + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
> err = -EAGAIN;
> goto out;
> do_interrupted:
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox