* [PATCH net 11/18] net/ena: fix potential access to freed memory during device reset
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
If the ena driver detects that the device is not behave as expected,
it tries to reset the device.
The reset flow calls ena_down, which will frees all the resources
the driver allocates and then it will reset the device.
This flow can cause memory corruption if the device is still writes
to the driver's memory space.
To overcome this potential race, move the reset before the device
resources are freed.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_netdev.c | 56 +++++++++++++++++++++-------
1 file changed, 43 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index dd7c74b..3bc8f43 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -80,14 +80,18 @@ static void ena_tx_timeout(struct net_device *dev)
{
struct ena_adapter *adapter = netdev_priv(dev);
+ /* Change the state of the device to trigger reset
+ * Check that we are not in the middle or a trigger already
+ */
+
+ if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+ return;
+
u64_stats_update_begin(&adapter->syncp);
adapter->dev_stats.tx_timeout++;
u64_stats_update_end(&adapter->syncp);
netif_err(adapter, tx_err, dev, "Transmit time out\n");
-
- /* Change the state of the device to trigger reset */
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
}
static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
@@ -1124,7 +1128,8 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
- if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
+ if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
+ test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
napi_complete_done(napi, 0);
return 0;
}
@@ -1713,12 +1718,22 @@ static void ena_down(struct ena_adapter *adapter)
adapter->dev_stats.interface_down++;
u64_stats_update_end(&adapter->syncp);
- /* After this point the napi handler won't enable the tx queue */
- ena_napi_disable_all(adapter);
netif_carrier_off(adapter->netdev);
netif_tx_disable(adapter->netdev);
+ /* After this point the napi handler won't enable the tx queue */
+ ena_napi_disable_all(adapter);
+
/* After destroy the queue there won't be any new interrupts */
+
+ if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
+ int rc;
+
+ rc = ena_com_dev_reset(adapter->ena_dev);
+ if (rc)
+ dev_err(&adapter->pdev->dev, "Device reset failed\n");
+ }
+
ena_destroy_all_io_queues(adapter);
ena_disable_io_intr_sync(adapter);
@@ -2081,6 +2096,14 @@ static void ena_netpoll(struct net_device *netdev)
struct ena_adapter *adapter = netdev_priv(netdev);
int i;
+ /* Dont schedule NAPI if the driver is in the middle of reset
+ * or netdev is down.
+ */
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) ||
+ test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+ return;
+
for (i = 0; i < adapter->num_queues; i++)
napi_schedule(&adapter->ena_napi[i].napi);
}
@@ -2468,6 +2491,14 @@ static void ena_fw_reset_device(struct work_struct *work)
bool dev_up, wd_state;
int rc;
+ if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ dev_err(&pdev->dev,
+ "device reset schedule while reset bit is off\n");
+ return;
+ }
+
+ netif_carrier_off(netdev);
+
del_timer_sync(&adapter->timer_service);
rtnl_lock();
@@ -2481,12 +2512,6 @@ static void ena_fw_reset_device(struct work_struct *work)
*/
ena_close(netdev);
- rc = ena_com_dev_reset(ena_dev);
- if (rc) {
- dev_err(&pdev->dev, "Device reset failed\n");
- goto err;
- }
-
ena_free_mgmnt_irq(adapter);
ena_disable_msix(adapter);
@@ -2499,6 +2524,8 @@ static void ena_fw_reset_device(struct work_struct *work)
ena_com_mmio_reg_read_request_destroy(ena_dev);
+ clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+
/* Finish with the destroy part. Start the init part */
rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
@@ -2562,6 +2589,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
return;
+ if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+ return;
+
if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
return;
@@ -2703,7 +2733,7 @@ static void ena_timer_service(unsigned long data)
if (host_info)
ena_update_host_info(host_info, adapter->netdev);
- if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
netif_err(adapter, drv, adapter->netdev,
"Trigger reset is on\n");
ena_dump_stats_to_dmesg(adapter);
--
1.9.1
^ permalink raw reply related
* [PATCH net 01/18] net/ena: remove RFS support from device feature list
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
Remove NETIF_F_NTUPLE from netdev->features.
The ENA device driver does not support RFS acceleration.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index bfeaec5..33a760e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2729,7 +2729,6 @@ static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
netdev->features =
dev_features |
NETIF_F_SG |
- NETIF_F_NTUPLE |
NETIF_F_RXHASH |
NETIF_F_HIGHDMA;
--
1.9.1
^ permalink raw reply related
* [PATCH net 16/18] net/ena: fix error handling when probe fails
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
When driver fails in probe, it will release all resources, including
adapter.
In case of probe failure, ena_remove should not try to free the adapter
resources.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index bff082a..ba395aa 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3179,6 +3179,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err_free_region:
ena_release_bars(ena_dev, pdev);
err_free_ena_dev:
+ pci_set_drvdata(pdev, NULL);
vfree(ena_dev);
err_disable_device:
pci_disable_device(pdev);
--
1.9.1
^ permalink raw reply related
* [PATCH net 10/18] net/ena: use READ_ONCE to access completion descriptors
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
Completion descriptors are accessed from the driver and from the device.
To avoid reading the old value, use READ_ONCE macro.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_com.h | 1 +
drivers/net/ethernet/amazon/ena/ena_eth_com.c | 8 ++++----
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 6883ee5..f8cdce0 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -33,6 +33,7 @@
#ifndef ENA_COM
#define ENA_COM
+#include <linux/compiler.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/gfp.h>
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index 539c536..f999305 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -45,7 +45,7 @@ static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr
+ (head_masked * io_cq->cdesc_entry_size_in_bytes));
- desc_phase = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
+ desc_phase = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
if (desc_phase != expected_phase)
@@ -141,7 +141,7 @@ static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
ena_com_cq_inc_head(io_cq);
count++;
- last = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
+ last = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
} while (!last);
@@ -489,13 +489,13 @@ int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)
* expected, it mean that the device still didn't update
* this completion.
*/
- cdesc_phase = cdesc->flags & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
+ cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
if (cdesc_phase != expected_phase)
return -EAGAIN;
ena_com_cq_inc_head(io_cq);
- *req_id = cdesc->req_id;
+ *req_id = READ_ONCE(cdesc->req_id);
return 0;
}
--
1.9.1
^ permalink raw reply related
* [PATCH net 08/18] net/ena: change sizeof() argument to be the type pointer
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
Instead of using:
memset(ptr, 0x0, sizeof(struct ...))
use:
memset(ptr, 0x0, sizeor(*ptr))
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_com.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 366c2c5..edb2e81 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -329,7 +329,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
size_t size;
int dev_node = 0;
- memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+ memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
io_sq->desc_entry_size =
(io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
@@ -383,7 +383,7 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
size_t size;
int prev_node = 0;
- memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+ memset(&io_cq->cdesc_addr, 0x0, sizeof(io_cq->cdesc_addr));
/* Use the basic completion descriptor for Rx */
io_cq->cdesc_entry_size_in_bytes =
@@ -681,7 +681,7 @@ static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev,
u8 direction;
int ret;
- memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+ memset(&destroy_cmd, 0x0, sizeof(destroy_cmd));
if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
direction = ENA_ADMIN_SQ_DIRECTION_TX;
@@ -963,7 +963,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
u8 direction;
int ret;
- memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd));
+ memset(&create_cmd, 0x0, sizeof(create_cmd));
create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ;
@@ -1155,7 +1155,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
struct ena_admin_acq_create_cq_resp_desc cmd_completion;
int ret;
- memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd));
+ memset(&create_cmd, 0x0, sizeof(create_cmd));
create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ;
@@ -1263,7 +1263,7 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
struct ena_admin_acq_destroy_cq_resp_desc destroy_resp;
int ret;
- memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+ memset(&destroy_cmd, 0x0, sizeof(destroy_cmd));
destroy_cmd.cq_idx = io_cq->idx;
destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ;
@@ -1613,8 +1613,8 @@ int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
io_sq = &ena_dev->io_sq_queues[ctx->qid];
io_cq = &ena_dev->io_cq_queues[ctx->qid];
- memset(io_sq, 0x0, sizeof(struct ena_com_io_sq));
- memset(io_cq, 0x0, sizeof(struct ena_com_io_cq));
+ memset(io_sq, 0x0, sizeof(*io_sq));
+ memset(io_cq, 0x0, sizeof(*io_cq));
/* Init CQ */
io_cq->q_depth = ctx->queue_size;
--
1.9.1
^ permalink raw reply related
* [PATCH net 06/18] net/ena: fix ethtool RSS flow configuration
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
ena_flow_data_to_flow_hash and ena_flow_hash_to_flow_type
treat the ena_flow_hash_to_flow_type enum as power of two values.
Change the values of ena_admin_flow_hash_fields to be power of two values.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 51b2a92..c78f0b2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -633,22 +633,22 @@ enum ena_admin_flow_hash_proto {
/* RSS flow hash fields */
enum ena_admin_flow_hash_fields {
/* Ethernet Dest Addr */
- ENA_ADMIN_RSS_L2_DA = 0,
+ ENA_ADMIN_RSS_L2_DA = 0x1,
/* Ethernet Src Addr */
- ENA_ADMIN_RSS_L2_SA = 1,
+ ENA_ADMIN_RSS_L2_SA = 0x2,
/* ipv4/6 Dest Addr */
- ENA_ADMIN_RSS_L3_DA = 2,
+ ENA_ADMIN_RSS_L3_DA = 0x4,
/* ipv4/6 Src Addr */
- ENA_ADMIN_RSS_L3_SA = 5,
+ ENA_ADMIN_RSS_L3_SA = 0x8,
/* tcp/udp Dest Port */
- ENA_ADMIN_RSS_L4_DP = 6,
+ ENA_ADMIN_RSS_L4_DP = 0x10,
/* tcp/udp Src Port */
- ENA_ADMIN_RSS_L4_SP = 7,
+ ENA_ADMIN_RSS_L4_SP = 0x20,
};
struct ena_admin_proto_input {
--
1.9.1
^ permalink raw reply related
* [PATCH net 18/18] net/ena: change driver's default timeouts and increase driver version
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
Driver version was increased to 1.1.2
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_com.c | 4 ++--
drivers/net/ethernet/amazon/ena/ena_netdev.h | 9 +++++----
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index b2891f9..9b158f0 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -36,9 +36,9 @@
/*****************************************************************************/
/* Timeout in micro-sec */
-#define ADMIN_CMD_TIMEOUT_US (1000000)
+#define ADMIN_CMD_TIMEOUT_US (3000000)
-#define ENA_ASYNC_QUEUE_DEPTH 4
+#define ENA_ASYNC_QUEUE_DEPTH 16
#define ENA_ADMIN_QUEUE_DEPTH 32
#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index c081fd3..de1e5ac 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -39,12 +39,13 @@
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
#include "ena_com.h"
#include "ena_eth_com.h"
#define DRV_MODULE_VER_MAJOR 1
-#define DRV_MODULE_VER_MINOR 0
+#define DRV_MODULE_VER_MINOR 1
#define DRV_MODULE_VER_SUBMINOR 2
#define DRV_MODULE_NAME "ena"
@@ -100,7 +101,7 @@
/* Number of queues to check for missing queues per timer service */
#define ENA_MONITORED_TX_QUEUES 4
/* Max timeout packets before device reset */
-#define MAX_NUM_OF_TIMEOUTED_PACKETS 32
+#define MAX_NUM_OF_TIMEOUTED_PACKETS 128
#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
@@ -116,9 +117,9 @@
#define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q))
/* ENA device should send keep alive msg every 1 sec.
- * We wait for 3 sec just to be on the safe side.
+ * We wait for 6 sec just to be on the safe side.
*/
-#define ENA_DEVICE_KALIVE_TIMEOUT (3 * HZ)
+#define ENA_DEVICE_KALIVE_TIMEOUT (6 * HZ)
#define ENA_MMIO_DISABLE_REG_READ BIT(0)
--
1.9.1
^ permalink raw reply related
* [PATCH net 17/18] net/ena: fix NULL dereference when removing the driver after device reset faild
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
If for some reason the device stop responding and the device reset failed
to recover the device, the mmio register read datastructure will not be
reinitialized.
On driver removal, the driver will also tries to reset the device
but this time the mmio data structure will be NULL.
To solve this issue perform the device reset in the remove function only if
the device is runnig.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_netdev.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index ba395aa..12d1dca 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2591,6 +2591,8 @@ static void ena_fw_reset_device(struct work_struct *work)
err:
rtnl_unlock();
+ clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
dev_err(&pdev->dev,
"Reset attempt failed. Can not reset the device\n");
}
@@ -3251,7 +3253,9 @@ static void ena_remove(struct pci_dev *pdev)
cancel_work_sync(&adapter->resume_io_task);
- ena_com_dev_reset(ena_dev);
+ /* Reset the device only if the device is running. */
+ if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
+ ena_com_dev_reset(ena_dev);
ena_free_mgmnt_irq(adapter);
--
1.9.1
^ permalink raw reply related
* [PATCH net 12/18] net/ena: refactor skb allocation
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
To increase readability, refactor skb allocation to dedicated function
This change does not impact the performance since the compiler optimize
the code and elimitate the if condition.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_netdev.c | 46 ++++++++++++++++------------
1 file changed, 27 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 3bc8f43..b478c61 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -787,6 +787,28 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
return tx_pkts;
}
+static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
+{
+ struct sk_buff *skb;
+
+ if (frags)
+ skb = napi_get_frags(rx_ring->napi);
+ else
+ skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+ rx_ring->rx_copybreak);
+
+ if (unlikely(!skb)) {
+ u64_stats_update_begin(&rx_ring->syncp);
+ rx_ring->rx_stats.skb_alloc_fail++;
+ u64_stats_update_end(&rx_ring->syncp);
+ netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+ "Failed to allocate skb. frags: %d\n", frags);
+ return NULL;
+ }
+
+ return skb;
+}
+
static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
struct ena_com_rx_buf_info *ena_bufs,
u32 descs,
@@ -795,8 +817,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
struct sk_buff *skb;
struct ena_rx_buffer *rx_info =
&rx_ring->rx_buffer_info[*next_to_clean];
- u32 len;
- u32 buf = 0;
+ u32 len, buf = 0;
void *va;
len = ena_bufs[0].len;
@@ -815,16 +836,9 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
prefetch(va + NET_IP_ALIGN);
if (len <= rx_ring->rx_copybreak) {
- skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_copybreak);
- if (unlikely(!skb)) {
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->rx_stats.skb_alloc_fail++;
- u64_stats_update_end(&rx_ring->syncp);
- netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
- "Failed to allocate skb\n");
+ skb = ena_alloc_skb(rx_ring, false);
+ if (unlikely(!skb))
return NULL;
- }
netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
"rx allocated small packet. len %d. data_len %d\n",
@@ -848,15 +862,9 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
return skb;
}
- skb = napi_get_frags(rx_ring->napi);
- if (unlikely(!skb)) {
- netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
- "Failed allocating skb\n");
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->rx_stats.skb_alloc_fail++;
- u64_stats_update_end(&rx_ring->syncp);
+ skb = ena_alloc_skb(rx_ring, true);
+ if (unlikely(!skb))
return NULL;
- }
do {
dma_unmap_page(rx_ring->dev,
--
1.9.1
^ permalink raw reply related
* [PATCH net 09/18] net/ena: change condition for host attribute configuration
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
Move the host info config to be the first admin command that is executed.
This change require the driver to remove the 'feature check'
from host info configuration flow.
The check is removed since the supported features bitmask field
is retrieved only after calling ENA_ADMIN_DEVICE_ATTRIBUTES admin command.
If set host info is not supported an error will be returned by the device.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_com.c | 8 +++-----
drivers/net/ethernet/amazon/ena/ena_netdev.c | 5 +++--
2 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index edb2e81..b2891f9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -2474,11 +2474,9 @@ int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
int ret;
- if (!ena_com_check_supported_feature_id(ena_dev,
- ENA_ADMIN_HOST_ATTR_CONFIG)) {
- pr_warn("Set host attribute isn't supported\n");
- return -EPERM;
- }
+ /* Host attribute config is called before ena_com_get_dev_attr_feat
+ * so ena_com can't check if the feature is supported.
+ */
memset(&cmd, 0x0, sizeof(cmd));
admin_queue = &ena_dev->admin_queue;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 44dc298..dd7c74b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2387,6 +2387,8 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
*/
ena_com_set_admin_polling_mode(ena_dev, true);
+ ena_config_host_info(ena_dev);
+
/* Get Device Attributes*/
rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
if (rc) {
@@ -2411,11 +2413,10 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
- ena_config_host_info(ena_dev);
-
return 0;
err_admin_init:
+ ena_com_delete_host_info(ena_dev);
ena_com_admin_destroy(ena_dev);
err_mmio_read_less:
ena_com_mmio_reg_read_request_destroy(ena_dev);
--
1.9.1
^ permalink raw reply related
* [PATCH net 07/18] net/ena: refactor ena_get_stats64 to be atomic context safe
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
ndo_get_stat64 can be called from atomic context.
However the current implementation sends an admin command to retrieve
the statistics from the device.
This admin commands uses sleep.
Refactor the implementation of ena_get_stats64 to take the
{rx,tx}bytes/cnt from the driver's inner counters
and to take the rx drops counter
from the asynchronous keep alive (heart bit) event.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 8 ++++
drivers/net/ethernet/amazon/ena/ena_netdev.c | 57 +++++++++++++++++-------
drivers/net/ethernet/amazon/ena/ena_netdev.h | 1 +
3 files changed, 51 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index c78f0b2..35ae511 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -904,6 +904,14 @@ struct ena_admin_aenq_link_change_desc {
u32 flags;
};
+struct ena_admin_aenq_keep_alive_desc {
+ struct ena_admin_aenq_common_desc aenq_common_desc;
+
+ u32 rx_drops_low;
+
+ u32 rx_drops_high;
+};
+
struct ena_admin_ena_mmio_req_read_less_resp {
u16 req_id;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index e7dda8b..44dc298 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2185,28 +2185,46 @@ static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
struct ena_adapter *adapter = netdev_priv(netdev);
- struct ena_admin_basic_stats ena_stats;
- int rc;
+ struct ena_ring *rx_ring, *tx_ring;
+ unsigned int start;
+ u64 rx_drops;
+ int i;
if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
return NULL;
- rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats);
- if (rc)
- return NULL;
+ for (i = 0; i < adapter->num_queues; i++) {
+ u64 bytes, packets;
+
+ tx_ring = &adapter->tx_ring[i];
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
+ packets = tx_ring->tx_stats.cnt;
+ bytes = tx_ring->tx_stats.bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
+
+ stats->tx_packets += packets;
+ stats->tx_bytes += bytes;
- stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) |
- ena_stats.tx_bytes_low;
- stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) |
- ena_stats.rx_bytes_low;
+ rx_ring = &adapter->rx_ring[i];
+
+ do {
+ start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
+ packets = rx_ring->rx_stats.cnt;
+ bytes = rx_ring->rx_stats.bytes;
+ } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
- stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) |
- ena_stats.rx_pkts_low;
- stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) |
- ena_stats.tx_pkts_low;
+ stats->rx_packets += packets;
+ stats->rx_bytes += bytes;
+ }
+
+ do {
+ start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
+ rx_drops = adapter->dev_stats.rx_drops;
+ } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
- stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) |
- ena_stats.rx_drops_low;
+ stats->rx_dropped = rx_drops;
stats->multicast = 0;
stats->collisions = 0;
@@ -3272,8 +3290,17 @@ static void ena_keep_alive_wd(void *adapter_data,
struct ena_admin_aenq_entry *aenq_e)
{
struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+ struct ena_admin_aenq_keep_alive_desc *desc;
+ u64 rx_drops;
+ desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
adapter->last_keep_alive_jiffies = jiffies;
+
+ rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
+
+ u64_stats_update_begin(&adapter->syncp);
+ adapter->dev_stats.rx_drops = rx_drops;
+ u64_stats_update_end(&adapter->syncp);
}
static void ena_notification(void *adapter_data,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index f8ef1f0..2897fab 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -251,6 +251,7 @@ struct ena_stats_dev {
u64 interface_up;
u64 interface_down;
u64 admin_q_pause;
+ u64 rx_drops;
};
enum ena_flags_t {
--
1.9.1
^ permalink raw reply related
* [PATCH net 05/18] net/ena: add hardware hints capability to the driver
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
The ENA device can update the ena driver about the desire timeouts.
The hardware hints are transmitted as Asynchronous event to the driver.
In case the device does not support this capability, the driver
will use its own defines.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 31 +++++++++
drivers/net/ethernet/amazon/ena/ena_com.c | 41 ++++++++---
drivers/net/ethernet/amazon/ena/ena_com.h | 5 ++
drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 -
drivers/net/ethernet/amazon/ena/ena_netdev.c | 86 +++++++++++++++++++-----
drivers/net/ethernet/amazon/ena/ena_netdev.h | 19 +++++-
drivers/net/ethernet/amazon/ena/ena_regs_defs.h | 2 +
7 files changed, 157 insertions(+), 28 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index a46e749..51b2a92 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -70,6 +70,8 @@ enum ena_admin_aq_feature_id {
ENA_ADMIN_MAX_QUEUES_NUM = 2,
+ ENA_ADMIN_HW_HINTS = 3,
+
ENA_ADMIN_RSS_HASH_FUNCTION = 10,
ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11,
@@ -749,6 +751,31 @@ struct ena_admin_feature_rss_ind_table {
struct ena_admin_rss_ind_table_entry inline_entry;
};
+/* When hint value is 0, driver should use it's own predefined value */
+struct ena_admin_ena_hw_hints {
+ /* value in ms */
+ u16 mmio_read_timeout;
+
+ /* value in ms */
+ u16 driver_watchdog_timeout;
+
+ /* Per packet tx completion timeout. value in ms */
+ u16 missing_tx_completion_timeout;
+
+ u16 missed_tx_completion_count_threshold_to_reset;
+
+ /* value in ms */
+ u16 admin_completion_tx_timeout;
+
+ u16 netdev_wd_timeout;
+
+ u16 max_tx_sgl_size;
+
+ u16 max_rx_sgl_size;
+
+ u16 reserved[8];
+};
+
struct ena_admin_get_feat_cmd {
struct ena_admin_aq_common_desc aq_common_descriptor;
@@ -782,6 +809,8 @@ struct ena_admin_get_feat_resp {
struct ena_admin_feature_rss_ind_table ind_table;
struct ena_admin_feature_intr_moder_desc intr_moderation;
+
+ struct ena_admin_ena_hw_hints hw_hints;
} u;
};
@@ -857,6 +886,8 @@ enum ena_admin_aenq_notification_syndrom {
ENA_ADMIN_SUSPEND = 0,
ENA_ADMIN_RESUME = 1,
+
+ ENA_ADMIN_UPDATE_HINTS = 2,
};
struct ena_admin_aenq_entry {
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 6bd2b9b..366c2c5 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -508,15 +508,13 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
struct ena_com_admin_queue *admin_queue)
{
- unsigned long flags;
- u32 start_time;
+ unsigned long flags, timeout;
int ret;
- start_time = ((u32)jiffies_to_usecs(jiffies));
+ timeout = jiffies + usecs_to_jiffies(admin_queue->completion_timeout);
while (comp_ctx->status == ENA_CMD_SUBMITTED) {
- if ((((u32)jiffies_to_usecs(jiffies)) - start_time) >
- ADMIN_CMD_TIMEOUT_US) {
+ if (time_is_before_jiffies(timeout)) {
pr_err("Wait for completion (polling) timeout\n");
/* ENA didn't have any completion */
spin_lock_irqsave(&admin_queue->q_lock, flags);
@@ -560,7 +558,8 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com
int ret;
wait_for_completion_timeout(&comp_ctx->wait_event,
- usecs_to_jiffies(ADMIN_CMD_TIMEOUT_US));
+ usecs_to_jiffies(
+ admin_queue->completion_timeout));
/* In case the command wasn't completed find out the root cause.
* There might be 2 kinds of errors
@@ -600,12 +599,14 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp =
mmio_read->read_resp;
- u32 mmio_read_reg, ret;
+ u32 mmio_read_reg, timeout, ret;
unsigned long flags;
int i;
might_sleep();
+ timeout = mmio_read->reg_read_to ? : ENA_REG_READ_TIMEOUT;
+
/* If readless is disabled, perform regular read */
if (!mmio_read->readless_supported)
return readl(ena_dev->reg_bar + offset);
@@ -626,14 +627,14 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
- for (i = 0; i < ENA_REG_READ_TIMEOUT; i++) {
+ for (i = 0; i < timeout; i++) {
if (read_resp->req_id == mmio_read->seq_num)
break;
udelay(1);
}
- if (unlikely(i == ENA_REG_READ_TIMEOUT)) {
+ if (unlikely(i == timeout)) {
pr_err("reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n",
mmio_read->seq_num, offset, read_resp->req_id,
read_resp->reg_off);
@@ -1723,6 +1724,20 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
memcpy(&get_feat_ctx->offload, &get_resp.u.offload,
sizeof(get_resp.u.offload));
+ /* Driver hints isn't mandatory admin command. So in case the
+ * command isn't supported set driver hints to 0
+ */
+ rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS);
+
+ if (!rc)
+ memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints,
+ sizeof(get_resp.u.hw_hints));
+ else if (rc == -EPERM)
+ memset(&get_feat_ctx->hw_hints, 0x0,
+ sizeof(get_feat_ctx->hw_hints));
+ else
+ return rc;
+
return 0;
}
@@ -1848,6 +1863,14 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev)
return rc;
}
+ timeout = (cap & ENA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
+ ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+ if (timeout)
+ /* the resolution of timeout reg is 100ms */
+ ena_dev->admin_queue.completion_timeout = timeout * 100000;
+ else
+ ena_dev->admin_queue.completion_timeout = ADMIN_CMD_TIMEOUT_US;
+
return 0;
}
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 509d7b8..6883ee5 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -96,6 +96,8 @@
#define ENA_INTR_MODER_LEVEL_STRIDE 2
#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED 0xFFFFFF
+#define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF
+
enum ena_intr_moder_level {
ENA_INTR_MODER_LOWEST = 0,
ENA_INTR_MODER_LOW,
@@ -232,6 +234,7 @@ struct ena_com_admin_queue {
void *q_dmadev;
spinlock_t q_lock; /* spinlock for the admin queue */
struct ena_comp_ctx *comp_ctx;
+ u32 completion_timeout;
u16 q_depth;
struct ena_com_admin_cq cq;
struct ena_com_admin_sq sq;
@@ -266,6 +269,7 @@ struct ena_com_aenq {
struct ena_com_mmio_read {
struct ena_admin_ena_mmio_req_read_less_resp *read_resp;
dma_addr_t read_resp_dma_addr;
+ u32 reg_read_to; /* in us */
u16 seq_num;
bool readless_supported;
/* spin lock to ensure a single outstanding read */
@@ -335,6 +339,7 @@ struct ena_com_dev_get_features_ctx {
struct ena_admin_device_attr_feature_desc dev_attr;
struct ena_admin_feature_aenq_desc aenq;
struct ena_admin_feature_offload_desc offload;
+ struct ena_admin_ena_hw_hints hw_hints;
};
struct ena_com_create_io_ctx {
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 67b2338f..a1fbfc2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -80,7 +80,6 @@ struct ena_stats {
ENA_STAT_TX_ENTRY(tx_poll),
ENA_STAT_TX_ENTRY(doorbells),
ENA_STAT_TX_ENTRY(prepare_ctx_err),
- ENA_STAT_TX_ENTRY(missing_tx_comp),
ENA_STAT_TX_ENTRY(bad_req_id),
};
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 7247abb..e7dda8b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1989,6 +1989,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
tx_info->tx_descs = nb_hw_desc;
tx_info->last_jiffies = jiffies;
+ tx_info->print_once = 0;
tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
tx_ring->ring_size);
@@ -2542,33 +2543,34 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
return;
+ if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
+ return;
+
budget = ENA_MONITORED_TX_QUEUES;
for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
tx_ring = &adapter->tx_ring[i];
+ missed_tx = 0;
+
for (j = 0; j < tx_ring->ring_size; j++) {
tx_buf = &tx_ring->tx_buffer_info[j];
last_jiffies = tx_buf->last_jiffies;
- if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
- netif_notice(adapter, tx_err, adapter->netdev,
- "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
- tx_ring->qid, j);
+ if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + adapter->missing_tx_completion_to))) {
+ if (!tx_buf->print_once)
+ netif_notice(adapter, tx_err, adapter->netdev,
+ "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
+ tx_ring->qid, j);
- u64_stats_update_begin(&tx_ring->syncp);
- missed_tx = tx_ring->tx_stats.missing_tx_comp++;
- u64_stats_update_end(&tx_ring->syncp);
+ tx_buf->print_once = 1;
+ missed_tx++;
- /* Clear last jiffies so the lost buffer won't
- * be counted twice.
- */
- tx_buf->last_jiffies = 0;
-
- if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
+ if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
netif_err(adapter, tx_err, adapter->netdev,
"The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
- missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
+ missed_tx, adapter->missing_tx_completion_threshold);
set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ return;
}
}
}
@@ -2589,8 +2591,11 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter)
if (!adapter->wd_state)
return;
- keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies
- + ENA_DEVICE_KALIVE_TIMEOUT);
+ if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+ return;
+
+ keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies +
+ adapter->keep_alive_timeout);
if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
netif_err(adapter, drv, adapter->netdev,
"Keep alive watchdog timeout.\n");
@@ -2613,6 +2618,44 @@ static void check_for_admin_com_state(struct ena_adapter *adapter)
}
}
+static void ena_update_hints(struct ena_adapter *adapter,
+ struct ena_admin_ena_hw_hints *hints)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ if (hints->admin_completion_tx_timeout)
+ adapter->ena_dev->admin_queue.completion_timeout =
+ hints->admin_completion_tx_timeout * 1000;
+
+ if (hints->mmio_read_timeout)
+ /* convert to usec */
+ adapter->ena_dev->mmio_read.reg_read_to =
+ hints->mmio_read_timeout * 1000;
+
+ if (hints->missed_tx_completion_count_threshold_to_reset)
+ adapter->missing_tx_completion_threshold =
+ hints->missed_tx_completion_count_threshold_to_reset;
+
+ if (hints->missing_tx_completion_timeout) {
+ if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+ adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
+ else
+ adapter->missing_tx_completion_to =
+ msecs_to_jiffies(hints->missing_tx_completion_timeout);
+ }
+
+ if (hints->netdev_wd_timeout)
+ netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
+
+ if (hints->driver_watchdog_timeout) {
+ if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+ adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
+ else
+ adapter->keep_alive_timeout =
+ msecs_to_jiffies(hints->driver_watchdog_timeout);
+ }
+}
+
static void ena_update_host_info(struct ena_admin_host_info *host_info,
struct net_device *netdev)
{
@@ -3024,6 +3067,11 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
adapter->last_keep_alive_jiffies = jiffies;
+ adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
+ adapter->missing_tx_completion_to = TX_TIMEOUT;
+ adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
+
+ ena_update_hints(adapter, &get_feat_ctx.hw_hints);
init_timer(&adapter->timer_service);
adapter->timer_service.expires = round_jiffies(jiffies + HZ);
@@ -3232,6 +3280,7 @@ static void ena_notification(void *adapter_data,
struct ena_admin_aenq_entry *aenq_e)
{
struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+ struct ena_admin_ena_hw_hints *hints;
WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
"Invalid group(%x) expected %x\n",
@@ -3249,6 +3298,11 @@ static void ena_notification(void *adapter_data,
case ENA_ADMIN_RESUME:
queue_work(ena_wq, &adapter->resume_io_task);
break;
+ case ENA_ADMIN_UPDATE_HINTS:
+ hints = (struct ena_admin_ena_hw_hints *)
+ (&aenq_e->inline_data_w4);
+ ena_update_hints(adapter, hints);
+ break;
default:
netif_err(adapter, drv, adapter->netdev,
"Invalid aenq notification link state %d\n",
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 69d7e9e..f8ef1f0 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -146,7 +146,18 @@ struct ena_tx_buffer {
u32 tx_descs;
/* num of buffers used by this skb */
u32 num_of_bufs;
- /* Save the last jiffies to detect missing tx packets */
+
+ /* Used for detect missing tx packets to limit the number of prints */
+ u32 print_once;
+ /* Save the last jiffies to detect missing tx packets
+ *
+ * sets to non zero value on ena_start_xmit and set to zero on
+ * napi and timer_Service_routine.
+ *
+ * while this value is not protected by lock,
+ * a given packet is not expected to be handled by ena_start_xmit
+ * and by napi/timer_service at the same time.
+ */
unsigned long last_jiffies;
struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
} ____cacheline_aligned;
@@ -170,7 +181,6 @@ struct ena_stats_tx {
u64 napi_comp;
u64 tx_poll;
u64 doorbells;
- u64 missing_tx_comp;
u64 bad_req_id;
};
@@ -269,6 +279,8 @@ struct ena_adapter {
struct msix_entry *msix_entries;
int msix_vecs;
+ u32 missing_tx_completion_threshold;
+
u32 tx_usecs, rx_usecs; /* interrupt moderation */
u32 tx_frames, rx_frames; /* interrupt moderation */
@@ -282,6 +294,9 @@ struct ena_adapter {
u8 mac_addr[ETH_ALEN];
+ unsigned long keep_alive_timeout;
+ unsigned long missing_tx_completion_to;
+
char name[ENA_NAME_MAX_LEN];
unsigned long flags;
diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
index 26097a2..c3891c5 100644
--- a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
@@ -78,6 +78,8 @@
#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e
#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8
#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00
+#define ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16
+#define ENA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000
/* aq_caps register */
#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff
--
1.9.1
^ permalink raw reply related
* [PATCH net 04/18] net/ena: reduce the severity of ena printouts
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_com.c | 27 +++++++++++++++++----------
drivers/net/ethernet/amazon/ena/ena_netdev.c | 14 +++++++++++---
2 files changed, 28 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 3066d9c..6bd2b9b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -784,7 +784,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
int ret;
if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) {
- pr_info("Feature %d isn't supported\n", feature_id);
+ pr_debug("Feature %d isn't supported\n", feature_id);
return -EPERM;
}
@@ -1126,7 +1126,13 @@ int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size,
comp, comp_size);
if (unlikely(IS_ERR(comp_ctx))) {
- pr_err("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx));
+ if (comp_ctx == ERR_PTR(-ENODEV))
+ pr_debug("Failed to submit command [%ld]\n",
+ PTR_ERR(comp_ctx));
+ else
+ pr_err("Failed to submit command [%ld]\n",
+ PTR_ERR(comp_ctx));
+
return PTR_ERR(comp_ctx);
}
@@ -1895,7 +1901,7 @@ int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu)
int ret;
if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) {
- pr_info("Feature %d isn't supported\n", ENA_ADMIN_MTU);
+ pr_debug("Feature %d isn't supported\n", ENA_ADMIN_MTU);
return -EPERM;
}
@@ -1948,8 +1954,8 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
if (!ena_com_check_supported_feature_id(ena_dev,
ENA_ADMIN_RSS_HASH_FUNCTION)) {
- pr_info("Feature %d isn't supported\n",
- ENA_ADMIN_RSS_HASH_FUNCTION);
+ pr_debug("Feature %d isn't supported\n",
+ ENA_ADMIN_RSS_HASH_FUNCTION);
return -EPERM;
}
@@ -2112,7 +2118,8 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev)
if (!ena_com_check_supported_feature_id(ena_dev,
ENA_ADMIN_RSS_HASH_INPUT)) {
- pr_info("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT);
+ pr_debug("Feature %d isn't supported\n",
+ ENA_ADMIN_RSS_HASH_INPUT);
return -EPERM;
}
@@ -2270,8 +2277,8 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
if (!ena_com_check_supported_feature_id(
ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) {
- pr_info("Feature %d isn't supported\n",
- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+ pr_debug("Feature %d isn't supported\n",
+ ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
return -EPERM;
}
@@ -2542,8 +2549,8 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
if (rc) {
if (rc == -EPERM) {
- pr_info("Feature %d isn't supported\n",
- ENA_ADMIN_INTERRUPT_MODERATION);
+ pr_debug("Feature %d isn't supported\n",
+ ENA_ADMIN_INTERRUPT_MODERATION);
rc = 0;
} else {
pr_err("Failed to get interrupt moderation admin cmd. rc: %d\n",
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index e3acf76..7247abb 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -566,6 +566,7 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
*/
static void ena_free_tx_bufs(struct ena_ring *tx_ring)
{
+ bool print_once = true;
u32 i;
for (i = 0; i < tx_ring->ring_size; i++) {
@@ -577,9 +578,16 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
if (!tx_info->skb)
continue;
- netdev_notice(tx_ring->netdev,
- "free uncompleted tx skb qid %d idx 0x%x\n",
- tx_ring->qid, i);
+ if (print_once) {
+ netdev_notice(tx_ring->netdev,
+ "free uncompleted tx skb qid %d idx 0x%x\n",
+ tx_ring->qid, i);
+ print_once = false;
+ } else {
+ netdev_dbg(tx_ring->netdev,
+ "free uncompleted tx skb qid %d idx 0x%x\n",
+ tx_ring->qid, i);
+ }
ena_buf = tx_info->bufs;
dma_unmap_single(tx_ring->dev,
--
1.9.1
^ permalink raw reply related
* [PATCH net 03/18] net/ena: use napi_schedule_irqoff when possible
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 8815217..e3acf76 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1181,7 +1181,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
{
struct ena_napi *ena_napi = data;
- napi_schedule(&ena_napi->napi);
+ napi_schedule_irqoff(&ena_napi->napi);
return IRQ_HANDLED;
}
--
1.9.1
^ permalink raw reply related
* [PATCH net 02/18] net/ena: fix queues number calculation
From: Netanel Belgazal @ 2016-11-20 8:45 UTC (permalink / raw)
To: linux-kernel, davem, netdev
Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>
The ENA driver tries to open a queue per vCPU.
To determine how many vCPUs the instance have it uses num_possible_cpus
while it should have use num_online_cpus instead.
Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 33a760e..8815217 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2667,7 +2667,7 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev,
io_sq_num = get_feat_ctx->max_queues.max_sq_num;
}
- io_queue_num = min_t(int, num_possible_cpus(), ENA_MAX_NUM_IO_QUEUES);
+ io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
io_queue_num = min_t(int, io_queue_num, io_sq_num);
io_queue_num = min_t(int, io_queue_num,
get_feat_ctx->max_queues.max_cq_num);
--
1.9.1
^ permalink raw reply related
* RE: [PATCH v9 0/8] thunderbolt: Introducing Thunderbolt(TM) Networking
From: Levy, Amir (Jer) @ 2016-11-20 6:30 UTC (permalink / raw)
To: gregkh@linuxfoundation.org
Cc: Simon Guinot, andreas.noever@gmail.com, bhelgaas@google.com,
corbet@lwn.net, linux-kernel@vger.kernel.org,
linux-pci@vger.kernel.org, netdev@vger.kernel.org,
linux-doc@vger.kernel.org, mario_limonciello@dell.com,
thunderbolt-linux, Westerberg, Mika, Winkler, Tomas,
Zhang, Xiong Y, Jamet, Michael, remi.rerolle@seagate.com
In-Reply-To: <20161118100700.GA7126@kroah.com>
On Fri, Nov 18 2016, 12:07 PM, gregkh@linuxfoundation.org wrote:
> On Fri, Nov 18, 2016 at 08:48:36AM +0000, Levy, Amir (Jer) wrote:
> > > BTW, it is quite a shame that the Thunderbolt firmware version can't
> > > be read from Linux.
> > >
> >
> > This is WIP, once this patch will be upstream, we will be able to
> > focus more on aligning Linux with the Thunderbolt features that we have
> for windows.
>
> Why is this patch somehow holding that work back? You aren't just sitting
> around waiting for people to review this and not doing anything else, right?
> Is there some basic building block in these patches that your firmware
> download code is going to rely on?
>
> confused,
>
> greg k-h
All the Thunderbolt SW features (including networking and FW update) depend
on the communication with FW, which is patch 3/8 in the series.
The patch also sets up a generic netlink for user space communication.
So yes, the communication with Thunderbolt FW is a basic building block.
^ permalink raw reply
* RE: [PATCH] net: fec: Detect and recover receive queue hangs
From: Andy Duan @ 2016-11-20 6:18 UTC (permalink / raw)
To: Chris Lesiak
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
Jaccon Bastiaansen
In-Reply-To: <b420fcd7-4171-f5c7-9d0c-cd8809e31bbc@licor.com>
From: Chris Lesiak <chris.lesiak@licor.com> Sent: Friday, November 18, 2016 10:37 PM
>To: Andy Duan <fugang.duan@nxp.com>
>Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Jaccon
>Bastiaansen <jaccon.bastiaansen@gmail.com>
>Subject: Re: [PATCH] net: fec: Detect and recover receive queue hangs
>
>On 11/18/2016 12:44 AM, Andy Duan wrote:
>> From: Chris Lesiak <chris.lesiak@licor.com> Sent: Friday, November 18,
>> 2016 5:15 AM
>> >To: Andy Duan <fugang.duan@nxp.com>
>> >Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Jaccon
>> >Bastiaansen <jaccon.bastiaansen@gmail.com>; chris.lesiak@licor.com
>> >Subject: [PATCH] net: fec: Detect and recover receive queue hangs >
>> >This corrects a problem that appears to be similar to ERR006358. But
>> while
>> >ERR006358 is a race when the tx queue transitions from empty to not
>> empty, >this problem is a race when the rx queue transitions from full to
>not full.
>> >
>> >The symptom is a receive queue that is stuck. The ENET_RDAR
>> register will >read 0, indicating that there are no empty receive
>> descriptors in the receive >ring. Since no additional frames can be queued,
>no RXF interrupts occur.
>> >
>> >This problem can be triggered with a 1 Gb link and about 400 Mbps of
>traffic.
>
>I can cause the error by running the following on an imx6q: iperf -s -u And
>sending packets from the other end of a 1 Gbps link:
>iperf -c $IPADDR -u -b40000pps
>
>A few others have seen this problem.
>See: https://community.nxp.com/thread/322882
>
>> >
>> >This patch detects this condition, sets the work_rx bit, and
>> reschedules the >poll method.
>> >
>> >Signed-off-by: Chris Lesiak <chris.lesiak@licor.com>
>> >---
>> > drivers/net/ethernet/freescale/fec_main.c | 31
>> >+++++++++++++++++++++++++++++++ > 1 file changed, 31 insertions(+)
>> > Firstly, how to reproduce the issue, pls list the reproduce steps.
>> Thanks.
>> Secondly, pls check below comments.
>>
>> >diff --git a/drivers/net/ethernet/freescale/fec_main.c
>> >b/drivers/net/ethernet/freescale/fec_main.c
>> >index fea0f33..8a87037 100644
>> >--- a/drivers/net/ethernet/freescale/fec_main.c
>> >+++ b/drivers/net/ethernet/freescale/fec_main.c
>> >@@ -1588,6 +1588,34 @@ fec_enet_interrupt(int irq, void *dev_id)
>> > return ret;
>> > }
>> >
>> >+static inline bool
>> >+fec_enet_recover_rxq(struct fec_enet_private *fep, u16 queue_id) {
>> >+ int work_bit = (queue_id == 0) ? 2 : ((queue_id == 1) ? 0 : 1);
>> >+
>> >+ if (readl(fep->rx_queue[queue_id]->bd.reg_desc_active))
>> If rx ring is really empty in slight throughput cases, rdar is always cleared,
>then there always do napi reschedule.
>
>I think that you are concerned that if rdar is zero due to this hardware
>problem, but the rx ring is actually empty, then fec_enet_rx_queue will
>never do a write to rdar so that it can be non-zero. That will cause napi to
>always be resceduled.
>
>I suppose that might be the case with zero rx traffic, and I was concerned
>that it might be true even when there was rx traffic. I suspected that the
>hardware, seeing that rdar is zero, would never queue another packet, even
>if there were in fact empty descriptors. But it doesn't seem to be the case. It
>does reschedule multiple times, but eventually sees some packets in the rx
>ring and recovers.
>
>I admit that I do not completely understand how that can happen. I did
>confirm that fec_enet_active_rxring is not being called.
>
>Maybe someone with a deeper understanding of the fec than I can provide
>an explanation.
>
The patch needs to hold on for some time (days), I will reserve time to investigate the issue.
Thanks.
>>
>> >+ return false;
>> >+
>> >+ dev_notice_once(&fep->pdev->dev, "Recovered rx queue\n");
>> >+
>> >+ fep->work_rx |= 1 << work_bit;
>> >+
>> >+ return true;
>> >+}
>> >+
>> >+static inline bool fec_enet_recover_rxqs(struct fec_enet_private
>> *fep) >+{
>> >+ unsigned int q;
>> >+ bool ret = false;
>> >+
>> >+ for (q = 0; q < fep->num_rx_queues; q++) {
>> >+ if (fec_enet_recover_rxq(fep, q))
>> >+ ret = true;
>> >+ }
>> >+
>> >+ return ret;
>> >+}
>> >+
>> > static int fec_enet_rx_napi(struct napi_struct *napi, int budget) {
>> > struct net_device *ndev = napi->dev;
>> >@@ -1601,6 +1629,9 @@ static int fec_enet_rx_napi(struct napi_struct
>> *napi, >int budget)
>> > if (pkts < budget) {
>> > napi_complete(napi);
>> > writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
>> >+
>> >+ if (fec_enet_recover_rxqs(fep) && napi_reschedule(napi))
>> >+ writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);
>> > }
>> > return pkts;
>> > }
>> >--
>> >2.5.5
>>
>
>
>--
>Chris Lesiak
>Principal Design Engineer, Software
>LI-COR Biosciences
>chris.lesiak@licor.com
>
>Any opinions expressed are those of the author and do not necessarily
>represent those of his employer.
>
^ permalink raw reply
* Re: [PATCH v8 5/6] net: ipv4, ipv6: run cgroup eBPF egress programs
From: Alexei Starovoitov @ 2016-11-20 6:07 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: Daniel Mack, htejun-b10kYP2dOMg, daniel-FeC+5ew28dpmcu3hnIyYJQ,
ast-b10kYP2dOMg, davem-fT/PcQaiUtIeIZ0/mPfg9Q, kafai-b10kYP2dOMg,
fw-HFFVJYpyMKqzQB+pC5nmwQ, harald-H+wXaHxf7aLQT0dZR+AlfA,
netdev-u79uwXL29TY76Z2rM5mHXA, sargun-GaZTRHToo+CzQB+pC5nmwQ,
cgroups-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20161118174405.GA9678@salvia>
On Fri, Nov 18, 2016 at 06:44:05PM +0100, Pablo Neira Ayuso wrote:
> On Fri, Nov 18, 2016 at 09:17:18AM -0800, Alexei Starovoitov wrote:
> > On Fri, Nov 18, 2016 at 01:37:32PM +0100, Pablo Neira Ayuso wrote:
> > > On Thu, Nov 17, 2016 at 07:27:08PM +0100, Daniel Mack wrote:
> > > [...]
> > > > @@ -312,6 +314,12 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
> > > > skb->dev = dev;
> > > > skb->protocol = htons(ETH_P_IP);
> > > >
> > > > + ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
> > > > + if (ret) {
> > > > + kfree_skb(skb);
> > > > + return ret;
> > > > + }
> > > > +
> > > > /*
> > > > * Multicasts are looped back for other local users
> > > > */
> > > > @@ -364,12 +372,19 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
> > > > int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
> > > > {
> > > > struct net_device *dev = skb_dst(skb)->dev;
> > > > + int ret;
> > > >
> > > > IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
> > > >
> > > > skb->dev = dev;
> > > > skb->protocol = htons(ETH_P_IP);
> > > >
> > > > + ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
> > > > + if (ret) {
> > > > + kfree_skb(skb);
> > > > + return ret;
> > > > + }
> > > > +
> > > > return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> > > > net, sk, skb, NULL, dev,
> > > > ip_finish_output,
> > >
> > > Please, place this after the netfilter hook.
> > >
> > > Since this new hook may mangle output packets, any mangling
> > > potentially interfers and breaks conntrack.
> >
> > actually this hook cannot mangle the packets, so no conntrack
> > concerns. Also this was brought up by Lorenzo earlier and consensus
> > was that it's cleaner to leave it in this order.
>
> Not yet probably, but this could be used to implement snat at some
> point, you have potentially the infrastructure to do so in place
> already.
>
> > My reply:
> > http://www.spinics.net/lists/cgroups/msg16675.html
> > and Daniel's:
> > http://www.spinics.net/lists/cgroups/msg16677.html
> > and the rest of that thread.
>
> Please place this afterwards since I don't want to update Netfilter
> documentation to indicate that there is a new spot to debug before
> POSTROUTING that may drop packets. People are used to debugging things
> in a certain way, if packets are dropped after POSTROUTING, then
> netfilter tracing will indicate the packet has successfully left our
> framework and people will notice that packets are dropped somewhere
> else, so they have a clue probably is this new layer.
>
> Actually I remember you mentioned in a previous email that this hook
> can be placed anywhere, and that they don't really need a fixed
> location, if so, then it should not be much of a problem to change
> this.
correct. As I said in the link above I don't mind the order.
We discussed it in private and (my personal interpretation) that
the opinions are 60% to keep it as-is and 40% to go with your
proposal.
It is certainly not a big deal to go one way or the other.
I think only you have a strong opinion about the order whereas
myself and Daneil B and Daniel M are exhuasted enough, so we will
take it either way.
So let me recoup the pro and con and let you decide, since it seems
doing it after nf hook on tx will actually make them much harder to
use together and I think you wouldn't want it in the first place.
Since RX hook is so late in the receive path there is no
practical use case to mangle the packet at this stage.
This set of patches doesn't allow to change the packet either.
After this rx hook the socket can only receive the packet and
messing up l3/l4 headers won't have any effect. Furthermore since
the hooks have to be symmetrical it doesn't make sense to mangle
the packet on TX either. In tcp case the socket is an established
connection, so allowing mangling won't do any good to the remote side.
Hence this cgroup+bpf+inet_rx/tx thingy is only useful for monitoring
of what applications are doing and high level application firewalling.
Now imagine some policy framework that prevents a websever
in a cgroup talk to a database using this facility. If nf hook
runs first on tx, this policy framework won't be usable, since l3/l4
can be mangled, so application enforcement is not possible unless
iptables are disabled. I don't want such "either iptables or cgroup+bpf"
scenario and I don't think you want that either.
At the same time if iptables do traditional nat and firewall
_after_ this tx hook then this policy framework can coexist
with iptables based security. So imo this order of hooks
is a better way forward.
But if you strongly disagree, I'm ok to change it,
though I think long term it will be a loosing proposition
for both frameworks. So your call.
Thanks
^ permalink raw reply
* Re: [PATCH] VSOCK: add loopback to virtio_transport
From: David Miller @ 2016-11-20 3:16 UTC (permalink / raw)
To: john.fastabend; +Cc: stefanha, netdev, cavery, imbrenda, jhansen
In-Reply-To: <58311507.7070707@gmail.com>
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 19 Nov 2016 19:14:15 -0800
> On 16-11-19 07:08 PM, David Miller wrote:
>> From: Stefan Hajnoczi <stefanha@redhat.com>
>> Date: Thu, 17 Nov 2016 15:49:23 +0000
>>
>>> @@ -159,6 +199,10 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
>>> return -ENODEV;
>>> }
>>>
>>> + if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
>>> + return virtio_transport_send_pkt_loopback(vsock, pkt);
>>> + }
>>> +
>>
>> A single-statement basic block should get surrounding curly
>> braces.
>>
>
> Should _not_ get curly braces in case it wasn't obvious ;)
Indeed, thanks for the correction :)
^ permalink raw reply
* Re: [PATCH net 1/1] tipc: eliminate obsolete socket locking policy description
From: David Miller @ 2016-11-20 3:15 UTC (permalink / raw)
To: jon.maloy
Cc: netdev, paul.gortmaker, parthasarathy.bhuvaragan, ying.xue, maloy,
tipc-discussion
In-Reply-To: <1479584827-21772-1-git-send-email-jon.maloy@ericsson.com>
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Sat, 19 Nov 2016 14:47:07 -0500
> The comment block in socket.c describing the locking policy is
> obsolete, and does not reflect current reality. We remove it in this
> commit.
>
> Since the current locking policy is much simpler and follows a
> mainstream approach, we see no need to add a new description.
>
> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Applied, thanks Jon.
^ permalink raw reply
* Re: [net-next] rtnl: fix the loop index update error in rtnl_dump_ifinfo()
From: David Miller @ 2016-11-20 3:15 UTC (permalink / raw)
To: zhangshengju; +Cc: netdev, dsa
In-Reply-To: <1479569312-15224-1-git-send-email-zhangshengju@cmss.chinamobile.com>
From: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Date: Sat, 19 Nov 2016 23:28:32 +0800
> If the link is filtered out, loop index should also be updated. If not,
> loop index will not be correct.
>
> Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Applied to 'net' and queued up for -stable.
Bug fixes should always be targetted at 'net' not 'net-next'.
^ permalink raw reply
* Re: [PATCH] VSOCK: add loopback to virtio_transport
From: John Fastabend @ 2016-11-20 3:14 UTC (permalink / raw)
To: David Miller, stefanha; +Cc: netdev, cavery, imbrenda, jhansen
In-Reply-To: <20161119.220804.755313313976863187.davem@davemloft.net>
On 16-11-19 07:08 PM, David Miller wrote:
> From: Stefan Hajnoczi <stefanha@redhat.com>
> Date: Thu, 17 Nov 2016 15:49:23 +0000
>
>> @@ -159,6 +199,10 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
>> return -ENODEV;
>> }
>>
>> + if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
>> + return virtio_transport_send_pkt_loopback(vsock, pkt);
>> + }
>> +
>
> A single-statement basic block should get surrounding curly
> braces.
>
Should _not_ get curly braces in case it wasn't obvious ;)
^ permalink raw reply
* Re: [PATCH] net: fix bogus cast in skb_pagelen() and use unsigned variables
From: David Miller @ 2016-11-20 3:12 UTC (permalink / raw)
To: adobriyan; +Cc: netdev
In-Reply-To: <20161119010808.GF1200@avx2>
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 19 Nov 2016 04:08:08 +0300
> 1) cast to "int" is unnecessary:
> u8 will be promoted to int before decrementing,
> small positive numbers fit into "int", so their values won't be changed
> during promotion.
>
> Once everything is int including loop counters, signedness doesn't
> matter: 32-bit operations will stay 32-bit operations.
>
> But! Someone tried to make this loop smart by making everything of
> the same type apparently in an attempt to optimise it.
> Do the optimization, just differently.
> Do the cast where it matters. :^)
>
> 2) frag size is unsigned entity and sum of fragments sizes is also
> unsigned.
>
> Make everything unsigned, leave no MOVSX instruction behind.
...
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Applied to net-next.
^ permalink raw reply
* Re: [PATCH] netlink: smaller nla_attr_minlen table
From: David Miller @ 2016-11-20 3:12 UTC (permalink / raw)
To: adobriyan; +Cc: netdev
In-Reply-To: <20161119005907.GE1200@avx2>
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 19 Nov 2016 03:59:07 +0300
> Length of a netlink attribute may be u16 but lengths of basic attributes
> are much smaller, so small we can save 16 bytes of .rodata and pocket
> change inside .text.
>
> 16-bit is worse on x86-64 than 8-bit because of operand size override prefix.
...
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Applied to net-next.
^ permalink raw reply
* Re: [PATCH] netlink: use "unsigned int" in nla_next()
From: David Miller @ 2016-11-20 3:11 UTC (permalink / raw)
To: adobriyan; +Cc: netdev
In-Reply-To: <20161119005435.GD1200@avx2>
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 19 Nov 2016 03:54:35 +0300
> ->nla_len is unsigned entity (it's length after all) and u16,
> thus it can't overflow when being aligned into int/unsigned int.
>
> (nlmsg_next has the same code, but I didn't yet convince myself
> it is correct to do so).
>
> There is pointer arithmetic in this function and offset being
> unsigned is better:
...
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Applied to net-next.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox