Netdev List
 help / color / mirror / Atom feed
* [PATCH net 07/18] net/ena: refactor ena_get_stats64 to be atomic context safe
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

ndo_get_stat64 can be called from atomic context.
However the current implementation sends an admin command to retrieve
the statistics from the device.
This admin commands uses sleep.

Refactor the implementation of ena_get_stats64 to take the
{rx,tx}bytes/cnt from the driver's inner counters
and to take the rx drops counter
from the asynchronous keep alive (heart bit) event.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_admin_defs.h |  8 ++++
 drivers/net/ethernet/amazon/ena/ena_netdev.c     | 57 +++++++++++++++++-------
 drivers/net/ethernet/amazon/ena/ena_netdev.h     |  1 +
 3 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index c78f0b2..35ae511 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -904,6 +904,14 @@ struct ena_admin_aenq_link_change_desc {
 	u32 flags;
 };
 
+struct ena_admin_aenq_keep_alive_desc {
+	struct ena_admin_aenq_common_desc aenq_common_desc;
+
+	u32 rx_drops_low;
+
+	u32 rx_drops_high;
+};
+
 struct ena_admin_ena_mmio_req_read_less_resp {
 	u16 req_id;
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index e7dda8b..44dc298 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2185,28 +2185,46 @@ static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev,
 						 struct rtnl_link_stats64 *stats)
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
-	struct ena_admin_basic_stats ena_stats;
-	int rc;
+	struct ena_ring *rx_ring, *tx_ring;
+	unsigned int start;
+	u64 rx_drops;
+	int i;
 
 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 		return NULL;
 
-	rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats);
-	if (rc)
-		return NULL;
+	for (i = 0; i < adapter->num_queues; i++) {
+		u64 bytes, packets;
+
+		tx_ring = &adapter->tx_ring[i];
+
+		do {
+			start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
+			packets = tx_ring->tx_stats.cnt;
+			bytes = tx_ring->tx_stats.bytes;
+		} while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
+
+		stats->tx_packets += packets;
+		stats->tx_bytes += bytes;
 
-	stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) |
-		ena_stats.tx_bytes_low;
-	stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) |
-		ena_stats.rx_bytes_low;
+		rx_ring = &adapter->rx_ring[i];
+
+		do {
+			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
+			packets = rx_ring->rx_stats.cnt;
+			bytes = rx_ring->rx_stats.bytes;
+		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
 
-	stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) |
-		ena_stats.rx_pkts_low;
-	stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) |
-		ena_stats.tx_pkts_low;
+		stats->rx_packets += packets;
+		stats->rx_bytes += bytes;
+	}
+
+	do {
+		start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
+		rx_drops = adapter->dev_stats.rx_drops;
+	} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
 
-	stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) |
-		ena_stats.rx_drops_low;
+	stats->rx_dropped = rx_drops;
 
 	stats->multicast = 0;
 	stats->collisions = 0;
@@ -3272,8 +3290,17 @@ static void ena_keep_alive_wd(void *adapter_data,
 			      struct ena_admin_aenq_entry *aenq_e)
 {
 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+	struct ena_admin_aenq_keep_alive_desc *desc;
+	u64 rx_drops;
 
+	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
 	adapter->last_keep_alive_jiffies = jiffies;
+
+	rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.rx_drops = rx_drops;
+	u64_stats_update_end(&adapter->syncp);
 }
 
 static void ena_notification(void *adapter_data,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index f8ef1f0..2897fab 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -251,6 +251,7 @@ struct ena_stats_dev {
 	u64 interface_up;
 	u64 interface_down;
 	u64 admin_q_pause;
+	u64 rx_drops;
 };
 
 enum ena_flags_t {
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 09/18] net/ena: change condition for host attribute configuration
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

Move the host info config to be the first admin command that is executed.
This change require the driver to remove the 'feature check'
from host info configuration flow.
The check is removed since the supported features bitmask field
is retrieved only after calling ENA_ADMIN_DEVICE_ATTRIBUTES admin command.

If set host info is not supported an error will be returned by the device.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_com.c    | 8 +++-----
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 5 +++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index edb2e81..b2891f9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -2474,11 +2474,9 @@ int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
 
 	int ret;
 
-	if (!ena_com_check_supported_feature_id(ena_dev,
-						ENA_ADMIN_HOST_ATTR_CONFIG)) {
-		pr_warn("Set host attribute isn't supported\n");
-		return -EPERM;
-	}
+	/* Host attribute config is called before ena_com_get_dev_attr_feat
+	 * so ena_com can't check if the feature is supported.
+	 */
 
 	memset(&cmd, 0x0, sizeof(cmd));
 	admin_queue = &ena_dev->admin_queue;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 44dc298..dd7c74b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2387,6 +2387,8 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
 	 */
 	ena_com_set_admin_polling_mode(ena_dev, true);
 
+	ena_config_host_info(ena_dev);
+
 	/* Get Device Attributes*/
 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
 	if (rc) {
@@ -2411,11 +2413,10 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
 
 	*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
 
-	ena_config_host_info(ena_dev);
-
 	return 0;
 
 err_admin_init:
+	ena_com_delete_host_info(ena_dev);
 	ena_com_admin_destroy(ena_dev);
 err_mmio_read_less:
 	ena_com_mmio_reg_read_request_destroy(ena_dev);
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 12/18] net/ena: refactor skb allocation
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

To increase readability, refactor skb allocation to dedicated function
This change does not impact the performance since the compiler optimize
the code and elimitate the if condition.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 46 ++++++++++++++++------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 3bc8f43..b478c61 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -787,6 +787,28 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
 	return tx_pkts;
 }
 
+static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
+{
+	struct sk_buff *skb;
+
+	if (frags)
+		skb = napi_get_frags(rx_ring->napi);
+	else
+		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+						rx_ring->rx_copybreak);
+
+	if (unlikely(!skb)) {
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.skb_alloc_fail++;
+		u64_stats_update_end(&rx_ring->syncp);
+		netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+			  "Failed to allocate skb. frags: %d\n", frags);
+		return NULL;
+	}
+
+	return skb;
+}
+
 static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 				  struct ena_com_rx_buf_info *ena_bufs,
 				  u32 descs,
@@ -795,8 +817,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 	struct sk_buff *skb;
 	struct ena_rx_buffer *rx_info =
 		&rx_ring->rx_buffer_info[*next_to_clean];
-	u32 len;
-	u32 buf = 0;
+	u32 len, buf = 0;
 	void *va;
 
 	len = ena_bufs[0].len;
@@ -815,16 +836,9 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 	prefetch(va + NET_IP_ALIGN);
 
 	if (len <= rx_ring->rx_copybreak) {
-		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-						rx_ring->rx_copybreak);
-		if (unlikely(!skb)) {
-			u64_stats_update_begin(&rx_ring->syncp);
-			rx_ring->rx_stats.skb_alloc_fail++;
-			u64_stats_update_end(&rx_ring->syncp);
-			netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
-				  "Failed to allocate skb\n");
+		skb = ena_alloc_skb(rx_ring, false);
+		if (unlikely(!skb))
 			return NULL;
-		}
 
 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 			  "rx allocated small packet. len %d. data_len %d\n",
@@ -848,15 +862,9 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 		return skb;
 	}
 
-	skb = napi_get_frags(rx_ring->napi);
-	if (unlikely(!skb)) {
-		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
-			  "Failed allocating skb\n");
-		u64_stats_update_begin(&rx_ring->syncp);
-		rx_ring->rx_stats.skb_alloc_fail++;
-		u64_stats_update_end(&rx_ring->syncp);
+	skb = ena_alloc_skb(rx_ring, true);
+	if (unlikely(!skb))
 		return NULL;
-	}
 
 	do {
 		dma_unmap_page(rx_ring->dev,
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 17/18] net/ena: fix NULL dereference when removing the driver after device reset faild
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

If for some reason the device stop responding and the device reset failed
to recover the device, the mmio register read datastructure will not be
reinitialized.
On driver removal, the driver will also tries to reset the device
but this time the mmio data structure will be NULL.

To solve this issue perform the device reset in the remove function only if
the device is runnig.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index ba395aa..12d1dca 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2591,6 +2591,8 @@ static void ena_fw_reset_device(struct work_struct *work)
 err:
 	rtnl_unlock();
 
+	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
 	dev_err(&pdev->dev,
 		"Reset attempt failed. Can not reset the device\n");
 }
@@ -3251,7 +3253,9 @@ static void ena_remove(struct pci_dev *pdev)
 
 	cancel_work_sync(&adapter->resume_io_task);
 
-	ena_com_dev_reset(ena_dev);
+	/* Reset the device only if the device is running. */
+	if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
+		ena_com_dev_reset(ena_dev);
 
 	ena_free_mgmnt_irq(adapter);
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 18/18] net/ena: change driver's default timeouts and increase driver version
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

Driver version was increased to 1.1.2

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_com.c    | 4 ++--
 drivers/net/ethernet/amazon/ena/ena_netdev.h | 9 +++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index b2891f9..9b158f0 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -36,9 +36,9 @@
 /*****************************************************************************/
 
 /* Timeout in micro-sec */
-#define ADMIN_CMD_TIMEOUT_US (1000000)
+#define ADMIN_CMD_TIMEOUT_US (3000000)
 
-#define ENA_ASYNC_QUEUE_DEPTH 4
+#define ENA_ASYNC_QUEUE_DEPTH 16
 #define ENA_ADMIN_QUEUE_DEPTH 32
 
 #define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index c081fd3..de1e5ac 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -39,12 +39,13 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
 
 #include "ena_com.h"
 #include "ena_eth_com.h"
 
 #define DRV_MODULE_VER_MAJOR	1
-#define DRV_MODULE_VER_MINOR	0
+#define DRV_MODULE_VER_MINOR	1
 #define DRV_MODULE_VER_SUBMINOR 2
 
 #define DRV_MODULE_NAME		"ena"
@@ -100,7 +101,7 @@
 /* Number of queues to check for missing queues per timer service */
 #define ENA_MONITORED_TX_QUEUES	4
 /* Max timeout packets before device reset */
-#define MAX_NUM_OF_TIMEOUTED_PACKETS 32
+#define MAX_NUM_OF_TIMEOUTED_PACKETS 128
 
 #define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
 
@@ -116,9 +117,9 @@
 #define ENA_IO_IRQ_IDX(q)		(ENA_IO_IRQ_FIRST_IDX + (q))
 
 /* ENA device should send keep alive msg every 1 sec.
- * We wait for 3 sec just to be on the safe side.
+ * We wait for 6 sec just to be on the safe side.
  */
-#define ENA_DEVICE_KALIVE_TIMEOUT	(3 * HZ)
+#define ENA_DEVICE_KALIVE_TIMEOUT	(6 * HZ)
 
 #define ENA_MMIO_DISABLE_REG_READ	BIT(0)
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 06/18] net/ena: fix ethtool RSS flow configuration
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

ena_flow_data_to_flow_hash and ena_flow_hash_to_flow_type
treat the ena_flow_hash_to_flow_type enum as power of two values.

Change the values of ena_admin_flow_hash_fields to be power of two values.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 51b2a92..c78f0b2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -633,22 +633,22 @@ enum ena_admin_flow_hash_proto {
 /* RSS flow hash fields */
 enum ena_admin_flow_hash_fields {
 	/* Ethernet Dest Addr */
-	ENA_ADMIN_RSS_L2_DA	= 0,
+	ENA_ADMIN_RSS_L2_DA	= 0x1,
 
 	/* Ethernet Src Addr */
-	ENA_ADMIN_RSS_L2_SA	= 1,
+	ENA_ADMIN_RSS_L2_SA	= 0x2,
 
 	/* ipv4/6 Dest Addr */
-	ENA_ADMIN_RSS_L3_DA	= 2,
+	ENA_ADMIN_RSS_L3_DA	= 0x4,
 
 	/* ipv4/6 Src Addr */
-	ENA_ADMIN_RSS_L3_SA	= 5,
+	ENA_ADMIN_RSS_L3_SA	= 0x8,
 
 	/* tcp/udp Dest Port */
-	ENA_ADMIN_RSS_L4_DP	= 6,
+	ENA_ADMIN_RSS_L4_DP	= 0x10,
 
 	/* tcp/udp Src Port */
-	ENA_ADMIN_RSS_L4_SP	= 7,
+	ENA_ADMIN_RSS_L4_SP	= 0x20,
 };
 
 struct ena_admin_proto_input {
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 08/18] net/ena: change sizeof() argument to be the type pointer
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

Instead of using:
memset(ptr, 0x0, sizeof(struct ...))
use:
memset(ptr, 0x0, sizeor(*ptr))

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 366c2c5..edb2e81 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -329,7 +329,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 	size_t size;
 	int dev_node = 0;
 
-	memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+	memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
 
 	io_sq->desc_entry_size =
 		(io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
@@ -383,7 +383,7 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
 	size_t size;
 	int prev_node = 0;
 
-	memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+	memset(&io_cq->cdesc_addr, 0x0, sizeof(io_cq->cdesc_addr));
 
 	/* Use the basic completion descriptor for Rx */
 	io_cq->cdesc_entry_size_in_bytes =
@@ -681,7 +681,7 @@ static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev,
 	u8 direction;
 	int ret;
 
-	memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+	memset(&destroy_cmd, 0x0, sizeof(destroy_cmd));
 
 	if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
 		direction = ENA_ADMIN_SQ_DIRECTION_TX;
@@ -963,7 +963,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
 	u8 direction;
 	int ret;
 
-	memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd));
+	memset(&create_cmd, 0x0, sizeof(create_cmd));
 
 	create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ;
 
@@ -1155,7 +1155,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
 	struct ena_admin_acq_create_cq_resp_desc cmd_completion;
 	int ret;
 
-	memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd));
+	memset(&create_cmd, 0x0, sizeof(create_cmd));
 
 	create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ;
 
@@ -1263,7 +1263,7 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
 	struct ena_admin_acq_destroy_cq_resp_desc destroy_resp;
 	int ret;
 
-	memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+	memset(&destroy_cmd, 0x0, sizeof(destroy_cmd));
 
 	destroy_cmd.cq_idx = io_cq->idx;
 	destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ;
@@ -1613,8 +1613,8 @@ int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
 	io_sq = &ena_dev->io_sq_queues[ctx->qid];
 	io_cq = &ena_dev->io_cq_queues[ctx->qid];
 
-	memset(io_sq, 0x0, sizeof(struct ena_com_io_sq));
-	memset(io_cq, 0x0, sizeof(struct ena_com_io_cq));
+	memset(io_sq, 0x0, sizeof(*io_sq));
+	memset(io_cq, 0x0, sizeof(*io_cq));
 
 	/* Init CQ */
 	io_cq->q_depth = ctx->queue_size;
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 10/18] net/ena: use READ_ONCE to access completion descriptors
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

Completion descriptors are accessed from the driver and from the device.
To avoid reading the old value, use READ_ONCE macro.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_com.h     | 1 +
 drivers/net/ethernet/amazon/ena/ena_eth_com.c | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 6883ee5..f8cdce0 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -33,6 +33,7 @@
 #ifndef ENA_COM
 #define ENA_COM
 
+#include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/gfp.h>
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index 539c536..f999305 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -45,7 +45,7 @@ static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
 	cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr
 			+ (head_masked * io_cq->cdesc_entry_size_in_bytes));
 
-	desc_phase = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
+	desc_phase = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
 			ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
 
 	if (desc_phase != expected_phase)
@@ -141,7 +141,7 @@ static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
 
 		ena_com_cq_inc_head(io_cq);
 		count++;
-		last = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
+		last = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
 			ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
 	} while (!last);
 
@@ -489,13 +489,13 @@ int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)
 	 * expected, it mean that the device still didn't update
 	 * this completion.
 	 */
-	cdesc_phase = cdesc->flags & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
+	cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
 	if (cdesc_phase != expected_phase)
 		return -EAGAIN;
 
 	ena_com_cq_inc_head(io_cq);
 
-	*req_id = cdesc->req_id;
+	*req_id = READ_ONCE(cdesc->req_id);
 
 	return 0;
 }
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 16/18] net/ena: fix error handling when probe fails
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

When driver fails in probe, it will release all resources, including
adapter.
In case of probe failure, ena_remove should not try to free the adapter
resources.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index bff082a..ba395aa 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3179,6 +3179,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 err_free_region:
 	ena_release_bars(ena_dev, pdev);
 err_free_ena_dev:
+	pci_set_drvdata(pdev, NULL);
 	vfree(ena_dev);
 err_disable_device:
 	pci_disable_device(pdev);
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 01/18] net/ena: remove RFS support from device feature list
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

Remove NETIF_F_NTUPLE from netdev->features.
The ENA device driver does not support RFS acceleration.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index bfeaec5..33a760e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2729,7 +2729,6 @@ static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
 	netdev->features =
 		dev_features |
 		NETIF_F_SG |
-		NETIF_F_NTUPLE |
 		NETIF_F_RXHASH |
 		NETIF_F_HIGHDMA;
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 11/18] net/ena: fix potential access to freed memory during device reset
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

If the ena driver detects that the device is not behave as expected,
it tries to reset the device.
The reset flow calls ena_down, which will frees all the resources
the driver allocates and then it will reset the device.

This flow can cause memory corruption if the device is still writes
to the driver's memory space.
To overcome this potential race, move the reset before the device
resources are freed.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 56 +++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index dd7c74b..3bc8f43 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -80,14 +80,18 @@ static void ena_tx_timeout(struct net_device *dev)
 {
 	struct ena_adapter *adapter = netdev_priv(dev);
 
+	/* Change the state of the device to trigger reset
+	 * Check that we are not in the middle or a trigger already
+	 */
+
+	if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+		return;
+
 	u64_stats_update_begin(&adapter->syncp);
 	adapter->dev_stats.tx_timeout++;
 	u64_stats_update_end(&adapter->syncp);
 
 	netif_err(adapter, tx_err, dev, "Transmit time out\n");
-
-	/* Change the state of the device to trigger reset */
-	set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 }
 
 static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
@@ -1124,7 +1128,8 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 
 	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
 
-	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
+	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
+	    test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
 		napi_complete_done(napi, 0);
 		return 0;
 	}
@@ -1713,12 +1718,22 @@ static void ena_down(struct ena_adapter *adapter)
 	adapter->dev_stats.interface_down++;
 	u64_stats_update_end(&adapter->syncp);
 
-	/* After this point the napi handler won't enable the tx queue */
-	ena_napi_disable_all(adapter);
 	netif_carrier_off(adapter->netdev);
 	netif_tx_disable(adapter->netdev);
 
+	/* After this point the napi handler won't enable the tx queue */
+	ena_napi_disable_all(adapter);
+
 	/* After destroy the queue there won't be any new interrupts */
+
+	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
+		int rc;
+
+		rc = ena_com_dev_reset(adapter->ena_dev);
+		if (rc)
+			dev_err(&adapter->pdev->dev, "Device reset failed\n");
+	}
+
 	ena_destroy_all_io_queues(adapter);
 
 	ena_disable_io_intr_sync(adapter);
@@ -2081,6 +2096,14 @@ static void ena_netpoll(struct net_device *netdev)
 	struct ena_adapter *adapter = netdev_priv(netdev);
 	int i;
 
+	/* Dont schedule NAPI if the driver is in the middle of reset
+	 * or netdev is down.
+	 */
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) ||
+	    test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+		return;
+
 	for (i = 0; i < adapter->num_queues; i++)
 		napi_schedule(&adapter->ena_napi[i].napi);
 }
@@ -2468,6 +2491,14 @@ static void ena_fw_reset_device(struct work_struct *work)
 	bool dev_up, wd_state;
 	int rc;
 
+	if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+		dev_err(&pdev->dev,
+			"device reset schedule while reset bit is off\n");
+		return;
+	}
+
+	netif_carrier_off(netdev);
+
 	del_timer_sync(&adapter->timer_service);
 
 	rtnl_lock();
@@ -2481,12 +2512,6 @@ static void ena_fw_reset_device(struct work_struct *work)
 	 */
 	ena_close(netdev);
 
-	rc = ena_com_dev_reset(ena_dev);
-	if (rc) {
-		dev_err(&pdev->dev, "Device reset failed\n");
-		goto err;
-	}
-
 	ena_free_mgmnt_irq(adapter);
 
 	ena_disable_msix(adapter);
@@ -2499,6 +2524,8 @@ static void ena_fw_reset_device(struct work_struct *work)
 
 	ena_com_mmio_reg_read_request_destroy(ena_dev);
 
+	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+
 	/* Finish with the destroy part. Start the init part */
 
 	rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
@@ -2562,6 +2589,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 		return;
 
+	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+		return;
+
 	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
 		return;
 
@@ -2703,7 +2733,7 @@ static void ena_timer_service(unsigned long data)
 	if (host_info)
 		ena_update_host_info(host_info, adapter->netdev);
 
-	if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
 		netif_err(adapter, drv, adapter->netdev,
 			  "Trigger reset is on\n");
 		ena_dump_stats_to_dmesg(adapter);
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 13/18] net/ena: remove redundant logic in napi callback for busy poll mode
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

sk_busy_loop can call the napi callback few million times a sec.
For each call there is unmask interrupt.
We want to reduce the number of unmasks.

Add an atomic variable that will tell the napi handler if
it was called from irq context or not.
Unmask the interrupt only from irq context.

A schenario where the driver left with missed unmask isn't feasible.
when ena_intr_msix_io is called the driver have 2 options:
1)Before napi completes and call napi_complete_done
2)After calling napi_complete_done

In the former case the napi will unmask the interrupt as needed.
In the latter case napi_complete_done will remove napi from the schedule
list so napi will be rescheduled (by ena_intr_msix_io) and interrupt
will be unmasked as desire in the 2nd napi call.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 46 +++++++++++++++++++---------
 drivers/net/ethernet/amazon/ena/ena_netdev.h |  1 +
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index b478c61..eda5fb5 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1145,26 +1145,41 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 	tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
 	rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
 
-	if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
+	/* If the device is about to reset or down, avoid unmask
+	 * the interrupt and return 0 so NAPI won't reschedule
+	 */
+	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
+		     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
+		napi_complete_done(napi, 0);
+		ret = 0;
+
+	} else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
 		napi_complete_done(napi, rx_work_done);
 
 		napi_comp_call = 1;
-		/* Tx and Rx share the same interrupt vector */
-		if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
-			ena_adjust_intr_moderation(rx_ring, tx_ring);
-
-		/* Update intr register: rx intr delay, tx intr delay and
-		 * interrupt unmask
+		/* Update numa and unmask the interrupt only when schedule
+		 * from the interrupt context (vs from sk_busy_loop)
 		 */
-		ena_com_update_intr_reg(&intr_reg,
-					rx_ring->smoothed_interval,
-					tx_ring->smoothed_interval,
-					true);
+		if (atomic_cmpxchg(&ena_napi->unmask_interrupt, 1, 0)) {
+			/* Tx and Rx share the same interrupt vector */
+			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
+				ena_adjust_intr_moderation(rx_ring, tx_ring);
+
+			/* Update intr register: rx intr delay,
+			 * tx intr delay and interrupt unmask
+			 */
+			ena_com_update_intr_reg(&intr_reg,
+						rx_ring->smoothed_interval,
+						tx_ring->smoothed_interval,
+						true);
+
+			/* It is a shared MSI-X.
+			 * Tx and Rx CQ have pointer to it.
+			 * So we use one of them to reach the intr reg
+			 */
+			ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+		}
 
-		/* It is a shared MSI-X. Tx and Rx CQ have pointer to it.
-		 * So we use one of them to reach the intr reg
-		 */
-		ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
 
 		ena_update_ring_numa_node(tx_ring, rx_ring);
 
@@ -1202,6 +1217,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
 {
 	struct ena_napi *ena_napi = data;
 
+	atomic_set(&ena_napi->unmask_interrupt, 1);
 	napi_schedule_irqoff(&ena_napi->napi);
 
 	return IRQ_HANDLED;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 2897fab..c081fd3 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -135,6 +135,7 @@ struct ena_napi {
 	struct napi_struct napi ____cacheline_aligned;
 	struct ena_ring *tx_ring;
 	struct ena_ring *rx_ring;
+	atomic_t unmask_interrupt;
 	u32 qid;
 };
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 15/18] net/ena: remove affinity hint from the driver
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

To allow irqbalance to better distribute the napi handler,
remove the smp affinity hint from the driver.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index eda5fb5..bff082a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1331,8 +1331,6 @@ static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
 		  "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
 		  irq->affinity_hint_mask.bits[0], irq->vector);
 
-	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
-
 	return rc;
 }
 
@@ -1362,8 +1360,6 @@ static int ena_request_io_irq(struct ena_adapter *adapter)
 		netif_dbg(adapter, ifup, adapter->netdev,
 			  "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
 			  i, irq->affinity_hint_mask.bits[0], irq->vector);
-
-		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
 	}
 
 	return rc;
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 14/18] net/ena: add IPv6 extended protocols to ena_admin_flow_hash_proto
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

We intend to use those fields in the future.

Signed-off-by: Netanel Belgazal <netanel@annapurnalabs.com>
---
 drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 35ae511..92bba08 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -627,6 +627,12 @@ enum ena_admin_flow_hash_proto {
 
 	ENA_ADMIN_RSS_NOT_IP	= 7,
 
+	/* TCPv6 with extension header */
+	ENA_ADMIN_RSS_TCP6_EX	= 8,
+
+	/* IPv6 with extension header */
+	ENA_ADMIN_RSS_IP6_EX	= 9,
+
 	ENA_ADMIN_RSS_PROTO_NUM	= 16,
 };
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net 00/18] Update ENA driver to version 1.1.2
From: Netanel Belgazal @ 2016-11-20  8:45 UTC (permalink / raw)
  To: linux-kernel, davem, netdev
  Cc: Netanel Belgazal, dwmw, zorik, alex, saeed, msw, aliguori, nafea

Update Amazon's Elastic Network Adapter (ENA) driver version from 1.0.2 to 1.1.2

Netanel Belgazal (18):
  net/ena: remove RFS support from device feature list
  net/ena: fix queues number calculation
  net/ena: use napi_schedule_irqoff when possible
  net/ena: reduce the severity of ena printouts
  net/ena: add hardware hints capability to the driver
  net/ena: fix ethtool RSS flow configuration
  net/ena: refactor ena_get_stats64 to be atomic context safe
  net/ena: change sizeof() argument to be the type pointer
  net/ena: change condition for host attribute configuration
  net/ena: use READ_ONCE to access completion descriptors
  net/ena: fix potential access to freed memory during device reset
  net/ena: refactor skb allocation
  net/ena: remove redundant logic in napi callback for busy poll mode
  net/ena: add IPv6 extended protocols to ena_admin_flow_hash_proto
  net/ena: remove affinity hint from the driver
  net/ena: fix error handling when probe fails
  net/ena: fix NULL dereference when removing the driver after device
    reset faild
  net/ena: change driver's default timeouts and increase driver version

 drivers/net/ethernet/amazon/ena/ena_admin_defs.h |  57 +++-
 drivers/net/ethernet/amazon/ena/ena_com.c        |  96 ++++---
 drivers/net/ethernet/amazon/ena/ena_com.h        |   6 +
 drivers/net/ethernet/amazon/ena/ena_eth_com.c    |   8 +-
 drivers/net/ethernet/amazon/ena/ena_ethtool.c    |   1 -
 drivers/net/ethernet/amazon/ena/ena_netdev.c     | 326 ++++++++++++++++-------
 drivers/net/ethernet/amazon/ena/ena_netdev.h     |  30 ++-
 drivers/net/ethernet/amazon/ena/ena_regs_defs.h  |   2 +
 8 files changed, 384 insertions(+), 142 deletions(-)

-- 
1.9.1

^ permalink raw reply

* QUICK LOAN OFFER
From: Sky Finance @ 2016-11-20  6:10 UTC (permalink / raw)
  To: Recipients

Do You Need A Loan Of Any Kind ? If Yes Email Now For More Info Contact us on: skyfinance001@gmail.com

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus

^ permalink raw reply

* Re: [PATCH net 07/18] net/ena: refactor ena_get_stats64 to be atomic context safe
From: kbuild test robot @ 2016-11-20 10:09 UTC (permalink / raw)
  To: Netanel Belgazal
  Cc: kbuild-all, linux-kernel, davem, netdev, Netanel Belgazal, dwmw,
	zorik, alex, saeed, msw, aliguori, nafea
In-Reply-To: <1479631547-29354-8-git-send-email-netanel@annapurnalabs.com>

[-- Attachment #1: Type: text/plain, Size: 11561 bytes --]

Hi Netanel,

[auto build test WARNING on net/master]

url:    https://github.com/0day-ci/linux/commits/Netanel-Belgazal/Update-ENA-driver-to-version-1-1-2/20161120-165649
config: i386-randconfig-x009-201647 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

Note: it may well be a FALSE warning. FWIW you are at least aware of it now.
http://gcc.gnu.org/wiki/Better_Uninitialized_Warnings

All warnings (new ones prefixed by >>):

   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_napi_enable_all
   Cyclomatic Complexity 1 include/linux/dma-mapping.h:dma_map_single_attrs
   Cyclomatic Complexity 1 include/linux/dynamic_queue_limits.h:dql_queued
   Cyclomatic Complexity 3 include/linux/netdevice.h:netdev_tx_sent_queue
   Cyclomatic Complexity 1 include/linux/netdevice.h:dev_kfree_skb_any
   Cyclomatic Complexity 1 include/linux/netdevice.h:netdev_tx_reset_queue
   Cyclomatic Complexity 6 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_free_tx_bufs
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_free_all_tx_bufs
   Cyclomatic Complexity 3 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_free_rx_page
   Cyclomatic Complexity 3 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_free_rx_bufs
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_free_all_rx_bufs
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_down
   Cyclomatic Complexity 4 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_close
   Cyclomatic Complexity 3 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_update_on_link_change
   Cyclomatic Complexity 2 include/linux/netdevice.h:napi_schedule_irqoff
   Cyclomatic Complexity 1 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_intr_msix_io
   Cyclomatic Complexity 4 include/linux/cpumask.h:cpumask_check
   Cyclomatic Complexity 1 include/linux/cpumask.h:cpumask_set_cpu
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_setup_io_intr
   Cyclomatic Complexity 1 include/linux/interrupt.h:request_irq
   Cyclomatic Complexity 9 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_request_io_irq
   Cyclomatic Complexity 5 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_request_mgmnt_irq
   Cyclomatic Complexity 8 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_setup_tx_resources
   Cyclomatic Complexity 5 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_setup_all_tx_resources
   Cyclomatic Complexity 5 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_setup_rx_resources
   Cyclomatic Complexity 5 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_setup_all_rx_resources
   Cyclomatic Complexity 5 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_create_io_tx_queue
   Cyclomatic Complexity 4 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_create_all_io_tx_queues
   Cyclomatic Complexity 5 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_create_io_rx_queue
   Cyclomatic Complexity 4 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_create_all_io_rx_queues
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_init_napi
   Cyclomatic Complexity 4 drivers/net/ethernet/amazon/ena/ena_eth_com.h:ena_com_update_dev_comp_head
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_eth_com.h:ena_com_write_sq_doorbell
   Cyclomatic Complexity 4 include/linux/netdevice.h:netdev_tx_completed_queue
   Cyclomatic Complexity 14 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_clean_tx_irq
   Cyclomatic Complexity 2 include/linux/netdevice.h:netif_tx_unlock
   Cyclomatic Complexity 1 include/linux/skbuff.h:__netdev_alloc_skb_ip_align
   Cyclomatic Complexity 1 include/linux/skbuff.h:netdev_alloc_skb_ip_align
   Cyclomatic Complexity 16 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_rx_skb
   Cyclomatic Complexity 1 include/linux/gfp.h:__alloc_pages
   Cyclomatic Complexity 1 include/linux/gfp.h:__alloc_pages_node
   Cyclomatic Complexity 2 include/linux/gfp.h:alloc_pages_node
   Cyclomatic Complexity 6 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_alloc_rx_page
   Cyclomatic Complexity 8 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_refill_rx_bufs
   Cyclomatic Complexity 11 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_clean_rx_irq
   Cyclomatic Complexity 4 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_refill_all_rx_bufs
   Cyclomatic Complexity 5 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_update_ring_numa_node
   Cyclomatic Complexity 8 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_change_mtu
   Cyclomatic Complexity 2 include/linux/netdevice.h:napi_schedule
   Cyclomatic Complexity 1 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_device_io_resume
   Cyclomatic Complexity 1 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_device_io_suspend
   Cyclomatic Complexity 2 include/linux/seqlock.h:seqcount_lockdep_reader_access
   Cyclomatic Complexity 1 include/linux/seqlock.h:read_seqcount_begin
   Cyclomatic Complexity 1 include/linux/u64_stats_sync.h:__u64_stats_fetch_begin
   Cyclomatic Complexity 1 include/linux/u64_stats_sync.h:u64_stats_fetch_begin_irq
   Cyclomatic Complexity 6 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_get_stats64
   Cyclomatic Complexity 6 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_notification
   Cyclomatic Complexity 2 include/linux/skbuff.h:__skb_linearize
   Cyclomatic Complexity 2 include/linux/skbuff.h:skb_linearize
   Cyclomatic Complexity 5 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_check_and_linearize_skb
   Cyclomatic Complexity 3 include/linux/skbuff.h:sw_tx_timestamp
   Cyclomatic Complexity 1 include/linux/skbuff.h:skb_tx_timestamp
   Cyclomatic Complexity 22 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_start_xmit
   Cyclomatic Complexity 1 arch/x86/include/asm/io.h:ioremap
   Cyclomatic Complexity 4 include/linux/dma-mapping.h:dma_set_mask
   Cyclomatic Complexity 1 include/linux/pci-dma-compat.h:pci_set_dma_mask
   Cyclomatic Complexity 2 include/linux/dma-mapping.h:dma_set_coherent_mask
   Cyclomatic Complexity 1 include/linux/pci-dma-compat.h:pci_set_consistent_dma_mask
   Cyclomatic Complexity 4 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_config_host_info
   Cyclomatic Complexity 12 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_device_init
   Cyclomatic Complexity 4 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_calc_io_queue_num
   Cyclomatic Complexity 69 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_calc_queue_size
   Cyclomatic Complexity 1 include/linux/etherdevice.h:eth_random_addr
   Cyclomatic Complexity 1 include/linux/etherdevice.h:eth_hw_addr_random
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_set_conf_feat_params
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_init_io_rings
   Cyclomatic Complexity 1 include/linux/cpu_rmap.h:alloc_irq_cpu_rmap
   Cyclomatic Complexity 4 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_init_rx_cpu_rmap
   Cyclomatic Complexity 14 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_enable_msix
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_intr_msix_mgmnt
   Cyclomatic Complexity 1 include/linux/cpumask.h:cpumask_first
   Cyclomatic Complexity 1 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_setup_mgmnt_intr
   Cyclomatic Complexity 3 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_enable_msix_and_set_admin_interrupts
   Cyclomatic Complexity 6 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_rss_init_default
   Cyclomatic Complexity 7 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_rss_configure
   Cyclomatic Complexity 3 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_up_complete
   Cyclomatic Complexity 9 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_up
   Cyclomatic Complexity 7 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_fw_reset_device
   Cyclomatic Complexity 6 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_open
   Cyclomatic Complexity 8 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_config_debug_area
   Cyclomatic Complexity 21 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_probe
   Cyclomatic Complexity 2 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_init
   Cyclomatic Complexity 1 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_adjust_intr_moderation
   Cyclomatic Complexity 4 drivers/net/ethernet/amazon/ena/ena_netdev.c:ena_io_poll
   In file included from include/linux/linkage.h:4:0,
                    from include/linux/kernel.h:6,
                    from include/linux/cpumask.h:9,
                    from include/linux/cpu_rmap.h:13,
                    from drivers/net/ethernet/amazon/ena/ena_netdev.c:36:
   drivers/net/ethernet/amazon/ena/ena_netdev.c: In function 'ena_get_stats64':
>> include/linux/compiler.h:231:26: warning: 'rx_ring' may be used uninitialized in this function [-Wmaybe-uninitialized]
     case 4: *(__u32 *)res = *(volatile __u32 *)p; break;  \
                             ^
   drivers/net/ethernet/amazon/ena/ena_netdev.c:2188:19: note: 'rx_ring' was declared here
     struct ena_ring *rx_ring, *tx_ring;
                      ^~~~~~~

vim +/rx_ring +231 include/linux/compiler.h

fe8c8a12 Cesar Eduardo Barros  2013-11-25  215  #ifndef OPTIMIZER_HIDE_VAR
fe8c8a12 Cesar Eduardo Barros  2013-11-25  216  #define OPTIMIZER_HIDE_VAR(var) barrier()
fe8c8a12 Cesar Eduardo Barros  2013-11-25  217  #endif
fe8c8a12 Cesar Eduardo Barros  2013-11-25  218  
6f33d587 Rusty Russell         2012-11-22  219  /* Not-quite-unique ID. */
6f33d587 Rusty Russell         2012-11-22  220  #ifndef __UNIQUE_ID
6f33d587 Rusty Russell         2012-11-22  221  # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
6f33d587 Rusty Russell         2012-11-22  222  #endif
6f33d587 Rusty Russell         2012-11-22  223  
230fa253 Christian Borntraeger 2014-11-25  224  #include <uapi/linux/types.h>
230fa253 Christian Borntraeger 2014-11-25  225  
d976441f Andrey Ryabinin       2015-10-19  226  #define __READ_ONCE_SIZE						\
d976441f Andrey Ryabinin       2015-10-19  227  ({									\
d976441f Andrey Ryabinin       2015-10-19  228  	switch (size) {							\
d976441f Andrey Ryabinin       2015-10-19  229  	case 1: *(__u8 *)res = *(volatile __u8 *)p; break;		\
d976441f Andrey Ryabinin       2015-10-19  230  	case 2: *(__u16 *)res = *(volatile __u16 *)p; break;		\
d976441f Andrey Ryabinin       2015-10-19 @231  	case 4: *(__u32 *)res = *(volatile __u32 *)p; break;		\
d976441f Andrey Ryabinin       2015-10-19  232  	case 8: *(__u64 *)res = *(volatile __u64 *)p; break;		\
d976441f Andrey Ryabinin       2015-10-19  233  	default:							\
d976441f Andrey Ryabinin       2015-10-19  234  		barrier();						\
d976441f Andrey Ryabinin       2015-10-19  235  		__builtin_memcpy((void *)res, (const void *)p, size);	\
d976441f Andrey Ryabinin       2015-10-19  236  		barrier();						\
d976441f Andrey Ryabinin       2015-10-19  237  	}								\
d976441f Andrey Ryabinin       2015-10-19  238  })
d976441f Andrey Ryabinin       2015-10-19  239  

:::::: The code at line 231 was first introduced by commit
:::::: d976441f44bc5d48635d081d277aa76556ffbf8b compiler, atomics, kasan: Provide READ_ONCE_NOCHECK()

:::::: TO: Andrey Ryabinin <aryabinin@virtuozzo.com>
:::::: CC: Ingo Molnar <mingo@kernel.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28075 bytes --]

^ permalink raw reply

* [PATCH net-next 1/1] driver: macvlan: Remove duplicated IFF_UP condition check in macvlan_forward_source_one
From: fgao @ 2016-11-20 11:21 UTC (permalink / raw)
  To: davem, kaber, netdev, gfree.wind

From: Gao Feng <gfree.wind@gmail.com>

The condition check "dev->flags & IFF_UP" is duplicated in
macvlan_forward_source_one, because its caller macvlan_forward_source
has already checked this flag.

Signed-off-by: Gao Feng <gfree.wind@gmail.com>
---
 drivers/net/macvlan.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 13b7e0b..95a5ffc 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -375,9 +375,6 @@ static void macvlan_forward_source_one(struct sk_buff *skb,
 	int ret;
 
 	dev = vlan->dev;
-	if (unlikely(!(dev->flags & IFF_UP)))
-		return;
-
 	nskb = skb_clone(skb, GFP_ATOMIC);
 	if (!nskb)
 		return;
-- 
1.9.1

^ permalink raw reply related

* Re: net/sctp: BUG: KASAN: stack-out-of-bounds in memcmp
From: Xin Long @ 2016-11-20 14:48 UTC (permalink / raw)
  To: Baozeng Ding
  Cc: Vladislav Yasevich, Neil Horman, David Miller, linux-sctp,
	network dev
In-Reply-To: <57cbb177-3e1a-0f27-062b-597db8232888@gmail.com>

On Sat, Nov 12, 2016 at 6:12 PM, Baozeng Ding <sploving1@gmail.com> wrote:
>
>
> On 2016/11/10 13:48, Xin Long wrote:
>> On Sat, Oct 15, 2016 at 4:28 PM, Baozeng Ding <sploving1@gmail.com> wrote:
>>> Hello Xin Long,
>>>
>>> On 2016/10/14 19:13, Xin Long wrote:
>>>> On Sat, Aug 20, 2016 at 3:51 PM, Baozeng Ding <sploving1@gmail.com> wrote:
>>>>> Hello all,
>>>>> The following program triggers  stack-out-of-bounds in memcmp. The kernel version is 4.8.0-rc1+ (on Aug 13 commit 118253a593bd1c57de2d1193df1ccffe1abe745b). Thanks.
>>>> ...
>>>>>
>>>>> #define _GNU_SOURCE
>>>>> #include <unistd.h>
>>>>> #include <stdint.h>
>>>>> #include <sys/socket.h>
>>>>> #include <sys/mman.h>
>>>>> #include <linux/in.h>
>>>>> #include <fcntl.h>
>>>>> #include <string.h>
>>>>> #include <stdio.h>
>>>>>
>>>>> int main()
>>>>> {
>>>>>         int fd;
>>>>>         mmap((void *)0x20000000ul, 0xff2000ul, 0x3ul, 0x32ul, -1, 0x0ul);
>>>>>         fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_SCTP);
>>>>>         memcpy((void*)0x20f82f80, "\x0a\x00\xab\x12\x72\xd4\x19\x9a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x85\xda\x00\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 128);
>>>>>         bind(fd, (struct sockaddr*)0x20f82f80ul, 0x80ul);
>>>>>         *(uint64_t*)0x202e1fc8 = (uint64_t)0x20f77f80;
>>>>>         *(uint32_t*)0x202e1fd0 = (uint32_t)0x80;
>>>>>         *(uint64_t*)0x202e1fd8 = (uint64_t)0x20f7dfe0;
>>>>>         *(uint64_t*)0x202e1fe0 = (uint64_t)0x2;
>>>>>         *(uint64_t*)0x202e1fe8 = (uint64_t)0x20f77000;
>>>>>         *(uint64_t*)0x202e1ff0 = (uint64_t)0x3;
>>>>>         *(uint32_t*)0x202e1ff8 = (uint32_t)0x80;
>>>>>         memcpy((void*)0x20f77f80, "\x0a\x00\xab\x12\xb0\xb3\x20\x7b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xc2\xc2\x0b\xb2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 128);
>>>>>         *(uint64_t*)0x20f7dfe0 = (uint64_t)0x20f77fc5;
>>>>>         *(uint64_t*)0x20f7dfe8 = (uint64_t)0x3b;
>>>>>         *(uint64_t*)0x20f7dff0 = (uint64_t)0x20f77fac;
>>>>>         *(uint64_t*)0x20f7dff8 = (uint64_t)0x54;
>>>>>         memcpy((void*)0x20f77fc5, "\xa5\x7d\xf3\xc4\xfe\xd3\xfd\x44\x63\x00\x8c\x1e\x4c\x2e\x8d\x8d\x9a\x9c\x9c\x9d\x5b\x7c\xe1\x06\xf7\x15\x16\xed\x68\xd1\xfc\xf4\xa4\x3a\xe4\x69\x51\x16\x74\xf4\x1a\xcf\x0e\x99\xc3\xa3\x87\xe7\x81\x6c\x10\x78\x75\x17\x69\x9d\x11\x0c\xc7", 59);
>>>>>         memcpy((void*)0x20f77fac, "\x86\x08\x89\x3c\xf3\x58\xea\xe7\x64\x6a\xfb\xb5\xe8\xdd\x5f\x69\xa5\xd4\xdc\xd9\xe7\x71\x95\x07\x78\x7b\x21\xda\x43\x9c\x62\x4d\xca\x64\xb5\x6e\x96\x55\xe9\x58\x76\x66\x1d\xb9\x7b\xe6\x20\xc1\xa9\xed\x70\xc1\x2b\x7c\x86\x8c\xba\x28\xb3\x2c\xb9\x64\xb7\x84\x65\x0d\x7f\xa6\x98\x6f\x49\xcb\x35\xad\x5a\xdf\x13\x75\x99\x57\x7e\xbb\x38\x89", 84);
>>>>>         *(uint64_t*)0x20f77000 = (uint64_t)0x15;
>>>>>         *(uint32_t*)0x20f77008 = (uint32_t)0x1;
>>>>>         *(uint32_t*)0x20f7700c = (uint32_t)0xfffffffffffffffe;
>>>>>         *(uint8_t*)0x20f77010 = (uint8_t)0xbb;
>>>>>         *(uint8_t*)0x20f77011 = (uint8_t)0x2;
>>>>>         *(uint8_t*)0x20f77012 = (uint8_t)0x5;
>>>>>         *(uint8_t*)0x20f77013 = (uint8_t)0x2;
>>>>>         *(uint8_t*)0x20f77014 = (uint8_t)0x80000000;
>>>>>         *(uint64_t*)0x20f77015 = (uint64_t)0x10;
>>>>>         *(uint32_t*)0x20f7701d = (uint32_t)0xffff;
>>>>>         *(uint32_t*)0x20f77021 = (uint32_t)0x1;
>>>>>         *(uint64_t*)0x20f77025 = (uint64_t)0x13;
>>>>>         *(uint32_t*)0x20f7702d = (uint32_t)0x6;
>>>>>         *(uint32_t*)0x20f77031 = (uint32_t)0xfffffffffffffe00;
>>>>>         *(uint8_t*)0x20f77035 = (uint8_t)0x80000000;
>>>>>         *(uint8_t*)0x20f77036 = (uint8_t)0xfffffffffffffff8;
>>>>>         sendmmsg(fd, (struct mmsghdr *)0x202e1fc8ul, 0x1ul, 0x1ul);
>>>>>         return 0;
>>>>> }
>>>>>
>>>> Hi, Baozeng, I couldn't reproduce this issue with this script,
>>>> even in 118253a593bd1c57de2d1193df1ccffe1abe745b
>>>> do I need to do some extra config for this ?
>>>>
>>> You need config KASAN.
>>> CONFIG_HAVE_ARCH_KASAN=y
>>> CONFIG_KASAN=y
>>> CONFIG_KASAN_INLINE=y
>>> CONFIG_KASAN_SHADOW_OFFSET=0xdffffc0000000000
>>>
>>> I justed tested with b67be92feb486f800d80d72c67fd87b47b79b18e(Octor 12),
>>> it sitll exits. If you still cannot reproduce it, i will send the .config to you privately. Thanks.
>>>
>>
>> Hi Baozeng, sorry for so late. but this issue is always on my radar.
>>
>> I still couldnot reproduce it, even on
>> b67be92feb486f800d80d72c67fd87b47b79b18e in any of
>> git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
>> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git
>> git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
>>
>> with:
>> CONFIG_KASAN_SHADOW_OFFSET=0xdffffc0000000000
>> CONFIG_HAVE_ARCH_KASAN=y
>> CONFIG_KASAN=y
>> # CONFIG_KASAN_OUTLINE is not set
>> CONFIG_KASAN_INLINE=y
>> # CONFIG_TEST_KASAN is not set
>> ...
>> attachment is my .config from linux.git
>>
>> I also tried with your .config, but in my box, it could only build 105
>> .ko instead of 2000+. I don't think it works.
>>
> I used qemu to run the it:
> qemu-system-x86_64 -m 1024 -net nic -net user,host=10.0.2.10,hostfwd=tcp::16059-:22 -display none -serial stdio -no-reboot -enable-kvm -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet -soundhw all -hda ./wheezy.img -snapshot -kernel ./bzImage -append console=ttyS0 vsyscall=native rodata=n oops=panic panic_on_warn=1 panic=-1 ftrace_dump_on_oops=orig_cpu earlyprintk=serial slub_debug=UZ root=/dev/sda

Couldn't reproduce it in my box.
My qemu start script is almost same with yours, other than I used
initramfs. not sure how you generated .config and started it without
initramfs. I guess the issue is caused by lack of some driver in your
kernel.

qemu-system-x86_64 -m 8192 -net nic -net
user,host=10.0.2.10,hostfwd=tcp::16059-:22 -display none -serial stdio
-no-reboot -enable-kvm -numa node,nodeid=0,cpus=0-1 -numa
node,nodeid=1,cpus=2-3 -smp sockets=2,cores=2,threads=1 -usb
-usbdevice mouse -usbdevice tablet \
-hda /home/img/fedora.qcow2 \
-kernel /home/img/vmlinuz-4.9.0-rc5.tests \
-initrd /home/img/initramfs-4.9.0-rc5.tests.img \
-append "console=ttyS0 vsyscall=native rodata=n oops=panic
panic_on_warn=1 panic=-1 ftrace_dump_on_oops=orig_cpu
earlyprintk=serial slub_debug=UZ root=/dev/sda1"

^ permalink raw reply

* Re: [PATCH net 00/18] Update ENA driver to version 1.1.2
From: David Miller @ 2016-11-20 15:24 UTC (permalink / raw)
  To: netanel
  Cc: linux-kernel, netdev, dwmw, zorik, alex, saeed, msw, aliguori,
	nafea
In-Reply-To: <1479631547-29354-1-git-send-email-netanel@annapurnalabs.com>

From: Netanel Belgazal <netanel@annapurnalabs.com>
Date: Sun, 20 Nov 2016 10:45:29 +0200

> Update Amazon's Elastic Network Adapter (ENA) driver version from 1.0.2 to 1.1.2

This is insufficient.

You must explain what this patch series is doing, how it is doing it,
and why it is doing it that way.

This is the message that people will look at to learn what is
contained in this series of patches, and they might be looking for
keywords or explanations as to why a decision was made to add a
feature, turn a feature off, or make some other important high level
change to the driver.

^ permalink raw reply

* Re: [PATCH net-next] mlx4: avoid unnecessary dirtying of critical fields
From: Tariq Toukan @ 2016-11-20 15:14 UTC (permalink / raw)
  To: Eric Dumazet, David Miller; +Cc: netdev, Tariq Toukan
In-Reply-To: <1479500133.8455.295.camel@edumazet-glaptop3.roam.corp.google.com>

Hi Eric,

Thanks for your patch.

On 18/11/2016 10:15 PM, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> While stressing a 40Gbit mlx4 NIC with busy polling, I found false
> sharing in mlx4 driver that can be easily avoided.
>
> This patch brings an additional 7 % performance improvement in UDP_RR
> workload.
>
> 1) If we received no frame during one mlx4_en_process_rx_cq()
>     invocation, no need to call mlx4_cq_set_ci() and/or dirty ring->cons
>
> 2) Do not refill rx buffers if we have plenty of them.
>     This avoids false sharing and allows some bulk/batch optimizations.
>     Page allocator and its locks will thank us.
>
> Finally, mlx4_en_poll_rx_cq() should not return 0 if it determined
> cpu handling NIC IRQ should be changed. We should return budget-1
> instead, to not fool net_rx_action() and its netdev_budget.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Tariq Toukan <tariqt@mellanox.com>
> ---
>   drivers/net/ethernet/mellanox/mlx4/en_rx.c |   51 +++++++++++--------
>   1 file changed, 32 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> index 22f08f9ef464..2112494ff43b 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> @@ -688,18 +688,23 @@ static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
>   	dev_kfree_skb_any(skb);
>   }
>   
> -static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
> -				     struct mlx4_en_rx_ring *ring)
> +static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
> +				      struct mlx4_en_rx_ring *ring)
>   {
> -	int index = ring->prod & ring->size_mask;
> +	u32 missing = ring->actual_size - (ring->prod - ring->cons);
>   
> -	while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
> -		if (mlx4_en_prepare_rx_desc(priv, ring, index,
> +	/* Try to batch allocations, but not too much. */
> +	if (missing < 8)
> +		return false;
> +	do {
> +		if (mlx4_en_prepare_rx_desc(priv, ring,
> +					    ring->prod & ring->size_mask,
>   					    GFP_ATOMIC | __GFP_COLD))
>   			break;
>   		ring->prod++;
> -		index = ring->prod & ring->size_mask;
> -	}
> +	} while (--missing);
> +
> +	return true;
>   }
>   
>   /* When hardware doesn't strip the vlan, we need to calculate the checksum
> @@ -1081,15 +1086,20 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
>   
>   out:
>   	rcu_read_unlock();
> -	if (doorbell_pending)
> -		mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
> -
> -	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
> -	mlx4_cq_set_ci(&cq->mcq);
> -	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
> -	ring->cons = cq->mcq.cons_index;
> -	mlx4_en_refill_rx_buffers(priv, ring);
> -	mlx4_en_update_rx_prod_db(ring);
> +
> +	if (polled) {
> +		if (doorbell_pending)
> +			mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
> +
> +		AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
Keep this perf stats update out of the if block.
> +		mlx4_cq_set_ci(&cq->mcq);
> +		wmb(); /* ensure HW sees CQ consumer before we post new buffers */
> +		ring->cons = cq->mcq.cons_index;
> +	}
> +
> +	if (mlx4_en_refill_rx_buffers(priv, ring))
> +		mlx4_en_update_rx_prod_db(ring);
> +
>   	return polled;
>   }
>   
> @@ -1131,10 +1141,13 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
>   			return budget;
>   
>   		/* Current cpu is not according to smp_irq_affinity -
> -		 * probably affinity changed. need to stop this NAPI
> -		 * poll, and restart it on the right CPU
> +		 * probably affinity changed. Need to stop this NAPI
> +		 * poll, and restart it on the right CPU.
> +		 * Try to avoid returning a too small value (like 0),
> +		 * to not fool net_rx_action() and its netdev_budget
>   		 */
> -		done = 0;
> +		if (done)
> +			done--;
>   	}
>   	/* Done for now */
>   	if (napi_complete_done(napi, done))
>
>
It looks good to me, just the one comment aforementioned.

Regards,
Tariq

^ permalink raw reply

* Re: [PATCH] [v2] net: phy: phy drivers should not set SUPPORTED_[Asym_]Pause
From: Timur Tabi @ 2016-11-20 16:08 UTC (permalink / raw)
  To: Florian Fainelli; +Cc: Timur Tabi, David Miller, jon.mason, netdev, Andrew Lunn
In-Reply-To: <cf18464c-e225-af5d-9251-f2553b93b9ff@gmail.com>

On Mon, Nov 14, 2016 at 12:35 PM, Florian Fainelli <f.fainelli@gmail.com> wrote:
> if (!(drv->features & (SUPPORTED_Pause | SUPPORTED_AsymPause))
>         phydev->supported |= SUPPORTED_Pause | SUPPORTED_AsymPause;

How about, if either bit is set in drv->features, then assume the phy
driver really knows what it's doing, and just copy those bits to
phydev->supported?

if (drv->features & (SUPPORTED_Pause | SUPPORTED_AsymPause)) {
    phydev->supported &= ~(SUPPORTED_Pause | SUPPORTED_AsymPause);
    phydev->supported |= drv->features & (SUPPORTED_Pause |
SUPPORTED_AsymPause);
} else
    phydev->supported |= SUPPORTED_Pause | SUPPORTED_AsymPause;

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

^ permalink raw reply

* Re: [PATCH net-next] mlx4: avoid unnecessary dirtying of critical fields
From: Eric Dumazet @ 2016-11-20 17:15 UTC (permalink / raw)
  To: Tariq Toukan; +Cc: David Miller, netdev, Tariq Toukan
In-Reply-To: <62f9c9ee-d710-7493-1094-e8ffe8aaa1ca@gmail.com>

On Sun, 2016-11-20 at 17:14 +0200, Tariq Toukan wrote:
> Hi Eric,
> 
> Thanks for your patch.
> 
> On 18/11/2016 10:15 PM, Eric Dumazet wrote:
> > +
> > +		AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
> Keep this perf stats update out of the if block.

This perf stat would be useless then with busy polling.

And this would be the the only source of false sharing in the driver.

Not that I particularly care, since AVG_PERF_COUNTER can not be enabled
without modifying / recompiling the driver.

^ permalink raw reply

* [PATCH v2 net-next] mlx4: avoid unnecessary dirtying of critical fields
From: Eric Dumazet @ 2016-11-20 17:24 UTC (permalink / raw)
  To: Tariq Toukan; +Cc: David Miller, netdev, Tariq Toukan
In-Reply-To: <1479662102.8455.361.camel@edumazet-glaptop3.roam.corp.google.com>

From: Eric Dumazet <edumazet@google.com>

While stressing a 40Gbit mlx4 NIC with busy polling, I found false
sharing in mlx4 driver that can be easily avoided.

This patch brings an additional 7 % performance improvement in UDP_RR
workload.

1) If we received no frame during one mlx4_en_process_rx_cq()
   invocation, no need to call mlx4_cq_set_ci() and/or dirty ring->cons

2) Do not refill rx buffers if we have plenty of them.
   This avoids false sharing and allows some bulk/batch optimizations.
   Page allocator and its locks will thank us.

Finally, mlx4_en_poll_rx_cq() should not return 0 if it determined
cpu handling NIC IRQ should be changed. We should return budget-1
instead, to not fool net_rx_action() and its netdev_budget.


v2: keep AVG_PERF_COUNTER(... polled) even if polled is 0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c |   47 ++++++++++++-------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 22f08f9ef4645869359783823127c0432fc7a591..6562f78b07f4370b5c1ea2c5e3a4221d7ebaeba8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -688,18 +688,23 @@ static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
 	dev_kfree_skb_any(skb);
 }
 
-static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
-				     struct mlx4_en_rx_ring *ring)
+static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
+				      struct mlx4_en_rx_ring *ring)
 {
-	int index = ring->prod & ring->size_mask;
+	u32 missing = ring->actual_size - (ring->prod - ring->cons);
 
-	while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
-		if (mlx4_en_prepare_rx_desc(priv, ring, index,
+	/* Try to batch allocations, but not too much. */
+	if (missing < 8)
+		return false;
+	do {
+		if (mlx4_en_prepare_rx_desc(priv, ring,
+					    ring->prod & ring->size_mask,
 					    GFP_ATOMIC | __GFP_COLD))
 			break;
 		ring->prod++;
-		index = ring->prod & ring->size_mask;
-	}
+	} while (--missing);
+
+	return true;
 }
 
 /* When hardware doesn't strip the vlan, we need to calculate the checksum
@@ -1081,15 +1086,20 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
 out:
 	rcu_read_unlock();
-	if (doorbell_pending)
-		mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
 
+	if (polled) {
+		if (doorbell_pending)
+			mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
+
+		mlx4_cq_set_ci(&cq->mcq);
+		wmb(); /* ensure HW sees CQ consumer before we post new buffers */
+		ring->cons = cq->mcq.cons_index;
+	}
 	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
-	mlx4_cq_set_ci(&cq->mcq);
-	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
-	ring->cons = cq->mcq.cons_index;
-	mlx4_en_refill_rx_buffers(priv, ring);
-	mlx4_en_update_rx_prod_db(ring);
+
+	if (mlx4_en_refill_rx_buffers(priv, ring))
+		mlx4_en_update_rx_prod_db(ring);
+
 	return polled;
 }
 
@@ -1131,10 +1141,13 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
 			return budget;
 
 		/* Current cpu is not according to smp_irq_affinity -
-		 * probably affinity changed. need to stop this NAPI
-		 * poll, and restart it on the right CPU
+		 * probably affinity changed. Need to stop this NAPI
+		 * poll, and restart it on the right CPU.
+		 * Try to avoid returning a too small value (like 0),
+		 * to not fool net_rx_action() and its netdev_budget
 		 */
-		done = 0;
+		if (done)
+			done--;
 	}
 	/* Done for now */
 	if (napi_complete_done(napi, done))

^ permalink raw reply related

* Re: [PATCH v2 net-next] mlx4: avoid unnecessary dirtying of critical fields
From: Eric Dumazet @ 2016-11-20 17:26 UTC (permalink / raw)
  To: Tariq Toukan; +Cc: David Miller, netdev, Tariq Toukan
In-Reply-To: <1479662676.8455.364.camel@edumazet-glaptop3.roam.corp.google.com>

On Sun, 2016-11-20 at 09:24 -0800, Eric Dumazet wrote:

>  		/* Current cpu is not according to smp_irq_affinity -
> -		 * probably affinity changed. need to stop this NAPI
> -		 * poll, and restart it on the right CPU
> +		 * probably affinity changed. Need to stop this NAPI
> +		 * poll, and restart it on the right CPU.
> +		 * Try to avoid returning a too small value (like 0),
> +		 * to not fool net_rx_action() and its netdev_budget
>  		 */
> -		done = 0;
> +		if (done)
> +			done--;


Note : This could have been a net candidate, but bug is minor and I
prefer to avoid a merge conflict, since net-next has the additional if
around the napi_complete_done() call.


>  	}
>  	/* Done for now */
>  	if (napi_complete_done(napi, done))
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox