* [net-next 4/7] ixgbevf: add tx counters
From: Aaron Brown @ 2014-01-18 2:30 UTC (permalink / raw)
To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Alexander Duyck,
Aaron Brown
In-Reply-To: <1390012205-21995-1-git-send-email-aaron.f.brown@intel.com>
From: Emil Tantilov <emil.s.tantilov@intel.com>
This patch adds counters for tx_restart_queue and tx_timeout_count.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Aaron Brown <aaron.f.brown@intel.com>
---
drivers/net/ethernet/intel/ixgbevf/ethtool.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index b48df78..f68b78c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -77,6 +77,8 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
{"tx_bytes", IXGBEVF_STAT(stats.vfgotc, stats.base_vfgotc,
stats.saved_reset_vfgotc)},
{"tx_busy", IXGBEVF_ZSTAT(tx_busy)},
+ {"tx_restart_queue", IXGBEVF_ZSTAT(restart_queue)},
+ {"tx_timeout_count", IXGBEVF_ZSTAT(tx_timeout_count)},
{"multicast", IXGBEVF_STAT(stats.vfmprc, stats.base_vfmprc,
stats.saved_reset_vfmprc)},
{"rx_csum_offload_errors", IXGBEVF_ZSTAT(hw_csum_rx_error)},
--
1.8.5.GIT
^ permalink raw reply related
* [net-next 3/7] ixgbevf: remove counters for Tx/Rx checksum offload
From: Aaron Brown @ 2014-01-18 2:30 UTC (permalink / raw)
To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Alexander Duyck,
Aaron Brown
In-Reply-To: <1390012205-21995-1-git-send-email-aaron.f.brown@intel.com>
From: Emil Tantilov <emil.s.tantilov@intel.com>
This patch removes the Tx/Rx counters for checksum offload.
The Tx counter was never updated and the Rx counter is of limited use.
This is in effort to clean up the counters and make them consistent
with the counters shown by ixgbe.
Also this patch removes some members of the adapter structure that were
never used and shuffles others to reduce number of holes.
before:
/* size: 1568, cachelines: 25, members: 48 */
/* sum members: 1519, holes: 10, sum holes: 43 */
/* padding: 6 */
/* last cacheline: 32 bytes */
after:
/* size: 1480, cachelines: 24, members: 43 */
/* sum members: 1479, holes: 1, sum holes: 1 */
/* last cacheline: 8 bytes */
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Aaron Brown <aaron.f.brown@intel.com>
---
drivers/net/ethernet/intel/ixgbevf/ethtool.c | 2 --
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 23 +++++++++--------------
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ----
3 files changed, 9 insertions(+), 20 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 0769306..b48df78 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -79,9 +79,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
{"tx_busy", IXGBEVF_ZSTAT(tx_busy)},
{"multicast", IXGBEVF_STAT(stats.vfmprc, stats.base_vfmprc,
stats.saved_reset_vfmprc)},
- {"rx_csum_offload_good", IXGBEVF_ZSTAT(hw_csum_rx_good)},
{"rx_csum_offload_errors", IXGBEVF_ZSTAT(hw_csum_rx_error)},
- {"tx_csum_offload_ctxt", IXGBEVF_ZSTAT(hw_csum_tx_good)},
#ifdef BP_EXTENDED_STATS
{"rx_bp_poll_yield", IXGBEVF_ZSTAT(bp_rx_yields)},
{"rx_bp_cleaned", IXGBEVF_ZSTAT(bp_rx_cleaned)},
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 0642bd2..0068428 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -106,7 +106,6 @@ struct ixgbevf_ring {
};
u64 hw_csum_rx_error;
- u64 hw_csum_rx_good;
u8 __iomem *tail;
u16 reg_idx; /* holds the special value that gets the hardware register
@@ -336,7 +335,6 @@ static inline u16 ixgbevf_desc_unused(struct ixgbevf_ring *ring)
struct ixgbevf_adapter {
struct timer_list watchdog_timer;
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
- u16 bd_number;
struct work_struct reset_task;
struct ixgbevf_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
@@ -349,25 +347,18 @@ struct ixgbevf_adapter {
u32 eims_other;
/* TX */
- struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */
int num_tx_queues;
+ struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */
u64 restart_queue;
- u64 hw_csum_tx_good;
- u64 lsc_int;
- u64 hw_tso_ctxt;
- u64 hw_tso6_ctxt;
u32 tx_timeout_count;
/* RX */
- struct ixgbevf_ring *rx_ring[MAX_TX_QUEUES]; /* One per active queue */
int num_rx_queues;
+ struct ixgbevf_ring *rx_ring[MAX_TX_QUEUES]; /* One per active queue */
u64 hw_csum_rx_error;
u64 hw_rx_no_dma_resources;
- u64 hw_csum_rx_good;
u64 non_eop_descs;
int num_msix_vectors;
- struct msix_entry *msix_entries;
-
u32 alloc_rx_page_failed;
u32 alloc_rx_buff_failed;
@@ -379,6 +370,8 @@ struct ixgbevf_adapter {
#define IXGBE_FLAG_IN_NETPOLL (u32)(1 << 1)
#define IXGBEVF_FLAG_QUEUE_RESET_REQUESTED (u32)(1 << 2)
+ struct msix_entry *msix_entries;
+
/* OS defined structs */
struct net_device *netdev;
struct pci_dev *pdev;
@@ -386,10 +379,12 @@ struct ixgbevf_adapter {
/* structs defined in ixgbe_vf.h */
struct ixgbe_hw hw;
u16 msg_enable;
- struct ixgbevf_hw_stats stats;
+ u16 bd_number;
/* Interrupt Throttle Rate */
u32 eitr_param;
+ struct ixgbevf_hw_stats stats;
+
unsigned long state;
u64 tx_busy;
unsigned int tx_ring_count;
@@ -408,9 +403,9 @@ struct ixgbevf_adapter {
u32 link_speed;
bool link_up;
- struct work_struct watchdog_task;
-
spinlock_t mbx_lock;
+
+ struct work_struct watchdog_task;
};
enum ixbgevf_state_t {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 77ddda6..41b72ed 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -357,7 +357,6 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring,
/* It must be a TCP or UDP packet with a valid checksum */
skb->ip_summed = CHECKSUM_UNNECESSARY;
- ring->hw_csum_rx_good++;
}
/**
@@ -2263,10 +2262,7 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
for (i = 0; i < adapter->num_rx_queues; i++) {
adapter->hw_csum_rx_error +=
adapter->rx_ring[i]->hw_csum_rx_error;
- adapter->hw_csum_rx_good +=
- adapter->rx_ring[i]->hw_csum_rx_good;
adapter->rx_ring[i]->hw_csum_rx_error = 0;
- adapter->rx_ring[i]->hw_csum_rx_good = 0;
}
}
--
1.8.5.GIT
^ permalink raw reply related
* [net-next 1/7] ixgbevf: make use of the dev pointer in the ixgbevf_ring struct
From: Aaron Brown @ 2014-01-18 2:29 UTC (permalink / raw)
To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Alexander Duyck,
Aaron Brown
In-Reply-To: <1390012205-21995-1-git-send-email-aaron.f.brown@intel.com>
From: Emil Tantilov <emil.s.tantilov@intel.com>
This patch cleans up the code by removing the adapter structure as
parameter from multiple functions. The adapter structure was previously
being used to access the dev pointer, but this can also be done via the
ixgbevf_ring structure. This way we can drop the adapter as parameter from
these functions.
This patch also includes small cleanups in some error code paths.
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Aaron Brown <aaron.f.brown@intel.com>
---
drivers/net/ethernet/intel/ixgbevf/ethtool.c | 46 ++++++------
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 8 +-
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 91 +++++++++--------------
3 files changed, 61 insertions(+), 84 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 515ba4e..f4e0574 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -305,18 +305,18 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
/* clone ring and setup updated count */
tx_ring[i] = *adapter->tx_ring[i];
tx_ring[i].count = new_tx_count;
- err = ixgbevf_setup_tx_resources(adapter, &tx_ring[i]);
- if (!err)
- continue;
- while (i) {
- i--;
- ixgbevf_free_tx_resources(adapter, &tx_ring[i]);
- }
+ err = ixgbevf_setup_tx_resources(&tx_ring[i]);
+ if (err) {
+ while (i) {
+ i--;
+ ixgbevf_free_tx_resources(&tx_ring[i]);
+ }
- vfree(tx_ring);
- tx_ring = NULL;
+ vfree(tx_ring);
+ tx_ring = NULL;
- goto clear_reset;
+ goto clear_reset;
+ }
}
}
@@ -331,18 +331,18 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
/* clone ring and setup updated count */
rx_ring[i] = *adapter->rx_ring[i];
rx_ring[i].count = new_rx_count;
- err = ixgbevf_setup_rx_resources(adapter, &rx_ring[i]);
- if (!err)
- continue;
- while (i) {
- i--;
- ixgbevf_free_rx_resources(adapter, &rx_ring[i]);
- }
+ err = ixgbevf_setup_rx_resources(&rx_ring[i]);
+ if (err) {
+ while (i) {
+ i--;
+ ixgbevf_free_rx_resources(&rx_ring[i]);
+ }
- vfree(rx_ring);
- rx_ring = NULL;
+ vfree(rx_ring);
+ rx_ring = NULL;
- goto clear_reset;
+ goto clear_reset;
+ }
}
}
@@ -352,7 +352,7 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
/* Tx */
if (tx_ring) {
for (i = 0; i < adapter->num_tx_queues; i++) {
- ixgbevf_free_tx_resources(adapter, adapter->tx_ring[i]);
+ ixgbevf_free_tx_resources(adapter->tx_ring[i]);
*adapter->tx_ring[i] = tx_ring[i];
}
adapter->tx_ring_count = new_tx_count;
@@ -364,7 +364,7 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
/* Rx */
if (rx_ring) {
for (i = 0; i < adapter->num_rx_queues; i++) {
- ixgbevf_free_rx_resources(adapter, adapter->rx_ring[i]);
+ ixgbevf_free_rx_resources(adapter->rx_ring[i]);
*adapter->rx_ring[i] = rx_ring[i];
}
adapter->rx_ring_count = new_rx_count;
@@ -380,7 +380,7 @@ clear_reset:
/* free Tx resources if Rx error is encountered */
if (tx_ring) {
for (i = 0; i < adapter->num_tx_queues; i++)
- ixgbevf_free_tx_resources(adapter, &tx_ring[i]);
+ ixgbevf_free_tx_resources(&tx_ring[i]);
vfree(tx_ring);
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 0547e40..59a7574 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -420,10 +420,10 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter);
void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter);
void ixgbevf_reset(struct ixgbevf_adapter *adapter);
void ixgbevf_set_ethtool_ops(struct net_device *netdev);
-int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *, struct ixgbevf_ring *);
-int ixgbevf_setup_tx_resources(struct ixgbevf_adapter *, struct ixgbevf_ring *);
-void ixgbevf_free_rx_resources(struct ixgbevf_adapter *, struct ixgbevf_ring *);
-void ixgbevf_free_tx_resources(struct ixgbevf_adapter *, struct ixgbevf_ring *);
+int ixgbevf_setup_rx_resources(struct ixgbevf_ring *);
+int ixgbevf_setup_tx_resources(struct ixgbevf_ring *);
+void ixgbevf_free_rx_resources(struct ixgbevf_ring *);
+void ixgbevf_free_tx_resources(struct ixgbevf_ring *);
void ixgbevf_update_stats(struct ixgbevf_adapter *adapter);
int ethtool_ioctl(struct ifreq *ifr);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 6cf4120..ec60db0 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -368,34 +368,31 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
struct ixgbevf_ring *rx_ring,
int cleaned_count)
{
- struct pci_dev *pdev = adapter->pdev;
union ixgbe_adv_rx_desc *rx_desc;
struct ixgbevf_rx_buffer *bi;
unsigned int i = rx_ring->next_to_use;
- bi = &rx_ring->rx_buffer_info[i];
-
while (cleaned_count--) {
rx_desc = IXGBEVF_RX_DESC(rx_ring, i);
+ bi = &rx_ring->rx_buffer_info[i];
if (!bi->skb) {
struct sk_buff *skb;
skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
rx_ring->rx_buf_len);
- if (!skb) {
- adapter->alloc_rx_buff_failed++;
+ if (!skb)
goto no_buffers;
- }
+
bi->skb = skb;
- bi->dma = dma_map_single(&pdev->dev, skb->data,
+ bi->dma = dma_map_single(rx_ring->dev, skb->data,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
- if (dma_mapping_error(&pdev->dev, bi->dma)) {
+ if (dma_mapping_error(rx_ring->dev, bi->dma)) {
dev_kfree_skb(skb);
bi->skb = NULL;
- dev_err(&pdev->dev, "RX DMA map failed\n");
+ dev_err(rx_ring->dev, "Rx DMA map failed\n");
break;
}
}
@@ -404,10 +401,10 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
i++;
if (i == rx_ring->count)
i = 0;
- bi = &rx_ring->rx_buffer_info[i];
}
no_buffers:
+ adapter->alloc_rx_buff_failed++;
if (rx_ring->next_to_use != i)
ixgbevf_release_rx_desc(rx_ring, i);
}
@@ -425,7 +422,6 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
int budget)
{
struct ixgbevf_adapter *adapter = q_vector->adapter;
- struct pci_dev *pdev = adapter->pdev;
union ixgbe_adv_rx_desc *rx_desc, *next_rxd;
struct ixgbevf_rx_buffer *rx_buffer_info, *next_buffer;
struct sk_buff *skb;
@@ -451,7 +447,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
rx_buffer_info->skb = NULL;
if (rx_buffer_info->dma) {
- dma_unmap_single(&pdev->dev, rx_buffer_info->dma,
+ dma_unmap_single(rx_ring->dev, rx_buffer_info->dma,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
rx_buffer_info->dma = 0;
@@ -1633,13 +1629,10 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter)
/**
* ixgbevf_clean_rx_ring - Free Rx Buffers per Queue
- * @adapter: board private structure
* @rx_ring: ring to free buffers from
**/
-static void ixgbevf_clean_rx_ring(struct ixgbevf_adapter *adapter,
- struct ixgbevf_ring *rx_ring)
+static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
{
- struct pci_dev *pdev = adapter->pdev;
unsigned long size;
unsigned int i;
@@ -1652,7 +1645,7 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_adapter *adapter,
rx_buffer_info = &rx_ring->rx_buffer_info[i];
if (rx_buffer_info->dma) {
- dma_unmap_single(&pdev->dev, rx_buffer_info->dma,
+ dma_unmap_single(rx_ring->dev, rx_buffer_info->dma,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
rx_buffer_info->dma = 0;
@@ -1677,11 +1670,9 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_adapter *adapter,
/**
* ixgbevf_clean_tx_ring - Free Tx Buffers
- * @adapter: board private structure
* @tx_ring: ring to be cleaned
**/
-static void ixgbevf_clean_tx_ring(struct ixgbevf_adapter *adapter,
- struct ixgbevf_ring *tx_ring)
+static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring)
{
struct ixgbevf_tx_buffer *tx_buffer_info;
unsigned long size;
@@ -1711,7 +1702,7 @@ static void ixgbevf_clean_all_rx_rings(struct ixgbevf_adapter *adapter)
int i;
for (i = 0; i < adapter->num_rx_queues; i++)
- ixgbevf_clean_rx_ring(adapter, adapter->rx_ring[i]);
+ ixgbevf_clean_rx_ring(adapter->rx_ring[i]);
}
/**
@@ -1723,7 +1714,7 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter)
int i;
for (i = 0; i < adapter->num_tx_queues; i++)
- ixgbevf_clean_tx_ring(adapter, adapter->tx_ring[i]);
+ ixgbevf_clean_tx_ring(adapter->tx_ring[i]);
}
void ixgbevf_down(struct ixgbevf_adapter *adapter)
@@ -2426,17 +2417,13 @@ pf_has_reset:
/**
* ixgbevf_free_tx_resources - Free Tx Resources per Queue
- * @adapter: board private structure
* @tx_ring: Tx descriptor ring for a specific queue
*
* Free all transmit software resources
**/
-void ixgbevf_free_tx_resources(struct ixgbevf_adapter *adapter,
- struct ixgbevf_ring *tx_ring)
+void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring)
{
- struct pci_dev *pdev = adapter->pdev;
-
- ixgbevf_clean_tx_ring(adapter, tx_ring);
+ ixgbevf_clean_tx_ring(tx_ring);
vfree(tx_ring->tx_buffer_info);
tx_ring->tx_buffer_info = NULL;
@@ -2445,7 +2432,7 @@ void ixgbevf_free_tx_resources(struct ixgbevf_adapter *adapter,
if (!tx_ring->desc)
return;
- dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc,
+ dma_free_coherent(tx_ring->dev, tx_ring->size, tx_ring->desc,
tx_ring->dma);
tx_ring->desc = NULL;
@@ -2463,20 +2450,17 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter)
for (i = 0; i < adapter->num_tx_queues; i++)
if (adapter->tx_ring[i]->desc)
- ixgbevf_free_tx_resources(adapter, adapter->tx_ring[i]);
+ ixgbevf_free_tx_resources(adapter->tx_ring[i]);
}
/**
* ixgbevf_setup_tx_resources - allocate Tx resources (Descriptors)
- * @adapter: board private structure
* @tx_ring: tx descriptor ring (for a specific queue) to setup
*
* Return 0 on success, negative on failure
**/
-int ixgbevf_setup_tx_resources(struct ixgbevf_adapter *adapter,
- struct ixgbevf_ring *tx_ring)
+int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring)
{
- struct pci_dev *pdev = adapter->pdev;
int size;
size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count;
@@ -2488,7 +2472,7 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_adapter *adapter,
tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
tx_ring->size = ALIGN(tx_ring->size, 4096);
- tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size,
+ tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->size,
&tx_ring->dma, GFP_KERNEL);
if (!tx_ring->desc)
goto err;
@@ -2518,7 +2502,7 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
int i, err = 0;
for (i = 0; i < adapter->num_tx_queues; i++) {
- err = ixgbevf_setup_tx_resources(adapter, adapter->tx_ring[i]);
+ err = ixgbevf_setup_tx_resources(adapter->tx_ring[i]);
if (!err)
continue;
hw_dbg(&adapter->hw,
@@ -2531,37 +2515,34 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
/**
* ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors)
- * @adapter: board private structure
* @rx_ring: rx descriptor ring (for a specific queue) to setup
*
* Returns 0 on success, negative on failure
**/
-int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
- struct ixgbevf_ring *rx_ring)
+int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
{
- struct pci_dev *pdev = adapter->pdev;
int size;
size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count;
rx_ring->rx_buffer_info = vzalloc(size);
if (!rx_ring->rx_buffer_info)
- goto alloc_failed;
+ goto err;
/* Round up to nearest 4K */
rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
rx_ring->size = ALIGN(rx_ring->size, 4096);
- rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size,
+ rx_ring->desc = dma_alloc_coherent(rx_ring->dev, rx_ring->size,
&rx_ring->dma, GFP_KERNEL);
- if (!rx_ring->desc) {
- vfree(rx_ring->rx_buffer_info);
- rx_ring->rx_buffer_info = NULL;
- goto alloc_failed;
- }
+ if (!rx_ring->desc)
+ goto err;
return 0;
-alloc_failed:
+err:
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n");
return -ENOMEM;
}
@@ -2580,7 +2561,7 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
int i, err = 0;
for (i = 0; i < adapter->num_rx_queues; i++) {
- err = ixgbevf_setup_rx_resources(adapter, adapter->rx_ring[i]);
+ err = ixgbevf_setup_rx_resources(adapter->rx_ring[i]);
if (!err)
continue;
hw_dbg(&adapter->hw,
@@ -2592,22 +2573,18 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
/**
* ixgbevf_free_rx_resources - Free Rx Resources
- * @adapter: board private structure
* @rx_ring: ring to clean the resources from
*
* Free all receive software resources
**/
-void ixgbevf_free_rx_resources(struct ixgbevf_adapter *adapter,
- struct ixgbevf_ring *rx_ring)
+void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring)
{
- struct pci_dev *pdev = adapter->pdev;
-
- ixgbevf_clean_rx_ring(adapter, rx_ring);
+ ixgbevf_clean_rx_ring(rx_ring);
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
- dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
+ dma_free_coherent(rx_ring->dev, rx_ring->size, rx_ring->desc,
rx_ring->dma);
rx_ring->desc = NULL;
@@ -2625,7 +2602,7 @@ static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter)
for (i = 0; i < adapter->num_rx_queues; i++)
if (adapter->rx_ring[i]->desc)
- ixgbevf_free_rx_resources(adapter, adapter->rx_ring[i]);
+ ixgbevf_free_rx_resources(adapter->rx_ring[i]);
}
/**
--
1.8.5.GIT
^ permalink raw reply related
* [net-next 2/7] ixgbevf: move ring specific stats into ring specific structure
From: Aaron Brown @ 2014-01-18 2:30 UTC (permalink / raw)
To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Alexander Duyck,
Aaron Brown
In-Reply-To: <1390012205-21995-1-git-send-email-aaron.f.brown@intel.com>
From: Emil Tantilov <emil.s.tantilov@intel.com>
This patch moves hot-path specific statistics into the ring structure.
This allows us to drop the adapter structure in some functions and should
help with performance.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Aaron Brown <aaron.f.brown@intel.com>
---
drivers/net/ethernet/intel/ixgbevf/ethtool.c | 12 +++---
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 44 +++++++++++++++-----
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 50 +++++++++++------------
3 files changed, 62 insertions(+), 44 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index f4e0574..0769306 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -411,15 +411,15 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev,
tx_yields = 0, tx_cleaned = 0, tx_missed = 0;
for (i = 0; i < adapter->num_rx_queues; i++) {
- rx_yields += adapter->rx_ring[i]->bp_yields;
- rx_cleaned += adapter->rx_ring[i]->bp_cleaned;
- rx_yields += adapter->rx_ring[i]->bp_yields;
+ rx_yields += adapter->rx_ring[i]->stats.yields;
+ rx_cleaned += adapter->rx_ring[i]->stats.cleaned;
+ rx_yields += adapter->rx_ring[i]->stats.yields;
}
for (i = 0; i < adapter->num_tx_queues; i++) {
- tx_yields += adapter->tx_ring[i]->bp_yields;
- tx_cleaned += adapter->tx_ring[i]->bp_cleaned;
- tx_yields += adapter->tx_ring[i]->bp_yields;
+ tx_yields += adapter->tx_ring[i]->stats.yields;
+ tx_cleaned += adapter->tx_ring[i]->stats.cleaned;
+ tx_yields += adapter->tx_ring[i]->stats.yields;
}
adapter->bp_rx_yields = rx_yields;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 59a7574..0642bd2 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -59,6 +59,29 @@ struct ixgbevf_rx_buffer {
dma_addr_t dma;
};
+struct ixgbevf_stats {
+ u64 packets;
+ u64 bytes;
+#ifdef BP_EXTENDED_STATS
+ u64 yields;
+ u64 misses;
+ u64 cleaned;
+#endif
+};
+
+struct ixgbevf_tx_queue_stats {
+ u64 restart_queue;
+ u64 tx_busy;
+ u64 tx_done_old;
+};
+
+struct ixgbevf_rx_queue_stats {
+ u64 non_eop_descs;
+ u64 alloc_rx_page_failed;
+ u64 alloc_rx_buff_failed;
+ u64 csum_err;
+};
+
struct ixgbevf_ring {
struct ixgbevf_ring *next;
struct net_device *netdev;
@@ -70,22 +93,20 @@ struct ixgbevf_ring {
unsigned int next_to_use;
unsigned int next_to_clean;
- int queue_index; /* needed for multiqueue queue management */
union {
struct ixgbevf_tx_buffer *tx_buffer_info;
struct ixgbevf_rx_buffer *rx_buffer_info;
};
- u64 total_bytes;
- u64 total_packets;
- struct u64_stats_sync syncp;
+ struct ixgbevf_stats stats;
+ struct u64_stats_sync syncp;
+ union {
+ struct ixgbevf_tx_queue_stats tx_stats;
+ struct ixgbevf_rx_queue_stats rx_stats;
+ };
+
u64 hw_csum_rx_error;
u64 hw_csum_rx_good;
-#ifdef BP_EXTENDED_STATS
- u64 bp_yields;
- u64 bp_misses;
- u64 bp_cleaned;
-#endif
u8 __iomem *tail;
u16 reg_idx; /* holds the special value that gets the hardware register
@@ -93,6 +114,7 @@ struct ixgbevf_ring {
* for DCB and RSS modes */
u16 rx_buf_len;
+ int queue_index; /* needed for multiqueue queue management */
};
/* How many Rx Buffers do we bundle into one write to the hardware ? */
@@ -186,7 +208,7 @@ static inline bool ixgbevf_qv_lock_napi(struct ixgbevf_q_vector *q_vector)
q_vector->state |= IXGBEVF_QV_STATE_NAPI_YIELD;
rc = false;
#ifdef BP_EXTENDED_STATS
- q_vector->tx.ring->bp_yields++;
+ q_vector->tx.ring->stats.yields++;
#endif
} else {
/* we don't care if someone yielded */
@@ -221,7 +243,7 @@ static inline bool ixgbevf_qv_lock_poll(struct ixgbevf_q_vector *q_vector)
q_vector->state |= IXGBEVF_QV_STATE_POLL_YIELD;
rc = false;
#ifdef BP_EXTENDED_STATS
- q_vector->rx.ring->bp_yields++;
+ q_vector->rx.ring->stats.yields++;
#endif
} else {
/* preserve yield marks */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index ec60db0..77ddda6 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -268,8 +268,8 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
}
u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->total_bytes += total_bytes;
- tx_ring->total_packets += total_packets;
+ tx_ring->stats.bytes += total_bytes;
+ tx_ring->stats.packets += total_packets;
u64_stats_update_end(&tx_ring->syncp);
q_vector->tx.total_bytes += total_bytes;
q_vector->tx.total_packets += total_packets;
@@ -343,7 +343,7 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring,
/* if IP and error */
if ((status_err & IXGBE_RXD_STAT_IPCS) &&
(status_err & IXGBE_RXDADV_ERR_IPE)) {
- ring->hw_csum_rx_error++;
+ ring->rx_stats.csum_err++;
return;
}
@@ -351,7 +351,7 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring,
return;
if (status_err & IXGBE_RXDADV_ERR_TCPE) {
- ring->hw_csum_rx_error++;
+ ring->rx_stats.csum_err++;
return;
}
@@ -362,10 +362,9 @@ static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring,
/**
* ixgbevf_alloc_rx_buffers - Replace used receive buffers; packet split
- * @adapter: address of board private structure
+ * @rx_ring: rx descriptor ring (for a specific queue) to setup buffers on
**/
-static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
- struct ixgbevf_ring *rx_ring,
+static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
int cleaned_count)
{
union ixgbe_adv_rx_desc *rx_desc;
@@ -404,7 +403,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
}
no_buffers:
- adapter->alloc_rx_buff_failed++;
+ rx_ring->rx_stats.alloc_rx_buff_failed++;
if (rx_ring->next_to_use != i)
ixgbevf_release_rx_desc(rx_ring, i);
}
@@ -421,7 +420,6 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
struct ixgbevf_ring *rx_ring,
int budget)
{
- struct ixgbevf_adapter *adapter = q_vector->adapter;
union ixgbe_adv_rx_desc *rx_desc, *next_rxd;
struct ixgbevf_rx_buffer *rx_buffer_info, *next_buffer;
struct sk_buff *skb;
@@ -467,7 +465,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
if (!(staterr & IXGBE_RXD_STAT_EOP)) {
skb->next = next_buffer->skb;
IXGBE_CB(skb->next)->prev = skb;
- adapter->non_eop_descs++;
+ rx_ring->rx_stats.non_eop_descs++;
goto next_desc;
}
@@ -499,7 +497,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
* source pruning.
*/
if ((skb->pkt_type & (PACKET_BROADCAST | PACKET_MULTICAST)) &&
- ether_addr_equal(adapter->netdev->dev_addr,
+ ether_addr_equal(rx_ring->netdev->dev_addr,
eth_hdr(skb)->h_source)) {
dev_kfree_skb_irq(skb);
goto next_desc;
@@ -512,8 +510,7 @@ next_desc:
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
- ixgbevf_alloc_rx_buffers(adapter, rx_ring,
- cleaned_count);
+ ixgbevf_alloc_rx_buffers(rx_ring, cleaned_count);
cleaned_count = 0;
}
@@ -528,11 +525,11 @@ next_desc:
cleaned_count = ixgbevf_desc_unused(rx_ring);
if (cleaned_count)
- ixgbevf_alloc_rx_buffers(adapter, rx_ring, cleaned_count);
+ ixgbevf_alloc_rx_buffers(rx_ring, cleaned_count);
u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->total_packets += total_rx_packets;
- rx_ring->total_bytes += total_rx_bytes;
+ rx_ring->stats.packets += total_rx_packets;
+ rx_ring->stats.bytes += total_rx_bytes;
u64_stats_update_end(&rx_ring->syncp);
q_vector->rx.total_packets += total_rx_packets;
q_vector->rx.total_bytes += total_rx_bytes;
@@ -637,9 +634,9 @@ static int ixgbevf_busy_poll_recv(struct napi_struct *napi)
found = ixgbevf_clean_rx_irq(q_vector, ring, 4);
#ifdef BP_EXTENDED_STATS
if (found)
- ring->bp_cleaned += found;
+ ring->stats.cleaned += found;
else
- ring->bp_misses++;
+ ring->stats.misses++;
#endif
if (found)
break;
@@ -1313,7 +1310,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
ixgbevf_rx_desc_queue_enable(adapter, ring);
- ixgbevf_alloc_rx_buffers(adapter, ring, ixgbevf_desc_unused(ring));
+ ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
}
/**
@@ -3048,8 +3045,6 @@ static void ixgbevf_tx_queue(struct ixgbevf_ring *tx_ring, int tx_flags,
static int __ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
{
- struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev);
-
netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
/* Herbert's original patch had:
* smp_mb__after_netif_stop_queue();
@@ -3063,7 +3058,8 @@ static int __ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
/* A reprieve! - use start_queue because it doesn't call schedule */
netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
- ++adapter->restart_queue;
+ ++tx_ring->tx_stats.restart_queue;
+
return 0;
}
@@ -3108,7 +3104,7 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
count += skb_shinfo(skb)->nr_frags;
#endif
if (ixgbevf_maybe_stop_tx(tx_ring, count + 3)) {
- adapter->tx_busy++;
+ tx_ring->tx_stats.tx_busy++;
return NETDEV_TX_BUSY;
}
@@ -3308,8 +3304,8 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev,
ring = adapter->rx_ring[i];
do {
start = u64_stats_fetch_begin_bh(&ring->syncp);
- bytes = ring->total_bytes;
- packets = ring->total_packets;
+ bytes = ring->stats.bytes;
+ packets = ring->stats.packets;
} while (u64_stats_fetch_retry_bh(&ring->syncp, start));
stats->rx_bytes += bytes;
stats->rx_packets += packets;
@@ -3319,8 +3315,8 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev,
ring = adapter->tx_ring[i];
do {
start = u64_stats_fetch_begin_bh(&ring->syncp);
- bytes = ring->total_bytes;
- packets = ring->total_packets;
+ bytes = ring->stats.bytes;
+ packets = ring->stats.packets;
} while (u64_stats_fetch_retry_bh(&ring->syncp, start));
stats->tx_bytes += bytes;
stats->tx_packets += packets;
--
1.8.5.GIT
^ permalink raw reply related
* [net-next 0/7] Intel Wired LAN Driver Updates
From: Aaron Brown @ 2014-01-18 2:29 UTC (permalink / raw)
To: davem; +Cc: Aaron Brown, netdev, gospo, sassmann
This series contains updates from Emil to ixgbevf.
He cleans up the code by removing the adapter structure as a
parameter from multiple functions in favor of using the ixgbevf_ring
structure and moves hot-path specific statistic int the ring
structure for anticipated performance gains.
He also removes the Tx/Rx counters for checksum offload and adds
counters for tx_restart_queue and tx_timeout_count.
Next he makes it so that the first tx_buffer structure acts as a
central storage location for most the skb info we are about to
transmit, then takes advantage of the dma buffer always being
present in the first descriptor and mapped as single allowing a
call to dma_unmap_single which alleviates the need to check for
DMA mapping in ixgbevf_clean_tx_irq().
Finally he merges the ixgbevf_tx_map call and the ixgbevf_tx_queue
call into a single function.
Emil Tantilov (7):
ixgbevf: make use of the dev pointer in the ixgbevf_ring struct
ixgbevf: move ring specific stats into ring specific structure
ixgbevf: remove counters for Tx/Rx checksum offload
ixgbevf: add tx counters
ixgbevf: make the first tx_buffer a repository for most of the skb
info
ixgbevf: redo dma mapping using the tx buffer info
ixgbevf: merge ixgbevf_tx_map and ixgbevf_tx_queue into a single
function
drivers/net/ethernet/intel/ixgbevf/defines.h | 1 +
drivers/net/ethernet/intel/ixgbevf/ethtool.c | 62 +--
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 90 ++--
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 620 +++++++++++-----------
4 files changed, 403 insertions(+), 370 deletions(-)
--
1.8.5.GIT
^ permalink raw reply
* [PATCH net-next] ipcomp: Convert struct xt_ipcomp spis into 16bits
From: Fan Du @ 2014-01-18 2:16 UTC (permalink / raw)
To: pablo; +Cc: steffen.klassert, davem, netdev, netfilter-devel
sparse warnings: (new ones prefixed by >>)
>> >> net/netfilter/xt_ipcomp.c:63:26: sparse: restricted __be16 degrades to integer
>> >> net/netfilter/xt_ipcomp.c:63:26: sparse: cast to restricted __be32
Fix this by using 16bits long spi, as IPcomp CPI is only valid for 16bits.
Signed-off-by: Fan Du <fan.du@windriver.com>
---
include/uapi/linux/netfilter/xt_ipcomp.h | 2 +-
net/netfilter/xt_ipcomp.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/uapi/linux/netfilter/xt_ipcomp.h b/include/uapi/linux/netfilter/xt_ipcomp.h
index 45c7e40..ca82ebb 100644
--- a/include/uapi/linux/netfilter/xt_ipcomp.h
+++ b/include/uapi/linux/netfilter/xt_ipcomp.h
@@ -4,7 +4,7 @@
#include <linux/types.h>
struct xt_ipcomp {
- __u32 spis[2]; /* Security Parameter Index */
+ __u16 spis[2]; /* Security Parameter Index */
__u8 invflags; /* Inverse flags */
__u8 hdrres; /* Test of the Reserved Filed */
};
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
index a4c7561..5542cb2 100644
--- a/net/netfilter/xt_ipcomp.c
+++ b/net/netfilter/xt_ipcomp.c
@@ -29,7 +29,7 @@ MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match");
/* Returns 1 if the spi is matched by the range, 0 otherwise */
static inline bool
-spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
+spi_match(u_int16_t min, u_int16_t max, u_int16_t spi, bool invert)
{
bool r;
pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
@@ -60,7 +60,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
return spi_match(compinfo->spis[0], compinfo->spis[1],
- ntohl(chdr->cpi << 16),
+ ntohl(chdr->cpi),
!!(compinfo->invflags & XT_IPCOMP_INV_SPI));
}
--
1.7.9.5
^ permalink raw reply related
* Re: [PATCH v7 net-next 2/4] sh_eth: Add support for r7s72100
From: David Miller @ 2014-01-18 2:14 UTC (permalink / raw)
To: horms+renesas
Cc: netdev, linux-sh, linux-arm-kernel, magnus.damm, sergei.shtylyov
In-Reply-To: <1389918150-19058-3-git-send-email-horms+renesas@verge.net.au>
From: Simon Horman <horms+renesas@verge.net.au>
Date: Fri, 17 Jan 2014 09:22:28 +0900
> The r7s72100 SoC includes a fast ethernet controller.
>
> Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
> Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Applied.
^ permalink raw reply
* Re: [PATCH v7 net-next 1/4] sh_eth: Use bool as return type of sh_eth_is_gether()
From: David Miller @ 2014-01-18 2:14 UTC (permalink / raw)
To: horms+renesas
Cc: netdev, linux-sh, linux-arm-kernel, magnus.damm, sergei.shtylyov
In-Reply-To: <1389918150-19058-2-git-send-email-horms+renesas@verge.net.au>
From: Simon Horman <horms+renesas@verge.net.au>
Date: Fri, 17 Jan 2014 09:22:27 +0900
> Return a boolean from sh_eth_is_gether() and refactor it as a one-liner.
>
> Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Applied.
^ permalink raw reply
* Re: [PATCH v2] ipv6: send Change Status Report after DAD is completed
From: David Miller @ 2014-01-18 2:13 UTC (permalink / raw)
To: fbl; +Cc: netdev, yoshfuji, hannes
In-Reply-To: <1389907679-15346-1-git-send-email-fbl@redhat.com>
From: Flavio Leitner <fbl@redhat.com>
Date: Thu, 16 Jan 2014 19:27:59 -0200
> The RFC 3810 defines two type of messages for multicast
> listeners. The "Current State Report" message, as the name
> implies, refreshes the *current* state to the querier.
> Since the querier sends Query messages periodically, there
> is no need to retransmit the report.
>
> On the other hand, any change should be reported immediately
> using "State Change Report" messages. Since it's an event
> triggered by a change and that it can be affected by packet
> loss, the rfc states it should be retransmitted [RobVar] times
> to make sure routers will receive timely.
>
> Currently, we are sending "Current State Reports" after
> DAD is completed. Before that, we send messages using
> unspecified address (::) which should be silently discarded
> by routers.
>
> This patch changes to send "State Change Report" messages
> after DAD is completed fixing the behavior to be RFC compliant
> and also to pass TAHI IPv6 testsuite.
>
> Signed-off-by: Flavio Leitner <fbl@redhat.com>
Applied.
^ permalink raw reply
* Re: [PATCH] parisc: fix SO_MAX_PACING_RATE typo
From: David Miller @ 2014-01-18 2:11 UTC (permalink / raw)
To: eric.dumazet; +Cc: dborkman, msekleta, netdev, mtk.manpages
In-Reply-To: <1389899712.31367.419.camel@edumazet-glaptop2.roam.corp.google.com>
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 16 Jan 2014 11:15:12 -0800
> From: Eric Dumazet <edumazet@google.com>
>
> SO_MAX_PACING_RATE definition on parisc got a typo.
> Its not too late to fix it, before 3.13 is official.
>
> Fixes: 62748f32d501 ("net: introduce SO_MAX_PACING_RATE")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH net] ipv6: simplify detection of first operational link-local address on interface
From: David Miller @ 2014-01-18 2:10 UTC (permalink / raw)
To: hannes; +Cc: jiri, netdev, fbl
In-Reply-To: <20140116191304.GC17529@order.stressinduktion.org>
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 16 Jan 2014 20:13:04 +0100
> In commit 1ec047eb4751e3 ("ipv6: introduce per-interface counter for
> dad-completed ipv6 addresses") I build the detection of the first
> operational link-local address much to complex. Additionally this code
> now has a race condition.
>
> Replace it with a much simpler variant, which just scans the address
> list when duplicate address detection completes, to check if this is
> the first valid link local address and send RS and MLD reports then.
>
> Fixes: 1ec047eb4751e3 ("ipv6: introduce per-interface counter for dad-completed ipv6 addresses")
> Reported-by: Jiri Pirko <jiri@resnulli.us>
> Cc: Flavio Leitner <fbl@redhat.com>
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Applied and queued up for -stable, thanks.
^ permalink raw reply
* Re: [PATCH net v2] tcp: metrics: Avoid duplicate entries with the same destination-IP
From: David Miller @ 2014-01-18 2:08 UTC (permalink / raw)
To: christoph.paasch; +Cc: netdev, eric.dumazet
In-Reply-To: <1389898881-11393-1-git-send-email-christoph.paasch@uclouvain.be>
From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Thu, 16 Jan 2014 20:01:21 +0100
> Because the tcp-metrics is an RCU-list, it may be that two
> soft-interrupts are inside __tcp_get_metrics() for the same
> destination-IP at the same time. If this destination-IP is not yet part of
> the tcp-metrics, both soft-interrupts will end up in tcpm_new and create
> a new entry for this IP.
> So, we will have two tcp-metrics with the same destination-IP in the list.
>
> This patch checks twice __tcp_get_metrics(). First without holding the
> lock, then while holding the lock. The second one is there to confirm
> that the entry has not been added by another soft-irq while waiting for
> the spin-lock.
>
> Fixes: 51c5d0c4b169b (tcp: Maintain dynamic metrics in local cache.)
> Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
> ---
>
> v2: As requested by Eric D.: Check the cache twice. Once without holding the lock,
> and then again while holding the lock. That way we avoid taking the lock
> needlessly.
Looks great, applied and queued up for -stable, thanks.
^ permalink raw reply
* Re: [PATCH net-next 2/2] qlcnic: remove unused code
From: David Miller @ 2014-01-18 2:04 UTC (permalink / raw)
To: stephen
Cc: himanshu.madhani, rajesh.borundia, shahed.shaikh,
jitendra.kalsaria, sony.chacko, sucheta.chakraborty, linux-driver,
netdev
In-Reply-To: <20140116103221.41637f8f@nehalam.linuxnetplumber.net>
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu, 16 Jan 2014 10:32:21 -0800
> Remove function qlcnic_enable_eswitch which was defined
> but never used in current code.
>
> Compile tested only.
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Applied.
^ permalink raw reply
* Re: [PATCH net-next 1/2] qlcnic: make local functions static
From: David Miller @ 2014-01-18 2:04 UTC (permalink / raw)
To: stephen
Cc: himanshu.madhani, rajesh.borundia, shahed.shaikh,
jitendra.kalsaria, sony.chacko, sucheta.chakraborty, linux-driver,
netdev
In-Reply-To: <20140116103127.121855af@nehalam.linuxnetplumber.net>
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu, 16 Jan 2014 10:31:27 -0800
> Functions only used in one file should be static.
> Found by running make namespacecheck
>
> Compile tested only.
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Applied.
^ permalink raw reply
* Re: [PATCH net-next] ipv6: tcp: fix flowlabel value in ACK messages send from TIME_WAIT
From: David Miller @ 2014-01-18 1:57 UTC (permalink / raw)
To: florent.fourcot; +Cc: netdev
In-Reply-To: <1389889282-2562-1-git-send-email-florent.fourcot@enst-bretagne.fr>
From: Florent Fourcot <florent.fourcot@enst-bretagne.fr>
Date: Thu, 16 Jan 2014 17:21:22 +0100
> This patch is following the commit b903d324bee262 (ipv6: tcp: fix TCLASS
> value in ACK messages sent from TIME_WAIT).
>
> For the same reason than tclass, we have to store the flow label in the
> inet_timewait_sock to provide consistency of flow label on the last ACK.
>
> Signed-off-by: Florent Fourcot <florent.fourcot@enst-bretagne.fr>
Applied, thank you.
^ permalink raw reply
* [PATCHv4 net-next 2/2] flowcache: Bring net/core/flow.c under IPsec maintain scope
From: Fan Du @ 2014-01-18 1:55 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
In-Reply-To: <1390010128-15835-1-git-send-email-fan.du@windriver.com>
As flow cache is mainly manipulated from IPsec.
Signed-off-by: Fan Du <fan.du@windriver.com>
---
MAINTAINERS | 1 +
1 file changed, 1 insertion(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index b358a3f..492e6c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5921,6 +5921,7 @@ L: netdev@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git
S: Maintained
+F: net/core/flow.c
F: net/xfrm/
F: net/key/
F: net/ipv4/xfrm*
--
1.7.9.5
^ permalink raw reply related
* [PATCHv4 net-next 1/2] flowcache: Make flow cache name space aware
From: Fan Du @ 2014-01-18 1:55 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
In-Reply-To: <1390010128-15835-1-git-send-email-fan.du@windriver.com>
Inserting a entry into flowcache, or flushing flowcache should be based
on per net scope. The reason to do so is flushing operation from fat
netns crammed with flow entries will also making the slim netns with only
a few flow cache entries go away in original implementation.
Since flowcache is tightly coupled with IPsec, so it would be easier to
put flow cache global parameters into xfrm namespace part. And one last
thing needs to do is bumping flow cache genid, and flush flow cache should
also be made in per net style.
Signed-off-by: Fan Du <fan.du@windriver.com>
---
include/net/flow.h | 5 +-
include/net/flowcache.h | 25 ++++++++
include/net/netns/xfrm.h | 11 ++++
net/core/flow.c | 127 ++++++++++++++++++---------------------
net/xfrm/xfrm_policy.c | 7 ++-
security/selinux/include/xfrm.h | 5 +-
6 files changed, 103 insertions(+), 77 deletions(-)
create mode 100644 include/net/flowcache.h
diff --git a/include/net/flow.h b/include/net/flow.h
index d23e7fa..bee3741 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -218,9 +218,10 @@ struct flow_cache_object *flow_cache_lookup(struct net *net,
const struct flowi *key, u16 family,
u8 dir, flow_resolve_t resolver,
void *ctx);
+int flow_cache_init(struct net *net);
-void flow_cache_flush(void);
-void flow_cache_flush_deferred(void);
+void flow_cache_flush(struct net *net);
+void flow_cache_flush_deferred(struct net *net);
extern atomic_t flow_cache_genid;
#endif
diff --git a/include/net/flowcache.h b/include/net/flowcache.h
new file mode 100644
index 0000000..c8f665e
--- /dev/null
+++ b/include/net/flowcache.h
@@ -0,0 +1,25 @@
+#ifndef _NET_FLOWCACHE_H
+#define _NET_FLOWCACHE_H
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+
+struct flow_cache_percpu {
+ struct hlist_head *hash_table;
+ int hash_count;
+ u32 hash_rnd;
+ int hash_rnd_recalc;
+ struct tasklet_struct flush_tasklet;
+};
+
+struct flow_cache {
+ u32 hash_shift;
+ struct flow_cache_percpu __percpu *percpu;
+ struct notifier_block hotcpu_notifier;
+ int low_watermark;
+ int high_watermark;
+ struct timer_list rnd_timer;
+};
+#endif /* _NET_FLOWCACHE_H */
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 1006a26..52d0086 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -6,6 +6,7 @@
#include <linux/workqueue.h>
#include <linux/xfrm.h>
#include <net/dst_ops.h>
+#include <net/flowcache.h>
struct ctl_table_header;
@@ -61,6 +62,16 @@ struct netns_xfrm {
spinlock_t xfrm_policy_sk_bundle_lock;
rwlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
+
+ /* flow cache part */
+ struct flow_cache flow_cache_global;
+ struct kmem_cache *flow_cachep;
+ atomic_t flow_cache_genid;
+ struct list_head flow_cache_gc_list;
+ spinlock_t flow_cache_gc_lock;
+ struct work_struct flow_cache_gc_work;
+ struct work_struct flow_cache_flush_work;
+ struct mutex flow_flush_sem;
};
#endif
diff --git a/net/core/flow.c b/net/core/flow.c
index dfa602c..344a184 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -24,6 +24,7 @@
#include <net/flow.h>
#include <linux/atomic.h>
#include <linux/security.h>
+#include <net/net_namespace.h>
struct flow_cache_entry {
union {
@@ -38,37 +39,12 @@ struct flow_cache_entry {
struct flow_cache_object *object;
};
-struct flow_cache_percpu {
- struct hlist_head *hash_table;
- int hash_count;
- u32 hash_rnd;
- int hash_rnd_recalc;
- struct tasklet_struct flush_tasklet;
-};
-
struct flow_flush_info {
struct flow_cache *cache;
atomic_t cpuleft;
struct completion completion;
};
-struct flow_cache {
- u32 hash_shift;
- struct flow_cache_percpu __percpu *percpu;
- struct notifier_block hotcpu_notifier;
- int low_watermark;
- int high_watermark;
- struct timer_list rnd_timer;
-};
-
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
-EXPORT_SYMBOL(flow_cache_genid);
-static struct flow_cache flow_cache_global;
-static struct kmem_cache *flow_cachep __read_mostly;
-
-static DEFINE_SPINLOCK(flow_cache_gc_lock);
-static LIST_HEAD(flow_cache_gc_list);
-
#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
@@ -84,46 +60,50 @@ static void flow_cache_new_hashrnd(unsigned long arg)
add_timer(&fc->rnd_timer);
}
-static int flow_entry_valid(struct flow_cache_entry *fle)
+static int flow_entry_valid(struct flow_cache_entry *fle,
+ struct netns_xfrm *xfrm)
{
- if (atomic_read(&flow_cache_genid) != fle->genid)
+ if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
return 0;
if (fle->object && !fle->object->ops->check(fle->object))
return 0;
return 1;
}
-static void flow_entry_kill(struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle,
+ struct netns_xfrm *xfrm)
{
if (fle->object)
fle->object->ops->delete(fle->object);
- kmem_cache_free(flow_cachep, fle);
+ kmem_cache_free(xfrm->flow_cachep, fle);
}
static void flow_cache_gc_task(struct work_struct *work)
{
struct list_head gc_list;
struct flow_cache_entry *fce, *n;
+ struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
+ flow_cache_gc_work);
INIT_LIST_HEAD(&gc_list);
- spin_lock_bh(&flow_cache_gc_lock);
- list_splice_tail_init(&flow_cache_gc_list, &gc_list);
- spin_unlock_bh(&flow_cache_gc_lock);
+ spin_lock_bh(&xfrm->flow_cache_gc_lock);
+ list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
+ spin_unlock_bh(&xfrm->flow_cache_gc_lock);
list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
- flow_entry_kill(fce);
+ flow_entry_kill(fce, xfrm);
}
-static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
- int deleted, struct list_head *gc_list)
+ int deleted, struct list_head *gc_list,
+ struct netns_xfrm *xfrm)
{
if (deleted) {
fcp->hash_count -= deleted;
- spin_lock_bh(&flow_cache_gc_lock);
- list_splice_tail(gc_list, &flow_cache_gc_list);
- spin_unlock_bh(&flow_cache_gc_lock);
- schedule_work(&flow_cache_gc_work);
+ spin_lock_bh(&xfrm->flow_cache_gc_lock);
+ list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
+ spin_unlock_bh(&xfrm->flow_cache_gc_lock);
+ schedule_work(&xfrm->flow_cache_gc_work);
}
}
@@ -135,6 +115,8 @@ static void __flow_cache_shrink(struct flow_cache *fc,
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
+ struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
+ flow_cache_global);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
int saved = 0;
@@ -142,7 +124,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
if (saved < shrink_to &&
- flow_entry_valid(fle)) {
+ flow_entry_valid(fle, xfrm)) {
saved++;
} else {
deleted++;
@@ -152,7 +134,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
}
}
- flow_cache_queue_garbage(fcp, deleted, &gc_list);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
}
static void flow_cache_shrink(struct flow_cache *fc,
@@ -208,7 +190,7 @@ struct flow_cache_object *
flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver, void *ctx)
{
- struct flow_cache *fc = &flow_cache_global;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
@@ -248,7 +230,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
if (fcp->hash_count > fc->high_watermark)
flow_cache_shrink(fc, fcp);
- fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
+ fle = kmem_cache_alloc(net->xfrm.flow_cachep, GFP_ATOMIC);
if (fle) {
fle->net = net;
fle->family = family;
@@ -258,7 +240,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
fcp->hash_count++;
}
- } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+ } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
flo = fle->object;
if (!flo)
goto ret_object;
@@ -279,7 +261,7 @@ nocache:
}
flo = resolver(net, key, family, dir, flo, ctx);
if (fle) {
- fle->genid = atomic_read(&flow_cache_genid);
+ fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
if (!IS_ERR(flo))
fle->object = flo;
else
@@ -303,12 +285,14 @@ static void flow_cache_flush_tasklet(unsigned long data)
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
+ struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
+ flow_cache_global);
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
- if (flow_entry_valid(fle))
+ if (flow_entry_valid(fle, xfrm))
continue;
deleted++;
@@ -317,7 +301,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
}
}
- flow_cache_queue_garbage(fcp, deleted, &gc_list);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
@@ -351,10 +335,9 @@ static void flow_cache_flush_per_cpu(void *data)
tasklet_schedule(tasklet);
}
-void flow_cache_flush(void)
+void flow_cache_flush(struct net *net)
{
struct flow_flush_info info;
- static DEFINE_MUTEX(flow_flush_sem);
cpumask_var_t mask;
int i, self;
@@ -365,8 +348,8 @@ void flow_cache_flush(void)
/* Don't want cpus going down or up during this. */
get_online_cpus();
- mutex_lock(&flow_flush_sem);
- info.cache = &flow_cache_global;
+ mutex_lock(&net->xfrm.flow_flush_sem);
+ info.cache = &net->xfrm.flow_cache_global;
for_each_online_cpu(i)
if (!flow_cache_percpu_empty(info.cache, i))
cpumask_set_cpu(i, mask);
@@ -386,21 +369,23 @@ void flow_cache_flush(void)
wait_for_completion(&info.completion);
done:
- mutex_unlock(&flow_flush_sem);
+ mutex_unlock(&net->xfrm.flow_flush_sem);
put_online_cpus();
free_cpumask_var(mask);
}
static void flow_cache_flush_task(struct work_struct *work)
{
- flow_cache_flush();
-}
+ struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
+ flow_cache_gc_work);
+ struct net *net = container_of(xfrm, struct net, xfrm);
-static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task);
+ flow_cache_flush(net);
+}
-void flow_cache_flush_deferred(void)
+void flow_cache_flush_deferred(struct net *net)
{
- schedule_work(&flow_cache_flush_work);
+ schedule_work(&net->xfrm.flow_cache_flush_work);
}
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
@@ -425,7 +410,8 @@ static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
- struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+ struct flow_cache *fc = container_of(nfb, struct flow_cache,
+ hotcpu_notifier);
int res, cpu = (unsigned long) hcpu;
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
@@ -444,9 +430,20 @@ static int flow_cache_cpu(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static int __init flow_cache_init(struct flow_cache *fc)
+int flow_cache_init(struct net *net)
{
int i;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
+
+ /* Initialize per-net flow cache global variables here */
+ net->xfrm.flow_cachep = kmem_cache_create("flow_cache",
+ sizeof(struct flow_cache_entry),
+ 0, SLAB_PANIC, NULL);
+ spin_lock_init(&net->xfrm.flow_cache_gc_lock);
+ INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
+ INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
+ INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
+ mutex_init(&net->xfrm.flow_flush_sem);
fc->hash_shift = 10;
fc->low_watermark = 2 * flow_cache_hash_size(fc);
@@ -484,14 +481,4 @@ err:
return -ENOMEM;
}
-
-static int __init flow_cache_init_global(void)
-{
- flow_cachep = kmem_cache_create("flow_cache",
- sizeof(struct flow_cache_entry),
- 0, SLAB_PANIC, NULL);
-
- return flow_cache_init(&flow_cache_global);
-}
-
-module_init(flow_cache_init_global);
+EXPORT_SYMBOL(flow_cache_init);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0177082..9d60590 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -661,7 +661,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
hlist_add_head(&policy->bydst, chain);
xfrm_pol_hold(policy);
net->xfrm.policy_count[dir]++;
- atomic_inc(&flow_cache_genid);
+ atomic_inc(&net->xfrm.flow_cache_genid);
/* After previous checking, family can either be AF_INET or AF_INET6 */
if (policy->family == AF_INET)
@@ -2567,14 +2567,14 @@ static void __xfrm_garbage_collect(struct net *net)
void xfrm_garbage_collect(struct net *net)
{
- flow_cache_flush();
+ flow_cache_flush(net);
__xfrm_garbage_collect(net);
}
EXPORT_SYMBOL(xfrm_garbage_collect);
static void xfrm_garbage_collect_deferred(struct net *net)
{
- flow_cache_flush_deferred();
+ flow_cache_flush_deferred(net);
__xfrm_garbage_collect(net);
}
@@ -2947,6 +2947,7 @@ static int __net_init xfrm_net_init(struct net *net)
spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
+ flow_cache_init(net);
return 0;
out_sysctl:
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 48c3cc9..dfe3fda 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -45,10 +45,11 @@ static inline void selinux_xfrm_notify_policyload(void)
{
struct net *net;
- atomic_inc(&flow_cache_genid);
rtnl_lock();
- for_each_net(net)
+ for_each_net(net) {
+ atomic_inc(&net->xfrm.flow_cache_genid);
rt_genid_bump_all(net);
+ }
rtnl_unlock();
}
#else
--
1.7.9.5
^ permalink raw reply related
* [PATCHv4 net-next 0/2] Make flow cache name space aware
From: Fan Du @ 2014-01-18 1:55 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
Hi,
This patch set aims to make flow cache operating in a per net style
when inserting flow cache entry or flush flow cache. The reason to
do so is not much compelling but reasonable, which is flushing flow
cache in original implementation has global effective, the collateral
damage is netns with only a few flow cache entries has gone.
So this patch make flow cache running in a per net scope. Operation
from different netns won't interfere with each other. And the flushing
operation is worthwhile for the netns which supposed to be.
v2:
- Pick up newly created file include/net/flowcache.h missed in v1.
v3:
- I made a mistake by spreading one unified patch into different
patches, which break bisect operation. So forge into one piece
patches advised by Cong and Sabrina.
v4:
- Fix selinux xfrm compiling issue.
Fan Du (2):
flowcache: Make flow cache name space aware
flowcache: Bring net/core/flow.c under IPsec maintain scope
MAINTAINERS | 1 +
include/net/flow.h | 5 +-
include/net/flowcache.h | 25 ++++++++
include/net/netns/xfrm.h | 11 ++++
net/core/flow.c | 127 ++++++++++++++++++---------------------
net/xfrm/xfrm_policy.c | 7 ++-
security/selinux/include/xfrm.h | 5 +-
7 files changed, 104 insertions(+), 77 deletions(-)
create mode 100644 include/net/flowcache.h
--
1.7.9.5
^ permalink raw reply
* [PATCHv5 net-next 6/6] xfrm: Don't prohibit AH from using ESN feature
From: Fan Du @ 2014-01-18 1:54 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
In-Reply-To: <1390010068-15715-1-git-send-email-fan.du@windriver.com>
Clear checking when user try to use ESN through netlink keymgr for AH.
As only ESP and AH support ESN feature according to RFC.
Signed-off-by: Fan Du <fan.du@windriver.com>
---
net/xfrm/xfrm_user.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3348566..9a80aa9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -142,7 +142,8 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
if (!rt)
return 0;
- if (p->id.proto != IPPROTO_ESP)
+ /* As only ESP and AH support ESN feature. */
+ if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
return -EINVAL;
if (p->replay_window != 0)
--
1.7.9.5
^ permalink raw reply related
* [PATCHv5 net-next 5/6] {IPv6,xfrm} Add ESN support for AH ingress part
From: Fan Du @ 2014-01-18 1:54 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
In-Reply-To: <1390010068-15715-1-git-send-email-fan.du@windriver.com>
This patch add esn support for AH input stage by attaching upper 32bits
sequence number right after packet payload as specified by RFC 4302.
Then the ICV value will guard upper 32bits sequence number as well when
packet going in.
Signed-off-by: Fan Du <fan.du@windriver.com>
---
net/ipv6/ah6.c | 30 ++++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 8929812..6c5f094 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -530,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
int nexthdr;
int nfrags;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
@@ -566,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, hdr_len);
- work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, hdr_len);
- icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
+ auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
+ seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, ip6h, hdr_len);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -588,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h->flow_lbl[2] = 0;
ip6h->hop_limit = 0;
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
+
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
--
1.7.9.5
^ permalink raw reply related
* [PATCHv5 net-next 4/6] {IPv6,xfrm} Add ESN support for AH egress part
From: Fan Du @ 2014-01-18 1:54 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
In-Reply-To: <1390010068-15715-1-git-send-email-fan.du@windriver.com>
This patch add esn support for AH output stage by attaching upper 32bits
sequence number right after packet payload as specified by RFC 4302.
Then the ICV value will guard upper 32bits sequence number as well when
packet going out.
Signed-off-by: Fan Du <fan.du@windriver.com>
---
net/ipv6/ah6.c | 26 +++++++++++++++++++++-----
1 file changed, 21 insertions(+), 5 deletions(-)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 81e496a..8929812 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
struct tmp_ext *iph_ext;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (extlen)
extlen += sizeof(*iph_ext);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
+ iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
+ extlen + seqhi_len);
if (!iph_base)
goto out;
iph_ext = ah_tmp_ext(iph_base);
- icv = ah_tmp_icv(ahash, iph_ext, extlen);
+ seqhi = (__be32 *)((char *)iph_ext + extlen);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
ah = ip_auth_hdr(skb);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_output_done, skb);
AH_SKB_CB(skb)->tmp = iph_base;
--
1.7.9.5
^ permalink raw reply related
* [PATCHv5 net-next 3/6] {IPv4,xfrm} Add ESN support for AH ingress part
From: Fan Du @ 2014-01-18 1:54 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
In-Reply-To: <1390010068-15715-1-git-send-email-fan.du@windriver.com>
This patch add esn support for AH input stage by attaching upper 32bits
sequence number right after packet payload as specified by RFC 4302.
Then the ICV value will guard upper 32bits sequence number as well when
packet getting in.
Signed-off-by: Fan Du <fan.du@windriver.com>
---
net/ipv4/ah4.c | 27 ++++++++++++++++++++++-----
1 file changed, 22 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index c6accac..54b965d 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -309,6 +309,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
int err = -ENOMEM;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(*ah)))
goto out;
@@ -349,14 +353,22 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
iph = ip_hdr(skb);
ihl = ip_hdrlen(skb);
- work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
+
+ work_iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl +
+ ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
- auth_data = ah_tmp_auth(work_iph, ihl);
+ seqhi = (__be32 *)((char *)work_iph + ihl);
+ auth_data = ah_tmp_auth(seqhi, seqhi_len);
icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memcpy(work_iph, iph, ihl);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
@@ -375,10 +387,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, ihl);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
--
1.7.9.5
^ permalink raw reply related
* [PATCHv5 net-next 2/6] {IPv4,xfrm} Add ESN support for AH egress part
From: Fan Du @ 2014-01-18 1:54 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
In-Reply-To: <1390010068-15715-1-git-send-email-fan.du@windriver.com>
This patch add esn support for AH output stage by attaching upper 32bits
sequence number right after packet payload as specified by RFC 4302.
Then the ICV value will guard upper 32bits sequence number as well when
packet going out.
Signed-off-by: Fan Du <fan.du@windriver.com>
---
net/ipv4/ah4.c | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7179026..c6accac 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -155,6 +155,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph, *top_iph;
struct ip_auth_hdr *ah;
struct ah_data *ahp;
+ int seqhi_len = 0;
+ __be32 *seqhi;
+ int sglists = 0;
+ struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
@@ -167,14 +171,19 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah = ip_auth_hdr(skb);
ihl = ip_hdrlen(skb);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ sglists = 1;
+ seqhi_len = sizeof(*seqhi);
+ }
err = -ENOMEM;
- iph = ah_alloc_tmp(ahash, nfrags, ihl);
+ iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + seqhi_len);
if (!iph)
goto out;
-
- icv = ah_tmp_icv(ahash, iph, ihl);
+ seqhi = (__be32 *)((char *)iph + ihl);
+ icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
+ seqhisg = sg + nfrags;
memset(ah->auth_data, 0, ahp->icv_trunc_len);
@@ -210,10 +219,15 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ sg_init_table(sg, nfrags + sglists);
+ skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- ahash_request_set_crypt(req, sg, icv, skb->len);
+ if (x->props.flags & XFRM_STATE_ESN) {
+ /* Attach seqhi sg right after packet payload */
+ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ sg_set_buf(seqhisg, seqhi, seqhi_len);
+ }
+ ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah_output_done, skb);
AH_SKB_CB(skb)->tmp = iph;
--
1.7.9.5
^ permalink raw reply related
* [PATCHv5 net-next 1/6] skbuff: Introduce skb_to_sgvec_nomark to map skb without mark new end
From: Fan Du @ 2014-01-18 1:54 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
In-Reply-To: <1390010068-15715-1-git-send-email-fan.du@windriver.com>
As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given sglist
without mark the sg which contain last skb data as the end. So the caller can
mannipulate sg list as will when padding new data after the first call without
calling sg_unmark_end to expend sg list.
Signed-off-by: Fan Du <fan.du@windriver.com>
---
include/linux/skbuff.h | 2 ++
net/core/skbuff.c | 26 ++++++++++++++++++++++++++
2 files changed, 28 insertions(+)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1f689e6..4fd28cc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -691,6 +691,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
unsigned int headroom);
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
int newtailroom, gfp_t priority);
+int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
+ int offset, int len);
int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
int len);
int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 15057d2..19387c7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3309,6 +3309,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
return elt;
}
+/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
+ * sglist without mark the sg which contain last skb data as the end.
+ * So the caller can mannipulate sg list as will when padding new data after
+ * the first call without calling sg_unmark_end to expend sg list.
+ *
+ * Scenario to use skb_to_sgvec_nomark:
+ * 1. sg_init_table
+ * 2. skb_to_sgvec_nomark(payload1)
+ * 3. skb_to_sgvec_nomark(payload2)
+ *
+ * This is equivalent to:
+ * 1. sg_init_table
+ * 2. skb_to_sgvec(payload1)
+ * 3. sg_unmark_end
+ * 4. skb_to_sgvec(payload2)
+ *
+ * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
+ * is more preferable.
+ */
+int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
+ int offset, int len)
+{
+ return __skb_to_sgvec(skb, sg, offset, len);
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
+
int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
{
int nsg = __skb_to_sgvec(skb, sg, offset, len);
--
1.7.9.5
^ permalink raw reply related
* [PATCHv5 net-next 0/6] xfrm: Add ESN support for AH
From: Fan Du @ 2014-01-18 1:54 UTC (permalink / raw)
To: steffen.klassert; +Cc: davem, netdev
Hi,
This is initial Extended Sequence Number support for AH based on IPv4/6.
The rationale is totally by the RFC 4302, which states:
3.3.3.2.2. Implicit Packet Padding and ESN
If the ESN option is elected for an SA, then the high-order 32 bits
of the ESN must be included in the ICV computation. For purposes of
ICV computation, these bits are appended (implicitly) immediately
after the end of the payload, and before any implicit packet padding.
So we attach the high-order 32bits as a scatterlist right after the packet
payload to compute ICV value.
Test:
I add a knob in iproute2/ip/xfrm_state.c to enable esn when setting SA,
which make it possible to test with-esn and without-esn scenarios, both
cases works ok with ping using packetsize(-s) from default to 32768.
v2:
- Patch3/5 and Patch4/5 add IPv6 part as requested by Steffen.
- Patch5/5 restrict ESN feature only to ESP and AH.
v3:
- Fix double parens spotted by Sergei, and thanks for reporting.
v4:
- Incorperate feedbacks from Steffen by simplify the code flow.
- Add Patch1/6 to introduce skb_to_sgvec_nomark
- Patch2/6 remove rebundant inclusion crypto/scatterwalk.h
v5:
- Before calling type 'input' method, XFRM_SKB_CB(skb)->seq.input.hi
has already been set in network byte order in xfrm_input, so no
need to change the byte once in type 'input' method. So fix
Patch3/6 and Patch5/6.
Fan Du (6):
skbuff: Introduce skb_to_sgvec_nomark to map skb without mark new end
{IPv4,xfrm} Add ESN support for AH egress part
{IPv4,xfrm} Add ESN support for AH ingress part
{IPv6,xfrm} Add ESN support for AH egress part
{IPv6,xfrm} Add ESN support for AH ingress part
xfrm: Don't prohibit AH from using ESN feature
include/linux/skbuff.h | 2 ++
net/core/skbuff.c | 26 ++++++++++++++++++++++
net/ipv4/ah4.c | 53 +++++++++++++++++++++++++++++++++++----------
net/ipv6/ah6.c | 56 ++++++++++++++++++++++++++++++++++++++----------
net/xfrm/xfrm_user.c | 3 ++-
5 files changed, 117 insertions(+), 23 deletions(-)
--
1.7.9.5
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox