* [iwl next-queue PATCH 01/10] ixgbe: Drop support for macvlan specific unicast lists
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
@ 2018-04-03 21:15 ` Alexander Duyck
2018-04-03 21:16 ` [iwl next-queue PATCH 02/10] macvlan: Rename fwd_priv to accel_priv and add accessor function Alexander Duyck
` (8 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:15 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
Drop the code for handling macvlan specific unicast lists. It isn't needed
since we don't take any efforts to maintain it when we bring the interface
up and it takes the slow path anyway.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 31 -------------------------
1 file changed, 31 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index afadba9..4d2c0ed 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4892,36 +4892,6 @@ int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
return -ENOMEM;
}
-/**
- * ixgbe_write_uc_addr_list - write unicast addresses to RAR table
- * @netdev: network interface device structure
- * @vfn: pool to associate with unicast addresses
- *
- * Writes unicast address list to the RAR table.
- * Returns: -ENOMEM on failure/insufficient address space
- * 0 on no addresses written
- * X on writing X addresses to the RAR table
- **/
-static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn)
-{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
- int count = 0;
-
- /* return ENOMEM indicating insufficient memory for addresses */
- if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn))
- return -ENOMEM;
-
- if (!netdev_uc_empty(netdev)) {
- struct netdev_hw_addr *ha;
- netdev_for_each_uc_addr(ha, netdev) {
- ixgbe_del_mac_filter(adapter, ha->addr, vfn);
- ixgbe_add_mac_filter(adapter, ha->addr, vfn);
- count++;
- }
- }
- return count;
-}
-
static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
@@ -5320,7 +5290,6 @@ static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool,
vmolr |= IXGBE_VMOLR_ROMPE;
hw->mac.ops.update_mc_addr_list(hw, dev);
}
- ixgbe_write_uc_addr_list(adapter->netdev, pool);
IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr);
}
^ permalink raw reply related [flat|nested] 15+ messages in thread* [iwl next-queue PATCH 02/10] macvlan: Rename fwd_priv to accel_priv and add accessor function
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
2018-04-03 21:15 ` [iwl next-queue PATCH 01/10] ixgbe: Drop support for macvlan specific unicast lists Alexander Duyck
@ 2018-04-03 21:16 ` Alexander Duyck
2018-04-04 16:53 ` [Intel-wired-lan] " Shannon Nelson
2018-04-03 21:16 ` [iwl next-queue PATCH 03/10] macvlan: Use software path for offloaded local, broadcast, and multicast traffic Alexander Duyck
` (7 subsequent siblings)
9 siblings, 1 reply; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:16 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
This change renames the fwd_priv member to accel_priv as this more
accurately reflects the actual purpose of this value. In addition I am
adding an accessor which will allow us to further abstract this in the
future if needed.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 +++-----
drivers/net/macvlan.c | 31 +++++++++++++++----------
include/linux/if_macvlan.h | 8 ++++++
3 files changed, 30 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 4d2c0ed..c08e6b5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5390,10 +5390,9 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev,
static int ixgbe_upper_dev_walk(struct net_device *upper, void *data)
{
if (netif_is_macvlan(upper)) {
- struct macvlan_dev *dfwd = netdev_priv(upper);
- struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
+ struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper);
- if (dfwd->fwd_priv)
+ if (vadapter)
ixgbe_fwd_ring_up(upper, vadapter);
}
@@ -8967,13 +8966,12 @@ struct upper_walk_data {
static int get_macvlan_queue(struct net_device *upper, void *_data)
{
if (netif_is_macvlan(upper)) {
- struct macvlan_dev *dfwd = netdev_priv(upper);
- struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
+ struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper);
struct upper_walk_data *data = _data;
struct ixgbe_adapter *adapter = data->adapter;
int ifindex = data->ifindex;
- if (vadapter && vadapter->netdev->ifindex == ifindex) {
+ if (vadapter && upper->ifindex == ifindex) {
data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx;
data->action = data->queue;
return 1;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 725f4b4..7ddc94f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -559,9 +559,9 @@ static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
if (unlikely(netpoll_tx_running(dev)))
return macvlan_netpoll_send_skb(vlan, skb);
- if (vlan->fwd_priv) {
+ if (vlan->accel_priv) {
skb->dev = vlan->lowerdev;
- ret = dev_queue_xmit_accel(skb, vlan->fwd_priv);
+ ret = dev_queue_xmit_accel(skb, vlan->accel_priv);
} else {
ret = macvlan_queue_xmit(skb, dev);
}
@@ -613,16 +613,23 @@ static int macvlan_open(struct net_device *dev)
goto hash_add;
}
+ err = -EBUSY;
+ if (macvlan_addr_busy(vlan->port, dev->dev_addr))
+ goto out;
+
+ /* Attempt to populate accel_priv which is used to offload the L2
+ * forwarding requests for unicast packets.
+ */
if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) {
- vlan->fwd_priv =
+ vlan->accel_priv =
lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
/* If we get a NULL pointer back, or if we get an error
* then we should just fall through to the non accelerated path
*/
- if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
- vlan->fwd_priv = NULL;
- } else
+ if (IS_ERR_OR_NULL(vlan->accel_priv))
+ vlan->accel_priv = NULL;
+ else
return 0;
}
@@ -655,10 +662,10 @@ static int macvlan_open(struct net_device *dev)
del_unicast:
dev_uc_del(lowerdev, dev->dev_addr);
out:
- if (vlan->fwd_priv) {
+ if (vlan->accel_priv) {
lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
- vlan->fwd_priv);
- vlan->fwd_priv = NULL;
+ vlan->accel_priv);
+ vlan->accel_priv = NULL;
}
return err;
}
@@ -668,10 +675,10 @@ static int macvlan_stop(struct net_device *dev)
struct macvlan_dev *vlan = netdev_priv(dev);
struct net_device *lowerdev = vlan->lowerdev;
- if (vlan->fwd_priv) {
+ if (vlan->accel_priv) {
lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
- vlan->fwd_priv);
- vlan->fwd_priv = NULL;
+ vlan->accel_priv);
+ vlan->accel_priv = NULL;
return 0;
}
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 4cb7aee..c5106ce 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -21,7 +21,7 @@ struct macvlan_dev {
struct hlist_node hlist;
struct macvlan_port *port;
struct net_device *lowerdev;
- void *fwd_priv;
+ void *accel_priv;
struct vlan_pcpu_stats __percpu *pcpu_stats;
DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
@@ -86,4 +86,10 @@ extern void macvlan_count_rx(const struct macvlan_dev *vlan,
}
#endif
+static inline void *macvlan_accel_priv(struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ return macvlan->accel_priv;
+}
#endif /* _LINUX_IF_MACVLAN_H */
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [Intel-wired-lan] [iwl next-queue PATCH 02/10] macvlan: Rename fwd_priv to accel_priv and add accessor function
2018-04-03 21:16 ` [iwl next-queue PATCH 02/10] macvlan: Rename fwd_priv to accel_priv and add accessor function Alexander Duyck
@ 2018-04-04 16:53 ` Shannon Nelson
2018-04-04 17:33 ` Alexander Duyck
0 siblings, 1 reply; 15+ messages in thread
From: Shannon Nelson @ 2018-04-04 16:53 UTC (permalink / raw)
To: Alexander Duyck, intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
On 4/3/2018 2:16 PM, Alexander Duyck wrote:
[...]
>
> +static inline void *macvlan_accel_priv(struct net_device *dev)
> +{
> + struct macvlan_dev *macvlan = netdev_priv(dev);
> +
> + return macvlan->accel_priv;
Perhaps a check for (macvlan == NULL) before using it?
sln
> +}
> #endif /* _LINUX_IF_MACVLAN_H */
>
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
>
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [Intel-wired-lan] [iwl next-queue PATCH 02/10] macvlan: Rename fwd_priv to accel_priv and add accessor function
2018-04-04 16:53 ` [Intel-wired-lan] " Shannon Nelson
@ 2018-04-04 17:33 ` Alexander Duyck
0 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-04 17:33 UTC (permalink / raw)
To: Shannon Nelson; +Cc: Alexander Duyck, intel-wired-lan, Jeff Kirsher, Netdev
On Wed, Apr 4, 2018 at 9:53 AM, Shannon Nelson
<shannon.nelson@oracle.com> wrote:
> On 4/3/2018 2:16 PM, Alexander Duyck wrote:
>
> [...]
>>
>> +static inline void *macvlan_accel_priv(struct net_device *dev)
>> +{
>> + struct macvlan_dev *macvlan = netdev_priv(dev);
>> +
>> + return macvlan->accel_priv;
>
>
> Perhaps a check for (macvlan == NULL) before using it?
> sln
>
>
The macvlan pointer cannot be NULL.The netdev_priv() function adds an
offset to the dev pointer. So I would have to be checking for a NULL
netdev. If the netdev was NULL then there was probably no point in
calling this function in the first place.
- Alex
^ permalink raw reply [flat|nested] 15+ messages in thread
* [iwl next-queue PATCH 03/10] macvlan: Use software path for offloaded local, broadcast, and multicast traffic
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
2018-04-03 21:15 ` [iwl next-queue PATCH 01/10] ixgbe: Drop support for macvlan specific unicast lists Alexander Duyck
2018-04-03 21:16 ` [iwl next-queue PATCH 02/10] macvlan: Rename fwd_priv to accel_priv and add accessor function Alexander Duyck
@ 2018-04-03 21:16 ` Alexander Duyck
2018-04-04 16:53 ` [Intel-wired-lan] " Shannon Nelson
2018-04-03 21:16 ` [iwl next-queue PATCH 04/10] ixgbe/fm10k: Drop tracking stats for macvlan broadcast/multicast Alexander Duyck
` (6 subsequent siblings)
9 siblings, 1 reply; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:16 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
This change makes it so that we use a software path for packets that are
going to be locally switched between two macvlan interfaces on the same
device. In addition we resort to software replication of broadcast and
multicast packets instead of offloading that to hardware.
The general idea is that using the device for east/west traffic local to
the system is extremely inefficient. We can only support up to whatever the
PCIe limit is for any given device so this caps us at somewhere around 20G
for devices supported by ixgbe. This is compounded even further when you
take broadcast and multicast into account as a single 10G port can come to
a crawl as a packet is replicated up to 60+ times in some cases. In order
to get away from that I am implementing changes so that we handle
broadcast/multicast replication and east/west local traffic all in
software.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 4 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 39 +++++--------------
drivers/net/macvlan.c | 47 ++++++++++-------------
3 files changed, 33 insertions(+), 57 deletions(-)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 4579349..ee645ba 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1254,7 +1254,7 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
glort = l2_accel->dglort + 1 + i;
hw->mac.ops.update_xcast_mode(hw, glort,
- FM10K_XCAST_MODE_MULTI);
+ FM10K_XCAST_MODE_NONE);
fm10k_queue_mac_request(interface, glort,
sdev->dev_addr,
hw->mac.default_vid, true);
@@ -1515,7 +1515,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
if (fm10k_host_mbx_ready(interface)) {
hw->mac.ops.update_xcast_mode(hw, glort,
- FM10K_XCAST_MODE_MULTI);
+ FM10K_XCAST_MODE_NONE);
fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
hw->mac.default_vid, true);
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index c08e6b5..6172c1b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4211,7 +4211,8 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
- u32 reg_offset, vf_shift;
+ u16 pool = adapter->num_rx_pools;
+ u32 reg_offset, vf_shift, vmolr;
u32 gcr_ext, vmdctl;
int i;
@@ -4225,6 +4226,13 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
vmdctl |= IXGBE_VT_CTL_REPLEN;
IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
+ /* accept untagged packets until a vlan tag is
+ * specifically set for the VMDQ queue/pool
+ */
+ vmolr = IXGBE_VMOLR_AUPE;
+ while (pool--)
+ IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr);
+
vf_shift = VMDQ_P(0) % 32;
reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0;
@@ -5271,28 +5279,6 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
spin_unlock(&adapter->fdir_perfect_lock);
}
-static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool,
- struct ixgbe_adapter *adapter)
-{
- struct ixgbe_hw *hw = &adapter->hw;
- u32 vmolr;
-
- /* No unicast promiscuous support for VMDQ devices. */
- vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool));
- vmolr |= (IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE);
-
- /* clear the affected bit */
- vmolr &= ~IXGBE_VMOLR_MPE;
-
- if (dev->flags & IFF_ALLMULTI) {
- vmolr |= IXGBE_VMOLR_MPE;
- } else {
- vmolr |= IXGBE_VMOLR_ROMPE;
- hw->mac.ops.update_mc_addr_list(hw, dev);
- }
- IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr);
-}
-
/**
* ixgbe_clean_rx_ring - Free Rx Buffers per Queue
* @rx_ring: ring to free buffers from
@@ -5376,10 +5362,8 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev,
*/
err = ixgbe_add_mac_filter(adapter, vdev->dev_addr,
VMDQ_P(accel->pool));
- if (err >= 0) {
- ixgbe_macvlan_set_rx_mode(vdev, accel->pool, adapter);
+ if (err >= 0)
return 0;
- }
for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
adapter->rx_ring[baseq + i]->netdev = NULL;
@@ -9816,9 +9800,6 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
ixgbe_del_mac_filter(adapter, accel->netdev->dev_addr,
VMDQ_P(accel->pool));
- /* disable ability to receive packets for this pool */
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(accel->pool), 0);
-
/* Allow remaining Rx packets to get flushed out of the
* Rx FIFO before we drop the netdev for the ring.
*/
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 7ddc94f..adde8fc 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -514,6 +514,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
const struct macvlan_dev *vlan = netdev_priv(dev);
const struct macvlan_port *port = vlan->port;
const struct macvlan_dev *dest;
+ void *accel_priv = NULL;
if (vlan->mode == MACVLAN_MODE_BRIDGE) {
const struct ethhdr *eth = (void *)skb->data;
@@ -533,9 +534,14 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
}
}
+ /* For packets that are non-multicast and not bridged we will pass
+ * the necessary information so that the lowerdev can distinguish
+ * the source of the packets via the accel_priv value.
+ */
+ accel_priv = vlan->accel_priv;
xmit_world:
skb->dev = vlan->lowerdev;
- return dev_queue_xmit(skb);
+ return dev_queue_xmit_accel(skb, accel_priv);
}
static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
@@ -552,19 +558,14 @@ static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, str
static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ struct macvlan_dev *vlan = netdev_priv(dev);
unsigned int len = skb->len;
int ret;
- struct macvlan_dev *vlan = netdev_priv(dev);
if (unlikely(netpoll_tx_running(dev)))
return macvlan_netpoll_send_skb(vlan, skb);
- if (vlan->accel_priv) {
- skb->dev = vlan->lowerdev;
- ret = dev_queue_xmit_accel(skb, vlan->accel_priv);
- } else {
- ret = macvlan_queue_xmit(skb, dev);
- }
+ ret = macvlan_queue_xmit(skb, dev);
if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
struct vlan_pcpu_stats *pcpu_stats;
@@ -620,26 +621,20 @@ static int macvlan_open(struct net_device *dev)
/* Attempt to populate accel_priv which is used to offload the L2
* forwarding requests for unicast packets.
*/
- if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) {
+ if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD)
vlan->accel_priv =
lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
- /* If we get a NULL pointer back, or if we get an error
- * then we should just fall through to the non accelerated path
- */
- if (IS_ERR_OR_NULL(vlan->accel_priv))
- vlan->accel_priv = NULL;
- else
- return 0;
+ /* If earlier attempt to offload failed, or accel_priv is not
+ * populated we must add the unicast address to the lower device.
+ */
+ if (IS_ERR_OR_NULL(vlan->accel_priv)) {
+ vlan->accel_priv = NULL;
+ err = dev_uc_add(lowerdev, dev->dev_addr);
+ if (err < 0)
+ goto out;
}
- err = -EBUSY;
- if (macvlan_addr_busy(vlan->port, dev->dev_addr))
- goto out;
-
- err = dev_uc_add(lowerdev, dev->dev_addr);
- if (err < 0)
- goto out;
if (dev->flags & IFF_ALLMULTI) {
err = dev_set_allmulti(lowerdev, 1);
if (err < 0)
@@ -660,13 +655,14 @@ static int macvlan_open(struct net_device *dev)
if (dev->flags & IFF_ALLMULTI)
dev_set_allmulti(lowerdev, -1);
del_unicast:
- dev_uc_del(lowerdev, dev->dev_addr);
-out:
if (vlan->accel_priv) {
lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
vlan->accel_priv);
vlan->accel_priv = NULL;
+ } else {
+ dev_uc_del(lowerdev, dev->dev_addr);
}
+out:
return err;
}
@@ -679,7 +675,6 @@ static int macvlan_stop(struct net_device *dev)
lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev,
vlan->accel_priv);
vlan->accel_priv = NULL;
- return 0;
}
dev_uc_unsync(lowerdev, dev);
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [Intel-wired-lan] [iwl next-queue PATCH 03/10] macvlan: Use software path for offloaded local, broadcast, and multicast traffic
2018-04-03 21:16 ` [iwl next-queue PATCH 03/10] macvlan: Use software path for offloaded local, broadcast, and multicast traffic Alexander Duyck
@ 2018-04-04 16:53 ` Shannon Nelson
2018-04-04 17:02 ` Alexander Duyck
0 siblings, 1 reply; 15+ messages in thread
From: Shannon Nelson @ 2018-04-04 16:53 UTC (permalink / raw)
To: Alexander Duyck, intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
On 4/3/2018 2:16 PM, Alexander Duyck wrote:
> This change makes it so that we use a software path for packets that are
> going to be locally switched between two macvlan interfaces on the same
> device. In addition we resort to software replication of broadcast and
> multicast packets instead of offloading that to hardware.
>
> The general idea is that using the device for east/west traffic local to
> the system is extremely inefficient. We can only support up to whatever the
> PCIe limit is for any given device so this caps us at somewhere around 20G
> for devices supported by ixgbe. This is compounded even further when you
> take broadcast and multicast into account as a single 10G port can come to
> a crawl as a packet is replicated up to 60+ times in some cases. In order
> to get away from that I am implementing changes so that we handle
> broadcast/multicast replication and east/west local traffic all in
> software.
>
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> ---
[...]
>
> @@ -4225,6 +4226,13 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
> vmdctl |= IXGBE_VT_CTL_REPLEN;
> IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
>
> + /* accept untagged packets until a vlan tag is
> + * specifically set for the VMDQ queue/pool
> + */
> + vmolr = IXGBE_VMOLR_AUPE;
> + while (pool--)
> + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr);
> +
It took me a bit to figure out what untagged packets have to do with
macvlan config. You might add to the comment that "no multicast or
broadcast is configured for these queues".
The driver part of this might be separated into a follow-on patch to
make it clearer that this is done as a consequence of the macvlan.c
changes. Or just roll them into your 4/10 patch.
sln
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Intel-wired-lan] [iwl next-queue PATCH 03/10] macvlan: Use software path for offloaded local, broadcast, and multicast traffic
2018-04-04 16:53 ` [Intel-wired-lan] " Shannon Nelson
@ 2018-04-04 17:02 ` Alexander Duyck
0 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-04 17:02 UTC (permalink / raw)
To: Shannon Nelson; +Cc: Alexander Duyck, intel-wired-lan, Jeff Kirsher, Netdev
On Wed, Apr 4, 2018 at 9:53 AM, Shannon Nelson
<shannon.nelson@oracle.com> wrote:
> On 4/3/2018 2:16 PM, Alexander Duyck wrote:
>>
>> This change makes it so that we use a software path for packets that are
>> going to be locally switched between two macvlan interfaces on the same
>> device. In addition we resort to software replication of broadcast and
>> multicast packets instead of offloading that to hardware.
>>
>> The general idea is that using the device for east/west traffic local to
>> the system is extremely inefficient. We can only support up to whatever
>> the
>> PCIe limit is for any given device so this caps us at somewhere around 20G
>> for devices supported by ixgbe. This is compounded even further when you
>> take broadcast and multicast into account as a single 10G port can come to
>> a crawl as a packet is replicated up to 60+ times in some cases. In order
>> to get away from that I am implementing changes so that we handle
>> broadcast/multicast replication and east/west local traffic all in
>> software.
>>
>> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
>> ---
>
>
> [...]
>
>> @@ -4225,6 +4226,13 @@ static void ixgbe_configure_virtualization(struct
>> ixgbe_adapter *adapter)
>> vmdctl |= IXGBE_VT_CTL_REPLEN;
>> IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
>> + /* accept untagged packets until a vlan tag is
>> + * specifically set for the VMDQ queue/pool
>> + */
>> + vmolr = IXGBE_VMOLR_AUPE;
>> + while (pool--)
>> + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr);
>> +
>
>
> It took me a bit to figure out what untagged packets have to do with macvlan
> config. You might add to the comment that "no multicast or broadcast is
> configured for these queues".
Yeah. I can probably address that in the next round of patches.
Specifically here all we are accepting are untagged unicast packets.
We cannot perform the offload through a VLAN since it would require
passing the VLAN along with the L2 MAC address. It is something I plan
to address in the near future so that macvlan interfaces stacked on
top of a VLAN can still be offloaded.
> The driver part of this might be separated into a follow-on patch to make it
> clearer that this is done as a consequence of the macvlan.c changes. Or
> just roll them into your 4/10 patch.
>
> sln
Actually part of the reason for keeping this all as one thing is
because if I split the macvlan bits from the driver bits then things
are broken until both patches are applied.
I had actually split patch 4/10 out from this patch since it is one of
the few things that could safely be pulled out in order to try and
reduce the overhead for this patch.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [iwl next-queue PATCH 04/10] ixgbe/fm10k: Drop tracking stats for macvlan broadcast/multicast
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
` (2 preceding siblings ...)
2018-04-03 21:16 ` [iwl next-queue PATCH 03/10] macvlan: Use software path for offloaded local, broadcast, and multicast traffic Alexander Duyck
@ 2018-04-03 21:16 ` Alexander Duyck
2018-04-03 21:16 ` [iwl next-queue PATCH 05/10] macvlan: macvlan_count_rx shouldn't be static inline AND extern Alexander Duyck
` (5 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:16 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
Drop dead code now that we shouldn't be receiving broadcast or multicast
frames on the queues associated to the macvlan netdev.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
drivers/net/ethernet/intel/fm10k/fm10k_main.c | 7 +++----
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 +++----
2 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index df86070..c51d61f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -445,15 +445,14 @@ static void fm10k_type_trans(struct fm10k_ring *rx_ring,
l2_accel = NULL;
}
- skb->protocol = eth_type_trans(skb, dev);
-
/* Record Rx queue, or update macvlan statistics */
if (!l2_accel)
skb_record_rx_queue(skb, rx_ring->queue_index);
else
macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
- (skb->pkt_type == PACKET_BROADCAST) ||
- (skb->pkt_type == PACKET_MULTICAST));
+ false);
+
+ skb->protocol = eth_type_trans(skb, dev);
}
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6172c1b..2c5100e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1768,15 +1768,14 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
- skb->protocol = eth_type_trans(skb, dev);
-
/* record Rx queue, or update MACVLAN statistics */
if (netif_is_ixgbe(dev))
skb_record_rx_queue(skb, rx_ring->queue_index);
else
macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
- (skb->pkt_type == PACKET_BROADCAST) ||
- (skb->pkt_type == PACKET_MULTICAST));
+ false);
+
+ skb->protocol = eth_type_trans(skb, dev);
}
static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
^ permalink raw reply related [flat|nested] 15+ messages in thread* [iwl next-queue PATCH 05/10] macvlan: macvlan_count_rx shouldn't be static inline AND extern
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
` (3 preceding siblings ...)
2018-04-03 21:16 ` [iwl next-queue PATCH 04/10] ixgbe/fm10k: Drop tracking stats for macvlan broadcast/multicast Alexander Duyck
@ 2018-04-03 21:16 ` Alexander Duyck
2018-04-03 21:16 ` [iwl next-queue PATCH 06/10] macvlan: Add function to test for destination filtering support Alexander Duyck
` (4 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:16 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
It doesn't make sense to define macvlan_count_rx as a static inline and
then add a forward declaration after that as an extern. I am dropping the
extern declaration since it seems like it is something that likely got
missed when the function was made an inline.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
include/linux/if_macvlan.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index c5106ce..80089d6 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -61,10 +61,6 @@ extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack);
-extern void macvlan_count_rx(const struct macvlan_dev *vlan,
- unsigned int len, bool success,
- bool multicast);
-
extern void macvlan_dellink(struct net_device *dev, struct list_head *head);
extern int macvlan_link_register(struct rtnl_link_ops *ops);
^ permalink raw reply related [flat|nested] 15+ messages in thread* [iwl next-queue PATCH 06/10] macvlan: Add function to test for destination filtering support
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
` (4 preceding siblings ...)
2018-04-03 21:16 ` [iwl next-queue PATCH 05/10] macvlan: macvlan_count_rx shouldn't be static inline AND extern Alexander Duyck
@ 2018-04-03 21:16 ` Alexander Duyck
2018-04-03 21:16 ` [iwl next-queue PATCH 07/10] macvlan: Provide function for interfaces to release HW offload Alexander Duyck
` (3 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:16 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
This patch adds a function indicating if a given macvlan can fully supports
destination filtering, especially as it relates to unicast traffic. For
those macvlan interfaces that do not support destination filtering such
passthru or source mode filtering we should not be enabling offload
support.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
include/linux/if_macvlan.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 80089d6..0221390 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -88,4 +88,13 @@ static inline void *macvlan_accel_priv(struct net_device *dev)
return macvlan->accel_priv;
}
+
+static inline bool macvlan_supports_dest_filter(struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ return macvlan->mode == MACVLAN_MODE_PRIVATE ||
+ macvlan->mode == MACVLAN_MODE_VEPA ||
+ macvlan->mode == MACVLAN_MODE_BRIDGE;
+}
#endif /* _LINUX_IF_MACVLAN_H */
^ permalink raw reply related [flat|nested] 15+ messages in thread* [iwl next-queue PATCH 07/10] macvlan: Provide function for interfaces to release HW offload
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
` (5 preceding siblings ...)
2018-04-03 21:16 ` [iwl next-queue PATCH 06/10] macvlan: Add function to test for destination filtering support Alexander Duyck
@ 2018-04-03 21:16 ` Alexander Duyck
2018-04-03 21:16 ` [iwl next-queue PATCH 08/10] ixgbe/fm10k: Only support macvlan offload for types that support destination filtering Alexander Duyck
` (2 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:16 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
This patch provides a basic function to allow a lower device to disable
macvlan offload if it was previously enabled on a given macvlan. The idea
here is to allow for recovery from failure should the lowerdev run out of
resources.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
include/linux/if_macvlan.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 0221390..2e55e4c 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -97,4 +97,12 @@ static inline bool macvlan_supports_dest_filter(struct net_device *dev)
macvlan->mode == MACVLAN_MODE_VEPA ||
macvlan->mode == MACVLAN_MODE_BRIDGE;
}
+
+static inline int macvlan_release_l2fw_offload(struct net_device *dev)
+{
+ struct macvlan_dev *macvlan = netdev_priv(dev);
+
+ macvlan->accel_priv = NULL;
+ return dev_uc_add(macvlan->lowerdev, dev->dev_addr);
+}
#endif /* _LINUX_IF_MACVLAN_H */
^ permalink raw reply related [flat|nested] 15+ messages in thread* [iwl next-queue PATCH 08/10] ixgbe/fm10k: Only support macvlan offload for types that support destination filtering
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
` (6 preceding siblings ...)
2018-04-03 21:16 ` [iwl next-queue PATCH 07/10] macvlan: Provide function for interfaces to release HW offload Alexander Duyck
@ 2018-04-03 21:16 ` Alexander Duyck
2018-04-03 21:16 ` [iwl next-queue PATCH 09/10] ixgbe: Drop real_adapter from l2 fwd acceleration structure Alexander Duyck
2018-04-03 21:16 ` [iwl next-queue PATCH 10/10] ixgbe: Avoid performing unnecessary resets for macvlan offload Alexander Duyck
9 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:16 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
Both the ixgbe and fm10k drivers support destination filtering.
Instead of adding a ton of complexity to support either source or passthru
mode we can instead just avoid offloading them for now. Doing this we avoid
leaking packets into interfaces that aren't meant to receive them.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 8 ++++++++
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 +++++++
2 files changed, 15 insertions(+)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index ee645ba..26e7497 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -22,6 +22,7 @@
#include "fm10k.h"
#include <linux/vmalloc.h>
#include <net/udp_tunnel.h>
+#include <linux/if_macvlan.h>
/**
* fm10k_setup_tx_resources - allocate Tx resources (Descriptors)
@@ -1449,6 +1450,13 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
int size = 0, i;
u16 glort;
+ /* The hardware supported by fm10k only filters on the destination MAC
+ * address. In order to avoid issues we only support offloading modes
+ * where the hardware can actually provide the functionality.
+ */
+ if (!macvlan_supports_dest_filter(sdev))
+ return ERR_PTR(-EMEDIUMTYPE);
+
/* allocate l2 accel structure if it is not available */
if (!l2_accel) {
/* verify there is enough free GLORTs to support l2_accel */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2c5100e..7c0f310 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9744,6 +9744,13 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
unsigned int limit;
int pool, err;
+ /* The hardware supported by ixgbe only filters on the destination MAC
+ * address. In order to avoid issues we only support offloading modes
+ * where the hardware can actually provide the functionality.
+ */
+ if (!macvlan_supports_dest_filter(vdev))
+ return ERR_PTR(-EMEDIUMTYPE);
+
/* Hardware has a limited number of available pools. Each VF, and the
* PF require a pool. Check to ensure we don't attempt to use more
* then the available number of pools.
^ permalink raw reply related [flat|nested] 15+ messages in thread* [iwl next-queue PATCH 09/10] ixgbe: Drop real_adapter from l2 fwd acceleration structure
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
` (7 preceding siblings ...)
2018-04-03 21:16 ` [iwl next-queue PATCH 08/10] ixgbe/fm10k: Only support macvlan offload for types that support destination filtering Alexander Duyck
@ 2018-04-03 21:16 ` Alexander Duyck
2018-04-03 21:16 ` [iwl next-queue PATCH 10/10] ixgbe: Avoid performing unnecessary resets for macvlan offload Alexander Duyck
9 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:16 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
This patch drops the real_adapter member from the fwd_adapter structure.
The general idea behind the change is that the real_adapter is carrying
unnecessary data since we could always just grab the adapter structure
from netdev_priv(macvlan->lowerdev) if we really needed to get at it.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 -
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 28 ++++++++++++++-----------
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 4f08c71..238137c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -306,7 +306,6 @@ enum ixgbe_ring_state_t {
struct ixgbe_fwd_adapter {
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
struct net_device *netdev;
- struct ixgbe_adapter *real_adapter;
unsigned int tx_base_queue;
unsigned int rx_base_queue;
int pool;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 7c0f310..56772d6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5330,10 +5330,10 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
rx_ring->next_to_use = 0;
}
-static int ixgbe_fwd_ring_up(struct net_device *vdev,
+static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
struct ixgbe_fwd_adapter *accel)
{
- struct ixgbe_adapter *adapter = accel->real_adapter;
+ struct net_device *vdev = accel->netdev;
int i, baseq, err;
if (!test_bit(accel->pool, adapter->fwd_bitmask))
@@ -5370,14 +5370,19 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev,
return err;
}
-static int ixgbe_upper_dev_walk(struct net_device *upper, void *data)
+static int ixgbe_macvlan_up(struct net_device *vdev, void *data)
{
- if (netif_is_macvlan(upper)) {
- struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper);
+ struct ixgbe_adapter *adapter = data;
+ struct ixgbe_fwd_adapter *accel;
- if (vadapter)
- ixgbe_fwd_ring_up(upper, vadapter);
- }
+ if (!netif_is_macvlan(vdev))
+ return 0;
+
+ accel = macvlan_accel_priv(vdev);
+ if (!accel)
+ return 0;
+
+ ixgbe_fwd_ring_up(adapter, accel);
return 0;
}
@@ -5385,7 +5390,7 @@ static int ixgbe_upper_dev_walk(struct net_device *upper, void *data)
static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter)
{
netdev_walk_all_upper_dev_rcu(adapter->netdev,
- ixgbe_upper_dev_walk, NULL);
+ ixgbe_macvlan_up, adapter);
}
static void ixgbe_configure(struct ixgbe_adapter *adapter)
@@ -9776,13 +9781,12 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
fwd_adapter->pool = pool;
- fwd_adapter->real_adapter = adapter;
/* Force reinit of ring allocation with VMDQ enabled */
err = ixgbe_setup_tc(pdev, adapter->hw_tcs);
if (!err && netif_running(pdev))
- err = ixgbe_fwd_ring_up(vdev, fwd_adapter);
+ err = ixgbe_fwd_ring_up(adapter, fwd_adapter);
if (!err)
return fwd_adapter;
@@ -9798,7 +9802,7 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
{
struct ixgbe_fwd_adapter *accel = priv;
- struct ixgbe_adapter *adapter = accel->real_adapter;
+ struct ixgbe_adapter *adapter = netdev_priv(pdev);
unsigned int rxbase = accel->rx_base_queue;
unsigned int limit, i;
^ permalink raw reply related [flat|nested] 15+ messages in thread* [iwl next-queue PATCH 10/10] ixgbe: Avoid performing unnecessary resets for macvlan offload
2018-04-03 21:15 [iwl next-queue PATCH 00/10] Clean-up macvlan offloading Alexander Duyck
` (8 preceding siblings ...)
2018-04-03 21:16 ` [iwl next-queue PATCH 09/10] ixgbe: Drop real_adapter from l2 fwd acceleration structure Alexander Duyck
@ 2018-04-03 21:16 ` Alexander Duyck
9 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2018-04-03 21:16 UTC (permalink / raw)
To: intel-wired-lan, jeffrey.t.kirsher; +Cc: netdev
The original implementation for macvlan offload has us performing a full
port reset every time we added a new macvlan. This shouldn't be necessary
and can be avoided with a few behavior changes.
This patches updates the logic for the queues so that we have essentially 3
possible configurations for macvlan offload. They consist of 15 macvlans
with 4 queues per macvlan, 31 macvlans with 2 queues per macvlan, and 63
macvlans with 1 queue per macvlan. As macvlans are added you will encounter
up to 3 total resets if you add all the way up to 63, and after that the
device will stay in the mode supporting up to 63 macvlans until the L2FW
flag is cleared.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 192 +++++++++++++++++-------
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 5 -
2 files changed, 135 insertions(+), 62 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 56772d6..01c95bf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5336,15 +5336,11 @@ static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
struct net_device *vdev = accel->netdev;
int i, baseq, err;
- if (!test_bit(accel->pool, adapter->fwd_bitmask))
- return 0;
-
baseq = accel->pool * adapter->num_rx_queues_per_pool;
netdev_dbg(vdev, "pool %i:%i queues %i:%i\n",
accel->pool, adapter->num_rx_pools,
baseq, baseq + adapter->num_rx_queues_per_pool);
- accel->netdev = vdev;
accel->rx_base_queue = baseq;
accel->tx_base_queue = baseq;
@@ -5364,9 +5360,17 @@ static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
if (err >= 0)
return 0;
+ /* if we cannot add the MAC rule then disable the offload */
+ macvlan_release_l2fw_offload(vdev);
+
for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
adapter->rx_ring[baseq + i]->netdev = NULL;
+ netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n");
+
+ clear_bit(accel->pool, adapter->fwd_bitmask);
+ kfree(accel);
+
return err;
}
@@ -8783,6 +8787,49 @@ static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter)
}
#endif /* CONFIG_IXGBE_DCB */
+static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data)
+{
+ struct ixgbe_adapter *adapter = data;
+ struct ixgbe_fwd_adapter *accel;
+ int pool;
+
+ /* we only care about macvlans... */
+ if (!netif_is_macvlan(vdev))
+ return 0;
+
+ /* that have hardware offload enabled... */
+ accel = macvlan_accel_priv(vdev);
+ if (!accel)
+ return 0;
+
+ /* If we can relocate to a different bit do so */
+ pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
+ if (pool < adapter->num_rx_pools) {
+ set_bit(pool, adapter->fwd_bitmask);
+ accel->pool = pool;
+ return 0;
+ }
+
+ /* if we cannot find a free pool then disable the offload */
+ netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n");
+ macvlan_release_l2fw_offload(vdev);
+ kfree(accel);
+
+ return 0;
+}
+
+static void ixgbe_defrag_macvlan_pools(struct net_device *dev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+ /* flush any stale bits out of the fwd bitmask */
+ bitmap_clear(adapter->fwd_bitmask, 1, 63);
+
+ /* walk through upper devices reassigning pools */
+ netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool,
+ adapter);
+}
+
/**
* ixgbe_setup_tc - configure net_device for multiple traffic classes
*
@@ -8850,6 +8897,8 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
#endif /* CONFIG_IXGBE_DCB */
ixgbe_init_interrupt_scheme(adapter);
+ ixgbe_defrag_macvlan_pools(dev);
+
if (netif_running(dev))
return ixgbe_open(dev);
@@ -9399,6 +9448,22 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
return features;
}
+static void ixgbe_reset_l2fw_offload(struct ixgbe_adapter *adapter)
+{
+ int rss = min_t(int, ixgbe_max_rss_indices(adapter),
+ num_online_cpus());
+
+ /* go back to full RSS if we're not running SR-IOV */
+ if (!adapter->ring_feature[RING_F_VMDQ].offset)
+ adapter->flags &= ~(IXGBE_FLAG_VMDQ_ENABLED |
+ IXGBE_FLAG_SRIOV_ENABLED);
+
+ adapter->ring_feature[RING_F_RSS].limit = rss;
+ adapter->ring_feature[RING_F_VMDQ].limit = 1;
+
+ ixgbe_setup_tc(adapter->netdev, adapter->hw_tcs);
+}
+
static int ixgbe_set_features(struct net_device *netdev,
netdev_features_t features)
{
@@ -9479,7 +9544,9 @@ static int ixgbe_set_features(struct net_device *netdev,
}
}
- if (need_reset)
+ if ((changed & NETIF_F_HW_L2FW_DOFFLOAD) && adapter->num_rx_pools > 1)
+ ixgbe_reset_l2fw_offload(adapter);
+ else if (need_reset)
ixgbe_do_reset(netdev);
else if (changed & (NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER))
@@ -9742,11 +9809,9 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
{
- struct ixgbe_fwd_adapter *fwd_adapter = NULL;
struct ixgbe_adapter *adapter = netdev_priv(pdev);
- int used_pools = adapter->num_vfs + adapter->num_rx_pools;
+ struct ixgbe_fwd_adapter *accel;
int tcs = adapter->hw_tcs ? : 1;
- unsigned int limit;
int pool, err;
/* The hardware supported by ixgbe only filters on the destination MAC
@@ -9756,47 +9821,73 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
if (!macvlan_supports_dest_filter(vdev))
return ERR_PTR(-EMEDIUMTYPE);
- /* Hardware has a limited number of available pools. Each VF, and the
- * PF require a pool. Check to ensure we don't attempt to use more
- * then the available number of pools.
- */
- if (used_pools >= IXGBE_MAX_VF_FUNCTIONS)
- return ERR_PTR(-EINVAL);
+ pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
+ if (pool == adapter->num_rx_pools) {
+ u16 used_pools = adapter->num_vfs + adapter->num_rx_pools;
+ u16 reserved_pools;
+
+ if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
+ adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) ||
+ adapter->num_rx_pools > IXGBE_MAX_MACVLANS)
+ return ERR_PTR(-EBUSY);
+
+ /* Hardware has a limited number of available pools. Each VF,
+ * and the PF require a pool. Check to ensure we don't
+ * attempt to use more then the available number of pools.
+ */
+ if (used_pools >= IXGBE_MAX_VF_FUNCTIONS)
+ return ERR_PTR(-EBUSY);
- if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
- adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) ||
- (adapter->num_rx_pools > IXGBE_MAX_MACVLANS))
- return ERR_PTR(-EBUSY);
+ /* Enable VMDq flag so device will be set in VM mode */
+ adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED |
+ IXGBE_FLAG_SRIOV_ENABLED;
- fwd_adapter = kzalloc(sizeof(*fwd_adapter), GFP_KERNEL);
- if (!fwd_adapter)
- return ERR_PTR(-ENOMEM);
+ /* Try to reserve as many queues per pool as possible,
+ * we start with the configurations that support 4 queues
+ * per pools, followed by 2, and then by just 1 per pool.
+ */
+ if (used_pools < 32 && adapter->num_rx_pools < 16)
+ reserved_pools = min_t(u16,
+ 32 - used_pools,
+ 16 - adapter->num_rx_pools);
+ else if (adapter->num_rx_pools < 32)
+ reserved_pools = min_t(u16,
+ 64 - used_pools,
+ 32 - adapter->num_rx_pools);
+ else
+ reserved_pools = 64 - used_pools;
- pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
- set_bit(pool, adapter->fwd_bitmask);
- limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools + 1);
- /* Enable VMDq flag so device will be set in VM mode */
- adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | IXGBE_FLAG_SRIOV_ENABLED;
- adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
+ if (!reserved_pools)
+ return ERR_PTR(-EBUSY);
- fwd_adapter->pool = pool;
+ adapter->ring_feature[RING_F_VMDQ].limit += reserved_pools;
- /* Force reinit of ring allocation with VMDQ enabled */
- err = ixgbe_setup_tc(pdev, adapter->hw_tcs);
+ /* Force reinit of ring allocation with VMDQ enabled */
+ err = ixgbe_setup_tc(pdev, adapter->hw_tcs);
+ if (err)
+ return ERR_PTR(err);
- if (!err && netif_running(pdev))
- err = ixgbe_fwd_ring_up(adapter, fwd_adapter);
+ if (pool >= adapter->num_rx_pools)
+ return ERR_PTR(-ENOMEM);
+ }
- if (!err)
- return fwd_adapter;
+ accel = kzalloc(sizeof(*accel), GFP_KERNEL);
+ if (!accel)
+ return ERR_PTR(-ENOMEM);
+
+ set_bit(pool, adapter->fwd_bitmask);
+ accel->pool = pool;
+ accel->netdev = vdev;
- /* unwind counter and free adapter struct */
- netdev_info(pdev,
- "%s: dfwd hardware acceleration failed\n", vdev->name);
- clear_bit(pool, adapter->fwd_bitmask);
- kfree(fwd_adapter);
- return ERR_PTR(err);
+ if (!netif_running(pdev))
+ return accel;
+
+ err = ixgbe_fwd_ring_up(adapter, accel);
+ if (err)
+ return ERR_PTR(err);
+
+ return accel;
}
static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
@@ -9804,7 +9895,7 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
struct ixgbe_fwd_adapter *accel = priv;
struct ixgbe_adapter *adapter = netdev_priv(pdev);
unsigned int rxbase = accel->rx_base_queue;
- unsigned int limit, i;
+ unsigned int i;
/* delete unicast filter associated with offloaded interface */
ixgbe_del_mac_filter(adapter, accel->netdev->dev_addr,
@@ -9828,25 +9919,6 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
}
clear_bit(accel->pool, adapter->fwd_bitmask);
- limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
- adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
-
- /* go back to full RSS if we're done with our VMQs */
- if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
- int rss = min_t(int, ixgbe_max_rss_indices(adapter),
- num_online_cpus());
-
- adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
- adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
- adapter->ring_feature[RING_F_RSS].limit = rss;
- }
-
- ixgbe_setup_tc(pdev, adapter->hw_tcs);
- netdev_dbg(pdev, "pool %i:%i queues %i:%i\n",
- accel->pool, adapter->num_rx_pools,
- accel->rx_base_queue,
- accel->rx_base_queue +
- adapter->num_rx_queues_per_pool);
kfree(accel);
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 008aa07..bfc4171 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -266,7 +266,7 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
#endif
/* Disable VMDq flag so device will be set in VM mode */
- if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
+ if (bitmap_weight(adapter->fwd_bitmask, adapter->num_rx_pools) == 1) {
adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
rss = min_t(int, ixgbe_max_rss_indices(adapter),
@@ -312,7 +312,8 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs)
* other values out of range.
*/
num_tc = adapter->hw_tcs;
- num_rx_pools = adapter->num_rx_pools;
+ num_rx_pools = bitmap_weight(adapter->fwd_bitmask,
+ adapter->num_rx_pools);
limit = (num_tc > 4) ? IXGBE_MAX_VFS_8TC :
(num_tc > 1) ? IXGBE_MAX_VFS_4TC : IXGBE_MAX_VFS_1TC;
^ permalink raw reply related [flat|nested] 15+ messages in thread