* [PATCH v7 16/17] net/i40e: flush ethertype filters
From: Beilei Xing @ 2017-01-06 5:27 UTC (permalink / raw)
To: jingjing.wu, helin.zhang; +Cc: dev
In-Reply-To: <1483680439-82227-1-git-send-email-beilei.xing@intel.com>
This patch adds i40e_flow_flush_ethertype_filter
function to flush all ethertype filters, including
filters in SW and HW.
Signed-off-by: Beilei Xing <beilei.xing@intel.com>
---
drivers/net/i40e/i40e_flow.c | 41 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 40 insertions(+), 1 deletion(-)
diff --git a/drivers/net/i40e/i40e_flow.c b/drivers/net/i40e/i40e_flow.c
index e425fe8..dc5e655 100644
--- a/drivers/net/i40e/i40e_flow.c
+++ b/drivers/net/i40e/i40e_flow.c
@@ -123,6 +123,7 @@ static int i40e_flow_destroy_ethertype_filter(struct i40e_pf *pf,
static int i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf,
struct i40e_tunnel_filter *filter);
static int i40e_flow_flush_fdir_filter(struct i40e_pf *pf);
+static int i40e_flow_flush_ethertype_filter(struct i40e_pf *pf);
const struct rte_flow_ops i40e_flow_ops = {
.validate = i40e_flow_validate,
@@ -1732,10 +1733,20 @@ i40e_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
int ret;
ret = i40e_flow_flush_fdir_filter(pf);
- if (ret)
+ if (ret) {
rte_flow_error_set(error, -ret,
RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
"Failed to flush FDIR flows.");
+ return -rte_errno;
+ }
+
+ ret = i40e_flow_flush_ethertype_filter(pf);
+ if (ret) {
+ rte_flow_error_set(error, -ret,
+ RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+ "Failed to ethertype flush flows.");
+ return -rte_errno;
+ }
return ret;
}
@@ -1771,3 +1782,31 @@ i40e_flow_flush_fdir_filter(struct i40e_pf *pf)
return ret;
}
+
+/* Flush all ethertype filters */
+static int
+i40e_flow_flush_ethertype_filter(struct i40e_pf *pf)
+{
+ struct i40e_ethertype_filter_list
+ *ethertype_list = &pf->ethertype.ethertype_list;
+ struct i40e_ethertype_filter *filter;
+ struct rte_flow *flow;
+ void *temp;
+ int ret = 0;
+
+ while ((filter = TAILQ_FIRST(ethertype_list))) {
+ ret = i40e_flow_destroy_ethertype_filter(pf, filter);
+ if (ret)
+ return ret;
+ }
+
+ /* Delete ethertype flows in flow list. */
+ TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) {
+ if (flow->filter_type == RTE_ETH_FILTER_ETHERTYPE) {
+ TAILQ_REMOVE(&pf->flow_list, flow, node);
+ rte_free(flow);
+ }
+ }
+
+ return ret;
+}
--
2.5.5
^ permalink raw reply related
* [PATCH v7 17/17] net/i40e: flush tunnel filters
From: Beilei Xing @ 2017-01-06 5:27 UTC (permalink / raw)
To: jingjing.wu, helin.zhang; +Cc: dev
In-Reply-To: <1483680439-82227-1-git-send-email-beilei.xing@intel.com>
This patch adds i40e_flow_flush_tunnel_filter
function to flush all tunnel filters, including
filters in SW and HW.
Signed-off-by: Beilei Xing <beilei.xing@intel.com>
---
drivers/net/i40e/i40e_flow.c | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/drivers/net/i40e/i40e_flow.c b/drivers/net/i40e/i40e_flow.c
index dc5e655..76bb332 100644
--- a/drivers/net/i40e/i40e_flow.c
+++ b/drivers/net/i40e/i40e_flow.c
@@ -124,6 +124,7 @@ static int i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf,
struct i40e_tunnel_filter *filter);
static int i40e_flow_flush_fdir_filter(struct i40e_pf *pf);
static int i40e_flow_flush_ethertype_filter(struct i40e_pf *pf);
+static int i40e_flow_flush_tunnel_filter(struct i40e_pf *pf);
const struct rte_flow_ops i40e_flow_ops = {
.validate = i40e_flow_validate,
@@ -1748,6 +1749,14 @@ i40e_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
return -rte_errno;
}
+ ret = i40e_flow_flush_tunnel_filter(pf);
+ if (ret) {
+ rte_flow_error_set(error, -ret,
+ RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+ "Failed to flush tunnel flows.");
+ return -rte_errno;
+ }
+
return ret;
}
@@ -1810,3 +1819,31 @@ i40e_flow_flush_ethertype_filter(struct i40e_pf *pf)
return ret;
}
+
+/* Flush all tunnel filters */
+static int
+i40e_flow_flush_tunnel_filter(struct i40e_pf *pf)
+{
+ struct i40e_tunnel_filter_list
+ *tunnel_list = &pf->tunnel.tunnel_list;
+ struct i40e_tunnel_filter *filter;
+ struct rte_flow *flow;
+ void *temp;
+ int ret = 0;
+
+ while ((filter = TAILQ_FIRST(tunnel_list))) {
+ ret = i40e_flow_destroy_tunnel_filter(pf, filter);
+ if (ret)
+ return ret;
+ }
+
+ /* Delete tunnel flows in flow list. */
+ TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) {
+ if (flow->filter_type == RTE_ETH_FILTER_TUNNEL) {
+ TAILQ_REMOVE(&pf->flow_list, flow, node);
+ rte_free(flow);
+ }
+ }
+
+ return ret;
+}
--
2.5.5
^ permalink raw reply related
* Re: [PATCH v5 00/12] Introducing EAL Bus-Device-Driver Model
From: Shreyansh Jain @ 2017-01-06 6:27 UTC (permalink / raw)
To: Thomas Monjalon; +Cc: david.marchand, dev
In-Reply-To: <8998736.nqQJbzs4a4@xps13>
On Wednesday 04 January 2017 03:52 AM, Thomas Monjalon wrote:
> 2016-12-26 18:53, Shreyansh Jain:
>> Link to v1: [10]
>> Link to v2: [11]
>> Link to v3: [13]
>> Link to v4: [14]
>>
>> :: Introduction ::
>>
>> DPDK has been inherently a PCI inclined framework. Because of this, the
>> design of device tree (or list) within DPDK is also PCI inclined. A
>> non-PCI device doesn't have a way of being expressed without using hooks
>> started from EAL to PMD.
>
> It is a very important work to make DPDK growing.
>
> I am sorry to not have done a lot of public comments before today.
> I have sent some thoughts about moving some things from generic objects to
> specialized ones. I think they are not so much big changes in your work
> and I hope we could converge to something in the git tree really soon.
Thanks a lot for spending time on this.
I will reply to all your emails and if possible, with updated code,
within today (6/Jan)
>
> Thanks Shreyansh.
>
> PS: reviews from others are more than welcome!
>
Yes please. this change impacts framework very widely (even though the
lines changed is not much). More reviews would really help.
^ permalink raw reply
* Re: [PATCH v7 18/27] app/testpmd: use VFD APIs on i40e
From: Lu, Wenzhuo @ 2017-01-06 7:23 UTC (permalink / raw)
To: Wu, Jingjing, dev@dpdk.org; +Cc: Chen, Jing D, Iremonger, Bernard
In-Reply-To: <9BB6961774997848B5B42BEC655768F810CC3DCB@SHSMSX103.ccr.corp.intel.com>
Hi Jingjing,
> -----Original Message-----
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 9:16 AM
> To: Lu, Wenzhuo; dev@dpdk.org
> Cc: Lu, Wenzhuo; Chen, Jing D; Iremonger, Bernard
> Subject: RE: [dpdk-dev] [PATCH v7 18/27] app/testpmd: use VFD APIs on i40e
>
>
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Tuesday, January 3, 2017 2:55 PM
> > To: dev@dpdk.org
> > Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Chen, Jing D
> > <jing.d.chen@intel.com>; Iremonger, Bernard
> > <bernard.iremonger@intel.com>
> > Subject: [dpdk-dev] [PATCH v7 18/27] app/testpmd: use VFD APIs on i40e
> >
> > The new VF Daemon (VFD) APIs is implemented on i40e. Change testpmd
> > code to use them, including VF MAC anti-spoofing, VF VLAN
> > anti-spoofing, TX loopback, VF VLAN strip, VF VLAN insert.
> >
> > Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> > Signed-off-by: Chen Jing D(Mark) <jing.d.chen@intel.com>
> > Signed-off-by: Bernard Iremonger <bernard.iremonger@intel.com>
> > ---
> > app/test-pmd/Makefile | 3 +
> > app/test-pmd/cmdline.c | 154
> +++++++++++++++++++++++++++++++++++++++-
> > ---------
> > 2 files changed, 126 insertions(+), 31 deletions(-)
> >
> > diff --git a/app/test-pmd/Makefile b/app/test-pmd/Makefile index
> > 891b85a..66bd38a 100644
> > --- a/app/test-pmd/Makefile
> > +++ b/app/test-pmd/Makefile
> > @@ -58,7 +58,10 @@ SRCS-y += csumonly.c SRCS-y += icmpecho.c
> > SRCS-$(CONFIG_RTE_LIBRTE_IEEE1588) += ieee1588fwd.c
> >
> > +ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y)
> > _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
> > +_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e endif
> >
> > CFLAGS_cmdline.o := -D_GNU_SOURCE
> >
> > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > ed84d7a..9a44b4f 100644
> > --- a/app/test-pmd/cmdline.c
> > +++ b/app/test-pmd/cmdline.c
> > @@ -90,6 +90,9 @@
> > #ifdef RTE_LIBRTE_IXGBE_PMD
> > #include <rte_pmd_ixgbe.h>
> > #endif
> > +#ifdef RTE_LIBRTE_I40E_PMD
> > +#include <rte_pmd_i40e.h>
> > +#endif
> > #include "testpmd.h"
> >
> > static struct cmdline *testpmd_cl;
> > @@ -262,19 +265,19 @@ static void cmd_help_long_parsed(void
> > *parsed_result,
> > "set portlist (x[,y]*)\n"
> > " Set the list of forwarding ports.\n\n"
> >
> > -#ifdef RTE_LIBRTE_IXGBE_PMD
>
> How about use
> #if defined(RTE_LIBRTE_IXGBE_PMD) || defined (RTE_LIBRTE_I40E_PMD) but
> not remove it, because this command only works for ixgbe and i40e pmd.
It's on purpose. Just like regular CLIs, we show them no matter NIC supports it or not. We will check when processing CLIs.
>
> > "set tx loopback (port_id) (on|off)\n"
> > " Enable or disable tx loopback.\n\n"
> >
> > +#ifdef RTE_LIBRTE_IXGBE_PMD
> > "set all queues drop (port_id) (on|off)\n"
> > " Set drop enable bit for all queues.\n\n"
> >
> > "set vf split drop (port_id) (vf_id) (on|off)\n"
> > " Set split drop enable bit for a VF from the PF.\n\n"
> > +#endif
> >
> > "set vf mac antispoof (port_id) (vf_id) (on|off).\n"
> > " Set MAC antispoof for a VF from the PF.\n\n"
> > -#endif
> >
^ permalink raw reply
* Re: [PATCH v7 19/27] app/testpmd: use unicast promiscuous mode on i40e
From: Lu, Wenzhuo @ 2017-01-06 7:25 UTC (permalink / raw)
To: Wu, Jingjing, dev@dpdk.org
In-Reply-To: <9BB6961774997848B5B42BEC655768F810CC3DFB@SHSMSX103.ccr.corp.intel.com>
Hi Jingjing,
> -----Original Message-----
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 9:22 AM
> To: Lu, Wenzhuo; dev@dpdk.org
> Cc: Lu, Wenzhuo
> Subject: RE: [dpdk-dev] [PATCH v7 19/27] app/testpmd: use unicast
> promiscuous mode on i40e
>
>
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Tuesday, January 3, 2017 2:55 PM
> > To: dev@dpdk.org
> > Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>
> > Subject: [dpdk-dev] [PATCH v7 19/27] app/testpmd: use unicast
> > promiscuous mode on i40e
> >
> > Add testpmd CLI to set VF unicast promiscuous mode on i40e.
> >
> > Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> > ---
> > app/test-pmd/cmdline.c | 93 +++++++++++++++++++++++++++++
> > doc/guides/testpmd_app_ug/testpmd_funcs.rst | 9 +++
> > 2 files changed, 102 insertions(+)
> >
> > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > 9a44b4f..affe9d1 100644
> > --- a/app/test-pmd/cmdline.c
> > +++ b/app/test-pmd/cmdline.c
> > @@ -400,6 +400,9 @@ static void cmd_help_long_parsed(void
> *parsed_result,
> > "set allmulti (port_id|all) (on|off)\n"
> > " Set the allmulti mode on port_id, or all.\n\n"
> >
> > + "set vf promisc (port_id) (vf_id) (on|off)\n"
> > + " Set unicast promiscuous mode for a VF from the
> > PF.\n\n"
> > +
> > "set flow_ctrl rx (on|off) tx (on|off) (high_water)"
> > " (low_water) (pause_time) (send_xon)
> mac_ctrl_frame_fwd"
> > " (on|off) autoneg (on|off) (port_id)\n"
> > @@ -11559,6 +11562,95 @@ struct cmd_set_vf_mac_addr_result {
> > },
> > };
> >
> > +/* VF unicast promiscuous mode configuration */
> > +
> > +/* Common result structure for VF unicast promiscuous mode */ struct
> > +cmd_vf_promisc_result {
> > + cmdline_fixed_string_t set;
> > + cmdline_fixed_string_t vf;
> > + cmdline_fixed_string_t promisc;
> > + uint8_t port_id;
> > + uint32_t vf_id;
> > + cmdline_fixed_string_t on_off;
> > +};
> > +
> > +/* Common CLI fields for VF unicast promiscuous mode enable disable
> > +*/ cmdline_parse_token_string_t cmd_vf_promisc_set =
> > + TOKEN_STRING_INITIALIZER
> > + (struct cmd_vf_promisc_result,
> > + set, "set");
> > +cmdline_parse_token_string_t cmd_vf_promisc_vf =
> > + TOKEN_STRING_INITIALIZER
> > + (struct cmd_vf_promisc_result,
> > + vf, "vf");
> > +cmdline_parse_token_string_t cmd_vf_promisc_promisc =
> > + TOKEN_STRING_INITIALIZER
> > + (struct cmd_vf_promisc_result,
> > + promisc, "promisc");
> > +cmdline_parse_token_num_t cmd_vf_promisc_port_id =
> > + TOKEN_NUM_INITIALIZER
> > + (struct cmd_vf_promisc_result,
> > + port_id, UINT8);
> > +cmdline_parse_token_num_t cmd_vf_promisc_vf_id =
> > + TOKEN_NUM_INITIALIZER
> > + (struct cmd_vf_promisc_result,
> > + vf_id, UINT32);
> > +cmdline_parse_token_string_t cmd_vf_promisc_on_off =
> > + TOKEN_STRING_INITIALIZER
> > + (struct cmd_vf_promisc_result,
> > + on_off, "on#off");
> > +
> > +static void
> > +cmd_set_vf_promisc_parsed(
> > + void *parsed_result,
> > + __attribute__((unused)) struct cmdline *cl,
> > + __attribute__((unused)) void *data)
> > +{
> > + struct cmd_vf_promisc_result *res = parsed_result;
> > + int ret = -ENOTSUP;
> > +
> > + __rte_unused int is_on = (strcmp(res->on_off, "on") == 0) ? 1 : 0;
> > +
> > + if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> > + return;
> > +
> > +#ifdef RTE_LIBRTE_I40E_PMD
> > + ret = rte_pmd_i40e_set_vf_unicast_promisc(res->port_id,
> > + res->vf_id, is_on);
> > +#endif
> > +
>
> It's better to wrap the command by +#ifdef RTE_LIBRTE_I40E_PMD #endif Or
> at least, need to check if the port is handled i40e pmd.
Yes, I'll add the check.
^ permalink raw reply
* Re: [PATCH v7 25/27] net/i40e: set/clear VF stats from PF
From: Lu, Wenzhuo @ 2017-01-06 8:01 UTC (permalink / raw)
To: Wu, Jingjing, dev@dpdk.org; +Cc: Zhang, Qi Z
In-Reply-To: <9BB6961774997848B5B42BEC655768F810CC3E3A@SHSMSX103.ccr.corp.intel.com>
Hi Jingjing, Qi,
> -----Original Message-----
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 9:25 AM
> To: Lu, Wenzhuo; dev@dpdk.org
> Cc: Zhang, Qi Z
> Subject: RE: [dpdk-dev] [PATCH v7 25/27] net/i40e: set/clear VF stats from PF
>
>
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Tuesday, January 3, 2017 2:55 PM
> > To: dev@dpdk.org
> > Cc: Zhang, Qi Z <qi.z.zhang@intel.com>
> > Subject: [dpdk-dev] [PATCH v7 25/27] net/i40e: set/clear VF stats from
> > PF
> >
> > From: Qi Zhang <qi.z.zhang@intel.com>
> >
> > This patch add support to get/clear VF statistics from PF side.
> > Two APIs are added:
> > rte_pmd_i40e_get_vf_stats.
> > rte_pmd_i40e_reset_vf_stats.
> >
> > Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
> > ---
> > drivers/net/i40e/i40e_ethdev.c | 81
> > +++++++++++++++++++++++++++++++
> > drivers/net/i40e/rte_pmd_i40e.h | 41 ++++++++++++++++
> > drivers/net/i40e/rte_pmd_i40e_version.map | 2 +
> > 3 files changed, 124 insertions(+)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 47e03d6..be45cfa 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -10480,3 +10480,84 @@ int rte_pmd_i40e_set_vf_vlan_filter(uint8_t
> > port, uint16_t vlan_id,
> >
> > return ret;
> > }
> > +
> > +int
> > +rte_pmd_i40e_get_vf_stats(uint8_t port,
> > + uint16_t vf_id,
> > + struct rte_eth_stats *stats)
> > +{
> > + struct rte_eth_dev *dev;
> > + struct rte_eth_dev_info dev_info;
> > + struct i40e_pf *pf;
> > + struct i40e_vsi *vsi;
> > + int ret = 0;
> > +
> > + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
> > +
> > + dev = &rte_eth_devices[port];
> > + rte_eth_dev_info_get(port, &dev_info);
> > +
> > + if (vf_id >= dev_info.max_vfs)
> > + return -EINVAL;
> > +
> > + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> > +
> > + if (vf_id > pf->vf_num - 1 || !pf->vfs) {
> > + PMD_DRV_LOG(ERR, "Invalid argument.");
> > + return -EINVAL;
> > + }
> > +
> > + vsi = pf->vfs[vf_id].vsi;
> > + if (!vsi)
> > + return -EINVAL;
> > +
> > + i40e_update_vsi_stats(vsi);
> > +
> > + stats->ipackets = vsi->eth_stats.rx_unicast +
> > + vsi->eth_stats.rx_multicast +
> > + vsi->eth_stats.rx_broadcast;
> > + stats->opackets = vsi->eth_stats.tx_unicast +
> > + vsi->eth_stats.tx_multicast +
> > + vsi->eth_stats.tx_broadcast;
> > + stats->ibytes = vsi->eth_stats.rx_bytes;
> > + stats->obytes = vsi->eth_stats.tx_bytes;
> > + stats->ierrors = vsi->eth_stats.rx_discards;
> > + stats->oerrors = vsi->eth_stats.tx_errors +
> > +vsi->eth_stats.tx_discards;
> > +
> > + return ret;
>
> It looks ret is not changed in this func at all.
>
> > +}
> > +
> > +int
> > +rte_pmd_i40e_reset_vf_stats(uint8_t port,
> > + uint16_t vf_id)
> > +{
> > + struct rte_eth_dev *dev;
> > + struct rte_eth_dev_info dev_info;
> > + struct i40e_pf *pf;
> > + struct i40e_vsi *vsi;
> > + int ret = 0;
> > +
> > + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
> > +
> > + dev = &rte_eth_devices[port];
> > + rte_eth_dev_info_get(port, &dev_info);
> > +
> > + if (vf_id >= dev_info.max_vfs)
> > + return -EINVAL;
> > +
> > + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> > +
> > + if (vf_id > pf->vf_num - 1 || !pf->vfs) {
> > + PMD_DRV_LOG(ERR, "Invalid argument.");
> > + return -EINVAL;
> > + }
> > +
> > + vsi = pf->vfs[vf_id].vsi;
> > + if (!vsi)
> > + return -EINVAL;
> > +
> > + vsi->offset_loaded = false;
> > + i40e_update_vsi_stats(vsi);
> > +
> > + return ret;
> Same comment as above.
I'll change it to 'return 0'. The same as above.
^ permalink raw reply
* [PATCH v2 1/5] net/qede: fix scatter-gather issue
From: Rasesh Mody @ 2017-01-06 8:16 UTC (permalink / raw)
To: ferruh.yigit; +Cc: Harish Patil, dev, stable, Dept-EngDPDKDev
In-Reply-To: <1483172217-30186-1-git-send-email-rasesh.mody@cavium.com>
From: Harish Patil <harish.patil@qlogic.com>
- Make qede_process_sg_pkts() inline and add unlikely check
- Fix mbuf segment chaining logic in qede_process_sg_pkts()
- Change qede_encode_sg_bd() to return total segments required
- Fix first TX buffer descriptor's length
- Replace repeatitive code using a macro
Fixes: bec0228816c0 ("net/qede: support scatter gather")
Signed-off-by: Harish Patil <harish.patil@qlogic.com>
---
drivers/net/qede/qede_rxtx.c | 139 ++++++++++++++++++++----------------------
drivers/net/qede/qede_rxtx.h | 4 --
2 files changed, 65 insertions(+), 78 deletions(-)
diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index 814d384..ecff5bc 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -810,39 +810,28 @@ static inline uint32_t qede_rx_cqe_to_tunn_pkt_type(uint16_t flags)
return RTE_PTYPE_UNKNOWN;
}
-
-int qede_process_sg_pkts(void *p_rxq, struct rte_mbuf *rx_mb,
- int num_segs, uint16_t pkt_len)
+static inline int
+qede_process_sg_pkts(void *p_rxq, struct rte_mbuf *rx_mb,
+ uint8_t num_segs, uint16_t pkt_len)
{
struct qede_rx_queue *rxq = p_rxq;
struct qede_dev *qdev = rxq->qdev;
struct ecore_dev *edev = &qdev->edev;
- uint16_t sw_rx_index, cur_size;
-
register struct rte_mbuf *seg1 = NULL;
register struct rte_mbuf *seg2 = NULL;
+ uint16_t sw_rx_index;
+ uint16_t cur_size;
seg1 = rx_mb;
while (num_segs) {
- cur_size = pkt_len > rxq->rx_buf_size ?
- rxq->rx_buf_size : pkt_len;
- if (!cur_size) {
- PMD_RX_LOG(DEBUG, rxq,
- "SG packet, len and num BD mismatch\n");
+ cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size :
+ pkt_len;
+ if (unlikely(!cur_size)) {
+ PMD_RX_LOG(ERR, rxq, "Length is 0 while %u BDs"
+ " left for mapping jumbo\n", num_segs);
qede_recycle_rx_bd_ring(rxq, qdev, num_segs);
return -EINVAL;
}
-
- if (qede_alloc_rx_buffer(rxq)) {
- uint8_t index;
-
- PMD_RX_LOG(DEBUG, rxq, "Buffer allocation failed\n");
- index = rxq->port_id;
- rte_eth_devices[index].data->rx_mbuf_alloc_failed++;
- rxq->rx_alloc_errors++;
- return -ENOMEM;
- }
-
sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf;
qede_rx_bd_ring_consume(rxq);
@@ -852,16 +841,9 @@ int qede_process_sg_pkts(void *p_rxq, struct rte_mbuf *rx_mb,
seg1 = seg1->next;
num_segs--;
rxq->rx_segs++;
- continue;
}
- seg1 = NULL;
-
- if (pkt_len)
- PMD_RX_LOG(DEBUG, rxq,
- "Mapped all BDs of jumbo, but still have %d bytes\n",
- pkt_len);
- return ECORE_SUCCESS;
+ return 0;
}
uint16_t
@@ -878,11 +860,16 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
register struct rte_mbuf *rx_mb = NULL;
register struct rte_mbuf *seg1 = NULL;
enum eth_rx_cqe_type cqe_type;
- uint16_t len, pad, preload_idx, pkt_len, parse_flag;
- uint8_t csum_flag, num_segs;
+ uint16_t pkt_len; /* Sum of all BD segments */
+ uint16_t len; /* Length of first BD */
+ uint8_t num_segs = 1;
+ uint16_t pad;
+ uint16_t preload_idx;
+ uint8_t csum_flag;
+ uint16_t parse_flag;
enum rss_hash_type htype;
uint8_t tunn_parse_flag;
- int ret;
+ uint8_t j;
hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
@@ -915,6 +902,7 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
fp_cqe = &cqe->fast_path_regular;
len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd);
+ pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
pad = fp_cqe->placement_offset;
assert((len + pad) <= rx_mb->buf_len);
@@ -979,25 +967,29 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
rxq->rx_alloc_errors++;
break;
}
-
qede_rx_bd_ring_consume(rxq);
-
if (fp_cqe->bd_num > 1) {
- pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
+ PMD_RX_LOG(DEBUG, rxq, "Jumbo-over-BD packet: %02x BDs"
+ " len on first: %04x Total Len: %04x\n",
+ fp_cqe->bd_num, len, pkt_len);
num_segs = fp_cqe->bd_num - 1;
-
- rxq->rx_segs++;
-
- pkt_len -= len;
seg1 = rx_mb;
- ret = qede_process_sg_pkts(p_rxq, seg1, num_segs,
- pkt_len);
- if (ret != ECORE_SUCCESS) {
- qede_recycle_rx_bd_ring(rxq, qdev,
- fp_cqe->bd_num);
+ if (qede_process_sg_pkts(p_rxq, seg1, num_segs,
+ pkt_len - len))
goto next_cqe;
+ for (j = 0; j < num_segs; j++) {
+ if (qede_alloc_rx_buffer(rxq)) {
+ PMD_RX_LOG(ERR, rxq,
+ "Buffer allocation failed\n");
+ rte_eth_devices[rxq->port_id].
+ data->rx_mbuf_alloc_failed++;
+ rxq->rx_alloc_errors++;
+ break;
+ }
+ rxq->rx_segs++;
}
}
+ rxq->rx_segs++; /* for the first segment */
/* Prefetch next mbuf while processing current one. */
preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
@@ -1007,7 +999,7 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
rx_mb->data_off = pad + RTE_PKTMBUF_HEADROOM;
rx_mb->nb_segs = fp_cqe->bd_num;
rx_mb->data_len = len;
- rx_mb->pkt_len = fp_cqe->pkt_len;
+ rx_mb->pkt_len = pkt_len;
rx_mb->port = rxq->port_id;
htype = (uint8_t)GET_FIELD(fp_cqe->bitfields,
@@ -1114,17 +1106,16 @@ qede_process_tx_compl(struct ecore_dev *edev, struct qede_tx_queue *txq)
}
/* Populate scatter gather buffer descriptor fields */
-static inline uint16_t qede_encode_sg_bd(struct qede_tx_queue *p_txq,
- struct rte_mbuf *m_seg,
- uint16_t count,
- struct eth_tx_1st_bd *bd1)
+static inline uint8_t
+qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg,
+ struct eth_tx_1st_bd *bd1)
{
struct qede_tx_queue *txq = p_txq;
struct eth_tx_2nd_bd *bd2 = NULL;
struct eth_tx_3rd_bd *bd3 = NULL;
struct eth_tx_bd *tx_bd = NULL;
- uint16_t nb_segs = count;
dma_addr_t mapping;
+ uint8_t nb_segs = 1; /* min one segment per packet */
/* Check for scattered buffers */
while (m_seg) {
@@ -1133,28 +1124,27 @@ static inline uint16_t qede_encode_sg_bd(struct qede_tx_queue *p_txq,
ecore_chain_produce(&txq->tx_pbl);
memset(bd2, 0, sizeof(*bd2));
mapping = rte_mbuf_data_dma_addr(m_seg);
- bd2->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
- bd2->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
- bd2->nbytes = rte_cpu_to_le_16(m_seg->data_len);
+ QEDE_BD_SET_ADDR_LEN(bd2, mapping, m_seg->data_len);
+ PMD_TX_LOG(DEBUG, txq, "BD2 len %04x\n",
+ m_seg->data_len);
} else if (nb_segs == 2) {
bd3 = (struct eth_tx_3rd_bd *)
ecore_chain_produce(&txq->tx_pbl);
memset(bd3, 0, sizeof(*bd3));
mapping = rte_mbuf_data_dma_addr(m_seg);
- bd3->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
- bd3->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
- bd3->nbytes = rte_cpu_to_le_16(m_seg->data_len);
+ QEDE_BD_SET_ADDR_LEN(bd3, mapping, m_seg->data_len);
+ PMD_TX_LOG(DEBUG, txq, "BD3 len %04x\n",
+ m_seg->data_len);
} else {
tx_bd = (struct eth_tx_bd *)
ecore_chain_produce(&txq->tx_pbl);
memset(tx_bd, 0, sizeof(*tx_bd));
mapping = rte_mbuf_data_dma_addr(m_seg);
- tx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
- tx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
- tx_bd->nbytes = rte_cpu_to_le_16(m_seg->data_len);
+ QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len);
+ PMD_TX_LOG(DEBUG, txq, "BD len %04x\n",
+ m_seg->data_len);
}
nb_segs++;
- bd1->data.nbds = nb_segs;
m_seg = m_seg->next;
}
@@ -1170,13 +1160,14 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
struct ecore_dev *edev = &qdev->edev;
struct qede_fastpath *fp;
struct eth_tx_1st_bd *bd1;
+ struct rte_mbuf *mbuf;
struct rte_mbuf *m_seg = NULL;
uint16_t nb_tx_pkts;
- uint16_t nb_pkt_sent = 0;
uint16_t bd_prod;
uint16_t idx;
uint16_t tx_count;
- uint16_t nb_segs = 0;
+ uint16_t nb_frags;
+ uint16_t nb_pkt_sent = 0;
fp = &qdev->fp_array[QEDE_RSS_COUNT(qdev) + txq->queue_id];
@@ -1198,19 +1189,19 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
while (nb_tx_pkts--) {
/* Fill the entry in the SW ring and the BDs in the FW ring */
idx = TX_PROD(txq);
- struct rte_mbuf *mbuf = *tx_pkts++;
-
+ mbuf = *tx_pkts++;
txq->sw_tx_ring[idx].mbuf = mbuf;
bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);
- /* Zero init struct fields */
- bd1->data.bd_flags.bitfields = 0;
- bd1->data.bitfields = 0;
-
bd1->data.bd_flags.bitfields =
1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
+ /* FW 8.10.x specific change */
+ bd1->data.bitfields =
+ (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
+ << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
/* Map MBUF linear data for DMA and set in the first BD */
QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_dma_addr(mbuf),
- mbuf->pkt_len);
+ mbuf->data_len);
+ PMD_TX_LOG(INFO, txq, "BD1 len %04x\n", mbuf->data_len);
if (RTE_ETH_IS_TUNNEL_PKT(mbuf->packet_type)) {
PMD_TX_LOG(INFO, txq, "Tx tunnel packet\n");
@@ -1267,18 +1258,18 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
/* Handle fragmented MBUF */
m_seg = mbuf->next;
- nb_segs++;
- bd1->data.nbds = nb_segs;
/* Encode scatter gather buffer descriptors if required */
- nb_segs = qede_encode_sg_bd(txq, m_seg, nb_segs, bd1);
- txq->nb_tx_avail = txq->nb_tx_avail - nb_segs;
- nb_segs = 0;
+ nb_frags = qede_encode_sg_bd(txq, m_seg, bd1);
+ bd1->data.nbds = nb_frags;
+ txq->nb_tx_avail -= nb_frags;
txq->sw_tx_prod++;
rte_prefetch0(txq->sw_tx_ring[TX_PROD(txq)].mbuf);
bd_prod =
rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
nb_pkt_sent++;
txq->xmit_pkts++;
+ PMD_TX_LOG(INFO, txq, "nbds = %d pkt_len = %04x\n",
+ bd1->data.nbds, mbuf->pkt_len);
}
/* Write value of prod idx into bd_prod */
diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h
index 2a8645a..a95b4ab 100644
--- a/drivers/net/qede/qede_rxtx.h
+++ b/drivers/net/qede/qede_rxtx.h
@@ -41,10 +41,6 @@
(bd)->addr.hi = rte_cpu_to_le_32(U64_HI(maddr)); \
(bd)->addr.lo = rte_cpu_to_le_32(U64_LO(maddr)); \
(bd)->nbytes = rte_cpu_to_le_16(len); \
- /* FW 8.10.x specific change */ \
- (bd)->data.bitfields = ((len) & \
- ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) \
- << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT; \
} while (0)
#define CQE_HAS_VLAN(flags) \
--
1.7.10.3
^ permalink raw reply related
* [PATCH v2 2/5] net/qede: fix minimum buffer size and scatter Rx check
From: Rasesh Mody @ 2017-01-06 8:16 UTC (permalink / raw)
To: ferruh.yigit; +Cc: Harish Patil, dev, stable, Dept-EngDPDKDev
In-Reply-To: <1483172217-30186-1-git-send-email-rasesh.mody@cavium.com>
From: Harish Patil <harish.patil@qlogic.com>
- Fix minimum RX buffer size to 1024B
- Force enable scatter/gather mode if given RX buf size is lesser than MTU
- Adjust RX buffer size to cache-line size with overhead included
Fixes: bec0228816c0 ("net/qede: support scatter gather")
Fixes: 2ea6f76aff40 ("qede: add core driver")
Signed-off-by: Harish Patil <harish.patil@qlogic.com>
---
drivers/net/qede/qede_ethdev.c | 3 +--
drivers/net/qede/qede_rxtx.c | 47 +++++++++++++++++-----------------------
drivers/net/qede/qede_rxtx.h | 11 ++++++++--
3 files changed, 30 insertions(+), 31 deletions(-)
diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index b7886f4..0b40d1b 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -969,8 +969,7 @@ qede_dev_info_get(struct rte_eth_dev *eth_dev,
PMD_INIT_FUNC_TRACE(edev);
dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
- dev_info->min_rx_bufsize = (uint32_t)(ETHER_MIN_MTU +
- QEDE_ETH_OVERHEAD);
+ dev_info->min_rx_bufsize = (uint32_t)QEDE_MIN_RX_BUFF_SIZE;
dev_info->max_rx_pktlen = (uint32_t)ETH_TX_MAX_NON_LSO_PKT_LEN;
dev_info->rx_desc_lim = qede_rx_desc_lim;
dev_info->tx_desc_lim = qede_tx_desc_lim;
diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index ecff5bc..aebe8cb 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -89,11 +89,11 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
{
struct qede_dev *qdev = dev->data->dev_private;
struct ecore_dev *edev = &qdev->edev;
- struct rte_eth_dev_data *eth_data = dev->data;
+ struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
struct qede_rx_queue *rxq;
- uint16_t pkt_len = (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len;
+ uint16_t max_rx_pkt_len;
+ uint16_t bufsz;
size_t size;
- uint16_t data_size;
int rc;
int i;
@@ -127,34 +127,27 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
rxq->nb_rx_desc = nb_desc;
rxq->queue_id = queue_idx;
rxq->port_id = dev->data->port_id;
-
- /* Sanity check */
- data_size = (uint16_t)rte_pktmbuf_data_room_size(mp) -
- RTE_PKTMBUF_HEADROOM;
-
- if (pkt_len > data_size && !dev->data->scattered_rx) {
- DP_ERR(edev, "MTU %u should not exceed dataroom %u\n",
- pkt_len, data_size);
- rte_free(rxq);
- return -EINVAL;
+ max_rx_pkt_len = (uint16_t)rxmode->max_rx_pkt_len;
+ qdev->mtu = max_rx_pkt_len;
+
+ /* Fix up RX buffer size */
+ bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
+ if ((rxmode->enable_scatter) ||
+ (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) {
+ if (!dev->data->scattered_rx) {
+ DP_INFO(edev, "Forcing scatter-gather mode\n");
+ dev->data->scattered_rx = 1;
+ }
}
-
if (dev->data->scattered_rx)
- rxq->rx_buf_size = data_size;
+ rxq->rx_buf_size = bufsz + QEDE_ETH_OVERHEAD;
else
- rxq->rx_buf_size = pkt_len + QEDE_ETH_OVERHEAD;
-
- qdev->mtu = pkt_len;
+ rxq->rx_buf_size = qdev->mtu + QEDE_ETH_OVERHEAD;
+ /* Align to cache-line size if needed */
+ rxq->rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rxq->rx_buf_size);
- DP_INFO(edev, "MTU = %u ; RX buffer = %u\n",
- qdev->mtu, rxq->rx_buf_size);
-
- if (pkt_len > ETHER_MAX_LEN) {
- dev->data->dev_conf.rxmode.jumbo_frame = 1;
- DP_NOTICE(edev, false, "jumbo frame enabled\n");
- } else {
- dev->data->dev_conf.rxmode.jumbo_frame = 0;
- }
+ DP_INFO(edev, "mtu %u mbufsz %u bd_max_bytes %u scatter_mode %d\n",
+ qdev->mtu, bufsz, rxq->rx_buf_size, dev->data->scattered_rx);
/* Allocate the parallel driver ring for Rx buffers */
size = sizeof(*rxq->sw_rx_ring) * rxq->nb_rx_desc;
diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h
index a95b4ab..9a393e9 100644
--- a/drivers/net/qede/qede_rxtx.h
+++ b/drivers/net/qede/qede_rxtx.h
@@ -51,14 +51,21 @@
((flags) & (PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_MASK \
<< PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_SHIFT))
+#define QEDE_MIN_RX_BUFF_SIZE (1024)
+#define QEDE_VLAN_TAG_SIZE (4)
+#define QEDE_LLC_SNAP_HDR_LEN (8)
+
/* Max supported alignment is 256 (8 shift)
* minimal alignment shift 6 is optimal for 57xxx HW performance
*/
#define QEDE_L1_CACHE_SHIFT 6
#define QEDE_RX_ALIGN_SHIFT (RTE_MAX(6, RTE_MIN(8, QEDE_L1_CACHE_SHIFT)))
#define QEDE_FW_RX_ALIGN_END (1UL << QEDE_RX_ALIGN_SHIFT)
-
-#define QEDE_ETH_OVERHEAD (ETHER_HDR_LEN + 8 + 8 + QEDE_FW_RX_ALIGN_END)
+#define QEDE_CEIL_TO_CACHE_LINE_SIZE(n) (((n) + (QEDE_FW_RX_ALIGN_END - 1)) & \
+ ~(QEDE_FW_RX_ALIGN_END - 1))
+/* Note: QEDE_LLC_SNAP_HDR_LEN is optional */
+#define QEDE_ETH_OVERHEAD ((ETHER_HDR_LEN) + ((2 * QEDE_VLAN_TAG_SIZE)) \
+ + (QEDE_LLC_SNAP_HDR_LEN))
#define QEDE_RSS_OFFLOAD_ALL (ETH_RSS_IPV4 |\
ETH_RSS_NONFRAG_IPV4_TCP |\
--
1.7.10.3
^ permalink raw reply related
* [PATCH v2 3/5] net/qede: fix PF fastpath status block index
From: Rasesh Mody @ 2017-01-06 8:16 UTC (permalink / raw)
To: ferruh.yigit; +Cc: Harish Patil, dev, stable, Dept-EngDPDKDev
In-Reply-To: <1483172217-30186-1-git-send-email-rasesh.mody@cavium.com>
From: Harish Patil <harish.patil@qlogic.com>
Allocate double the number of fastpath status block index
since the PF RX/TX queues are not sharing the status block.
This is an interim solution till other parts of the code
is modified to handle the same.
Fixes: f1e4b6c0acee ("net/qede: fix status block index for VF queues")
Signed-off-by: Harish Patil <harish.patil@qlogic.com>
---
drivers/net/qede/qede_rxtx.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index aebe8cb..f20881c 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -431,13 +431,15 @@ int qede_alloc_fp_resc(struct qede_dev *qdev)
struct ecore_dev *edev = &qdev->edev;
struct qede_fastpath *fp;
uint32_t num_sbs;
- int rc, i;
+ uint16_t i;
+ uint16_t sb_idx;
+ int rc;
if (IS_VF(edev))
ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs);
else
- num_sbs = (ecore_cxt_get_proto_cid_count
- (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL)) / 2;
+ num_sbs = ecore_cxt_get_proto_cid_count
+ (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL);
if (num_sbs == 0) {
DP_ERR(edev, "No status blocks available\n");
@@ -455,7 +457,11 @@ int qede_alloc_fp_resc(struct qede_dev *qdev)
for (i = 0; i < QEDE_QUEUE_CNT(qdev); i++) {
fp = &qdev->fp_array[i];
- if (qede_alloc_mem_sb(qdev, fp->sb_info, i % num_sbs)) {
+ if (IS_VF(edev))
+ sb_idx = i % num_sbs;
+ else
+ sb_idx = i;
+ if (qede_alloc_mem_sb(qdev, fp->sb_info, sb_idx)) {
qede_free_fp_arrays(qdev);
return -ENOMEM;
}
--
1.7.10.3
^ permalink raw reply related
* [PATCH v2 4/5] net/qede: fix per queue stats/xstats
From: Rasesh Mody @ 2017-01-06 8:16 UTC (permalink / raw)
To: ferruh.yigit; +Cc: Rasesh Mody, dev, stable, Dept-EngDPDKDev
In-Reply-To: <1483172217-30186-1-git-send-email-rasesh.mody@cavium.com>
From: Rasesh Mody <Rasesh.Mody@cavium.com>
If value of number of rxq/txq is diffrent than
RTE_ETHDEV_QUEUE_STAT_CNTRS, limit per queue
stats/xstats to minimum of the two.
Fixes: 7634c5f91569 ("net/qede: add queue statistics")
Signed-off-by: Rasesh Mody <Rasesh.Mody@cavium.com>
---
drivers/net/qede/qede_ethdev.c | 32 +++++++++++++++++++++++++++++---
1 file changed, 29 insertions(+), 3 deletions(-)
diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index 0b40d1b..6d90c46 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -1162,6 +1162,7 @@ qede_get_stats(struct rte_eth_dev *eth_dev, struct rte_eth_stats *eth_stats)
struct ecore_dev *edev = &qdev->edev;
struct ecore_eth_stats stats;
unsigned int i = 0, j = 0, qid;
+ unsigned int rxq_stat_cntrs, txq_stat_cntrs;
struct qede_tx_queue *txq;
qdev->ops->get_vport_stats(edev, &stats);
@@ -1195,6 +1196,17 @@ qede_get_stats(struct rte_eth_dev *eth_dev, struct rte_eth_stats *eth_stats)
eth_stats->oerrors = stats.tx_err_drop_pkts;
/* Queue stats */
+ rxq_stat_cntrs = RTE_MIN(QEDE_RSS_COUNT(qdev),
+ RTE_ETHDEV_QUEUE_STAT_CNTRS);
+ txq_stat_cntrs = RTE_MIN(QEDE_TSS_COUNT(qdev),
+ RTE_ETHDEV_QUEUE_STAT_CNTRS);
+ if ((rxq_stat_cntrs != QEDE_RSS_COUNT(qdev)) ||
+ (txq_stat_cntrs != QEDE_TSS_COUNT(qdev)))
+ DP_VERBOSE(edev, ECORE_MSG_DEBUG,
+ "Not all the queue stats will be displayed. Set"
+ " RTE_ETHDEV_QUEUE_STAT_CNTRS config param"
+ " appropriately and retry.\n");
+
for (qid = 0; qid < QEDE_QUEUE_CNT(qdev); qid++) {
if (qdev->fp_array[qid].type & QEDE_FASTPATH_RX) {
eth_stats->q_ipackets[i] =
@@ -1213,7 +1225,11 @@ qede_get_stats(struct rte_eth_dev *eth_dev, struct rte_eth_stats *eth_stats)
rx_alloc_errors));
i++;
}
+ if (i == rxq_stat_cntrs)
+ break;
+ }
+ for (qid = 0; qid < QEDE_QUEUE_CNT(qdev); qid++) {
if (qdev->fp_array[qid].type & QEDE_FASTPATH_TX) {
txq = qdev->fp_array[(qid)].txqs[0];
eth_stats->q_opackets[j] =
@@ -1223,13 +1239,17 @@ qede_get_stats(struct rte_eth_dev *eth_dev, struct rte_eth_stats *eth_stats)
xmit_pkts)));
j++;
}
+ if (j == txq_stat_cntrs)
+ break;
}
}
static unsigned
qede_get_xstats_count(struct qede_dev *qdev) {
return RTE_DIM(qede_xstats_strings) +
- (RTE_DIM(qede_rxq_xstats_strings) * QEDE_RSS_COUNT(qdev));
+ (RTE_DIM(qede_rxq_xstats_strings) *
+ RTE_MIN(QEDE_RSS_COUNT(qdev),
+ RTE_ETHDEV_QUEUE_STAT_CNTRS));
}
static int
@@ -1239,6 +1259,7 @@ qede_get_xstats_names(__rte_unused struct rte_eth_dev *dev,
struct qede_dev *qdev = dev->data->dev_private;
const unsigned int stat_cnt = qede_get_xstats_count(qdev);
unsigned int i, qid, stat_idx = 0;
+ unsigned int rxq_stat_cntrs;
if (xstats_names != NULL) {
for (i = 0; i < RTE_DIM(qede_xstats_strings); i++) {
@@ -1249,7 +1270,9 @@ qede_get_xstats_names(__rte_unused struct rte_eth_dev *dev,
stat_idx++;
}
- for (qid = 0; qid < QEDE_RSS_COUNT(qdev); qid++) {
+ rxq_stat_cntrs = RTE_MIN(QEDE_RSS_COUNT(qdev),
+ RTE_ETHDEV_QUEUE_STAT_CNTRS);
+ for (qid = 0; qid < rxq_stat_cntrs; qid++) {
for (i = 0; i < RTE_DIM(qede_rxq_xstats_strings); i++) {
snprintf(xstats_names[stat_idx].name,
sizeof(xstats_names[stat_idx].name),
@@ -1273,6 +1296,7 @@ qede_get_xstats(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
struct ecore_eth_stats stats;
const unsigned int num = qede_get_xstats_count(qdev);
unsigned int i, qid, stat_idx = 0;
+ unsigned int rxq_stat_cntrs;
if (n < num)
return num;
@@ -1285,7 +1309,9 @@ qede_get_xstats(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
stat_idx++;
}
- for (qid = 0; qid < QEDE_QUEUE_CNT(qdev); qid++) {
+ rxq_stat_cntrs = RTE_MIN(QEDE_RSS_COUNT(qdev),
+ RTE_ETHDEV_QUEUE_STAT_CNTRS);
+ for (qid = 0; qid < rxq_stat_cntrs; qid++) {
if (qdev->fp_array[qid].type & QEDE_FASTPATH_RX) {
for (i = 0; i < RTE_DIM(qede_rxq_xstats_strings); i++) {
xstats[stat_idx].value = *(uint64_t *)(
--
1.7.10.3
^ permalink raw reply related
* [PATCH v2 5/5] net/qede: convert few DP_NOTICE and DP_INFO to DP_ERR
From: Rasesh Mody @ 2017-01-06 8:16 UTC (permalink / raw)
To: ferruh.yigit; +Cc: Rasesh Mody, dev, stable, Dept-EngDPDKDev
In-Reply-To: <1483172217-30186-1-git-send-email-rasesh.mody@cavium.com>
From: Rasesh Mody <Rasesh.Mody@cavium.com>
Signed-off-by: Rasesh Mody <Rasesh.Mody@cavium.com>
---
drivers/net/qede/base/ecore_mcp.c | 2 +-
drivers/net/qede/qede_ethdev.c | 11 +++++------
2 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c
index bb13828..f634d98 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -931,7 +931,7 @@ static void ecore_mcp_send_protocol_stats(struct ecore_hwfn *p_hwfn,
hsi_param = DRV_MSG_CODE_STATS_TYPE_LAN;
break;
default:
- DP_NOTICE(p_hwfn, false, "Invalid protocol type %d\n", type);
+ DP_INFO(p_hwfn, "Invalid protocol type %d\n", type);
return;
}
diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c
index 6d90c46..c67fbb6 100644
--- a/drivers/net/qede/qede_ethdev.c
+++ b/drivers/net/qede/qede_ethdev.c
@@ -650,7 +650,7 @@ static void qede_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
qede_vlan_filter_set(eth_dev, 0, 1);
} else {
if (qdev->configured_vlans > 1) { /* Excluding VLAN0 */
- DP_NOTICE(edev, false,
+ DP_ERR(edev,
" Please remove existing VLAN filters"
" before disabling VLAN filtering\n");
/* Signal app that VLAN filtering is still
@@ -684,7 +684,7 @@ static int qede_vlan_filter_set(struct rte_eth_dev *eth_dev,
if (on) {
if (qdev->configured_vlans == dev_info->num_vlan_filters) {
- DP_INFO(edev, "Reached max VLAN filter limit"
+ DP_ERR(edev, "Reached max VLAN filter limit"
" enabling accept_any_vlan\n");
qede_config_accept_any_vlan(qdev, true);
return 0;
@@ -849,14 +849,13 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev)
if (edev->num_hwfns > 1) {
if (eth_dev->data->nb_rx_queues < 2 ||
eth_dev->data->nb_tx_queues < 2) {
- DP_NOTICE(edev, false,
- "100G mode needs min. 2 RX/TX queues\n");
+ DP_ERR(edev, "100G mode needs min. 2 RX/TX queues\n");
return -EINVAL;
}
if ((eth_dev->data->nb_rx_queues % 2 != 0) ||
(eth_dev->data->nb_tx_queues % 2 != 0)) {
- DP_NOTICE(edev, false,
+ DP_ERR(edev,
"100G mode needs even no. of RX/TX queues\n");
return -EINVAL;
}
@@ -867,7 +866,7 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev)
eth_dev->data->scattered_rx = 1;
if (rxmode->enable_lro == 1) {
- DP_INFO(edev, "LRO is not supported\n");
+ DP_ERR(edev, "LRO is not supported\n");
return -EINVAL;
}
--
1.7.10.3
^ permalink raw reply related
* Re: [PATCH v7 14/27] net/i40e: set VF VLAN insertion from PF
From: Lu, Wenzhuo @ 2017-01-06 8:20 UTC (permalink / raw)
To: Wu, Jingjing, dev@dpdk.org; +Cc: Iremonger, Bernard
In-Reply-To: <9BB6961774997848B5B42BEC655768F810CC3CCD@SHSMSX103.ccr.corp.intel.com>
Hi Jingjing, Bernard,
> -----Original Message-----
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 8:33 AM
> To: Lu, Wenzhuo; dev@dpdk.org
> Cc: Iremonger, Bernard
> Subject: RE: [dpdk-dev] [PATCH v7 14/27] net/i40e: set VF VLAN insertion from
> PF
>
>
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Tuesday, January 3, 2017 2:55 PM
> > To: dev@dpdk.org
> > Cc: Iremonger, Bernard <bernard.iremonger@intel.com>
> > Subject: [dpdk-dev] [PATCH v7 14/27] net/i40e: set VF VLAN insertion
> > from PF
> >
> > From: Bernard Iremonger <bernard.iremonger@intel.com>
> >
> > Support inserting VF VLAN id from PF.
> > User can call the API on PF to insert a VLAN id to a specific VF.
> >
> > Signed-off-by: Bernard Iremonger <bernard.iremonger@intel.com>
> > ---
> > drivers/net/i40e/i40e_ethdev.c | 56
> > +++++++++++++++++++++++++++++++
> > drivers/net/i40e/rte_pmd_i40e.h | 19 +++++++++++
> > drivers/net/i40e/rte_pmd_i40e_version.map | 1 +
> > 3 files changed, 76 insertions(+)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 7ab1c93..31c387d 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -10266,3 +10266,59 @@ static void i40e_set_default_mac_addr(struct
> > rte_eth_dev *dev,
> > else
> > return -EINVAL;
> > }
> > +
> > +int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id,
> > + uint16_t vlan_id)
> > +{
> > + struct rte_eth_dev *dev;
> > + struct rte_eth_dev_info dev_info;
> > + struct i40e_pf *pf;
> > + struct i40e_hw *hw;
> > + struct i40e_vsi *vsi;
> > + struct i40e_vsi_context ctxt;
> > + int ret;
> > +
> > + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);
> > +
> > + dev = &rte_eth_devices[port];
> > + rte_eth_dev_info_get(port, &dev_info);
>
> It looks dev_info is not used in this function.
I'll delete it.
>
> > + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> > + hw = I40E_PF_TO_HW(pf);
> > +
> > + /**
> > + * return -ENODEV if SRIOV not enabled, VF number not configured
> > + * or no queue assigned.
> > + */
> > + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 ||
> > + pf->vf_nb_qps == 0)
> > + return -ENODEV;
> > +
> > + if (vf_id >= pf->vf_num || !pf->vfs)
> > + return -EINVAL;
> > +
> > + if (vlan_id > ETHER_MAX_VLAN_ID)
> > + return -EINVAL;
> > +
> > + vsi = pf->vfs[vf_id].vsi;
> > + if (!vsi)
> > + return -EINVAL;
> > +
> > + vsi->info.valid_sections =
> > cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
> > + vsi->info.pvid = vlan_id;
> > + if (vlan_id > 0)
> > + vsi->info.port_vlan_flags |=
> I40E_AQ_VSI_PVLAN_INSERT_PVID;
> > + else
> > + vsi->info.port_vlan_flags &=
> > ~I40E_AQ_VSI_PVLAN_INSERT_PVID;
> So, Pvid is used here for insert. Does it has any relationship with vlan anti-
> spoof patch?
> If so, it's better to consider how to deal with that.
It's vlan insertion not filtering. So I think not related.
>
> Thanks
> Jingjing
^ permalink raw reply
* Re: [PATCH v3] net/mlx5: add support for ConnectX-5 NICs
From: Adrien Mazarguil @ 2017-01-06 8:50 UTC (permalink / raw)
To: Yongseok Koh; +Cc: ferruh.yigit, dev
In-Reply-To: <20170106004931.2015-1-yskoh@mellanox.com>
On Thu, Jan 05, 2017 at 04:49:31PM -0800, Yongseok Koh wrote:
> Add PCI device ID for ConnectX-5 and enable multi-packet send for PF and VF
> along with changing documentation and release note.
>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Thanks!
--
Adrien Mazarguil
6WIND
^ permalink raw reply
* Re: [PATCH v7 03/27] net/i40e: set VF MAC anti-spoofing from PF
From: Lu, Wenzhuo @ 2017-01-06 8:54 UTC (permalink / raw)
To: Wu, Jingjing, dev@dpdk.org
In-Reply-To: <9BB6961774997848B5B42BEC655768F810CC3CB8@SHSMSX103.ccr.corp.intel.com>
Hi Jingjing,
> -----Original Message-----
> From: Wu, Jingjing
> Sent: Friday, January 6, 2017 8:33 AM
> To: Lu, Wenzhuo; dev@dpdk.org
> Cc: Lu, Wenzhuo
> Subject: RE: [dpdk-dev] [PATCH v7 03/27] net/i40e: set VF MAC anti-spoofing
> from PF
>
> > +
> > + vsi->info.valid_sections =
> > cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
> > + if (on)
> > + vsi->info.sec_flags |=
> > I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK;
> > + else
> > + vsi->info.sec_flags &=
> > ~I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK;
> > +
> > + memset(&ctxt, 0, sizeof(ctxt));
> > + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
> > + ctxt.seid = vsi->seid;
> > +
> > + hw = I40E_VSI_TO_HW(vsi);
> > + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
> > + if (ret != I40E_SUCCESS)
> > + PMD_DRV_LOG(ERR, "Failed to update VSI params");
>
> If fails, will you revert the info in vsi struct?
Will not. Just leverage the existing behavior. I think it has some good side as user should not try it again and again if not success.
>
> > +
> > + return ret;
>
> Please return eth dev lib error code, but not I40E_XXX
Yes, will change it.
^ permalink raw reply
* Re: [PATCH v3] net/mlx5: add support for ConnectX-5 NICs
From: Thomas Monjalon @ 2017-01-06 9:09 UTC (permalink / raw)
To: Adrien Mazarguil, Yongseok Koh; +Cc: dev, ferruh.yigit
In-Reply-To: <20170106085042.GU12822@6wind.com>
2017-01-06 09:50, Adrien Mazarguil:
> On Thu, Jan 05, 2017 at 04:49:31PM -0800, Yongseok Koh wrote:
> > Add PCI device ID for ConnectX-5 and enable multi-packet send for PF and VF
> > along with changing documentation and release note.
> >
> > Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
>
> Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
You'll need to update the website:
http://dpdk.org/browse/tools/dpdk-web/tree/doc/nics.html#n83
^ permalink raw reply
* Re: XL710 with i40e driver drops packets on RX even on a small rates.
From: Martin Weiser @ 2017-01-06 9:17 UTC (permalink / raw)
To: dev, Ilya Maximets; +Cc: Helin Zhang, Jingjing Wu
In-Reply-To: <dd503e9e-209a-22f0-710f-bc90b52c1b46@allegro-packets.com>
Hello,
just to let you know we were finally able to resolve the issue. It seems
that the affected boards had a firmware issue with PCIe x8 v3.
When we forced the PCI slots to run at x8 v2 the issue disappeared for
Test 1 and Test 2. Test 3 still produced missed packets but probably due
to the reduced PCIe x8 v2 bandwidth.
We then found out that there exists a BIOS/firmware update for these
boards which was issued by Supermicro in November ... unfortunately
there are no changenotes whatsoever.
But lo and behold this update seems to include a fix for exactly this
issue since now the XL710 is working as expected with PCIe x8 v3.
Best regards,
Martin
On 04.01.17 13:33, Martin Weiser wrote:
> Hello,
>
> I have performed some more thorough testing on 3 different machines to
> illustrate the strange results with XL710.
> Please note that all 3 systems were able to forward the traffic of Test
> 1 and Test 2 without packet loss when a 82599ES NIC was installed in the
> same PCI slot as the XL710 in the tests below.
>
> Here is the test setup and the test results:
>
>
> ## Test traffic
>
> In all tests the t-rex traffic generator was used to generate traffic on
> a XL710 card with the following parameters:
>
> ### Test 1
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 25 --flip
>
> This resulted in a 60 second run with ~1.21 Gbps traffic on each of the
> two interfaces with ~100000 packets per
> second on each interface.
>
> ### Test 2
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 100 --flip
>
> This resulted in a 60 second run with ~4.85 Gbps traffic on each of the
> two interfaces with ~400000 packets per
> second on each interface.
>
> ### Test 3
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 400 --flip
>
> This resulted in a 60 second run with ~19.43 Gbps traffic on each of the
> two interfaces with ~1600000 packets per
> second on each interface.
>
>
>
> ## DPDK
>
> On all systems a vanilla DPDK v16.11 testpmd was used with the following
> parameters (PCI IDs differed between systems):
>
> ./build/app/testpmd -l 1,2 -w 0000:06:00.0 -w 0000:06:00.1 -- -i
>
>
>
> ## System 1
>
> * Board: Supermicro X10SDV-TP8F
> * CPU:
> Architecture: x86_64
> CPU op-mode(s): 32-bit, 64-bit
> Byte Order: Little Endian
> CPU(s): 8
> On-line CPU(s) list: 0-7
> Thread(s) per core: 2
> Core(s) per socket: 4
> Socket(s): 1
> NUMA node(s): 1
> Vendor ID: GenuineIntel
> CPU family: 6
> Model: 86
> Model name: Intel(R) Xeon(R) CPU D-1518 @ 2.20GHz
> Stepping: 3
> CPU MHz: 800.250
> CPU max MHz: 2200.0000
> CPU min MHz: 800.0000
> BogoMIPS: 4399.58
> Virtualization: VT-x
> L1d cache: 32K
> L1i cache: 32K
> L2 cache: 256K
> L3 cache: 6144K
> NUMA node0 CPU(s): 0-7
> Flags: fpu vme de pse tsc msr pae mce cx8 apic
> sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
> tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts
> rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq
> dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid
> dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx
> f16c rdrand lahf_lm abm 3dnowprefetch epb intel_pt tpr_shadow vnmi
> flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms
> invpcid rtm cqm rdseed adx smap xsaveopt cqm_llc cqm_occup_llc
> cqm_mbm_total cqm_mbm_local dtherm arat pln pts
> * Memory channels: 2
> * Memory: 2 * 8192 MB DDR4 @ 2133 MHz
> * NIC firmware: FW 5.0 API 1.5 NVM 05.00.04 eetrack 80002505
> * i40e version: 1.4.25-k
> * OS: Ubuntu 16.04.1 LTS
> * Kernel: 4.4.0-57-generic
> * Kernel parameters: isolcpus=1,2,3,5,6,7 default_hugepagesz=1G
> hugepagesz=1G hugepages=1
>
> ### Test 1
>
> Mostly no packet loss. Sometimes ~10 packets missed of ~600000 on each
> interface when testpmd was not started in
> interactive mode.
>
> ### Test 2
>
> 100-300 packets of ~24000000 missed on each interface.
>
> ### Test 3
>
> 4000-5000 packets of ~96000000 missed on each interface.
>
>
>
> ## System 2
>
> * Board: Supermicro X10SDV-7TP8F
> * CPU:
> Architecture: x86_64
> CPU op-mode(s): 32-bit, 64-bit
> Byte Order: Little Endian
> CPU(s): 32
> On-line CPU(s) list: 0-31
> Thread(s) per core: 2
> Core(s) per socket: 16
> Socket(s): 1
> NUMA node(s): 1
> Vendor ID: GenuineIntel
> CPU family: 6
> Model: 86
> Model name: 06/56
> Stepping: 4
> CPU MHz: 1429.527
> CPU max MHz: 2300.0000
> CPU min MHz: 800.0000
> BogoMIPS: 3400.37
> Virtualization: VT-x
> L1d cache: 32K
> L1i cache: 32K
> L2 cache: 256K
> L3 cache: 24576K
> NUMA node0 CPU(s): 0-31
> Flags: fpu vme de pse tsc msr pae mce cx8 apic
> sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
> tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts
> rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq
> dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid
> dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx
> f16c rdrand lahf_lm abm 3dnowprefetch epb intel_pt tpr_shadow vnmi
> flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms
> invpcid rtm cqm rdseed adx smap xsaveopt cqm_llc cqm_occup_llc
> cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts
> * Memory channels: 2
> * Memory: 4 * 16384 MB DDR4 @ 2133 MHz
> * NIC firmware: FW 5.0 API 1.5 NVM 05.00.04 eetrack 80002505
> * i40e version: 1.4.25-k
> * OS: Ubuntu 16.04.1 LTS
> * Kernel: 4.4.0-57-generic
> * Kernel parameters:
> isolcpus=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
> default_hugepagesz=1G hugepagesz=1G hugepages=1
>
> ### Test 1
>
> Mostly no packet loss of ~600000.
>
> ### Test 2
>
> 400000-500000 packets of ~24000000 missed on each interface.
>
> ### Test 3
>
> 1200000-1400000 packets of ~96000000 missed on each interface.
>
>
>
> ## System 3
>
> * Board: Supermicro X9SRW-F
> * CPU:
> Architecture: x86_64
> CPU op-mode(s): 32-bit, 64-bit
> Byte Order: Little Endian
> CPU(s): 12
> On-line CPU(s) list: 0-11
> Thread(s) per core: 2
> Core(s) per socket: 6
> Socket(s): 1
> NUMA node(s): 1
> Vendor ID: GenuineIntel
> CPU family: 6
> Model: 62
> Model name: Intel(R) Xeon(R) CPU E5-1650 v2 @ 3.50GHz
> Stepping: 4
> CPU MHz: 1200.253
> CPU max MHz: 3900.0000
> CPU min MHz: 1200.0000
> BogoMIPS: 7000.29
> Virtualization: VT-x
> L1d cache: 32K
> L1i cache: 32K
> L2 cache: 256K
> L3 cache: 12288K
> NUMA node0 CPU(s): 0-11
> Flags: fpu vme de pse tsc msr pae mce cx8 apic
> sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
> tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts
> rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq
> dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca
> sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand
> lahf_lm epb tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms
> xsaveopt dtherm arat pln pts
> * Memory channels: 4
> * Memory: 4 * 8192 MB DDR3 @ 1600 MHz
> * NIC firmware: FW 5.0 API 1.5 NVM 05.00.04 eetrack 80002537
> * i40e version: 1.4.25-k
> * OS: Ubuntu 16.04.1 LTS
> * Kernel: 4.4.0-57-generic
> * Kernel parameters: default_hugepagesz=1G hugepagesz=1G hugepages=1
> isolcpus=1-5,7-11
>
> ### Test 1
>
> No packets lost.
>
> ### Test 2
>
> No packets lost.
>
> ### Test 3
>
> No packets lost.
>
>
>
> Best regards,
> Martin
>
>
>
> On 03.01.17 13:18, Martin Weiser wrote:
>> Hello,
>>
>> we are also seeing this issue on one of our test systems while it does
>> not occur on other test systems with the same DPDK version (we tested
>> 16.11 and current master).
>>
>> The system that we can reproduce this issue on also has a X552 ixgbe NIC
>> which can forward the exact same traffic using the same testpmd
>> parameters without a problem.
>> Even if we install a 82599ES ixgbe NIC in the same PCI slot that the
>> XL710 was in the 82599ES can forward the traffic without any drops.
>>
>> Like in the issue reported by Ilya all packet drops occur on the testpmd
>> side and are accounted as 'imissed'. Increasing the number of rx
>> descriptors only helps a little at low packet rates.
>>
>> Drops start occurring at pretty low packet rates like 100000 packets per
>> second.
>>
>> Any suggestions would be greatly appreciated.
>>
>> Best regards,
>> Martin
>>
>>
>>
>> On 22.08.16 14:06, Ilya Maximets wrote:
>>> Hello, All.
>>>
>>> I've faced with a really bad situation with packet drops on a small
>>> packet rates (~45 Kpps) while using XL710 NIC with i40e DPDK driver.
>>>
>>> The issue was found while testing PHY-VM-PHY scenario with OVS and
>>> confirmed on PHY-PHY scenario with testpmd.
>>>
>>> DPDK version 16.07 was used in all cases.
>>> XL710 firmware-version: f5.0.40043 a1.5 n5.04 e2505
>>>
>>> Test description (PHY-PHY):
>>>
>>> * Following cmdline was used:
>>>
>>> # n_desc=2048
>>> # ./testpmd -c 0xf -n 2 --socket-mem=8192,0 -w 0000:05:00.0 -v \
>>> -- --burst=32 --txd=${n_desc} --rxd=${n_desc} \
>>> --rxq=1 --txq=1 --nb-cores=1 \
>>> --eth-peer=0,a0:00:00:00:00:00 --forward-mode=mac
>>>
>>> * DPDK-Pktgen application was used as a traffic generator.
>>> Single flow generated.
>>>
>>> Results:
>>>
>>> * Packet size: 128B, rate: 90% of 10Gbps (~7.5 Mpps):
>>>
>>> On the generator's side:
>>>
>>> Total counts:
>>> Tx : 759034368 packets
>>> Rx : 759033239 packets
>>> Lost : 1129 packets
>>>
>>> Average rates:
>>> Tx : 7590344 pps
>>> Rx : 7590332 pps
>>> Lost : 11 pps
>>>
>>> All of this dropped packets are RX-dropped on testpmd's side:
>>>
>>> +++++++++++++++ Accumulated forward statistics for all ports+++++++++++++++
>>> RX-packets: 759033239 RX-dropped: 1129 RX-total: 759034368
>>> TX-packets: 759033239 TX-dropped: 0 TX-total: 759033239
>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>
>>> At the same time 10G NIC with IXGBE driver works perfectly
>>> without any packet drops in the same scenario.
>>>
>>> Much worse situation with PHY-VM-PHY scenario with OVS:
>>>
>>> * testpmd application used inside guest to forward incoming packets.
>>> (almost same cmdline as for PHY-PHY)
>>>
>>> * For packet size 256 B on rate 1% of 10Gbps (~45 Kpps):
>>>
>>> Total counts:
>>> Tx : 1358112 packets
>>> Rx : 1357990 packets
>>> Lost : 122 packets
>>>
>>> Average rates:
>>> Tx : 45270 pps
>>> Rx : 45266 pps
>>> Lost : 4 pps
>>>
>>> All of this 122 dropped packets can be found in rx_dropped counter:
>>>
>>> # ovs-vsctl get interface dpdk0 statistics:rx_dropped
>>> 122
>>>
>>> And again, no issues with IXGBE on the exactly same scenario.
>>>
>>>
>>> Results of my investigation:
>>>
>>> * I found that all of this packets are 'imissed'. This means that rx
>>> descriptor ring was overflowed.
>>>
>>> * I've modified i40e driver to check the real number of free descriptors
>>> that was not still filled by the NIC and found that HW fills
>>> rx descriptors with uneven rate. Looks like it fills them using
>>> a huge batches.
>>>
>>> * So, root cause of packet drops with XL710 is somehow uneven rate of
>>> filling of the hw rx descriptors by the NIC. This leads to exhausting
>>> of rx descriptors and packet drops by the hardware. 10G IXGBE NIC works
>>> more smoothly and driver is able to refill hw ring with rx descriptors
>>> in time.
>>>
>>> * The issue becomes worse with OVS because of much bigger latencies
>>> between 'rte_eth_rx_burst()' calls.
>>>
>>> The easiest solution for this problem is to increase number of RX descriptors.
>>> Increasing up to 4096 eliminates packet drops but decreases the performance a lot:
>>>
>>> For OVS PHY-VM-PHY scenario by 10%
>>> For OVS PHY-PHY scenario by 20%
>>> For tespmd PHY-PHY scenario by 17% (22.1 Mpps --> 18.2 Mpps for 64B packets)
>>>
>>> As a result we have a trade-off between zero drop rate on small packet rates and
>>> the higher maximum performance that is very sad.
>>>
>>> Using of 16B descriptors doesn't really help with performance.
>>> Upgrading the firmware from version 4.4 to 5.04 didn't help with drops.
>>>
>>> Any thoughts? Can anyone reproduce this?
>>>
>>> Best regards, Ilya Maximets.
^ permalink raw reply
* Re: XL710 with i40e driver drops packets on RX even on a small rates.
From: Zhang, Helin @ 2017-01-06 9:45 UTC (permalink / raw)
To: Martin Weiser, dev@dpdk.org, Ilya Maximets; +Cc: Wu, Jingjing
In-Reply-To: <0c3ac3b2-17b5-6a4c-1cac-cc197dc4ed0f@allegro-packets.com>
Very good to know that!
Congratulations!
/Helin
-----Original Message-----
From: Martin Weiser [mailto:martin.weiser@allegro-packets.com]
Sent: Friday, January 6, 2017 5:17 PM
To: dev@dpdk.org; Ilya Maximets <i.maximets@samsung.com>
Cc: Zhang, Helin <helin.zhang@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
Subject: Re: [dpdk-dev] XL710 with i40e driver drops packets on RX even on a small rates.
Hello,
just to let you know we were finally able to resolve the issue. It seems that the affected boards had a firmware issue with PCIe x8 v3.
When we forced the PCI slots to run at x8 v2 the issue disappeared for Test 1 and Test 2. Test 3 still produced missed packets but probably due to the reduced PCIe x8 v2 bandwidth.
We then found out that there exists a BIOS/firmware update for these boards which was issued by Supermicro in November ... unfortunately there are no changenotes whatsoever.
But lo and behold this update seems to include a fix for exactly this issue since now the XL710 is working as expected with PCIe x8 v3.
Best regards,
Martin
On 04.01.17 13:33, Martin Weiser wrote:
> Hello,
>
> I have performed some more thorough testing on 3 different machines to
> illustrate the strange results with XL710.
> Please note that all 3 systems were able to forward the traffic of
> Test
> 1 and Test 2 without packet loss when a 82599ES NIC was installed in
> the same PCI slot as the XL710 in the tests below.
>
> Here is the test setup and the test results:
>
>
> ## Test traffic
>
> In all tests the t-rex traffic generator was used to generate traffic
> on a XL710 card with the following parameters:
>
> ### Test 1
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 25 --flip
>
> This resulted in a 60 second run with ~1.21 Gbps traffic on each of
> the two interfaces with ~100000 packets per second on each interface.
>
> ### Test 2
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 100 --flip
>
> This resulted in a 60 second run with ~4.85 Gbps traffic on each of
> the two interfaces with ~400000 packets per second on each interface.
>
> ### Test 3
>
> ./t-rex-64 -f cap2/imix_1518.yaml -c 4 -d 60 -m 400 --flip
>
> This resulted in a 60 second run with ~19.43 Gbps traffic on each of
> the two interfaces with ~1600000 packets per second on each interface.
>
>
>
> ## DPDK
>
> On all systems a vanilla DPDK v16.11 testpmd was used with the
> following parameters (PCI IDs differed between systems):
>
> ./build/app/testpmd -l 1,2 -w 0000:06:00.0 -w 0000:06:00.1 -- -i
>
>
>
> ## System 1
>
> * Board: Supermicro X10SDV-TP8F
> * CPU:
> Architecture: x86_64
> CPU op-mode(s): 32-bit, 64-bit
> Byte Order: Little Endian
> CPU(s): 8
> On-line CPU(s) list: 0-7
> Thread(s) per core: 2
> Core(s) per socket: 4
> Socket(s): 1
> NUMA node(s): 1
> Vendor ID: GenuineIntel
> CPU family: 6
> Model: 86
> Model name: Intel(R) Xeon(R) CPU D-1518 @ 2.20GHz
> Stepping: 3
> CPU MHz: 800.250
> CPU max MHz: 2200.0000
> CPU min MHz: 800.0000
> BogoMIPS: 4399.58
> Virtualization: VT-x
> L1d cache: 32K
> L1i cache: 32K
> L2 cache: 256K
> L3 cache: 6144K
> NUMA node0 CPU(s): 0-7
> Flags: fpu vme de pse tsc msr pae mce cx8 apic
> sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss
> ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs
> bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni
> pclmulqdq
> dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm
> pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes
> xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb intel_pt
> tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle
> avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap xsaveopt cqm_llc
> cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm arat pln pts
> * Memory channels: 2
> * Memory: 2 * 8192 MB DDR4 @ 2133 MHz
> * NIC firmware: FW 5.0 API 1.5 NVM 05.00.04 eetrack 80002505
> * i40e version: 1.4.25-k
> * OS: Ubuntu 16.04.1 LTS
> * Kernel: 4.4.0-57-generic
> * Kernel parameters: isolcpus=1,2,3,5,6,7 default_hugepagesz=1G
> hugepagesz=1G hugepages=1
>
> ### Test 1
>
> Mostly no packet loss. Sometimes ~10 packets missed of ~600000 on each
> interface when testpmd was not started in interactive mode.
>
> ### Test 2
>
> 100-300 packets of ~24000000 missed on each interface.
>
> ### Test 3
>
> 4000-5000 packets of ~96000000 missed on each interface.
>
>
>
> ## System 2
>
> * Board: Supermicro X10SDV-7TP8F
> * CPU:
> Architecture: x86_64
> CPU op-mode(s): 32-bit, 64-bit
> Byte Order: Little Endian
> CPU(s): 32
> On-line CPU(s) list: 0-31
> Thread(s) per core: 2
> Core(s) per socket: 16
> Socket(s): 1
> NUMA node(s): 1
> Vendor ID: GenuineIntel
> CPU family: 6
> Model: 86
> Model name: 06/56
> Stepping: 4
> CPU MHz: 1429.527
> CPU max MHz: 2300.0000
> CPU min MHz: 800.0000
> BogoMIPS: 3400.37
> Virtualization: VT-x
> L1d cache: 32K
> L1i cache: 32K
> L2 cache: 256K
> L3 cache: 24576K
> NUMA node0 CPU(s): 0-31
> Flags: fpu vme de pse tsc msr pae mce cx8 apic
> sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss
> ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs
> bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni
> pclmulqdq
> dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm
> pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes
> xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb intel_pt
> tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle
> avx2 smep bmi2 erms invpcid rtm cqm rdseed adx smap xsaveopt cqm_llc
> cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts
> * Memory channels: 2
> * Memory: 4 * 16384 MB DDR4 @ 2133 MHz
> * NIC firmware: FW 5.0 API 1.5 NVM 05.00.04 eetrack 80002505
> * i40e version: 1.4.25-k
> * OS: Ubuntu 16.04.1 LTS
> * Kernel: 4.4.0-57-generic
> * Kernel parameters:
> isolcpus=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,2
> 5,26,27,28,29,30,31 default_hugepagesz=1G hugepagesz=1G hugepages=1
>
> ### Test 1
>
> Mostly no packet loss of ~600000.
>
> ### Test 2
>
> 400000-500000 packets of ~24000000 missed on each interface.
>
> ### Test 3
>
> 1200000-1400000 packets of ~96000000 missed on each interface.
>
>
>
> ## System 3
>
> * Board: Supermicro X9SRW-F
> * CPU:
> Architecture: x86_64
> CPU op-mode(s): 32-bit, 64-bit
> Byte Order: Little Endian
> CPU(s): 12
> On-line CPU(s) list: 0-11
> Thread(s) per core: 2
> Core(s) per socket: 6
> Socket(s): 1
> NUMA node(s): 1
> Vendor ID: GenuineIntel
> CPU family: 6
> Model: 62
> Model name: Intel(R) Xeon(R) CPU E5-1650 v2 @ 3.50GHz
> Stepping: 4
> CPU MHz: 1200.253
> CPU max MHz: 3900.0000
> CPU min MHz: 1200.0000
> BogoMIPS: 7000.29
> Virtualization: VT-x
> L1d cache: 32K
> L1i cache: 32K
> L2 cache: 256K
> L3 cache: 12288K
> NUMA node0 CPU(s): 0-11
> Flags: fpu vme de pse tsc msr pae mce cx8 apic
> sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss
> ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs
> bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni
> pclmulqdq
> dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca
> sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c
> rdrand lahf_lm epb tpr_shadow vnmi flexpriority ept vpid fsgsbase smep
> erms xsaveopt dtherm arat pln pts
> * Memory channels: 4
> * Memory: 4 * 8192 MB DDR3 @ 1600 MHz
> * NIC firmware: FW 5.0 API 1.5 NVM 05.00.04 eetrack 80002537
> * i40e version: 1.4.25-k
> * OS: Ubuntu 16.04.1 LTS
> * Kernel: 4.4.0-57-generic
> * Kernel parameters: default_hugepagesz=1G hugepagesz=1G hugepages=1
> isolcpus=1-5,7-11
>
> ### Test 1
>
> No packets lost.
>
> ### Test 2
>
> No packets lost.
>
> ### Test 3
>
> No packets lost.
>
>
>
> Best regards,
> Martin
>
>
>
> On 03.01.17 13:18, Martin Weiser wrote:
>> Hello,
>>
>> we are also seeing this issue on one of our test systems while it
>> does not occur on other test systems with the same DPDK version (we
>> tested
>> 16.11 and current master).
>>
>> The system that we can reproduce this issue on also has a X552 ixgbe
>> NIC which can forward the exact same traffic using the same testpmd
>> parameters without a problem.
>> Even if we install a 82599ES ixgbe NIC in the same PCI slot that the
>> XL710 was in the 82599ES can forward the traffic without any drops.
>>
>> Like in the issue reported by Ilya all packet drops occur on the
>> testpmd side and are accounted as 'imissed'. Increasing the number of
>> rx descriptors only helps a little at low packet rates.
>>
>> Drops start occurring at pretty low packet rates like 100000 packets
>> per second.
>>
>> Any suggestions would be greatly appreciated.
>>
>> Best regards,
>> Martin
>>
>>
>>
>> On 22.08.16 14:06, Ilya Maximets wrote:
>>> Hello, All.
>>>
>>> I've faced with a really bad situation with packet drops on a small
>>> packet rates (~45 Kpps) while using XL710 NIC with i40e DPDK driver.
>>>
>>> The issue was found while testing PHY-VM-PHY scenario with OVS and
>>> confirmed on PHY-PHY scenario with testpmd.
>>>
>>> DPDK version 16.07 was used in all cases.
>>> XL710 firmware-version: f5.0.40043 a1.5 n5.04 e2505
>>>
>>> Test description (PHY-PHY):
>>>
>>> * Following cmdline was used:
>>>
>>> # n_desc=2048
>>> # ./testpmd -c 0xf -n 2 --socket-mem=8192,0 -w 0000:05:00.0 -v \
>>> -- --burst=32 --txd=${n_desc} --rxd=${n_desc} \
>>> --rxq=1 --txq=1 --nb-cores=1 \
>>> --eth-peer=0,a0:00:00:00:00:00 --forward-mode=mac
>>>
>>> * DPDK-Pktgen application was used as a traffic generator.
>>> Single flow generated.
>>>
>>> Results:
>>>
>>> * Packet size: 128B, rate: 90% of 10Gbps (~7.5 Mpps):
>>>
>>> On the generator's side:
>>>
>>> Total counts:
>>> Tx : 759034368 packets
>>> Rx : 759033239 packets
>>> Lost : 1129 packets
>>>
>>> Average rates:
>>> Tx : 7590344 pps
>>> Rx : 7590332 pps
>>> Lost : 11 pps
>>>
>>> All of this dropped packets are RX-dropped on testpmd's side:
>>>
>>> +++++++++++++++ Accumulated forward statistics for all ports+++++++++++++++
>>> RX-packets: 759033239 RX-dropped: 1129 RX-total: 759034368
>>> TX-packets: 759033239 TX-dropped: 0 TX-total: 759033239
>>>
>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> +++++++
>>>
>>> At the same time 10G NIC with IXGBE driver works perfectly
>>> without any packet drops in the same scenario.
>>>
>>> Much worse situation with PHY-VM-PHY scenario with OVS:
>>>
>>> * testpmd application used inside guest to forward incoming packets.
>>> (almost same cmdline as for PHY-PHY)
>>>
>>> * For packet size 256 B on rate 1% of 10Gbps (~45 Kpps):
>>>
>>> Total counts:
>>> Tx : 1358112 packets
>>> Rx : 1357990 packets
>>> Lost : 122 packets
>>>
>>> Average rates:
>>> Tx : 45270 pps
>>> Rx : 45266 pps
>>> Lost : 4 pps
>>>
>>> All of this 122 dropped packets can be found in rx_dropped counter:
>>>
>>> # ovs-vsctl get interface dpdk0 statistics:rx_dropped
>>> 122
>>>
>>> And again, no issues with IXGBE on the exactly same scenario.
>>>
>>>
>>> Results of my investigation:
>>>
>>> * I found that all of this packets are 'imissed'. This means that rx
>>> descriptor ring was overflowed.
>>>
>>> * I've modified i40e driver to check the real number of free descriptors
>>> that was not still filled by the NIC and found that HW fills
>>> rx descriptors with uneven rate. Looks like it fills them using
>>> a huge batches.
>>>
>>> * So, root cause of packet drops with XL710 is somehow uneven rate of
>>> filling of the hw rx descriptors by the NIC. This leads to exhausting
>>> of rx descriptors and packet drops by the hardware. 10G IXGBE NIC works
>>> more smoothly and driver is able to refill hw ring with rx descriptors
>>> in time.
>>>
>>> * The issue becomes worse with OVS because of much bigger latencies
>>> between 'rte_eth_rx_burst()' calls.
>>>
>>> The easiest solution for this problem is to increase number of RX descriptors.
>>> Increasing up to 4096 eliminates packet drops but decreases the performance a lot:
>>>
>>> For OVS PHY-VM-PHY scenario by 10%
>>> For OVS PHY-PHY scenario by 20%
>>> For tespmd PHY-PHY scenario by 17% (22.1 Mpps --> 18.2 Mpps for 64B
>>> packets)
>>>
>>> As a result we have a trade-off between zero drop rate on small
>>> packet rates and the higher maximum performance that is very sad.
>>>
>>> Using of 16B descriptors doesn't really help with performance.
>>> Upgrading the firmware from version 4.4 to 5.04 didn't help with drops.
>>>
>>> Any thoughts? Can anyone reproduce this?
>>>
>>> Best regards, Ilya Maximets.
^ permalink raw reply
* Re: [PATCH v5 00/20] Decouple ethdev from PCI device
From: Andrew Rybchenko @ 2017-01-06 10:07 UTC (permalink / raw)
To: Ferruh Yigit, Thomas Monjalon, Jan Blunck
Cc: dev, shreyansh.jain, david.marchand, stephen, Alejandro Lucero
In-Reply-To: <2be3d632-b1eb-d7be-47e3-c6e9211fef33@intel.com>
On 01/03/2017 03:20 PM, Ferruh Yigit wrote:
> On 12/25/2016 10:33 PM, Thomas Monjalon wrote:
>> 2016-12-23 16:57, Jan Blunck:
>>> This repost addresses the review comments of Thomas Monjalon to completely
>>> remove the ethdev helper to further decrease the coupling of the ethdev and
>>> the eal layers. This required me to squash together all patches using the
>>> rte_eth_dev_to_pci() helper into "Decouple from PCI device" patch. As
>>> discussed privately I'll keep the PCI information in rte_eth_dev_info
>>> untouched.
>> Applied with some trivial fixes, thanks
>>
> I rebased these changes into next-net tree. And need to update some sfc
> and nfp patches [1] there.
>
> Andrew, Alejandro,
>
> Can you please review your driver in the latest next-net tree?
>
> Thanks,
> ferruh
>
> [1]
> nfp:
> net/nfp: add Rx interrupts
>
> sfc:
> net/sfc: support link status change interrupt
> net/sfc: interrupts support sufficient for event queue init
> net/sfc: implement driver operation to init device on attach
> net/sfc: libefx-based PMD stub sufficient to build and init
Ferruh,
thanks that you care about it. I've reviewed and tested it. It looks and
works fine for me.
Andrew.
^ permalink raw reply
* [PATCH v3 0/6] net/virtio: fix several multiple process issues
From: Yuanhan Liu @ 2017-01-06 10:16 UTC (permalink / raw)
To: dev; +Cc: Yuanhan Liu
In-Reply-To: <1482922962-21036-1-git-send-email-yuanhan.liu@linux.intel.com>
v3: - fixed several comments from Thomas regarding to eth_dev
- updated the release note and nic features matrix
v2: - fixed virtio 1.0 multiple process support
- fixed the case when few virtio net devices are managed by DPDK
while few others are handled by Linux kernel.
This patch series fixes few crash issues regarding to multiple process
model. In my limited fuzzy test, now it works for both virtio 0.95 and
1.0, as well as for the case some virtio-net devices are managed by
kernel device while some others are managed by DPDK.
---
Maintaining the multiple process support is not an easy task -- you
have to be very mindful while coding -- what kind of stuff should
and should not be in shared memory. Otherwise, it's very likely the
multiple process model will be broken.
A typical example is the ops pointer, a pointer to a set of function
pointers. Normally, it's a pointer stored in a read-only data section
of the application:
static const struct virtio_pci_ops legacy_ops = {
...,
}
The pointer, of course, may vary in different process space. If,
however, we store the pointer into shared memory, we could only
have one value for it. Setting it from process A and accessing
it from process B would likely lead to an illegal memory access.
As a result, crash happens.
The fix is to keep those addresses locally, in a new struct,
virtio_hw_internal. By that, each process maintains it's own
version of the pointer (from its own process space). Thus,
everything would work as expected.
---
Yuanhan Liu (6):
ethdev: fix port data mismatched in multiple process model
net/virtio: fix wrong Rx/Tx method for secondary process
net/virtio: store PCI operators pointer locally
net/virtio: store IO port info locally
net/virtio: fix multiple process support
net/virtio: remove dead structure field
doc/guides/nics/features/virtio.ini | 1 +
doc/guides/rel_notes/release_17_02.rst | 5 ++
drivers/net/virtio/virtio_ethdev.c | 69 +++++++++++++++++++++++++---
drivers/net/virtio/virtio_pci.c | 81 +++++++++++++++++----------------
drivers/net/virtio/virtio_pci.h | 25 ++++++++--
drivers/net/virtio/virtio_user_ethdev.c | 5 +-
drivers/net/virtio/virtqueue.h | 2 +-
lib/librte_ether/rte_ethdev.c | 77 +++++++++++++++++++++++++++----
8 files changed, 204 insertions(+), 61 deletions(-)
--
1.9.0
^ permalink raw reply
* [PATCH v3 3/6] net/virtio: store PCI operators pointer locally
From: Yuanhan Liu @ 2017-01-06 10:16 UTC (permalink / raw)
To: dev; +Cc: Yuanhan Liu
In-Reply-To: <1483697780-12088-1-git-send-email-yuanhan.liu@linux.intel.com>
We used to store the vtpci_ops at virtio_hw structure. The struct,
however, is stored in shared memory. That means only one value is
allowed. For the multiple process model, however, the address of
vtpci_ops should be different among different processes.
Take virtio PMD as example, the vtpci_ops is set by the primary
process, based on its own process space. If we access that address
from the secondary process, that would be an illegal memory access,
A crash then might happen.
To make the multiple process model work, we need store the vtpci_ops
in local memory but not in a shared memory. This is what the patch
does: a local virtio_hw_internal array of size RTE_MAX_ETHPORTS is
allocated. This new structure is used to store all these kind of
info in a non-shared memory. Current, we have:
- vtpci_ops
- rte_pci_ioport
- virtio pci mapped memory, such as common_cfg.
The later two will be done in coming patches. Later patches would also
set them correctly for secondary process, so that the multiple process
model could work.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
drivers/net/virtio/virtio_ethdev.c | 9 ++++++---
drivers/net/virtio/virtio_pci.c | 26 +++++++++++++-------------
drivers/net/virtio/virtio_pci.h | 17 ++++++++++++++++-
drivers/net/virtio/virtio_user_ethdev.c | 3 ++-
drivers/net/virtio/virtqueue.h | 2 +-
5 files changed, 38 insertions(+), 19 deletions(-)
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index ef37ad1..5567aa2 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -152,6 +152,8 @@ struct rte_virtio_xstats_name_off {
#define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
sizeof(rte_virtio_txq_stat_strings[0]))
+struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
+
static int
virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
int *dlen, int pkt_num)
@@ -360,7 +362,7 @@ struct rte_virtio_xstats_name_off {
* Read the virtqueue size from the Queue Size field
* Always power of 2 and if 0 virtqueue does not exist
*/
- vq_size = hw->vtpci_ops->get_queue_num(hw, vtpci_queue_idx);
+ vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
if (vq_size == 0) {
PMD_INIT_LOG(ERR, "virtqueue does not exist");
@@ -519,7 +521,7 @@ struct rte_virtio_xstats_name_off {
}
}
- if (hw->vtpci_ops->setup_queue(hw, vq) < 0) {
+ if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
PMD_INIT_LOG(ERR, "setup_queue failed");
return -EINVAL;
}
@@ -1114,7 +1116,7 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
req_features);
/* Read device(host) feature bits */
- host_features = hw->vtpci_ops->get_features(hw);
+ host_features = VTPCI_OPS(hw)->get_features(hw);
PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
host_features);
@@ -1322,6 +1324,7 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
return -ENOMEM;
}
+ hw->port_id = eth_dev->data->port_id;
pci_dev = eth_dev->pci_dev;
if (pci_dev) {
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 9b47165..b1f2e18 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -537,14 +537,14 @@
vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
void *dst, int length)
{
- hw->vtpci_ops->read_dev_cfg(hw, offset, dst, length);
+ VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
}
void
vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
const void *src, int length)
{
- hw->vtpci_ops->write_dev_cfg(hw, offset, src, length);
+ VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
}
uint64_t
@@ -557,7 +557,7 @@
* host all support.
*/
features = host_features & hw->guest_features;
- hw->vtpci_ops->set_features(hw, features);
+ VTPCI_OPS(hw)->set_features(hw, features);
return features;
}
@@ -565,9 +565,9 @@
void
vtpci_reset(struct virtio_hw *hw)
{
- hw->vtpci_ops->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+ VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
/* flush status write */
- hw->vtpci_ops->get_status(hw);
+ VTPCI_OPS(hw)->get_status(hw);
}
void
@@ -580,21 +580,21 @@
vtpci_set_status(struct virtio_hw *hw, uint8_t status)
{
if (status != VIRTIO_CONFIG_STATUS_RESET)
- status |= hw->vtpci_ops->get_status(hw);
+ status |= VTPCI_OPS(hw)->get_status(hw);
- hw->vtpci_ops->set_status(hw, status);
+ VTPCI_OPS(hw)->set_status(hw, status);
}
uint8_t
vtpci_get_status(struct virtio_hw *hw)
{
- return hw->vtpci_ops->get_status(hw);
+ return VTPCI_OPS(hw)->get_status(hw);
}
uint8_t
vtpci_isr(struct virtio_hw *hw)
{
- return hw->vtpci_ops->get_isr(hw);
+ return VTPCI_OPS(hw)->get_isr(hw);
}
@@ -602,7 +602,7 @@
uint16_t
vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
{
- return hw->vtpci_ops->set_config_irq(hw, vec);
+ return VTPCI_OPS(hw)->set_config_irq(hw, vec);
}
static void *
@@ -736,8 +736,8 @@
*/
if (virtio_read_caps(dev, hw) == 0) {
PMD_INIT_LOG(INFO, "modern virtio pci detected.");
- hw->vtpci_ops = &modern_ops;
- hw->modern = 1;
+ virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
+ hw->modern = 1;
*dev_flags |= RTE_ETH_DEV_INTR_LSC;
return 0;
}
@@ -755,7 +755,7 @@
return -1;
}
- hw->vtpci_ops = &legacy_ops;
+ virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
hw->use_msix = legacy_virtio_has_msix(&dev->addr);
hw->modern = 0;
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index de271bf..268bb82 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -254,6 +254,7 @@ struct virtio_hw {
uint8_t use_msix;
uint8_t modern;
uint8_t use_simple_rxtx;
+ uint8_t port_id;
uint8_t mac_addr[ETHER_ADDR_LEN];
uint32_t notify_off_multiplier;
uint8_t *isr;
@@ -261,12 +262,26 @@ struct virtio_hw {
struct rte_pci_device *dev;
struct virtio_pci_common_cfg *common_cfg;
struct virtio_net_config *dev_cfg;
- const struct virtio_pci_ops *vtpci_ops;
void *virtio_user_dev;
struct virtqueue **vqs;
};
+
+/*
+ * While virtio_hw is stored in shared memory, this structure stores
+ * some infos that may vary in the multiple process model locally.
+ * For example, the vtpci_ops pointer.
+ */
+struct virtio_hw_internal {
+ const struct virtio_pci_ops *vtpci_ops;
+};
+
+#define VTPCI_OPS(hw) (virtio_hw_internal[(hw)->port_id].vtpci_ops)
+
+extern struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
+
+
/*
* This structure is just a reference to read
* net device specific config space; it just a chodu structure
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 406beea..7d2a9d9 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -301,7 +301,8 @@
return NULL;
}
- hw->vtpci_ops = &virtio_user_ops;
+ hw->port_id = data->port_id;
+ virtio_hw_internal[hw->port_id].vtpci_ops = &virtio_user_ops;
hw->use_msix = 0;
hw->modern = 0;
hw->use_simple_rxtx = 0;
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index f0bb089..b1070e0 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -330,7 +330,7 @@ struct virtio_tx_region {
* For virtio on IA, the notificaiton is through io port operation
* which is a serialization instruction itself.
*/
- vq->hw->vtpci_ops->notify_queue(vq->hw, vq);
+ VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
}
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
--
1.9.0
^ permalink raw reply related
* [PATCH v3 4/6] net/virtio: store IO port info locally
From: Yuanhan Liu @ 2017-01-06 10:16 UTC (permalink / raw)
To: dev; +Cc: Yuanhan Liu
In-Reply-To: <1483697780-12088-1-git-send-email-yuanhan.liu@linux.intel.com>
Like vtpci_ops, the rte_pci_ioport has to store in local memory. This
is basically for the rte_pci_device field is allocated from process
local memory, but not from shared memory.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
drivers/net/virtio/virtio_pci.c | 49 ++++++++++++++++++++++-------------------
drivers/net/virtio/virtio_pci.h | 3 ++-
2 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index b1f2e18..d1e9c05 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -92,17 +92,17 @@
while (length > 0) {
if (length >= 4) {
size = 4;
- rte_eal_pci_ioport_read(&hw->io, dst, size,
+ rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
VIRTIO_PCI_CONFIG(hw) + offset);
*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
} else if (length >= 2) {
size = 2;
- rte_eal_pci_ioport_read(&hw->io, dst, size,
+ rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
VIRTIO_PCI_CONFIG(hw) + offset);
*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
} else {
size = 1;
- rte_eal_pci_ioport_read(&hw->io, dst, size,
+ rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
VIRTIO_PCI_CONFIG(hw) + offset);
}
@@ -111,7 +111,7 @@
length -= size;
}
#else
- rte_eal_pci_ioport_read(&hw->io, dst, length,
+ rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, length,
VIRTIO_PCI_CONFIG(hw) + offset);
#endif
}
@@ -131,16 +131,16 @@
if (length >= 4) {
size = 4;
tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
- rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size,
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
VIRTIO_PCI_CONFIG(hw) + offset);
} else if (length >= 2) {
size = 2;
tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
- rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size,
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
VIRTIO_PCI_CONFIG(hw) + offset);
} else {
size = 1;
- rte_eal_pci_ioport_write(&hw->io, src, size,
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), src, size,
VIRTIO_PCI_CONFIG(hw) + offset);
}
@@ -149,7 +149,7 @@
length -= size;
}
#else
- rte_eal_pci_ioport_write(&hw->io, src, length,
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), src, length,
VIRTIO_PCI_CONFIG(hw) + offset);
#endif
}
@@ -159,7 +159,7 @@
{
uint32_t dst;
- rte_eal_pci_ioport_read(&hw->io, &dst, 4, VIRTIO_PCI_HOST_FEATURES);
+ rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES);
return dst;
}
@@ -171,7 +171,7 @@
"only 32 bit features are allowed for legacy virtio!");
return;
}
- rte_eal_pci_ioport_write(&hw->io, &features, 4,
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &features, 4,
VIRTIO_PCI_GUEST_FEATURES);
}
@@ -180,14 +180,14 @@
{
uint8_t dst;
- rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_STATUS);
+ rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
return dst;
}
static void
legacy_set_status(struct virtio_hw *hw, uint8_t status)
{
- rte_eal_pci_ioport_write(&hw->io, &status, 1, VIRTIO_PCI_STATUS);
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
}
static void
@@ -201,7 +201,7 @@
{
uint8_t dst;
- rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_ISR);
+ rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
return dst;
}
@@ -211,8 +211,10 @@
{
uint16_t dst;
- rte_eal_pci_ioport_write(&hw->io, &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
- rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2,
+ VIRTIO_MSI_CONFIG_VECTOR);
+ rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2,
+ VIRTIO_MSI_CONFIG_VECTOR);
return dst;
}
@@ -221,8 +223,9 @@
{
uint16_t dst;
- rte_eal_pci_ioport_write(&hw->io, &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
- rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_PCI_QUEUE_NUM);
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2,
+ VIRTIO_PCI_QUEUE_SEL);
+ rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
return dst;
}
@@ -234,10 +237,10 @@
if (!check_vq_phys_addr_ok(vq))
return -1;
- rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
VIRTIO_PCI_QUEUE_SEL);
src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
- rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN);
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
return 0;
}
@@ -247,15 +250,15 @@
{
uint32_t src = 0;
- rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
VIRTIO_PCI_QUEUE_SEL);
- rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN);
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
}
static void
legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
{
- rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
+ rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
VIRTIO_PCI_QUEUE_NOTIFY);
}
@@ -289,7 +292,7 @@
legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
struct virtio_hw *hw, uint32_t *dev_flags)
{
- if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0)
+ if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
return -1;
if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN)
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 268bb82..6b9aecf 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -245,7 +245,6 @@ struct virtio_pci_ops {
struct virtio_hw {
struct virtnet_ctl *cvq;
- struct rte_pci_ioport io;
uint64_t req_guest_features;
uint64_t guest_features;
uint32_t max_queue_pairs;
@@ -275,9 +274,11 @@ struct virtio_hw {
*/
struct virtio_hw_internal {
const struct virtio_pci_ops *vtpci_ops;
+ struct rte_pci_ioport io;
};
#define VTPCI_OPS(hw) (virtio_hw_internal[(hw)->port_id].vtpci_ops)
+#define VTPCI_IO(hw) (&virtio_hw_internal[(hw)->port_id].io)
extern struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
--
1.9.0
^ permalink raw reply related
* [PATCH v3 6/6] net/virtio: remove dead structure field
From: Yuanhan Liu @ 2017-01-06 10:16 UTC (permalink / raw)
To: dev; +Cc: Yuanhan Liu
In-Reply-To: <1483697780-12088-1-git-send-email-yuanhan.liu@linux.intel.com>
Actually, virtio_hw->dev is not used since the beginning when it's
introduced. Remove it.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
drivers/net/virtio/virtio_pci.c | 2 --
drivers/net/virtio/virtio_pci.h | 1 -
2 files changed, 3 deletions(-)
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index f5754e5..fbdb5b7 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -730,8 +730,6 @@
vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
uint32_t *dev_flags)
{
- hw->dev = dev;
-
/*
* Try if we can succeed reading virtio pci caps, which exists
* only on modern pci device. If failed, we fallback to legacy
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 511a1c8..4235bef 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -258,7 +258,6 @@ struct virtio_hw {
uint32_t notify_off_multiplier;
uint8_t *isr;
uint16_t *notify_base;
- struct rte_pci_device *dev;
struct virtio_pci_common_cfg *common_cfg;
struct virtio_net_config *dev_cfg;
void *virtio_user_dev;
--
1.9.0
^ permalink raw reply related
* [PATCH v3 1/6] ethdev: fix port data mismatched in multiple process model
From: Yuanhan Liu @ 2017-01-06 10:16 UTC (permalink / raw)
To: dev; +Cc: Yuanhan Liu, stable, Thomas Monjalon, Bruce Richardson,
Ferruh Yigit
In-Reply-To: <1483697780-12088-1-git-send-email-yuanhan.liu@linux.intel.com>
Assume we have two virtio ports, 00:03.0 and 00:04.0. The first one is
managed by the kernel driver, while the later one is managed by DPDK.
Now we start the primary process. 00:03.0 will be skipped by DPDK virtio
PMD driver (since it's being used by the kernel). 00:04.0 would be
successfully initiated by DPDK virtio PMD (if nothing abnormal happens).
After that, we would get a port id 0, and all the related info needed
by virtio (virtio_hw) is stored at rte_eth_dev_data[0].
Then we start the secondary process. As usual, 00:03.0 will be firstly
probed. It firstly tries to get a local eth_dev structure for it (by
rte_eth_dev_allocate):
port_id = rte_eth_dev_find_free_port();
...
eth_dev = &rte_eth_devices[port_id];
eth_dev->data = &rte_eth_dev_data[port_id];
...
return eth_dev;
Since it's a first PCI device, port_id will be 0. eth_dev->data would
then point to rte_eth_dev_data[0]. And here things start going wrong,
as rte_eth_dev_data[0] actually stores the virtio_hw for 00:04.0.
That said, in the secondary process, DPDK will continue to drive PCI
device 00.03.0 (despite the fact it's been managed by kernel), with
the info from PCI device 00:04.0. Which is wrong.
The fix is to attach the port already registered by the primary process:
iterate the rte_eth_dev_data[], and get the port id who's PCI ID matches
the current PCI device.
This would let us maintain same port ID for the same PCI device, keeping
the chance of referencing to wrong data minimal.
Fixes: af75078fece3 ("first public release")
Cc: stable@dpdk.org
Cc: Thomas Monjalon <thomas.monjalon@6wind.com>
Cc: Bruce Richardson <bruce.richardson@intel.com>
Cc: Ferruh Yigit <ferruh.yigit@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v3: - do not move rte_eth_dev_data_alloc to pci_probe
- rename eth_dev_attach to eth_dev_attach_secondary
- introduce eth_dev_init() for common eth_dev struct initiation
- move comment block inside the "if" block
---
lib/librte_ether/rte_ethdev.c | 77 ++++++++++++++++++++++++++++++++++++++-----
1 file changed, 68 insertions(+), 9 deletions(-)
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index fde8112..c3e65f1 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -189,6 +189,21 @@ struct rte_eth_dev *
return RTE_MAX_ETHPORTS;
}
+static void
+eth_dev_init(struct rte_eth_dev *eth_dev, uint8_t port_id, const char *name)
+{
+ eth_dev->data = &rte_eth_dev_data[port_id];
+ eth_dev->attached = DEV_ATTACHED;
+ eth_dev_last_created_port = port_id;
+ nb_ports++;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ snprintf(eth_dev->data->name, sizeof(eth_dev->data->name),
+ "%s", name);
+ eth_dev->data->port_id = port_id;
+ }
+}
+
struct rte_eth_dev *
rte_eth_dev_allocate(const char *name)
{
@@ -211,12 +226,41 @@ struct rte_eth_dev *
}
eth_dev = &rte_eth_devices[port_id];
- eth_dev->data = &rte_eth_dev_data[port_id];
- snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
- eth_dev->data->port_id = port_id;
- eth_dev->attached = DEV_ATTACHED;
- eth_dev_last_created_port = port_id;
- nb_ports++;
+ eth_dev_init(eth_dev, port_id, name);
+
+ return eth_dev;
+}
+
+/*
+ * Attach to a port already registered by the primary process, which
+ * makes sure that the same device would have the same port id both
+ * in the primary and secondary process.
+ */
+static struct rte_eth_dev *
+eth_dev_attach_secondary(const char *name)
+{
+ uint8_t i;
+ struct rte_eth_dev *eth_dev;
+
+ if (rte_eth_dev_data == NULL)
+ rte_eth_dev_data_alloc();
+
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (strcmp(rte_eth_dev_data[i].name, name) == 0)
+ break;
+ }
+ if (i == RTE_MAX_ETHPORTS) {
+ RTE_PMD_DEBUG_TRACE(
+ "device %s is not driven by the primary process\n",
+ name);
+ return NULL;
+ }
+
+ RTE_ASSERT(eth_dev->data->port_id == i);
+
+ eth_dev = &rte_eth_devices[i];
+ eth_dev_init(eth_dev, i, NULL);
+
return eth_dev;
}
@@ -246,9 +290,24 @@ struct rte_eth_dev *
rte_eal_pci_device_name(&pci_dev->addr, ethdev_name,
sizeof(ethdev_name));
- eth_dev = rte_eth_dev_allocate(ethdev_name);
- if (eth_dev == NULL)
- return -ENOMEM;
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ eth_dev = rte_eth_dev_allocate(ethdev_name);
+ if (eth_dev == NULL)
+ return -ENOMEM;
+ } else {
+ eth_dev = eth_dev_attach_secondary(ethdev_name);
+ if (eth_dev == NULL) {
+ /*
+ * if we failed to attach a device, it means
+ * the device is skipped, due to some errors.
+ * Take virtio-net device as example, it could
+ * due to the device is managed by virtio-net
+ * kernel driver. For such case, we return a
+ * positive value, to let EAL skip it as well.
+ */
+ return 1;
+ }
+ }
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
--
1.9.0
^ permalink raw reply related
* [PATCH v3 2/6] net/virtio: fix wrong Rx/Tx method for secondary process
From: Yuanhan Liu @ 2017-01-06 10:16 UTC (permalink / raw)
To: dev; +Cc: Yuanhan Liu, stable
In-Reply-To: <1483697780-12088-1-git-send-email-yuanhan.liu@linux.intel.com>
If the primary enables the vector Rx/Tx path, the current code would
let the secondary always choose the non vector Rx/Tx path. This results
to a Rx/Tx method mismatch between primary and secondary process. Werid
errors then may happen, something like:
PMD: virtio_xmit_pkts() tx: virtqueue_enqueue error: -14
Fix it by choosing the correct Rx/Tx callbacks for the secondary process.
That is, use vector path if it's given.
Fixes: 8d8393fb1861 ("virtio: pick simple Rx/Tx")
Cc: stable@dpdk.org
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v2: fix a checkpatch warning: use {} consistently
---
drivers/net/virtio/virtio_ethdev.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 079fd6c..ef37ad1 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1304,7 +1304,12 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
- rx_func_get(eth_dev);
+ if (hw->use_simple_rxtx) {
+ eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
+ eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
+ } else {
+ rx_func_get(eth_dev);
+ }
return 0;
}
--
1.9.0
^ permalink raw reply related
* [PATCH v3 5/6] net/virtio: fix multiple process support
From: Yuanhan Liu @ 2017-01-06 10:16 UTC (permalink / raw)
To: dev; +Cc: Yuanhan Liu, stable, Juho Snellman, Yaron Illouz
In-Reply-To: <1483697780-12088-1-git-send-email-yuanhan.liu@linux.intel.com>
The introduce of virtio 1.0 support brings yet another set of ops, badly,
it's not handled correctly, that it breaks the multiple process support.
The issue is the data/function pointer may vary from different processes,
and the old used to do one time set (for primary process only). That
said, the function pointer the secondary process saw is actually from the
primary process space. Accessing it could likely result to a crash.
Kudos to the last patches, we now be able to maintain those info that may
vary among different process locally, meaning every process could have its
own copy for each of them, with the correct value set. And this is what
this patch does:
- remap the PCI (IO port for legacy device and memory map for modern
device)
- set vtpci_ops correctly
After that, multiple process would work like a charm. (At least, it
passed my fuzzy test)
Fixes: b8f04520ad71 ("virtio: use PCI ioport API")
Fixes: d5bbeefca826 ("virtio: introduce PCI implementation structure")
Fixes: 6ba1f63b5ab0 ("virtio: support specification 1.0")
Cc: stable@dpdk.org
Cc: Juho Snellman <jsnell@iki.fi>
Cc: Yaron Illouz <yaroni@radcom.com>
Reported-by: Juho Snellman <jsnell@iki.fi>
Reported-by: Yaron Illouz <yaroni@radcom.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
v3: - update release note and nic matrix
v2: - fixed PCI remap, so that virtio 1.0 also works
---
doc/guides/nics/features/virtio.ini | 1 +
doc/guides/rel_notes/release_17_02.rst | 5 ++++
drivers/net/virtio/virtio_ethdev.c | 53 +++++++++++++++++++++++++++++++--
drivers/net/virtio/virtio_pci.c | 4 +--
drivers/net/virtio/virtio_pci.h | 4 +++
drivers/net/virtio/virtio_user_ethdev.c | 2 +-
6 files changed, 64 insertions(+), 5 deletions(-)
diff --git a/doc/guides/nics/features/virtio.ini b/doc/guides/nics/features/virtio.ini
index 41830c1..f5de291 100644
--- a/doc/guides/nics/features/virtio.ini
+++ b/doc/guides/nics/features/virtio.ini
@@ -22,3 +22,4 @@ ARMv8 = Y
x86-32 = Y
x86-64 = Y
Usage doc = Y
+Multiprocess aware = Y
diff --git a/doc/guides/rel_notes/release_17_02.rst b/doc/guides/rel_notes/release_17_02.rst
index 3b65038..a4260de 100644
--- a/doc/guides/rel_notes/release_17_02.rst
+++ b/doc/guides/rel_notes/release_17_02.rst
@@ -54,6 +54,11 @@ Resolved Issues
Also, make sure to start the actual text at the margin.
=========================================================
+ * **fixed virtio multiple process support.**
+
+ Fixed few regressions introduced in recent releases that break the virtio
+ multiple process support.
+
EAL
~~~
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 5567aa2..19d4348 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1289,6 +1289,49 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
}
/*
+ * Remap the PCI device again (IO port map for legacy device and
+ * memory map for modern device), so that the secondary process
+ * could have the PCI initiated correctly.
+ */
+static int
+virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
+{
+ if (hw->modern) {
+ /*
+ * We don't have to re-parse the PCI config space, since
+ * rte_eal_pci_map_device() makes sure the mapped address
+ * in secondary process would equal to the one mapped in
+ * the primary process: error will be returned if that
+ * requirement is not met.
+ *
+ * That said, we could simply reuse all cap pointers
+ * (such as dev_cfg, common_cfg, etc.) parsed from the
+ * primary process, which is stored in shared memory.
+ */
+ if (rte_eal_pci_map_device(pci_dev)) {
+ PMD_INIT_LOG(DEBUG, "failed to map pci device!");
+ return -1;
+ }
+ } else {
+ if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+virtio_set_vtpci_ops(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
+{
+ if (pci_dev == NULL)
+ VTPCI_OPS(hw) = &virtio_user_ops;
+ else if (hw->modern)
+ VTPCI_OPS(hw) = &modern_ops;
+ else
+ VTPCI_OPS(hw) = &legacy_ops;
+}
+
+/*
* This function is based on probe() function in virtio_pci.c
* It returns 0 on success.
*/
@@ -1296,7 +1339,7 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
{
struct virtio_hw *hw = eth_dev->data->dev_private;
- struct rte_pci_device *pci_dev;
+ struct rte_pci_device *pci_dev = eth_dev->pci_dev;
uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE;
int ret;
@@ -1306,6 +1349,13 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ if (pci_dev) {
+ ret = virtio_remap_pci(pci_dev, hw);
+ if (ret)
+ return ret;
+ }
+
+ virtio_set_vtpci_ops(pci_dev, hw);
if (hw->use_simple_rxtx) {
eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
@@ -1325,7 +1375,6 @@ static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
}
hw->port_id = eth_dev->data->port_id;
- pci_dev = eth_dev->pci_dev;
if (pci_dev) {
ret = vtpci_init(pci_dev, hw, &dev_flags);
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index d1e9c05..f5754e5 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -303,7 +303,7 @@
return 0;
}
-static const struct virtio_pci_ops legacy_ops = {
+const struct virtio_pci_ops legacy_ops = {
.read_dev_cfg = legacy_read_dev_config,
.write_dev_cfg = legacy_write_dev_config,
.reset = legacy_reset,
@@ -519,7 +519,7 @@
io_write16(1, vq->notify_addr);
}
-static const struct virtio_pci_ops modern_ops = {
+const struct virtio_pci_ops modern_ops = {
.read_dev_cfg = modern_read_dev_config,
.write_dev_cfg = modern_write_dev_config,
.reset = modern_reset,
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 6b9aecf..511a1c8 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -333,4 +333,8 @@ int vtpci_init(struct rte_pci_device *, struct virtio_hw *,
uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t);
+extern const struct virtio_pci_ops legacy_ops;
+extern const struct virtio_pci_ops modern_ops;
+extern const struct virtio_pci_ops virtio_user_ops;
+
#endif /* _VIRTIO_PCI_H_ */
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 7d2a9d9..3563952 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -212,7 +212,7 @@
strerror(errno));
}
-static const struct virtio_pci_ops virtio_user_ops = {
+const struct virtio_pci_ops virtio_user_ops = {
.read_dev_cfg = virtio_user_read_dev_config,
.write_dev_cfg = virtio_user_write_dev_config,
.reset = virtio_user_reset,
--
1.9.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox