Re: [PATCH net-next 1/2] net_sched/mqprio: add support for different pgroup types

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: John Fastabend <john.r.fastabend@intel.com>
To: Amir Vadai <amirv@mellanox.com>
Cc: "David S. Miller" <davem@davemloft.net>,
	netdev@vger.kernel.org, Oren Duer <oren@mellanox.com>,
	Liran Liss <liranl@mellanox.com>
Subject: Re: [PATCH net-next 1/2] net_sched/mqprio: add support for different pgroup types
Date: Tue, 08 May 2012 23:36:07 -0700	[thread overview]
Message-ID: <4FAA1057.6020709@intel.com> (raw)
In-Reply-To: <1336287910-12010-2-git-send-email-amirv@mellanox.com>

On 5/6/2012 12:05 AM, Amir Vadai wrote:
> Currently, HW based QoS mechanisms use the framework and means introduced in
> commits 4f57c087d "net: implement mechanism for HW based QOS" and b8970f0bfc
> "net_sched: implement a root container qdisc sch_mqprio".
> 
> The approach present in these patches is strongly orientated to the extended
> transmission selection (ETS) algorithm traffic classes (TC).
> 

I would argue the strongly orientated part per our other thread [0/2]

> This patch enhances the current scheme to allow for these mechanisms to be used
> also with hardware who has queues per UP - user priority (Linux has well
> established mechanisms to set UP for both tagged and untagged traffic).
> 

also per other thread I think this is only needed if you have many different
egress map configurations on vlans.

> Now, __skb_tx_hash() will direct a flow to a tx ring from a range of tx rings.
> This range is defined by the admin through the mqprio scheduler for the
> specific HW. For TC based queues, the range is by TC number and for UP based
> queues, the range is by UP.
> 
> Signed-off-by: Amir Vadai <amirv@mellanox.com>
> ---
>  include/linux/netdevice.h |   27 +++++++++++++++++++++++++++
>  include/linux/pkt_sched.h |    3 ++-
>  net/core/dev.c            |   12 +++++++++---
>  net/sched/sch_mqprio.c    |   11 +++++++++--
>  4 files changed, 47 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 7f377fb..ecdd953 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -835,6 +835,9 @@ struct netdev_fcoe_hbainfo {
>   * 	is always called from the stack with the rtnl lock held and netif tx
>   * 	queues stopped. This allows the netdevice to perform queue management
>   * 	safely.
> + * int (*ndo_set_pg_type)(struct net_device *dev, u8 pg_type)
> + *	Called to setup 'tc' type. According to this type, traffic is
> + *	distributed across tx rings. If not set, ETS TC is in use.
>   *
>   *	Fiber Channel over Ethernet (FCoE) offload functions.
>   * int (*ndo_fcoe_enable)(struct net_device *dev);
> @@ -973,6 +976,8 @@ struct net_device_ops {
>  	int			(*ndo_get_vf_port)(struct net_device *dev,
>  						   int vf, struct sk_buff *skb);
>  	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
> +	int			(*ndo_set_pg_type)(struct net_device *dev,
> +						   u8 pg_type);

expand ndo_setup_tc() to take either another parameter 'pg_type' or just
start passing in the entire tc_mqprio_opt that way we get the number of
queues as well. I would prefer passing tc_mpqrio_opt to adding more parameters.

This avoids adding another ndo op.


>  #if IS_ENABLED(CONFIG_FCOE)
>  	int			(*ndo_fcoe_enable)(struct net_device *dev);
>  	int			(*ndo_fcoe_disable)(struct net_device *dev);
> @@ -1307,6 +1312,11 @@ struct net_device {
>  	/* Data Center Bridging netlink ops */
>  	const struct dcbnl_rtnl_ops *dcbnl_ops;
>  #endif
> +	enum {
> +		PGROUP_TC,
> +		PGROUP_UP,
> +		PGROUP_MAX,
> +	} pg_type:8;
>  	u8 num_tc;
>  	struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
>  	u8 prio_tc_map[TC_BITMASK + 1];
> @@ -1329,6 +1339,23 @@ struct net_device {
>  #define	NETDEV_ALIGN		32
>  
>  static inline
> +int netdev_get_pg_type(const struct net_device *dev)
> +{
> +	return dev->pg_type;
> +}
> +
> +static inline
> +int netdev_set_pg_type(struct net_device *dev, u8 pg_type)
> +{
> +	if (pg_type >= PGROUP_MAX)
> +		return -EINVAL;
> +
> +	dev->pg_type = pg_type;
> +
> +	return 0;
> +}
> +
> +static inline
>  int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
>  {
>  	return dev->prio_tc_map[prio & TC_BITMASK];
> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
> index ffe975c..1ae7d3c 100644
> --- a/include/linux/pkt_sched.h
> +++ b/include/linux/pkt_sched.h
> @@ -596,7 +596,8 @@ struct tc_drr_stats {
>  struct tc_mqprio_qopt {
>  	__u8	num_tc;
>  	__u8	prio_tc_map[TC_QOPT_BITMASK + 1];
> -	__u8	hw;
> +	__u8	hw;	/* bit 0: hw owned, bits 1-7: hw queuing type.
> +			 * valid types: 0 - ETS TC, 1 - UP */
>  	__u16	count[TC_QOPT_MAX_QUEUE];
>  	__u16	offset[TC_QOPT_MAX_QUEUE];
>  };
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 09024fd..72ac4bf 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2325,9 +2325,15 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
>  	}
>  
>  	if (dev->num_tc) {
> -		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
> -		qoffset = dev->tc_to_txq[tc].offset;
> -		qcount = dev->tc_to_txq[tc].count;
> +		u8 pgroup;
> +
> +		if (dev->pg_type == PGROUP_TC || !vlan_tx_tag_present(skb))
> +			pgroup = netdev_get_prio_tc_map(dev, skb->priority);
> +		else
> +			pgroup = (vlan_tx_tag_get(skb) >> 13);
> +
> +		qoffset = dev->tc_to_txq[pgroup].offset;
> +		qcount = dev->tc_to_txq[pgroup].count;
>  	}
>  
>  	if (skb->sk && skb->sk->sk_hash)
> diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
> index d1831ca..2149cbb 100644
> --- a/net/sched/sch_mqprio.c
> +++ b/net/sched/sch_mqprio.c
> @@ -134,11 +134,18 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
>  		priv->qdiscs[i] = qdisc;
>  	}
>  
> +	if (dev->netdev_ops->ndo_set_pg_type)
> +		err = dev->netdev_ops->ndo_set_pg_type(dev, qopt->hw >> 1);
> +	else
> +		err = netdev_set_pg_type(dev, PGROUP_TC);

Software should still be allowed to set PGROUP_UP even though hardware may
not support it.

> +	if (err)
> +		goto err;
> +
>  	/* If the mqprio options indicate that hardware should own
>  	 * the queue mapping then run ndo_setup_tc otherwise use the
>  	 * supplied and verified mapping
>  	 */
> -	if (qopt->hw) {
> +	if (qopt->hw & 1) {
>  		priv->hw_owned = 1;
>  		err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
>  		if (err)

next prev parent reply	other threads:[~2012-05-09  6:36 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-06  7:05 [PATCH net-next 0/2] extend sch_mqprio to distribute traffic not only by ETS TC Amir Vadai
2012-05-06  7:05 ` [PATCH net-next 1/2] net_sched/mqprio: add support for different pgroup types Amir Vadai
2012-05-09  6:36   ` John Fastabend [this message]
2012-05-06  7:05 ` [PATCH net-next 2/2] net/mlx4_en: num cores tx rings for every UP Amir Vadai
2012-05-08  0:54 ` [PATCH net-next 0/2] extend sch_mqprio to distribute traffic not only by ETS TC John Fastabend
2012-05-08 13:56   ` Amir Vadai
2012-05-09  6:22     ` John Fastabend
2012-05-14 19:24       ` Amir Vadai
2012-05-15 16:44         ` John Fastabend

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4FAA1057.6020709@intel.com \
    --to=john.r.fastabend@intel.com \
    --cc=amirv@mellanox.com \
    --cc=davem@davemloft.net \
    --cc=liranl@mellanox.com \
    --cc=netdev@vger.kernel.org \
    --cc=oren@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).