* [PATCH 1/1] net: ti: netcp: use setup_timer
From: Allen Pais @ 2017-09-21 13:02 UTC (permalink / raw)
To: linux-kernel; +Cc: w-kwok2, m-karicheri2, netdev, Allen Pais
Use setup_timer function instead of initializing timer with the
function and data fields.
Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
drivers/net/ethernet/ti/netcp_ethss.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 28cb38a..4ad8216 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -3616,9 +3616,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
}
spin_unlock_bh(&gbe_dev->hw_stats_lock);
- init_timer(&gbe_dev->timer);
- gbe_dev->timer.data = (unsigned long)gbe_dev;
- gbe_dev->timer.function = netcp_ethss_timer;
+ setup_timer(&gbe_dev->timer, netcp_ethss_timer,
+ (unsigned long)gbe_dev);
gbe_dev->timer.expires = jiffies + GBE_TIMER_INTERVAL;
add_timer(&gbe_dev->timer);
*inst_priv = gbe_dev;
--
2.7.4
^ permalink raw reply related
* [PATCH 1/1] net: usb: catc: use setup_timer() helper
From: Allen Pais @ 2017-09-21 12:54 UTC (permalink / raw)
To: linux-kernel; +Cc: linux-usb, netdev, Allen Pais
Use setup_timer function instead of initializing timer with the
function and data fields.
Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
drivers/net/usb/catc.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index dbc9031..aeb62e1 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -805,9 +805,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
spin_lock_init(&catc->tx_lock);
spin_lock_init(&catc->ctrl_lock);
- init_timer(&catc->timer);
- catc->timer.data = (long) catc;
- catc->timer.function = catc_stats_timer;
+ setup_timer(&catc->timer, catc_stats_timer, (long)catc);
catc->ctrl_urb = usb_alloc_urb(0, GFP_KERNEL);
catc->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
--
2.7.4
^ permalink raw reply related
* [PATCH 1/1] net: wan : hdlc: use setup_timer() helper
From: Allen Pais @ 2017-09-21 12:47 UTC (permalink / raw)
To: linux-kernel; +Cc: khc, netdev, Allen Pais
Use setup_timer function instead of initializing timer with the
function and data fields.
Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
drivers/net/wan/hdlc_fr.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 78596e4..425a47f 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1003,11 +1003,10 @@ static void fr_start(struct net_device *dev)
state(hdlc)->n391cnt = 0;
state(hdlc)->txseq = state(hdlc)->rxseq = 0;
- init_timer(&state(hdlc)->timer);
+ setup_timer(&state(hdlc)->timer, fr_timer,
+ (unsigned long)dev);
/* First poll after 1 s */
state(hdlc)->timer.expires = jiffies + HZ;
- state(hdlc)->timer.function = fr_timer;
- state(hdlc)->timer.data = (unsigned long)dev;
add_timer(&state(hdlc)->timer);
} else
fr_set_link_state(1, dev);
--
2.7.4
^ permalink raw reply related
* Re: [PATCH net-next 2/3] net: ethernet: socionext: add AVE ethernet driver
From: Kunihiko Hayashi @ 2017-09-21 12:27 UTC (permalink / raw)
To: Florian Fainelli, netdev
Cc: David S. Miller, Andrew Lunn, Rob Herring, Mark Rutland,
linux-arm-kernel, linux-kernel, devicetree, Masahiro Yamada,
Masami Hiramatsu, Jassi Brar
In-Reply-To: <20170911155555.6719.4A936039@socionext.com>
On Mon, 11 Sep 2017 15:55:56 +0900 <hayashi.kunihiko@socionext.com> wrote:
> > > +static int ave_set_rxdesc(struct net_device *ndev, int entry)
> > > +{
> > > + struct ave_private *priv = netdev_priv(ndev);
> > > + struct sk_buff *skb;
> > > + unsigned long align;
> > > + dma_addr_t paddr;
> > > + void *buffptr;
> > > + int ret = 0;
> > > +
> > > + skb = priv->rx.desc[entry].skbs;
> > > + if (!skb) {
> > > + skb = netdev_alloc_skb_ip_align(ndev,
> > > + AVE_MAX_ETHFRAME + NET_SKB_PAD);
> > > + if (!skb) {
> > > + netdev_err(ndev, "can't allocate skb for Rx\n");
> > > + return -ENOMEM;
> > > + }
> > > + }
> > > +
> > > + /* set disable to cmdsts */
> > > + ave_wdesc(ndev, AVE_DESCID_RX, entry, 0, AVE_STS_INTR | AVE_STS_OWN);
> > > +
> > > + /* align skb data for cache size */
> > > + align = (unsigned long)skb_tail_pointer(skb) & (NET_SKB_PAD - 1);
> > > + align = NET_SKB_PAD - align;
> > > + skb_reserve(skb, align);
> > > + buffptr = (void *)skb_tail_pointer(skb);
> >
> > Are you positive you need this? Because by default, the networking stack
> > will align to the maximum between your L1 cache line size and 64 bytes,
> > which should be a pretty good alignment guarantee.
>
> Now if L1 cache line size is 128,
> the skb buffer is also aligned to 128, isn't it?
> So this code doesn't make sense.
Although the above cache-alignment operation isn't necessary,
we should add the address adjustment because of the restriction of the hardware
specification.
The netdev_alloc_skb_ip_align() allocates the cache-aligned buffer
and add 2 byte to skb->data by skb_reserve(skb, NET_IP_ALIGN).
Then skb->data points to "aligned address + 2 byte".
When we call dma_map_single() with skb->data, it might return the aligned address
and there might not be 2 byte space.
On the other hand, according to the hardware specification,
the Rx buffer address set to the descriptor is assumed that:
- the Rx address is 4 byte aligned,
- the Rx address begins with 2 byte headroom, data will be put from (buffer+2).
Therefore, to make headroom in front of returned address from ave_dma_map(),
I think that the buffer address should be adjusted like that:
skb = netdev_alloc_skb_ip_align(ndev, AVE_MAX_ETHFRAME);
paddr = ave_dma_map(ndev, &priv->rx.desc[entry],
skb->data - NET_IP_ALIGN,
AVE_MAX_ETHFRAME + NET_IP_ALIGN, DMA_FROM_DEVICE);
ave_wdesc_addr(ndev, AVE_DESCID_RX, entry, 4, paddr);
I'll apply the code to next patch.
BTW, since the Tx buffer address doesn't have any restrictions, the adjustment
like this isn't necessary.
> > > +
> > > + /* enable clock */
> > > + priv->clk = devm_clk_get(dev, NULL);
> > > + if (IS_ERR(priv->clk))
> > > + priv->clk = NULL;
> > > + clk_prepare_enable(priv->clk);
> >
> > Same here with the clock, the block is clocked, so it can consume some
> > amount of power, just do the necessary HW initialization with the clock
> > enabled, then defer until ndo_open() before turning it back on.
There are a number of the functions that needs clock enabled and "block reset"
operations, like mdiobus_register(), phy_connect(), and so on.
I tried to move such functions to ndo_open() to defer clock enabled until ndo_open().
However, the driver didn't work for some reasons of hardware restriction.
I think it's hard to change this sequence.
---
Best Regards,
Kunihiko Hayashi
^ permalink raw reply
* Re: [patch net-next 02/12] ipmr: Add reference count to MFC entries
From: Nikolay Aleksandrov @ 2017-09-21 11:56 UTC (permalink / raw)
To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw
In-Reply-To: <20170921064338.1282-3-jiri@resnulli.us>
On 21/09/17 09:43, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
>
> Next commits will introduce MFC notifications through the atomic
> fib_notification chain, thus allowing modules to be aware of MFC entries.
>
> Due to the fact that modules may need to hold a reference to an MFC entry,
> add reference count to MFC entries to prevent them from being freed while
> these modules use them.
>
> The reference counting is done only on resolved MFC entries currently.
>
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> include/linux/mroute.h | 20 ++++++++++++++++++++
> net/ipv4/ipmr.c | 8 +++++---
> 2 files changed, 25 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
> index d7f6333..2f88e3d 100644
> --- a/include/linux/mroute.h
> +++ b/include/linux/mroute.h
> @@ -138,6 +138,7 @@ struct mfc_cache {
> unsigned long wrong_if;
> unsigned long lastuse;
> unsigned char ttls[MAXVIFS];
> + refcount_t refcount;
There's a struct comment above that has a short description for each entry,
I know it seems redundant, but could you please add one for this ?
> } res;
> } mfc_un;
> struct list_head list;
> @@ -148,4 +149,23 @@ struct rtmsg;
> int ipmr_get_route(struct net *net, struct sk_buff *skb,
> __be32 saddr, __be32 daddr,
> struct rtmsg *rtm, u32 portid);
> +
> +#ifdef CONFIG_IP_MROUTE
> +void ipmr_cache_free(struct mfc_cache *mfc_cache);
> +#else
> +static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
> +{
> +}
> +#endif
> +
> +static inline void ipmr_cache_put(struct mfc_cache *c)
> +{
> + if (refcount_dec_and_test(&c->mfc_un.res.refcount))
> + ipmr_cache_free(c);
> +}
> +static inline void ipmr_cache_hold(struct mfc_cache *c)
> +{
> + refcount_inc(&c->mfc_un.res.refcount);
> +}
> +
> #endif
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index c9b3e6e..86dc5f9 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -652,10 +652,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head)
> kmem_cache_free(mrt_cachep, c);
> }
>
> -static inline void ipmr_cache_free(struct mfc_cache *c)
> +void ipmr_cache_free(struct mfc_cache *c)
> {
> call_rcu(&c->rcu, ipmr_cache_free_rcu);
> }
> +EXPORT_SYMBOL(ipmr_cache_free);
>
> /* Destroy an unresolved cache entry, killing queued skbs
> * and reporting error to netlink readers.
> @@ -949,6 +950,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
> if (c) {
> c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
> c->mfc_un.res.minvif = MAXVIFS;
> + refcount_set(&c->mfc_un.res.refcount, 1);
> }
> return c;
> }
> @@ -1162,7 +1164,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
> rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> list_del_rcu(&c->list);
> mroute_netlink_event(mrt, c, RTM_DELROUTE);
> - ipmr_cache_free(c);
> + ipmr_cache_put(c);
>
> return 0;
> }
> @@ -1264,7 +1266,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
> rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> list_del_rcu(&c->list);
> mroute_netlink_event(mrt, c, RTM_DELROUTE);
> - ipmr_cache_free(c);
> + ipmr_cache_put(c);
> }
>
> if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
>
^ permalink raw reply
* Re: [patch net-next 04/12] ipmr: Send FIB notifications on MFC and VIF entries
From: Nikolay Aleksandrov @ 2017-09-21 11:48 UTC (permalink / raw)
To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw
In-Reply-To: <20170921064338.1282-5-jiri@resnulli.us>
On 21/09/17 09:43, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
>
> Use the newly introduced notification chain to send events upon VIF and MFC
> addition and deletion. The MFC notifications are sent only on resolved MFC
> entries, as unresolved cannot be offloaded.
>
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> net/ipv4/ipmr.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 53 insertions(+)
>
LGTM, I only wish we could consolidate all of these call_ipmr_mfc_entry_notifiers()
calls inside mroute_netlink_event() but it will need an additional argument for the
ADD vs REPLACE cases. Anyway,
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 9d331a74..7891d95 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -627,6 +627,27 @@ static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
> return call_fib_notifier(nb, net, event_type, &info.info);
> }
>
> +static int call_ipmr_vif_entry_notifiers(struct net *net,
> + enum fib_event_type event_type,
> + struct vif_device *vif,
> + vifi_t vif_index, u32 tb_id)
> +{
> + struct vif_entry_notifier_info info = {
> + .info = {
> + .family = RTNL_FAMILY_IPMR,
> + .net = net,
> + },
> + .dev = vif->dev,
> + .vif_index = vif_index,
> + .vif_flags = vif->flags,
> + .tb_id = tb_id,
> + };
> +
> + ASSERT_RTNL();
> + net->ipv4.ipmr_seq++;
> + return call_fib_notifiers(net, event_type, &info.info);
> +}
> +
> static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
> struct net *net,
> enum fib_event_type event_type,
> @@ -644,6 +665,24 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
> return call_fib_notifier(nb, net, event_type, &info.info);
> }
>
> +static int call_ipmr_mfc_entry_notifiers(struct net *net,
> + enum fib_event_type event_type,
> + struct mfc_cache *mfc, u32 tb_id)
> +{
> + struct mfc_entry_notifier_info info = {
> + .info = {
> + .family = RTNL_FAMILY_IPMR,
> + .net = net,
> + },
> + .mfc = mfc,
> + .tb_id = tb_id
> + };
> +
> + ASSERT_RTNL();
> + net->ipv4.ipmr_seq++;
> + return call_fib_notifiers(net, event_type, &info.info);
> +}
> +
> /**
> * vif_delete - Delete a VIF entry
> * @notify: Set to 1, if the caller is a notifier_call
> @@ -651,6 +690,7 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
> static int vif_delete(struct mr_table *mrt, int vifi, int notify,
> struct list_head *head)
> {
> + struct net *net = read_pnet(&mrt->net);
> struct vif_device *v;
> struct net_device *dev;
> struct in_device *in_dev;
> @@ -660,6 +700,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
>
> v = &mrt->vif_table[vifi];
>
> + if (VIF_EXISTS(mrt, vifi))
> + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
> + mrt->id);
> +
> write_lock_bh(&mrt_lock);
> dev = v->dev;
> v->dev = NULL;
> @@ -909,6 +953,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
> if (vifi+1 > mrt->maxvif)
> mrt->maxvif = vifi+1;
> write_unlock_bh(&mrt_lock);
> + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
> return 0;
> }
>
> @@ -1209,6 +1254,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
>
> static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
> {
> + struct net *net = read_pnet(&mrt->net);
> struct mfc_cache *c;
>
> /* The entries are added/deleted only under RTNL */
> @@ -1220,6 +1266,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
> return -ENOENT;
> rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> list_del_rcu(&c->list);
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
> mroute_netlink_event(mrt, c, RTM_DELROUTE);
> ipmr_cache_put(c);
>
> @@ -1248,6 +1295,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
> if (!mrtsock)
> c->mfc_flags |= MFC_STATIC;
> write_unlock_bh(&mrt_lock);
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
> + mrt->id);
> mroute_netlink_event(mrt, c, RTM_NEWROUTE);
> return 0;
> }
> @@ -1297,6 +1346,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
> ipmr_cache_resolve(net, mrt, uc, c);
> ipmr_cache_free(uc);
> }
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
> mroute_netlink_event(mrt, c, RTM_NEWROUTE);
> return 0;
> }
> @@ -1304,6 +1354,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
> /* Close the multicast socket, and clear the vif tables etc */
> static void mroute_clean_tables(struct mr_table *mrt, bool all)
> {
> + struct net *net = read_pnet(&mrt->net);
> struct mfc_cache *c, *tmp;
> LIST_HEAD(list);
> int i;
> @@ -1322,6 +1373,8 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
> continue;
> rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> list_del_rcu(&c->list);
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
> + mrt->id);
> mroute_netlink_event(mrt, c, RTM_DELROUTE);
> ipmr_cache_put(c);
> }
>
^ permalink raw reply
* Re: Latest net-next from GIT panic
From: Paweł Staszewski @ 2017-09-21 11:31 UTC (permalink / raw)
To: Eric Dumazet
Cc: Wei Wang, Cong Wang, Linux Kernel Network Developers,
Eric Dumazet
In-Reply-To: <1505991826.29839.124.camel@edumazet-glaptop3.roam.corp.google.com>
W dniu 2017-09-21 o 13:03, Eric Dumazet pisze:
> OK we have two problems here
>
> 1) We need to unify skb_dst_force() ( for net tree )
>
> 2) Vlan devices should try to correctly handle IFF_XMIT_DST_RELEASE from
> lower device. This will considerably help your performance.
>
>
> For 1), this is what I had in mind, can you try it ?
>
> Thanks a lot !
>
> diff --git a/include/net/dst.h b/include/net/dst.h
> index 93568bd0a3520bb7402f04d90cf04ac99c81cfbe..f23851eeaad917e8dafc06b58d23a2575405c894 100644
> --- a/include/net/dst.h
> +++ b/include/net/dst.h
> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
> {
> if (dst)
> - atomic_inc(&dst->__refcnt);
> + dst_hold(dst);
> return dst;
> }
>
> @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
> __skb_dst_copy(nskb, oskb->_skb_refdst);
> }
>
> -/**
> - * skb_dst_force - makes sure skb dst is refcounted
> - * @skb: buffer
> - *
> - * If dst is not yet refcounted, let's do it
> - */
> -static inline void skb_dst_force(struct sk_buff *skb)
> -{
> - if (skb_dst_is_noref(skb)) {
> - WARN_ON(!rcu_read_lock_held());
> - skb->_skb_refdst &= ~SKB_DST_NOREF;
> - dst_clone(skb_dst(skb));
> - }
> -}
> -
> /**
> * dst_hold_safe - Take a reference on a dst if possible
> * @dst: pointer to dst entry
> @@ -356,6 +341,23 @@ static inline void skb_dst_force_safe(struct sk_buff *skb)
> }
> }
>
> +/**
> + * skb_dst_force - makes sure skb dst is refcounted
> + * @skb: buffer
> + *
> + * If dst is not yet refcounted, let's do it
> + */
> +static inline void skb_dst_force(struct sk_buff *skb)
> +{
> + if (skb_dst_is_noref(skb)) {
> + struct dst_entry *dst = skb_dst(skb);
> +
> + WARN_ON(!rcu_read_lock_held());
> + if (!dst_hold_safe(dst))
> + dst = NULL;
> + skb->_skb_refdst = (unsigned long)dst;
> + }
> +}
>
> /**
> * __skb_tunnel_rx - prepare skb for rx reinsert
>
>
Patch applied - soo far no problems - and no warnings in dmesg
^ permalink raw reply
* Re: [patch net-next 05/12] net: ipmr: Add MFC offload indication
From: Nikolay Aleksandrov @ 2017-09-21 11:27 UTC (permalink / raw)
To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw
In-Reply-To: <20170921064338.1282-6-jiri@resnulli.us>
On 21/09/17 09:43, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
>
> Allow drivers, registered to the fib notification chain indicate whether a
> multicast MFC route is offloaded or not, similarly to unicast routes. The
> indication of whether a route is offloaded is done using the mfc_flags
> field on an mfc_cache struct, and the information is sent to the userspace
> via the RTNetlink interface only.
>
> Currently, MFC routes are either offloaded or not, thus there is no need to
> add per-VIF offload indication.
>
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> include/linux/mroute.h | 1 +
> net/ipv4/ipmr.c | 3 +++
> 2 files changed, 4 insertions(+)
>
> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
> index ba5a976..9b64587 100644
> --- a/include/linux/mroute.h
> +++ b/include/linux/mroute.h
> @@ -93,6 +93,7 @@ struct mr_table {
> */
> enum {
> MFC_STATIC = BIT(0),
> + MFC_OFFLOAD = BIT(1),
> };
It maybe redundant but above this enum is a comment with flag descriptions,
since you're adding a new flag could you also please add a short
description above ?
>
> struct mfc_cache_cmp_arg {
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 7891d95..6804885 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -2268,6 +2268,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
> nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
> return -EMSGSIZE;
>
> + if (c->mfc_flags & MFC_OFFLOAD)
> + rtm->rtm_flags |= RTNH_F_OFFLOAD;
> +
> if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
> return -EMSGSIZE;
>
>
^ permalink raw reply
* [PATCH] net: phy: Fix truncation of large IRQ numbers in phy_attached_print()
From: Geert Uytterhoeven @ 2017-09-21 11:27 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, David S . Miller
Cc: Romain Perier, netdev, linux-kernel, Geert Uytterhoeven
Given NR_IRQS is 2048 on sparc64, and even 32784 on alpha, 3 digits is
not enough to represent interrupt numbers on all architectures. Hence
PHY interrupt numbers may be truncated during printing.
Increase the buffer size from 4 to 8 bytes to fix this.
Fixes: 5e369aefdce4818c ("net: stmmac: Delete dead code for MDIO registration")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
drivers/net/phy/phy_device.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8cf0c5901f95870f..67f25ac29025c539 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -879,7 +879,7 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
{
const char *drv_name = phydev->drv ? phydev->drv->name : "unbound";
char *irq_str;
- char irq_num[4];
+ char irq_num[8];
switch(phydev->irq) {
case PHY_POLL:
--
2.7.4
^ permalink raw reply related
* [PATCH net-next 10/10] net: hns3: Add mqprio support when interacting with network stack
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
When using tc qdisc to configure DCB parameter, dcb_ops->setup_tc
is used to tell hclge_dcb module to do the setup.
When using lldptool to configure DCB parameter, hclge_dcb module
call the client_ops->setup_tc to tell network stack which queue
and priority is using for specific tc.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 135 +++++++++++++++++----
1 file changed, 111 insertions(+), 24 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 11dab26..31fcda4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -196,6 +196,32 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector)
tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
}
+static int hns3_nic_set_real_num_queue(struct net_device *netdev)
+{
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+ struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_knic_private_info *kinfo = &h->kinfo;
+ unsigned int queue_size = kinfo->rss_size * kinfo->num_tc;
+ int ret;
+
+ ret = netif_set_real_num_tx_queues(netdev, queue_size);
+ if (ret) {
+ netdev_err(netdev,
+ "netif_set_real_num_tx_queues fail, ret=%d!\n",
+ ret);
+ return ret;
+ }
+
+ ret = netif_set_real_num_rx_queues(netdev, queue_size);
+ if (ret) {
+ netdev_err(netdev,
+ "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static int hns3_nic_net_up(struct net_device *netdev)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -232,26 +258,13 @@ static int hns3_nic_net_up(struct net_device *netdev)
static int hns3_nic_net_open(struct net_device *netdev)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
int ret;
netif_carrier_off(netdev);
- ret = netif_set_real_num_tx_queues(netdev, h->kinfo.num_tqps);
- if (ret) {
- netdev_err(netdev,
- "netif_set_real_num_tx_queues fail, ret=%d!\n",
- ret);
- return ret;
- }
-
- ret = netif_set_real_num_rx_queues(netdev, h->kinfo.num_tqps);
- if (ret) {
- netdev_err(netdev,
- "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
+ ret = hns3_nic_set_real_num_queue(netdev);
+ if (ret)
return ret;
- }
ret = hns3_nic_net_up(netdev);
if (ret) {
@@ -1193,32 +1206,40 @@ static void hns3_nic_udp_tunnel_del(struct net_device *netdev,
}
}
-static int hns3_setup_tc(struct net_device *netdev, u8 tc)
+static int hns3_setup_tc(struct net_device *netdev, u8 tc, u8 *prio_tc)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hnae3_handle *h = priv->ae_handle;
struct hnae3_knic_private_info *kinfo = &h->kinfo;
+ bool if_running = netif_running(netdev);
unsigned int i;
int ret;
if (tc > HNAE3_MAX_TC)
return -EINVAL;
- if (kinfo->num_tc == tc)
- return 0;
-
if (!netdev)
return -EINVAL;
- if (!tc) {
+ if (if_running) {
+ (void)hns3_nic_net_stop(netdev);
+ msleep(100);
+ }
+
+ ret = (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
+ kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP;
+ if (ret)
+ goto err_out;
+
+ if (tc <= 1) {
netdev_reset_tc(netdev);
- return 0;
+ goto out;
}
/* Set num_tc for netdev */
ret = netdev_set_num_tc(netdev, tc);
if (ret)
- return ret;
+ goto err_out;
/* Set per TC queues for the VSI */
for (i = 0; i < HNAE3_MAX_TC; i++) {
@@ -1229,7 +1250,14 @@ static int hns3_setup_tc(struct net_device *netdev, u8 tc)
kinfo->tc_info[i].tqp_offset);
}
- return 0;
+out:
+ ret = hns3_nic_set_real_num_queue(netdev);
+
+err_out:
+ if (if_running)
+ (void)hns3_nic_net_open(netdev);
+
+ return ret;
}
static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
@@ -1240,7 +1268,7 @@ static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
if (type != TC_SETUP_MQPRIO)
return -EOPNOTSUPP;
- return hns3_setup_tc(dev, mqprio->num_tc);
+ return hns3_setup_tc(dev, mqprio->num_tc, mqprio->prio_tc_map);
}
static int hns3_vlan_rx_add_vid(struct net_device *netdev,
@@ -2848,10 +2876,69 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup)
}
}
+static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
+{
+ struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+ struct net_device *ndev = kinfo->netdev;
+ bool if_running = netif_running(ndev);
+ int ret;
+ u8 i;
+
+ if (tc > HNAE3_MAX_TC)
+ return -EINVAL;
+
+ if (!ndev)
+ return -ENODEV;
+
+ ret = netdev_set_num_tc(ndev, tc);
+ if (ret)
+ return ret;
+
+ if (if_running) {
+ (void)hns3_nic_net_stop(ndev);
+ msleep(100);
+ }
+
+ ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ?
+ kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP;
+ if (ret)
+ goto err_out;
+
+ if (tc <= 1) {
+ netdev_reset_tc(ndev);
+ goto out;
+ }
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ struct hnae3_tc_info *tc_info = &kinfo->tc_info[i];
+
+ if (tc_info->enable)
+ netdev_set_tc_queue(ndev,
+ tc_info->tc,
+ tc_info->tqp_count,
+ tc_info->tqp_offset);
+ }
+
+ for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+ netdev_set_prio_tc_map(ndev, i,
+ kinfo->prio_tc[i]);
+ }
+
+out:
+ ret = hns3_nic_set_real_num_queue(ndev);
+
+err_out:
+ if (if_running)
+ (void)hns3_nic_net_open(ndev);
+
+ return ret;
+}
+
const struct hnae3_client_ops client_ops = {
.init_instance = hns3_client_init,
.uninit_instance = hns3_client_uninit,
.link_status_change = hns3_link_status_change,
+ .setup_tc = hns3_client_setup_tc,
};
/* hns3_init_module - Driver registration routine
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 09/10] net: hns3: Setting for fc_mode and dcb enable flag in TM module
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
After the DCB feature is supported, fc_mode and dcb enable flag
must be set according to the DCB parameter.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 34 +++++++++++++++++++---
1 file changed, 30 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index e158e66..cc5efda 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -487,7 +487,11 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
hdev->tm_info.prio_tc[i] =
(i >= hdev->tm_info.num_tc) ? 0 : i;
- hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+ /* DCB is enabled if we have more than 1 TC */
+ if (hdev->tm_info.num_tc > 1)
+ hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+ else
+ hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
}
static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
@@ -513,6 +517,24 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
}
}
+static void hclge_pfc_info_init(struct hclge_dev *hdev)
+{
+ if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) {
+ if (hdev->fc_mode_last_time == HCLGE_FC_PFC)
+ dev_warn(&hdev->pdev->dev,
+ "DCB is disable, but last mode is FC_PFC\n");
+
+ hdev->tm_info.fc_mode = hdev->fc_mode_last_time;
+ } else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
+ /* fc_mode_last_time record the last fc_mode when
+ * DCB is enabled, so that fc_mode can be set to
+ * the correct value when DCB is disabled.
+ */
+ hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+ hdev->tm_info.fc_mode = HCLGE_FC_PFC;
+ }
+}
+
static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
{
if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) &&
@@ -525,8 +547,7 @@ static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
hclge_tm_vport_info_update(hdev);
- hdev->tm_info.fc_mode = HCLGE_FC_NONE;
- hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+ hclge_pfc_info_init(hdev);
return 0;
}
@@ -1159,8 +1180,13 @@ int hclge_tm_init_hw(struct hclge_dev *hdev)
int hclge_tm_schd_init(struct hclge_dev *hdev)
{
- int ret = hclge_tm_schd_info_init(hdev);
+ int ret;
+
+ /* fc_mode is HCLGE_FC_FULL on reset */
+ hdev->tm_info.fc_mode = HCLGE_FC_FULL;
+ hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+ ret = hclge_tm_schd_info_init(hdev);
if (ret)
return ret;
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 08/10] net: hns3: Add dcb netlink interface for the support of DCB feature
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add dcb netlink interface by calling the interface from
hclge_dcb module.
This patch also update Makefile in order to build hns3_dcbnl module.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2 +
.../ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c | 106 +++++++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 +
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h | 7 ++
4 files changed, 117 insertions(+)
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index 7023dc87..d2b20d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -11,3 +11,5 @@ hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
obj-$(CONFIG_HNS3_ENET) += hns3.o
hns3-objs = hns3_enet.o hns3_ethtool.o
+
+hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
new file mode 100644
index 0000000..9832172
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "hnae3.h"
+#include "hns3_enet.h"
+
+static
+int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->ieee_getets)
+ return h->kinfo.dcb_ops->ieee_getets(h, ets);
+
+ return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->ieee_setets)
+ return h->kinfo.dcb_ops->ieee_setets(h, ets);
+
+ return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->ieee_getpfc)
+ return h->kinfo.dcb_ops->ieee_getpfc(h, pfc);
+
+ return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->ieee_setpfc)
+ return h->kinfo.dcb_ops->ieee_setpfc(h, pfc);
+
+ return -EOPNOTSUPP;
+}
+
+/* DCBX configuration */
+static u8 hns3_dcbnl_getdcbx(struct net_device *ndev)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->getdcbx)
+ return h->kinfo.dcb_ops->getdcbx(h);
+
+ return 0;
+}
+
+/* return 0 if successful, otherwise fail */
+static u8 hns3_dcbnl_setdcbx(struct net_device *ndev, u8 mode)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->setdcbx)
+ return h->kinfo.dcb_ops->setdcbx(h, mode);
+
+ return 1;
+}
+
+static const struct dcbnl_rtnl_ops hns3_dcbnl_ops = {
+ .ieee_getets = hns3_dcbnl_ieee_getets,
+ .ieee_setets = hns3_dcbnl_ieee_setets,
+ .ieee_getpfc = hns3_dcbnl_ieee_getpfc,
+ .ieee_setpfc = hns3_dcbnl_ieee_setpfc,
+ .getdcbx = hns3_dcbnl_getdcbx,
+ .setdcbx = hns3_dcbnl_setdcbx,
+};
+
+/* hclge_dcbnl_setup - DCBNL setup
+ * @handle: the corresponding vport handle
+ * Set up DCBNL
+ */
+void hns3_dcbnl_setup(struct hnae3_handle *handle)
+{
+ struct net_device *dev = handle->kinfo.netdev;
+
+ if (!handle->kinfo.dcb_ops)
+ return;
+
+ dev->dcbnl_ops = &hns3_dcbnl_ops;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 35369e1..11dab26 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -2790,6 +2790,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
goto out_reg_netdev_fail;
}
+ hns3_dcbnl_setup(handle);
+
/* MTU range: (ETH_MIN_MTU(kernel default) - 9706) */
netdev->max_mtu = HNS3_MAX_MTU - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
index 7e87461..481eada 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
@@ -590,4 +590,11 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
void hns3_ethtool_set_ops(struct net_device *netdev);
int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
+
+#ifdef CONFIG_HNS3_DCB
+void hns3_dcbnl_setup(struct hnae3_handle *handle);
+#else
+static inline void hns3_dcbnl_setup(struct hnae3_handle *handle) {}
+#endif
+
#endif
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 07/10] net: hns3: Add hclge_dcb module for the support of DCB feature
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
The hclge_dcb module calls the interface from hclge_main/tm
and provide interface for the dcb netlink interface.
This patch also update Makefiles required to build the DCB
supported code in HNS3 Ethernet driver and update the existing
Kconfig file in the hisilicon folder.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
drivers/net/ethernet/hisilicon/Kconfig | 9 +
drivers/net/ethernet/hisilicon/hns3/hnae3.h | 20 ++
.../net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2 +
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 327 +++++++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h | 21 ++
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 25 +-
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 3 +
7 files changed, 401 insertions(+), 6 deletions(-)
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 91c7bdb..9d7cb03 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -103,4 +103,13 @@ config HNS3_ENET
family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
devices and their associated operations.
+config HNS3_DCB
+ bool "Hisilicon HNS3 Data Center Bridge Support"
+ default n
+ depends on HNS3 && HNS3_HCLGE && DCB
+ ---help---
+ Say Y here if you want to use Data Center Bridging (DCB) in the HNS3 driver.
+
+ If unsure, say N.
+
endif # NET_VENDOR_HISILICON
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 1a01cad..5a6fa53 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -28,6 +28,7 @@
*/
#include <linux/acpi.h>
+#include <linux/dcbnl.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/module.h>
@@ -131,6 +132,7 @@ struct hnae3_client_ops {
int (*init_instance)(struct hnae3_handle *handle);
void (*uninit_instance)(struct hnae3_handle *handle, bool reset);
void (*link_status_change)(struct hnae3_handle *handle, bool state);
+ int (*setup_tc)(struct hnae3_handle *handle, u8 tc);
};
#define HNAE3_CLIENT_NAME_LENGTH 16
@@ -363,6 +365,23 @@ struct hnae3_ae_ops {
u16 vlan, u8 qos, __be16 proto);
};
+struct hnae3_dcb_ops {
+ /* IEEE 802.1Qaz std */
+ int (*ieee_getets)(struct hnae3_handle *, struct ieee_ets *);
+ int (*ieee_setets)(struct hnae3_handle *, struct ieee_ets *);
+ int (*ieee_getpfc)(struct hnae3_handle *, struct ieee_pfc *);
+ int (*ieee_setpfc)(struct hnae3_handle *, struct ieee_pfc *);
+
+ /* DCBX configuration */
+ u8 (*getdcbx)(struct hnae3_handle *);
+ u8 (*setdcbx)(struct hnae3_handle *, u8);
+
+ /* TC setup */
+ int (*setup_tc)(struct hnae3_handle *, u8, u8 *);
+
+ int (*map_update)(struct hnae3_handle *);
+};
+
struct hnae3_ae_algo {
const struct hnae3_ae_ops *ops;
struct list_head node;
@@ -394,6 +413,7 @@ struct hnae3_knic_private_info {
u16 num_tqps; /* total number of TQPs in this handle */
struct hnae3_queue **tqp; /* array base of all TQPs in this instance */
+ const struct hnae3_dcb_ops *dcb_ops;
};
struct hnae3_roce_private_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index 162e8a42..7023dc87 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -7,5 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_HNS3_HCLGE) += hclge.o
hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o
+hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
+
obj-$(CONFIG_HNS3_ENET) += hns3.o
hns3-objs = hns3_enet.o hns3_ethtool.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
new file mode 100644
index 0000000..178333b
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "hclge_main.h"
+#include "hclge_tm.h"
+#include "hnae3.h"
+
+#define BW_PERCENT 100
+
+static int hclge_ieee_ets_to_tm_info(struct hclge_dev *hdev,
+ struct ieee_ets *ets)
+{
+ u8 i;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_STRICT:
+ hdev->tm_info.tc_info[i].tc_sch_mode =
+ HCLGE_SCH_MODE_SP;
+ hdev->tm_info.pg_info[0].tc_dwrr[i] = 0;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ hdev->tm_info.tc_info[i].tc_sch_mode =
+ HCLGE_SCH_MODE_DWRR;
+ hdev->tm_info.pg_info[0].tc_dwrr[i] =
+ ets->tc_tx_bw[i];
+ break;
+ default:
+ /* Hardware only supports SP (strict priority)
+ * or ETS (enhanced transmission selection)
+ * algorithms, if we receive some other value
+ * from dcbnl, then throw an error.
+ */
+ return -EINVAL;
+ }
+ }
+
+ return hclge_tm_prio_tc_info_update(hdev, ets->prio_tc);
+}
+
+static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev,
+ struct ieee_ets *ets)
+{
+ u32 i;
+
+ memset(ets, 0, sizeof(*ets));
+ ets->willing = 1;
+ ets->ets_cap = hdev->tc_max;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ ets->prio_tc[i] = hdev->tm_info.prio_tc[i];
+ ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
+
+ if (hdev->tm_info.tc_info[i].tc_sch_mode ==
+ HCLGE_SCH_MODE_SP)
+ ets->tc_tsa[i] = IEEE_8021QAZ_TSA_STRICT;
+ else
+ ets->tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+ }
+}
+
+/* IEEE std */
+static int hclge_ieee_getets(struct hnae3_handle *h, struct ieee_ets *ets)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+ hclge_tm_info_to_ieee_ets(hdev, ets);
+
+ return 0;
+}
+
+static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
+ u8 *tc, bool *changed)
+{
+ u32 total_ets_bw = 0;
+ u8 max_tc = 0;
+ u8 i;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ if (ets->prio_tc[i] >= hdev->tc_max ||
+ i >= hdev->tc_max)
+ return -EINVAL;
+
+ if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i])
+ *changed = true;
+
+ if (ets->prio_tc[i] > max_tc)
+ max_tc = ets->prio_tc[i];
+
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_STRICT:
+ if (hdev->tm_info.tc_info[i].tc_sch_mode !=
+ HCLGE_SCH_MODE_SP)
+ *changed = true;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ if (hdev->tm_info.tc_info[i].tc_sch_mode !=
+ HCLGE_SCH_MODE_DWRR)
+ *changed = true;
+
+ total_ets_bw += ets->tc_tx_bw[i];
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (total_ets_bw != BW_PERCENT)
+ return -EINVAL;
+
+ *tc = max_tc + 1;
+ if (*tc != hdev->tm_info.num_tc)
+ *changed = true;
+
+ return 0;
+}
+
+static int hclge_map_update(struct hnae3_handle *h)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ ret = hclge_tm_map_cfg(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_tm_schd_mode_hw(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_pause_setup_hw(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_buffer_alloc(hdev);
+ if (ret)
+ return ret;
+
+ return hclge_rss_init_hw(hdev);
+}
+
+static int hclge_client_setup_tc(struct hclge_dev *hdev)
+{
+ struct hclge_vport *vport = hdev->vport;
+ struct hnae3_client *client;
+ struct hnae3_handle *handle;
+ int ret;
+ u32 i;
+
+ for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+ handle = &vport[i].nic;
+ client = handle->client;
+
+ if (!client || !client->ops || !client->ops->setup_tc)
+ continue;
+
+ ret = client->ops->setup_tc(handle, hdev->tm_info.num_tc);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ bool map_changed = false;
+ u8 num_tc = 0;
+ int ret;
+
+ if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
+ if (ret)
+ return ret;
+
+ hclge_tm_schd_info_update(hdev, num_tc);
+
+ ret = hclge_ieee_ets_to_tm_info(hdev, ets);
+ if (ret)
+ return ret;
+
+ if (map_changed) {
+ ret = hclge_client_setup_tc(hdev);
+ if (ret)
+ return ret;
+ }
+
+ return hclge_tm_dwrr_cfg(hdev);
+}
+
+static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ u8 i, j, pfc_map, *prio_tc;
+
+ memset(pfc, 0, sizeof(*pfc));
+ pfc->pfc_cap = hdev->pfc_max;
+ prio_tc = hdev->tm_info.prio_tc;
+ pfc_map = hdev->tm_info.hw_pfc_map;
+
+ /* Pfc setting is based on TC */
+ for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) {
+ if ((prio_tc[j] == i) && (pfc_map & BIT(i)))
+ pfc->pfc_en |= BIT(j);
+ }
+ }
+
+ return 0;
+}
+
+static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ u8 i, j, pfc_map, *prio_tc;
+
+ if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ prio_tc = hdev->tm_info.prio_tc;
+ pfc_map = 0;
+
+ for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) {
+ if ((prio_tc[j] == i) && (pfc->pfc_en & BIT(j))) {
+ pfc_map |= BIT(i);
+ break;
+ }
+ }
+ }
+
+ if (pfc_map == hdev->tm_info.hw_pfc_map)
+ return 0;
+
+ hdev->tm_info.hw_pfc_map = pfc_map;
+
+ return hclge_pause_setup_hw(hdev);
+}
+
+/* DCBX configuration */
+static u8 hclge_getdcbx(struct hnae3_handle *h)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+ return hdev->dcbx_cap;
+}
+
+static u8 hclge_setdcbx(struct hnae3_handle *h, u8 mode)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+ /* No support for LLD_MANAGED modes or CEE */
+ if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+ (mode & DCB_CAP_DCBX_VER_CEE) ||
+ !(mode & DCB_CAP_DCBX_HOST))
+ return 1;
+
+ hdev->dcbx_cap = mode;
+
+ return 0;
+}
+
+static int hclge_setup_tc(struct hnae3_handle *h, u8 tc, u8 *prio_tc)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ if (tc > hdev->tc_max) {
+ dev_err(&hdev->pdev->dev,
+ "setup tc failed, tc(%u) > tc_max(%u)\n",
+ tc, hdev->tc_max);
+ return -EINVAL;
+ }
+
+ hclge_tm_schd_info_update(hdev, tc);
+
+ ret = hclge_tm_prio_tc_info_update(hdev, prio_tc);
+ if (ret)
+ return ret;
+
+ return hclge_tm_init_hw(hdev);
+}
+
+static const struct hnae3_dcb_ops hns3_dcb_ops = {
+ .ieee_getets = hclge_ieee_getets,
+ .ieee_setets = hclge_ieee_setets,
+ .ieee_getpfc = hclge_ieee_getpfc,
+ .ieee_setpfc = hclge_ieee_setpfc,
+ .getdcbx = hclge_getdcbx,
+ .setdcbx = hclge_setdcbx,
+ .setup_tc = hclge_setup_tc,
+ .map_update = hclge_map_update,
+};
+
+void hclge_dcb_ops_set(struct hclge_dev *hdev)
+{
+ struct hclge_vport *vport = hdev->vport;
+ struct hnae3_knic_private_info *kinfo;
+
+ /* Hdev does not support DCB or vport is
+ * not a pf, then dcb_ops is not set.
+ */
+ if (!hnae3_dev_dcb_supported(hdev) ||
+ vport->vport_id != 0)
+ return;
+
+ kinfo = &vport->nic.kinfo;
+ kinfo->dcb_ops = &hns3_dcb_ops;
+ hdev->dcbx_cap = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_HOST;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
new file mode 100644
index 0000000..7d808ee
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_DCB_H__
+#define __HCLGE_DCB_H__
+
+#include "hclge_main.h"
+
+#ifdef CONFIG_HNS3_DCB
+void hclge_dcb_ops_set(struct hclge_dev *hdev);
+#else
+static inline void hclge_dcb_ops_set(struct hclge_dev *hdev) {}
+#endif
+
+#endif /* __HCLGE_DCB_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 49a11d5..28bd118 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -19,6 +19,7 @@
#include <linux/platform_device.h>
#include "hclge_cmd.h"
+#include "hclge_dcb.h"
#include "hclge_main.h"
#include "hclge_mdio.h"
#include "hclge_tm.h"
@@ -1057,7 +1058,7 @@ static int hclge_configure(struct hclge_dev *hdev)
hdev->hw.mac.phy_addr = cfg.phy_addr;
hdev->num_desc = cfg.tqp_desc_num;
hdev->tm_info.num_pg = 1;
- hdev->tm_info.num_tc = cfg.tc_num;
+ hdev->tc_max = cfg.tc_num;
hdev->tm_info.hw_pfc_map = 0;
ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
@@ -1066,15 +1067,25 @@ static int hclge_configure(struct hclge_dev *hdev)
return ret;
}
- if ((hdev->tm_info.num_tc > HNAE3_MAX_TC) ||
- (hdev->tm_info.num_tc < 1)) {
+ if ((hdev->tc_max > HNAE3_MAX_TC) ||
+ (hdev->tc_max < 1)) {
dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
- hdev->tm_info.num_tc);
- hdev->tm_info.num_tc = 1;
+ hdev->tc_max);
+ hdev->tc_max = 1;
}
+ /* Dev does not support DCB */
+ if (!hnae3_dev_dcb_supported(hdev)) {
+ hdev->tc_max = 1;
+ hdev->pfc_max = 0;
+ } else {
+ hdev->pfc_max = hdev->tc_max;
+ }
+
+ hdev->tm_info.num_tc = hdev->tc_max;
+
/* Currently not support uncontiuous tc */
- for (i = 0; i < cfg.tc_num; i++)
+ for (i = 0; i < hdev->tm_info.num_tc; i++)
hnae_set_bit(hdev->hw_tc_map, i, 1);
if (!hdev->num_vmdq_vport && !hdev->num_req_vfs)
@@ -4242,6 +4253,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
return ret;
}
+ hclge_dcb_ops_set(hdev);
+
setup_timer(&hdev->service_timer, hclge_service_timer,
(unsigned long)hdev);
INIT_WORK(&hdev->service_task, hclge_service_task);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 394b587..7c66c00 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -421,8 +421,11 @@ struct hclge_dev {
#define HCLGE_FLAG_TC_BASE_SCH_MODE 1
#define HCLGE_FLAG_VNET_BASE_SCH_MODE 2
u8 tx_sch_mode;
+ u8 tc_max;
+ u8 pfc_max;
u8 default_up;
+ u8 dcbx_cap;
struct hclge_tm_info tm_info;
u16 num_msi;
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 06/10] net: hns3: Add some interface for the support of DCB feature
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add some interface and export some interface from
hclge_tm and hclgc_main to support the upcoming DCB feature.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +-
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 3 ++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 48 ++++++++++++++++++++--
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 6 +++
4 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c27b460..49a11d5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -30,7 +30,6 @@
#define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f))
#define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f))
-static int hclge_rss_init_hw(struct hclge_dev *hdev);
static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
enum hclge_mta_dmac_sel_type mta_mac_sel,
bool enable);
@@ -2660,7 +2659,7 @@ static int hclge_get_tc_size(struct hnae3_handle *handle)
return hdev->rss_size_max;
}
-static int hclge_rss_init_hw(struct hclge_dev *hdev)
+int hclge_rss_init_hw(struct hclge_dev *hdev)
{
const u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
struct hclge_vport *vport = hdev->vport;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 4fc36f0..394b587 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -515,4 +515,7 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue)
int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex);
int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
bool is_kill, u16 vlan, u8 qos, __be16 proto);
+
+int hclge_buffer_alloc(struct hclge_dev *hdev);
+int hclge_rss_init_hw(struct hclge_dev *hdev);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 2bc7d63c..e158e66 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -884,10 +884,14 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev)
return 0;
}
-static int hclge_tm_map_cfg(struct hclge_dev *hdev)
+int hclge_tm_map_cfg(struct hclge_dev *hdev)
{
int ret;
+ ret = hclge_up_to_tc_map(hdev);
+ if (ret)
+ return ret;
+
ret = hclge_tm_pg_to_pri_map(hdev);
if (ret)
return ret;
@@ -995,7 +999,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
return 0;
}
-static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
+int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
{
int ret;
@@ -1093,7 +1097,45 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
return ret;
}
- return hclge_up_to_tc_map(hdev);
+ return 0;
+}
+
+int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc)
+{
+ struct hclge_vport *vport = hdev->vport;
+ struct hnae3_knic_private_info *kinfo;
+ u32 i, k;
+
+ for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+ if (prio_tc[i] >= hdev->tm_info.num_tc)
+ return -EINVAL;
+ hdev->tm_info.prio_tc[i] = prio_tc[i];
+
+ for (k = 0; k < hdev->num_alloc_vport; k++) {
+ kinfo = &vport[k].nic.kinfo;
+ kinfo->prio_tc[i] = prio_tc[i];
+ }
+ }
+ return 0;
+}
+
+void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
+{
+ u8 i, bit_map = 0;
+
+ hdev->tm_info.num_tc = num_tc;
+
+ for (i = 0; i < hdev->tm_info.num_tc; i++)
+ bit_map |= BIT(i);
+
+ if (!bit_map) {
+ bit_map = 1;
+ hdev->tm_info.num_tc = 1;
+ }
+
+ hdev->hw_tc_map = bit_map;
+
+ hclge_tm_schd_info_init(hdev);
}
int hclge_tm_init_hw(struct hclge_dev *hdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 19a01e4..bf59961 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -112,4 +112,10 @@ struct hclge_port_shapping_cmd {
int hclge_tm_schd_init(struct hclge_dev *hdev);
int hclge_pause_setup_hw(struct hclge_dev *hdev);
+int hclge_tm_schd_mode_hw(struct hclge_dev *hdev);
+int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc);
+void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc);
+int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
+int hclge_tm_map_cfg(struct hclge_dev *hdev);
+int hclge_tm_init_hw(struct hclge_dev *hdev);
#endif
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 05/10] net: hns3: Add tc-based TM support for sriov enabled port
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
When sriov is enabled and TM is in tc-based mode, vf's TM
parameters is not set in TM initialization process.
This patch add the tc_based TM support for sriov enabled
using the information in vport struct.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 49 ++++++++++++++--------
1 file changed, 31 insertions(+), 18 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 33090d0..2bc7d63c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -389,13 +389,13 @@ static int hclge_tm_pri_schd_mode_cfg(struct hclge_dev *hdev, u8 pri_id)
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id)
+static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id, u8 mode)
{
struct hclge_desc desc;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, false);
- if (hdev->tm_info.tc_info[qs_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR)
+ if (mode == HCLGE_SCH_MODE_DWRR)
desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
else
desc.data[1] = 0;
@@ -639,17 +639,18 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev)
{
struct hclge_vport *vport = hdev->vport;
int ret;
- u32 i;
+ u32 i, k;
if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
/* Cfg qs -> pri mapping, one by one mapping */
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
- ret = hclge_tm_qs_to_pri_map_cfg(hdev, i, i);
- if (ret)
- return ret;
- }
+ for (k = 0; k < hdev->num_alloc_vport; k++)
+ for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ ret = hclge_tm_qs_to_pri_map_cfg(
+ hdev, vport[k].qs_offset + i, i);
+ if (ret)
+ return ret;
+ }
} else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) {
- int k;
/* Cfg qs -> pri mapping, qs = tc, pri = vf, 8 qs -> 1 pri */
for (k = 0; k < hdev->num_alloc_vport; k++)
for (i = 0; i < HNAE3_MAX_TC; i++) {
@@ -798,10 +799,11 @@ static int hclge_tm_pri_shaper_cfg(struct hclge_dev *hdev)
static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
{
+ struct hclge_vport *vport = hdev->vport;
struct hclge_pg_info *pg_info;
u8 dwrr;
int ret;
- u32 i;
+ u32 i, k;
for (i = 0; i < hdev->tm_info.num_tc; i++) {
pg_info =
@@ -812,9 +814,13 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
if (ret)
return ret;
- ret = hclge_tm_qs_weight_cfg(hdev, i, dwrr);
- if (ret)
- return ret;
+ for (k = 0; k < hdev->num_alloc_vport; k++) {
+ ret = hclge_tm_qs_weight_cfg(
+ hdev, vport[k].qs_offset + i,
+ vport[k].dwrr);
+ if (ret)
+ return ret;
+ }
}
return 0;
@@ -945,7 +951,10 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport)
return ret;
for (i = 0; i < kinfo->num_tc; i++) {
- ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i);
+ u8 sch_mode = hdev->tm_info.tc_info[i].tc_sch_mode;
+
+ ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i,
+ sch_mode);
if (ret)
return ret;
}
@@ -957,7 +966,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
{
struct hclge_vport *vport = hdev->vport;
int ret;
- u8 i;
+ u8 i, k;
if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
for (i = 0; i < hdev->tm_info.num_tc; i++) {
@@ -965,9 +974,13 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
if (ret)
return ret;
- ret = hclge_tm_qs_schd_mode_cfg(hdev, i);
- if (ret)
- return ret;
+ for (k = 0; k < hdev->num_alloc_vport; k++) {
+ ret = hclge_tm_qs_schd_mode_cfg(
+ hdev, vport[k].qs_offset + i,
+ HCLGE_SCH_MODE_DWRR);
+ if (ret)
+ return ret;
+ }
}
} else {
for (i = 0; i < hdev->num_alloc_vport; i++) {
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 04/10] net: hns3: Add support for port shaper setting in TM module
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add a tm_port_shaper cmd and set port shaper
to HCLGE_ETHER_MAX_RATE on TM initialization process.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 33 ++++++++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 4 +++
2 files changed, 37 insertions(+)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 0b4b5d9..33090d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -301,6 +301,35 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
+static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
+{
+ struct hclge_port_shapping_cmd *shap_cfg_cmd;
+ struct hclge_desc desc;
+ u8 ir_u, ir_b, ir_s;
+ int ret;
+
+ ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE,
+ HCLGE_SHAPER_LVL_PORT,
+ &ir_b, &ir_u, &ir_s);
+ if (ret)
+ return ret;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false);
+ shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
+
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para, IR_B, ir_b);
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para, IR_U, ir_u);
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para, IR_S, ir_s);
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para,
+ BS_B, HCLGE_SHAPER_BS_U_DEF);
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para,
+ BS_S, HCLGE_SHAPER_BS_S_DEF);
+ shap_cfg_cmd->port_shapping_para =
+ cpu_to_le32(shap_cfg_cmd->port_shapping_para);
+
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
enum hclge_shap_bucket bucket, u8 pri_id,
u8 ir_b, u8 ir_u, u8 ir_s,
@@ -864,6 +893,10 @@ static int hclge_tm_shaper_cfg(struct hclge_dev *hdev)
{
int ret;
+ ret = hclge_tm_port_shaper_cfg(hdev);
+ if (ret)
+ return ret;
+
ret = hclge_tm_pg_shaper_cfg(hdev);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 8ecd83c..19a01e4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -99,6 +99,10 @@ struct hclge_pfc_en_cmd {
u8 pri_en_bitmap;
};
+struct hclge_port_shapping_cmd {
+ __le32 port_shapping_para;
+};
+
#define hclge_tm_set_field(dest, string, val) \
hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
(HCLGE_TM_SHAP_##string##_LSH), val)
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 03/10] net: hns3: Add support for PFC setting in TM module
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add a pfc_pause_en cmd, and use it to configure
PFC option according to fc_mode in hdev->tm_info.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 68 ++++++++++++++++++++--
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 5 ++
2 files changed, 68 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 73a75d7..0b4b5d9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -124,6 +124,20 @@ static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
+static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
+ u8 pfc_bitmap)
+{
+ struct hclge_desc desc;
+ struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)&desc.data;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PFC_PAUSE_EN, false);
+
+ pfc->tx_rx_en_bitmap = tx_rx_bitmap;
+ pfc->pri_en_bitmap = pfc_bitmap;
+
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
{
u8 tc;
@@ -969,20 +983,64 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
return hclge_tm_schd_mode_hw(hdev);
}
+static int hclge_pfc_setup_hw(struct hclge_dev *hdev)
+{
+ u8 enable_bitmap = 0;
+
+ if (hdev->tm_info.fc_mode == HCLGE_FC_PFC)
+ enable_bitmap = HCLGE_TX_MAC_PAUSE_EN_MSK |
+ HCLGE_RX_MAC_PAUSE_EN_MSK;
+
+ return hclge_pfc_pause_en_cfg(hdev, enable_bitmap,
+ hdev->tm_info.hw_pfc_map);
+}
+
+static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+{
+ bool tx_en, rx_en;
+
+ switch (hdev->tm_info.fc_mode) {
+ case HCLGE_FC_NONE:
+ tx_en = false;
+ rx_en = false;
+ break;
+ case HCLGE_FC_RX_PAUSE:
+ tx_en = false;
+ rx_en = true;
+ break;
+ case HCLGE_FC_TX_PAUSE:
+ tx_en = true;
+ rx_en = false;
+ break;
+ case HCLGE_FC_FULL:
+ tx_en = true;
+ rx_en = true;
+ break;
+ default:
+ tx_en = true;
+ rx_en = true;
+ }
+
+ return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en);
+}
+
int hclge_pause_setup_hw(struct hclge_dev *hdev)
{
- bool en = hdev->tm_info.fc_mode != HCLGE_FC_PFC;
int ret;
u8 i;
- ret = hclge_mac_pause_en_cfg(hdev, en, en);
- if (ret)
- return ret;
+ if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
+ return hclge_mac_pause_setup_hw(hdev);
- /* Only DCB-supported dev supports qset back pressure setting */
+ /* Only DCB-supported dev supports qset back pressure and pfc cmd */
if (!hnae3_dev_dcb_supported(hdev))
return 0;
+ /* When MAC is GE Mode, hdev does not support pfc setting */
+ ret = hclge_pfc_setup_hw(hdev);
+ if (ret)
+ dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret);
+
for (i = 0; i < hdev->tm_info.num_tc; i++) {
ret = hclge_tm_qs_bp_cfg(hdev, i);
if (ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 85158b0..8ecd83c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -94,6 +94,11 @@ struct hclge_bp_to_qs_map_cmd {
u32 rsvd1;
};
+struct hclge_pfc_en_cmd {
+ u8 tx_rx_en_bitmap;
+ u8 pri_en_bitmap;
+};
+
#define hclge_tm_set_field(dest, string, val) \
hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
(HCLGE_TM_SHAP_##string##_LSH), val)
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 02/10] net: hns3: Add support for dynamically buffer reallocation
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
Current buffer allocation can only happen at init, when
doing buffer reallocation after init, care must be taken
care of memory which priv_buf points to.
This patch fixes it by using a dynamic allocated temporary
memory. Because we only do buffer reallocation at init or
when setting up the DCB parameter, and priv_buf is only
used at buffer allocation process, so it is ok to use a
dynamic allocated temporary memory.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 5 +
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 150 +++++++++++----------
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 -
3 files changed, 87 insertions(+), 70 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index a81c6cb..6b6d28e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -322,6 +322,11 @@ struct hclge_shared_buf {
u32 buf_size;
};
+struct hclge_pkt_buf_alloc {
+ struct hclge_priv_buf priv_buf[HCLGE_MAX_TC_NUM];
+ struct hclge_shared_buf s_buf;
+};
+
#define HCLGE_RX_COM_WL_EN_B 15
struct hclge_rx_com_wl_buf {
__le16 high_wl;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index dfe0fd2..c27b460 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1324,7 +1324,8 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
return 0;
}
-static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev)
+static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
/* TX buffer size is unit by 128 byte */
#define HCLGE_BUF_SIZE_UNIT_SHIFT 7
@@ -1340,7 +1341,7 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev)
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0);
for (i = 0; i < HCLGE_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
buf_size = priv->tx_buf_size;
req->tx_pkt_buff[i] =
cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) |
@@ -1357,9 +1358,10 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev)
return 0;
}
-static int hclge_tx_buffer_alloc(struct hclge_dev *hdev)
+static int hclge_tx_buffer_alloc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- int ret = hclge_cmd_alloc_tx_buff(hdev);
+ int ret = hclge_cmd_alloc_tx_buff(hdev, buf_alloc);
if (ret) {
dev_err(&hdev->pdev->dev,
@@ -1392,13 +1394,14 @@ static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev)
}
/* Get the number of pfc enabled TCs, which have private buffer */
-static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
+static int hclge_get_pfc_priv_num(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
int i, cnt = 0;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if ((hdev->tm_info.hw_pfc_map & BIT(i)) &&
priv->enable)
cnt++;
@@ -1408,13 +1411,14 @@ static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
}
/* Get the number of pfc disabled TCs, which have private buffer */
-static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
+static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
int i, cnt = 0;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i) &&
!(hdev->tm_info.hw_pfc_map & BIT(i)) &&
priv->enable)
@@ -1424,33 +1428,35 @@ static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
return cnt;
}
-static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev)
+static u32 hclge_get_rx_priv_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
u32 rx_priv = 0;
int i;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (priv->enable)
rx_priv += priv->buf_size;
}
return rx_priv;
}
-static u32 hclge_get_tx_buff_alloced(struct hclge_dev *hdev)
+static u32 hclge_get_tx_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
u32 tx_buf = 0, i;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
tx_buf += priv->tx_buf_size;
}
return tx_buf;
}
-static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
+static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc,
+ u32 rx_all)
{
u32 shared_buf_min, shared_buf_tc, shared_std;
int tc_num, pfc_enable_num;
@@ -1471,30 +1477,31 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
hdev->mps;
shared_std = max_t(u32, shared_buf_min, shared_buf_tc);
- rx_priv = hclge_get_rx_priv_buff_alloced(hdev);
+ rx_priv = hclge_get_rx_priv_buff_alloced(buf_alloc);
if (rx_all <= rx_priv + shared_std)
return false;
shared_buf = rx_all - rx_priv;
- hdev->s_buf.buf_size = shared_buf;
- hdev->s_buf.self.high = shared_buf;
- hdev->s_buf.self.low = 2 * hdev->mps;
+ buf_alloc->s_buf.buf_size = shared_buf;
+ buf_alloc->s_buf.self.high = shared_buf;
+ buf_alloc->s_buf.self.low = 2 * hdev->mps;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
if ((hdev->hw_tc_map & BIT(i)) &&
(hdev->tm_info.hw_pfc_map & BIT(i))) {
- hdev->s_buf.tc_thrd[i].low = hdev->mps;
- hdev->s_buf.tc_thrd[i].high = 2 * hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].low = hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].high = 2 * hdev->mps;
} else {
- hdev->s_buf.tc_thrd[i].low = 0;
- hdev->s_buf.tc_thrd[i].high = hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].low = 0;
+ buf_alloc->s_buf.tc_thrd[i].high = hdev->mps;
}
}
return true;
}
-static int hclge_tx_buffer_calc(struct hclge_dev *hdev)
+static int hclge_tx_buffer_calc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
u32 i, total_size;
@@ -1503,7 +1510,7 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev)
/* alloc tx buffer for all enabled tc */
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (total_size < HCLGE_DEFAULT_TX_BUF)
return -ENOMEM;
@@ -1521,22 +1528,24 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev)
/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
* @hdev: pointer to struct hclge_dev
+ * @buf_alloc: pointer to buffer calculation data
* @return: 0: calculate sucessful, negative: fail
*/
-int hclge_rx_buffer_calc(struct hclge_dev *hdev)
+int hclge_rx_buffer_calc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
u32 rx_all = hdev->pkt_buf_size;
int no_pfc_priv_num, pfc_priv_num;
struct hclge_priv_buf *priv;
int i;
- rx_all -= hclge_get_tx_buff_alloced(hdev);
+ rx_all -= hclge_get_tx_buff_alloced(buf_alloc);
/* When DCB is not supported, rx private
* buffer is not allocated.
*/
if (!hnae3_dev_dcb_supported(hdev)) {
- if (!hclge_is_rx_buf_ok(hdev, rx_all))
+ if (!hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return -ENOMEM;
return 0;
@@ -1544,7 +1553,7 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
/* step 1, try to alloc private buffer for all enabled tc */
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i)) {
priv->enable = 1;
if (hdev->tm_info.hw_pfc_map & BIT(i)) {
@@ -1565,14 +1574,14 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
}
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
/* step 2, try to decrease the buffer size of
* no pfc TC's private buffer
*/
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
priv->enable = 0;
priv->wl.low = 0;
@@ -1595,18 +1604,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
}
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
/* step 3, try to reduce the number of pfc disabled TCs,
* which have private buffer
*/
/* get the total no pfc enable TC number, which have private buffer */
- no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev);
+ no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev, buf_alloc);
/* let the last to be cleared first */
for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i) &&
!(hdev->tm_info.hw_pfc_map & BIT(i))) {
@@ -1618,22 +1627,22 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
no_pfc_priv_num--;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) ||
no_pfc_priv_num == 0)
break;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
/* step 4, try to reduce the number of pfc enabled TCs
* which have private buffer.
*/
- pfc_priv_num = hclge_get_pfc_priv_num(hdev);
+ pfc_priv_num = hclge_get_pfc_priv_num(hdev, buf_alloc);
/* let the last to be cleared first */
for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i) &&
hdev->tm_info.hw_pfc_map & BIT(i)) {
@@ -1645,17 +1654,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
pfc_priv_num--;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) ||
pfc_priv_num == 0)
break;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
return -ENOMEM;
}
-static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
+static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_rx_priv_buff *req;
struct hclge_desc desc;
@@ -1667,7 +1677,7 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
/* Alloc private buffer TCs */
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- struct hclge_priv_buf *priv = &hdev->priv_buf[i];
+ struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
req->buf_num[i] =
cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S);
@@ -1676,7 +1686,7 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
}
req->shared_buf =
- cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
+ cpu_to_le16((buf_alloc->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
(1 << HCLGE_TC0_PRI_BUF_EN_B));
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -1691,7 +1701,8 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
#define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0)
-static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
+static int hclge_rx_priv_wl_config(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_rx_priv_wl_buf *req;
struct hclge_priv_buf *priv;
@@ -1711,7 +1722,9 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) {
- priv = &hdev->priv_buf[i * HCLGE_TC_NUM_ONE_DESC + j];
+ u32 idx = i * HCLGE_TC_NUM_ONE_DESC + j;
+
+ priv = &buf_alloc->priv_buf[idx];
req->tc_wl[j].high =
cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S);
req->tc_wl[j].high |=
@@ -1736,9 +1749,10 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
return 0;
}
-static int hclge_common_thrd_config(struct hclge_dev *hdev)
+static int hclge_common_thrd_config(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- struct hclge_shared_buf *s_buf = &hdev->s_buf;
+ struct hclge_shared_buf *s_buf = &buf_alloc->s_buf;
struct hclge_rx_com_thrd *req;
struct hclge_desc desc[2];
struct hclge_tc_thrd *tc;
@@ -1782,9 +1796,10 @@ static int hclge_common_thrd_config(struct hclge_dev *hdev)
return 0;
}
-static int hclge_common_wl_config(struct hclge_dev *hdev)
+static int hclge_common_wl_config(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- struct hclge_shared_buf *buf = &hdev->s_buf;
+ struct hclge_shared_buf *buf = &buf_alloc->s_buf;
struct hclge_rx_com_wl *req;
struct hclge_desc desc;
int ret;
@@ -1814,69 +1829,68 @@ static int hclge_common_wl_config(struct hclge_dev *hdev)
int hclge_buffer_alloc(struct hclge_dev *hdev)
{
+ struct hclge_pkt_buf_alloc *pkt_buf;
int ret;
- hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM,
- sizeof(struct hclge_priv_buf),
- GFP_KERNEL | __GFP_ZERO);
- if (!hdev->priv_buf)
+ pkt_buf = kzalloc(sizeof(*pkt_buf), GFP_KERNEL);
+ if (!pkt_buf)
return -ENOMEM;
- ret = hclge_tx_buffer_calc(hdev);
+ ret = hclge_tx_buffer_calc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not calc tx buffer size for all TCs %d\n", ret);
- return ret;
+ goto out;
}
- ret = hclge_tx_buffer_alloc(hdev);
+ ret = hclge_tx_buffer_alloc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not alloc tx buffers %d\n", ret);
- return ret;
+ goto out;
}
- ret = hclge_rx_buffer_calc(hdev);
+ ret = hclge_rx_buffer_calc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not calc rx priv buffer size for all TCs %d\n",
ret);
- return ret;
+ goto out;
}
- ret = hclge_rx_priv_buf_alloc(hdev);
+ ret = hclge_rx_priv_buf_alloc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev, "could not alloc rx priv buffer %d\n",
ret);
- return ret;
+ goto out;
}
if (hnae3_dev_dcb_supported(hdev)) {
- ret = hclge_rx_priv_wl_config(hdev);
+ ret = hclge_rx_priv_wl_config(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not configure rx private waterline %d\n",
ret);
- return ret;
+ goto out;
}
- ret = hclge_common_thrd_config(hdev);
+ ret = hclge_common_thrd_config(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not configure common threshold %d\n",
ret);
- return ret;
+ goto out;
}
}
- ret = hclge_common_wl_config(hdev);
- if (ret) {
+ ret = hclge_common_wl_config(hdev, pkt_buf);
+ if (ret)
dev_err(&hdev->pdev->dev,
"could not configure common waterline %d\n", ret);
- return ret;
- }
- return 0;
+out:
+ kfree(pkt_buf);
+ return ret;
}
static int hclge_init_roce_base_info(struct hclge_vport *vport)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 9fcfd93..4fc36f0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -463,8 +463,6 @@ struct hclge_dev {
u32 pkt_buf_size; /* Total pf buf size for tx/rx */
u32 mps; /* Max packet size */
- struct hclge_priv_buf *priv_buf;
- struct hclge_shared_buf s_buf;
enum hclge_mta_dmac_sel_type mta_mac_sel_type;
bool enable_mta; /* Mutilcast filter enable */
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 01/10] net: hns3: Support for dynamically assigning tx buffer to TC
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add support of dynamically assigning tx buffer to
TC when the TC is enabled.
It will save buffer for rx direction to avoid packet loss.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 1 +
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 69 ++++++++++++++++++----
2 files changed, 60 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 758cf39..a81c6cb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -311,6 +311,7 @@ struct hclge_tc_thrd {
struct hclge_priv_buf {
struct hclge_waterline wl; /* Waterline for low and high*/
u32 buf_size; /* TC private buffer size */
+ u32 tx_buf_size;
u32 enable; /* Enable TC private buffer or not */
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d27618b..dfe0fd2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1324,23 +1324,28 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
return 0;
}
-static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
+static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev)
{
/* TX buffer size is unit by 128 byte */
#define HCLGE_BUF_SIZE_UNIT_SHIFT 7
#define HCLGE_BUF_SIZE_UPDATE_EN_MSK BIT(15)
struct hclge_tx_buff_alloc *req;
+ struct hclge_priv_buf *priv;
struct hclge_desc desc;
+ u32 buf_size;
int ret;
u8 i;
req = (struct hclge_tx_buff_alloc *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0);
- for (i = 0; i < HCLGE_TC_NUM; i++)
+ for (i = 0; i < HCLGE_TC_NUM; i++) {
+ priv = &hdev->priv_buf[i];
+ buf_size = priv->tx_buf_size;
req->tx_pkt_buff[i] =
cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) |
HCLGE_BUF_SIZE_UPDATE_EN_MSK);
+ }
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret) {
@@ -1352,9 +1357,9 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
return 0;
}
-static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, u32 buf_size)
+static int hclge_tx_buffer_alloc(struct hclge_dev *hdev)
{
- int ret = hclge_cmd_alloc_tx_buff(hdev, buf_size);
+ int ret = hclge_cmd_alloc_tx_buff(hdev);
if (ret) {
dev_err(&hdev->pdev->dev,
@@ -1433,6 +1438,18 @@ static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev)
return rx_priv;
}
+static u32 hclge_get_tx_buff_alloced(struct hclge_dev *hdev)
+{
+ struct hclge_priv_buf *priv;
+ u32 tx_buf = 0, i;
+
+ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+ priv = &hdev->priv_buf[i];
+ tx_buf += priv->tx_buf_size;
+ }
+ return tx_buf;
+}
+
static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
{
u32 shared_buf_min, shared_buf_tc, shared_std;
@@ -1477,18 +1494,44 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
return true;
}
+static int hclge_tx_buffer_calc(struct hclge_dev *hdev)
+{
+ struct hclge_priv_buf *priv;
+ u32 i, total_size;
+
+ total_size = hdev->pkt_buf_size;
+
+ /* alloc tx buffer for all enabled tc */
+ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+ priv = &hdev->priv_buf[i];
+
+ if (total_size < HCLGE_DEFAULT_TX_BUF)
+ return -ENOMEM;
+
+ if (hdev->hw_tc_map & BIT(i))
+ priv->tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+ else
+ priv->tx_buf_size = 0;
+
+ total_size -= priv->tx_buf_size;
+ }
+
+ return 0;
+}
+
/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
* @hdev: pointer to struct hclge_dev
- * @tx_size: the allocated tx buffer for all TCs
* @return: 0: calculate sucessful, negative: fail
*/
-int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
+int hclge_rx_buffer_calc(struct hclge_dev *hdev)
{
- u32 rx_all = hdev->pkt_buf_size - tx_size;
+ u32 rx_all = hdev->pkt_buf_size;
int no_pfc_priv_num, pfc_priv_num;
struct hclge_priv_buf *priv;
int i;
+ rx_all -= hclge_get_tx_buff_alloced(hdev);
+
/* When DCB is not supported, rx private
* buffer is not allocated.
*/
@@ -1771,7 +1814,6 @@ static int hclge_common_wl_config(struct hclge_dev *hdev)
int hclge_buffer_alloc(struct hclge_dev *hdev)
{
- u32 tx_buf_size = HCLGE_DEFAULT_TX_BUF;
int ret;
hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM,
@@ -1780,14 +1822,21 @@ int hclge_buffer_alloc(struct hclge_dev *hdev)
if (!hdev->priv_buf)
return -ENOMEM;
- ret = hclge_tx_buffer_alloc(hdev, tx_buf_size);
+ ret = hclge_tx_buffer_calc(hdev);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "could not calc tx buffer size for all TCs %d\n", ret);
+ return ret;
+ }
+
+ ret = hclge_tx_buffer_alloc(hdev);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not alloc tx buffers %d\n", ret);
return ret;
}
- ret = hclge_rx_buffer_calc(hdev, tx_buf_size);
+ ret = hclge_rx_buffer_calc(hdev);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not calc rx priv buffer size for all TCs %d\n",
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 00/10] Add support for DCB feature in hns3 driver
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
The patchset contains some enhancement related to DCB before
adding support for DCB feature.
This patchset depends on the following patchset:
https://patchwork.ozlabs.org/cover/815646/
https://patchwork.ozlabs.org/cover/816145/
High Level Architecture:
[ tc qdisc ] [ lldpad ]
| |
| |
| |
[ hns3_enet ] [ hns3_dcbnl ]
\ /
\ /
\ /
[ hclge_dcb ]
/ \
/ \
/ \
[ hclgc_main ] [ hclge_tm ]
Current patch-set support following functionality:
1. Use of tc qdisc to configure the tc num and prio_tc_map.
2. Use of lldptool to configure the tc schedule mode, tc
bandwidth(if schedule mode is ETS), prio_tc_map and
PFC parameter.
Yunsheng Lin (10):
net: hns3: Support for dynamically assigning tx buffer to TC
net: hns3: Add support for dynamically buffer reallocation
net: hns3: Add support for PFC setting in TM module
net: hns3: Add support for port shaper setting in TM module
net: hns3: Add tc-based TM support for sriov enabled port
net: hns3: Add some interface for the support of DCB feature
net: hns3: Add hclge_dcb module for the support of DCB feature
net: hns3: Add dcb netlink interface for the support of DCB feature
net: hns3: Setting for fc_mode and dcb enable flag in TM module
net: hns3: Add mqprio support when interacting with network stack
drivers/net/ethernet/hisilicon/Kconfig | 9 +
drivers/net/ethernet/hisilicon/hns3/hnae3.h | 20 ++
.../net/ethernet/hisilicon/hns3/hns3pf/Makefile | 4 +
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 6 +
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 327 +++++++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h | 21 ++
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 219 +++++++++-----
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 8 +-
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 232 +++++++++++++--
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 15 +
.../ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c | 106 +++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 137 +++++++--
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h | 7 +
13 files changed, 983 insertions(+), 128 deletions(-)
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
--
1.9.1
^ permalink raw reply
* Re: [PATCH net 2/4] net:ethernet:aquantia: Fix Tx queue hangups
From: Yunsheng Lin @ 2017-09-21 11:19 UTC (permalink / raw)
To: Igor Russkikh, David S . Miller
Cc: netdev, David Arcari, Pavel Belous, Nadezhda Krupnina,
Simon Edelhaus
In-Reply-To: <cef3863edd8d504d7406f781c97260c52f21e156.1505915085.git.igor.russkikh@aquantia.com>
Hi, Igor
On 2017/9/21 18:53, Igor Russkikh wrote:
> Driver did a poor job in managing its Tx queues: Sometimes it could stop
> tx queues due to link down condition in aq_nic_xmit - but never waked up
> them. That led to Tx path total suspend.
> This patch fixes this and improves generic queue management:
> - introduces queue restart counter
> - uses generic netif_ interface to disable and enable tx path
> - refactors link up/down condition and introduces dmesg log event when
> link changes.
> - introduces new constant for minimum descriptors count required for queue
> wakeup
>
> Signed-off-by: Pavel Belous <Pavel.Belous@aquantia.com>
> Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
> ---
> drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 4 ++
> drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 91 +++++++++++-------------
> drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 2 -
> drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 26 +++++++
> drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 4 ++
> drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 8 +--
> 6 files changed, 76 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
> index 2149864..0fdaaa6 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
> @@ -51,6 +51,10 @@
>
> #define AQ_CFG_SKB_FRAGS_MAX 32U
>
> +/* Number of descriptors available in one ring to resume this ring queue
> + */
> +#define AQ_CFG_RESTART_DESC_THRES (AQ_CFG_SKB_FRAGS_MAX * 2)
> +
> #define AQ_CFG_NAPI_WEIGHT 64U
>
> #define AQ_CFG_MULTICAST_ADDRESS_MAX 32U
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> index f281392..24f573c 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> @@ -119,6 +119,35 @@ int aq_nic_cfg_start(struct aq_nic_s *self)
> return 0;
> }
>
> +static int aq_nic_update_link_status(struct aq_nic_s *self)
> +{
> + int err = self->aq_hw_ops.hw_get_link_status(self->aq_hw);
> +
> + if (err < 0)
> + return -1;
why not just return err?
> +
> + if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps)
> + pr_info("%s: link change old %d new %d\n",
> + AQ_CFG_DRV_NAME, self->link_status.mbps,
> + self->aq_hw->aq_link_status.mbps);
You has ndev in struct aq_nic_s *self, why not use netdev_*?
> +
> + self->link_status = self->aq_hw->aq_link_status;
> + if (!netif_carrier_ok(self->ndev) && self->link_status.mbps) {
> + aq_utils_obj_set(&self->header.flags,
> + AQ_NIC_FLAG_STARTED);
> + aq_utils_obj_clear(&self->header.flags,
> + AQ_NIC_LINK_DOWN);
> + netif_carrier_on(self->ndev);
> + netif_tx_wake_all_queues(self->ndev);
> + }
> + if (netif_carrier_ok(self->ndev) && !self->link_status.mbps) {
> + netif_carrier_off(self->ndev);
> + netif_tx_disable(self->ndev);
> + aq_utils_obj_set(&self->header.flags, AQ_NIC_LINK_DOWN);
> + }
> + return 0;
> +}
> +
> static void aq_nic_service_timer_cb(unsigned long param)
> {
> struct aq_nic_s *self = (struct aq_nic_s *)param;
> @@ -131,26 +160,13 @@ static void aq_nic_service_timer_cb(unsigned long param)
> if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY))
> goto err_exit;
>
> - err = self->aq_hw_ops.hw_get_link_status(self->aq_hw);
> - if (err < 0)
> + err = aq_nic_update_link_status(self);
> + if (err)
> goto err_exit;
>
> - self->link_status = self->aq_hw->aq_link_status;
> -
> self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw,
> self->aq_nic_cfg.is_interrupt_moderation);
>
> - if (self->link_status.mbps) {
> - aq_utils_obj_set(&self->header.flags,
> - AQ_NIC_FLAG_STARTED);
> - aq_utils_obj_clear(&self->header.flags,
> - AQ_NIC_LINK_DOWN);
> - netif_carrier_on(self->ndev);
> - } else {
> - netif_carrier_off(self->ndev);
> - aq_utils_obj_set(&self->header.flags, AQ_NIC_LINK_DOWN);
> - }
> -
> memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s));
> memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s));
> for (i = AQ_DIMOF(self->aq_vec); i--;) {
> @@ -240,7 +256,6 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
> int aq_nic_ndev_register(struct aq_nic_s *self)
> {
> int err = 0;
> - unsigned int i = 0U;
>
> if (!self->ndev) {
> err = -EINVAL;
> @@ -262,8 +277,7 @@ int aq_nic_ndev_register(struct aq_nic_s *self)
>
> netif_carrier_off(self->ndev);
>
> - for (i = AQ_CFG_VECS_MAX; i--;)
> - aq_nic_ndev_queue_stop(self, i);
> + netif_tx_disable(self->ndev);
>
> err = register_netdev(self->ndev);
> if (err < 0)
> @@ -319,12 +333,8 @@ struct aq_nic_s *aq_nic_alloc_hot(struct net_device *ndev)
> err = -EINVAL;
> goto err_exit;
> }
> - if (netif_running(ndev)) {
> - unsigned int i;
> -
> - for (i = AQ_CFG_VECS_MAX; i--;)
> - netif_stop_subqueue(ndev, i);
> - }
> + if (netif_running(ndev))
> + netif_tx_disable(ndev);
>
> for (self->aq_vecs = 0; self->aq_vecs < self->aq_nic_cfg.vecs;
> self->aq_vecs++) {
> @@ -384,16 +394,6 @@ int aq_nic_init(struct aq_nic_s *self)
> return err;
> }
>
> -void aq_nic_ndev_queue_start(struct aq_nic_s *self, unsigned int idx)
> -{
> - netif_start_subqueue(self->ndev, idx);
> -}
> -
> -void aq_nic_ndev_queue_stop(struct aq_nic_s *self, unsigned int idx)
> -{
> - netif_stop_subqueue(self->ndev, idx);
> -}
> -
> int aq_nic_start(struct aq_nic_s *self)
> {
> struct aq_vec_s *aq_vec = NULL;
> @@ -452,10 +452,6 @@ int aq_nic_start(struct aq_nic_s *self)
> goto err_exit;
> }
>
> - for (i = 0U, aq_vec = self->aq_vec[0];
> - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
> - aq_nic_ndev_queue_start(self, i);
> -
> err = netif_set_real_num_tx_queues(self->ndev, self->aq_vecs);
> if (err < 0)
> goto err_exit;
> @@ -464,6 +460,8 @@ int aq_nic_start(struct aq_nic_s *self)
> if (err < 0)
> goto err_exit;
>
> + netif_tx_start_all_queues(self->ndev);
> +
> err_exit:
> return err;
> }
> @@ -603,7 +601,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
> unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs;
> unsigned int tc = 0U;
> int err = NETDEV_TX_OK;
> - bool is_nic_in_bad_state;
>
> frags = skb_shinfo(skb)->nr_frags + 1;
>
> @@ -614,13 +611,10 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
> goto err_exit;
> }
>
> - is_nic_in_bad_state = aq_utils_obj_test(&self->header.flags,
> - AQ_NIC_FLAGS_IS_NOT_TX_READY) ||
> - (aq_ring_avail_dx(ring) <
> - AQ_CFG_SKB_FRAGS_MAX);
> + aq_ring_update_queue_state(ring);
>
> - if (is_nic_in_bad_state) {
> - aq_nic_ndev_queue_stop(self, ring->idx);
> + /* Above status update may stop the queue. Check this. */
> + if (__netif_subqueue_stopped(self->ndev, ring->idx)) {
> err = NETDEV_TX_BUSY;
> goto err_exit;
> }
> @@ -632,9 +626,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
> ring,
> frags);
> if (err >= 0) {
> - if (aq_ring_avail_dx(ring) < AQ_CFG_SKB_FRAGS_MAX + 1)
> - aq_nic_ndev_queue_stop(self, ring->idx);
> -
> ++ring->stats.tx.packets;
> ring->stats.tx.bytes += skb->len;
> }
> @@ -906,9 +897,7 @@ int aq_nic_stop(struct aq_nic_s *self)
> struct aq_vec_s *aq_vec = NULL;
> unsigned int i = 0U;
>
> - for (i = 0U, aq_vec = self->aq_vec[0];
> - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
> - aq_nic_ndev_queue_stop(self, i);
> + netif_tx_disable(self->ndev);
>
> del_timer_sync(&self->service_timer);
>
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> index 7fc2a5e..0ddd556 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> @@ -83,8 +83,6 @@ struct net_device *aq_nic_get_ndev(struct aq_nic_s *self);
> int aq_nic_init(struct aq_nic_s *self);
> int aq_nic_cfg_start(struct aq_nic_s *self);
> int aq_nic_ndev_register(struct aq_nic_s *self);
> -void aq_nic_ndev_queue_start(struct aq_nic_s *self, unsigned int idx);
> -void aq_nic_ndev_queue_stop(struct aq_nic_s *self, unsigned int idx);
> void aq_nic_ndev_free(struct aq_nic_s *self);
> int aq_nic_start(struct aq_nic_s *self);
> int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb);
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> index 4eee199..02f79b0 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> @@ -104,6 +104,32 @@ int aq_ring_init(struct aq_ring_s *self)
> return 0;
> }
>
> +void aq_ring_update_queue_state(struct aq_ring_s *ring)
> +{
> + if (aq_ring_avail_dx(ring) <= AQ_CFG_SKB_FRAGS_MAX)
> + aq_ring_queue_stop(ring);
> + else if (aq_ring_avail_dx(ring) > AQ_CFG_RESTART_DESC_THRES)
> + aq_ring_queue_wake(ring);
> +}
> +
> +void aq_ring_queue_wake(struct aq_ring_s *ring)
> +{
> + struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic);
> +
> + if (__netif_subqueue_stopped(ndev, ring->idx)) {
> + netif_wake_subqueue(ndev, ring->idx);
> + ring->stats.tx.queue_restarts++;
> + }
> +}
> +
> +void aq_ring_queue_stop(struct aq_ring_s *ring)
> +{
> + struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic);
> +
> + if (!__netif_subqueue_stopped(ndev, ring->idx))
> + netif_stop_subqueue(ndev, ring->idx);
> +}
> +
> void aq_ring_tx_clean(struct aq_ring_s *self)
> {
> struct device *dev = aq_nic_get_dev(self->aq_nic);
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
> index 782176c..24523b5 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
> @@ -94,6 +94,7 @@ struct aq_ring_stats_tx_s {
> u64 errors;
> u64 packets;
> u64 bytes;
> + u64 queue_restarts;
> };
>
> union aq_ring_stats_s {
> @@ -147,6 +148,9 @@ struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self,
> int aq_ring_init(struct aq_ring_s *self);
> void aq_ring_rx_deinit(struct aq_ring_s *self);
> void aq_ring_free(struct aq_ring_s *self);
> +void aq_ring_update_queue_state(struct aq_ring_s *ring);
> +void aq_ring_queue_wake(struct aq_ring_s *ring);
> +void aq_ring_queue_stop(struct aq_ring_s *ring);
> void aq_ring_tx_clean(struct aq_ring_s *self);
> int aq_ring_rx_clean(struct aq_ring_s *self,
> struct napi_struct *napi,
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
> index ebf5880..305ff8f 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
> @@ -59,12 +59,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
> if (ring[AQ_VEC_TX_ID].sw_head !=
> ring[AQ_VEC_TX_ID].hw_head) {
> aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
> -
> - if (aq_ring_avail_dx(&ring[AQ_VEC_TX_ID]) >
> - AQ_CFG_SKB_FRAGS_MAX) {
> - aq_nic_ndev_queue_start(self->aq_nic,
> - ring[AQ_VEC_TX_ID].idx);
> - }
> + aq_ring_update_queue_state(&ring[AQ_VEC_TX_ID]);
> was_tx_cleaned = true;
> }
>
> @@ -364,6 +359,7 @@ void aq_vec_add_stats(struct aq_vec_s *self,
> stats_tx->packets += tx->packets;
> stats_tx->bytes += tx->bytes;
> stats_tx->errors += tx->errors;
> + stats_tx->queue_restarts += tx->queue_restarts;
> }
> }
>
>
^ permalink raw reply
* Re: [patch net-next 03/12] ipmr: Add FIB notification access functions
From: Nikolay Aleksandrov @ 2017-09-21 11:19 UTC (permalink / raw)
To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw
In-Reply-To: <20170921064338.1282-4-jiri@resnulli.us>
On 21/09/17 09:43, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
>
> Make the ipmr module register as a FIB notifier. To do that, implement both
> the ipmr_seq_read and ipmr_dump ops.
>
> The ipmr_seq_read op returns a sequence counter that is incremented on
> every notification related operation done by the ipmr. To implement that,
> add a sequence counter in the netns_ipv4 struct and increment it whenever a
> new MFC route or VIF are added or deleted. The sequence operations are
> protected by the RTNL lock.
>
> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
> and sends notifications about them. The entries dump is done under RCU.
>
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> include/linux/mroute.h | 15 ++++++
> include/net/netns/ipv4.h | 3 ++
> net/ipv4/ipmr.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 151 insertions(+), 2 deletions(-)
>
[snip]
> +
> +static int ipmr_dump(struct net *net, struct notifier_block *nb)
> +{
> + struct mr_table *mrt;
> + int err;
> +
> + err = ipmr_rules_dump(net, nb);
> + if (err)
> + return err;
> +
> + ipmr_for_each_table(mrt, net) {
> + struct vif_device *v = &mrt->vif_table[0];
> + struct mfc_cache *mfc;
> + int vifi;
> +
> + /* Notifiy on table VIF entries */
> + for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
> + if (!v->dev)
> + continue;
> +
> + call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
> + v, vifi, mrt->id);
> + }
The VIF table is protected by mrt_lock (rwlock), here with RCU only
you're not guaranteed to keep v->dev. It can become NULL after the check above.
For details you can see vif_delete() in net/ipv4/ipmr.c. You need at least
mrt_lock for reading.
> +
> + /* Notify on table MFC entries */
> + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
> + call_ipmr_mfc_entry_notifier(nb, net,
> + FIB_EVENT_ENTRY_ADD, mfc,
> + mrt->id);
> + }
> +
> + return 0;
> +}
> +
> +static const struct fib_notifier_ops ipmr_notifier_ops_template = {
> + .family = RTNL_FAMILY_IPMR,
> + .fib_seq_read = ipmr_seq_read,
> + .fib_dump = ipmr_dump,
> + .owner = THIS_MODULE,
> +};
> +
> +int __net_init ipmr_notifier_init(struct net *net)
> +{
> + struct fib_notifier_ops *ops;
> +
> + net->ipv4.ipmr_seq = 0;
> +
> + ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
> + if (IS_ERR(ops))
> + return PTR_ERR(ops);
> + net->ipv4.ipmr_notifier_ops = ops;
> +
> + return 0;
> +}
> +
> +static void __net_exit ipmr_notifier_exit(struct net *net)
> +{
> + fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
> + net->ipv4.ipmr_notifier_ops = NULL;
> +}
> +
> /* Setup for IP multicast routing */
> static int __net_init ipmr_net_init(struct net *net)
> {
> int err;
>
> + err = ipmr_notifier_init(net);
> + if (err)
> + goto ipmr_notifier_fail;
> +
> err = ipmr_rules_init(net);
> if (err < 0)
> - goto fail;
> + goto ipmr_rules_fail;
>
> #ifdef CONFIG_PROC_FS
> err = -ENOMEM;
> @@ -3074,7 +3202,9 @@ static int __net_init ipmr_net_init(struct net *net)
> proc_vif_fail:
> ipmr_rules_exit(net);
> #endif
> -fail:
> +ipmr_rules_fail:
> + ipmr_notifier_exit(net);
> +ipmr_notifier_fail:
> return err;
> }
>
> @@ -3084,6 +3214,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
> remove_proc_entry("ip_mr_cache", net->proc_net);
> remove_proc_entry("ip_mr_vif", net->proc_net);
> #endif
> + ipmr_notifier_exit(net);
> ipmr_rules_exit(net);
> }
>
>
^ permalink raw reply
* Re: [PATCH net] bpf: one perf event close won't free bpf program attached by another perf event
From: Peter Zijlstra @ 2017-09-21 11:17 UTC (permalink / raw)
To: Yonghong Song; +Cc: Steven Rostedt, ast, daniel, netdev, kernel-team
In-Reply-To: <9e968490-87ae-7a79-9e59-0dcc840a93f5@fb.com>
On Wed, Sep 20, 2017 at 10:20:13PM -0700, Yonghong Song wrote:
> > (2). trace_event_call->perf_events are per cpu data structure, that
> > means, some filtering logic is needed to avoid the same perf_event prog
> > is executing twice.
>
> What I mean here is that the trace_event_call->perf_events need to be
> checked on ALL cpus since bpf prog should be executed regardless of
> cpu affiliation. It is possible that the same perf_event in different
> per_cpu bucket and hence filtering is needed to avoid the same
> perf_event bpf_prog is executed twice.
An event will only ever be on a single CPU's list at any one time IIRC.
Now, hysterically perf_event_set_bpf_prog used the tracepoint crud
because that already had bpf bits in. But it might make sense to look at
unifying the bpf stuff across all the different event types. Have them
all use event->prog.
I suspect that would break a fair bunch of bpf proglets, since the data
access to the trace data would be completely different, but it would be
much nicer to not have this distinction based on event type.
^ permalink raw reply
* Re: Latest net-next from GIT panic
From: Paweł Staszewski @ 2017-09-21 11:14 UTC (permalink / raw)
To: Eric Dumazet
Cc: Wei Wang, Cong Wang, Linux Kernel Network Developers,
Eric Dumazet
In-Reply-To: <22cde020-e13a-3635-512c-25532f754bda@itcare.pl>
W dniu 2017-09-21 o 13:12, Paweł Staszewski pisze:
>
>
> W dniu 2017-09-21 o 13:03, Eric Dumazet pisze:
>> On Thu, 2017-09-21 at 11:06 +0200, Paweł Staszewski wrote:
>>> W dniu 2017-09-21 o 03:17, Eric Dumazet pisze:
>>>> On Wed, 2017-09-20 at 18:09 -0700, Wei Wang wrote:
>>>>>> Thanks very much Pawel for the feedback.
>>>>>>
>>>>>> I was looking into the code (specifically IPv4 part) and found
>>>>>> that in
>>>>>> free_fib_info_rcu(), we call free_nh_exceptions() without holding
>>>>>> the
>>>>>> fnhe_lock. I am wondering if that could cause some race condition on
>>>>>> fnhe->fnhe_rth_input/output so a double call on dst_dev_put() on the
>>>>>> same dst could be happening.
>>>>>>
>>>>>> But as we call free_fib_info_rcu() only after the grace period, and
>>>>>> the lookup code which could potentially modify
>>>>>> fnhe->fnhe_rth_input/output all holds rcu_read_lock(), it seems
>>>>>> fine...
>>>>>>
>>>>> Hi Pawel,
>>>>>
>>>>> Could you try the following debug patch on top of net-next branch and
>>>>> reproduce the issue check if there are warning msg showing?
>>>>>
>>>>> diff --git a/include/net/dst.h b/include/net/dst.h
>>>>> index 93568bd0a352..82aff41c6f63 100644
>>>>> --- a/include/net/dst.h
>>>>> +++ b/include/net/dst.h
>>>>> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry
>>>>> *dst, unsigned long time)
>>>>> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
>>>>> {
>>>>> if (dst)
>>>>> - atomic_inc(&dst->__refcnt);
>>>>> + dst_hold(dst);
>>>>> return dst;
>>>>> }
>>>>>
>>>>> Thanks.
>>>>> Wei
>>>>>
>>>> Yes, we believe skb_dst_force() and skb_dst_force_safe() should be
>>>> unified (to the 'safe' version)
>>>>
>>>> We no longer have gc to protect from 0 -> 1 transition of dst
>>>> refcount.
>>>>
>>>>
>>>>
>>>>
>>> After adding patch from Wei
>>> https://bugzilla.kernel.org/show_bug.cgi?id=197005#c14
>>>
>> OK we have two problems here
>>
>> 1) We need to unify skb_dst_force() ( for net tree )
>>
>> 2) Vlan devices should try to correctly handle IFF_XMIT_DST_RELEASE from
>> lower device. This will considerably help your performance.
>>
>>
>> For 1), this is what I had in mind, can you try it ?
>>
>> Thanks a lot !
>>
>> diff --git a/include/net/dst.h b/include/net/dst.h
>> index
>> 93568bd0a3520bb7402f04d90cf04ac99c81cfbe..f23851eeaad917e8dafc06b58d23a2575405c894
>> 100644
>> --- a/include/net/dst.h
>> +++ b/include/net/dst.h
>> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry
>> *dst, unsigned long time)
>> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
>> {
>> if (dst)
>> - atomic_inc(&dst->__refcnt);
>> + dst_hold(dst);
>> return dst;
>> }
>> @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff
>> *nskb, const struct sk_buff *oskb
>> __skb_dst_copy(nskb, oskb->_skb_refdst);
>> }
>> -/**
>> - * skb_dst_force - makes sure skb dst is refcounted
>> - * @skb: buffer
>> - *
>> - * If dst is not yet refcounted, let's do it
>> - */
>> -static inline void skb_dst_force(struct sk_buff *skb)
>> -{
>> - if (skb_dst_is_noref(skb)) {
>> - WARN_ON(!rcu_read_lock_held());
>> - skb->_skb_refdst &= ~SKB_DST_NOREF;
>> - dst_clone(skb_dst(skb));
>> - }
>> -}
>> -
>> /**
>> * dst_hold_safe - Take a reference on a dst if possible
>> * @dst: pointer to dst entry
>> @@ -356,6 +341,23 @@ static inline void skb_dst_force_safe(struct
>> sk_buff *skb)
>> }
>> }
>> +/**
>> + * skb_dst_force - makes sure skb dst is refcounted
>> + * @skb: buffer
>> + *
>> + * If dst is not yet refcounted, let's do it
>> + */
>> +static inline void skb_dst_force(struct sk_buff *skb)
>> +{
>> + if (skb_dst_is_noref(skb)) {
>> + struct dst_entry *dst = skb_dst(skb);
>> +
>> + WARN_ON(!rcu_read_lock_held());
>> + if (!dst_hold_safe(dst))
>> + dst = NULL;
>> + skb->_skb_refdst = (unsigned long)dst;
>> + }
>> +}
>> /**
>> * __skb_tunnel_rx - prepare skb for rx reinsert
>>
>>
>>
> Thanks
>
> What is weird i have this part in my net-next from git:
> /**
> * skb_dst_force_safe - makes sure skb dst is refcounted
> * @skb: buffer
> *
> * If dst is not yet refcounted and not destroyed, grab a ref on it.
> */
> static inline void skb_dst_force_safe(struct sk_buff *skb)
> {
> if (skb_dst_is_noref(skb)) {
> struct dst_entry *dst = skb_dst(skb);
>
> if (!dst_hold_safe(dst))
> dst = NULL;
>
> skb->_skb_refdst = (unsigned long)dst;
> }
> }
>
>
>
ok the difference is skb_dst_force_safe not skb_dst_force
^ permalink raw reply
* Re: Latest net-next from GIT panic
From: Paweł Staszewski @ 2017-09-21 11:12 UTC (permalink / raw)
To: Eric Dumazet
Cc: Wei Wang, Cong Wang, Linux Kernel Network Developers,
Eric Dumazet
In-Reply-To: <1505991826.29839.124.camel@edumazet-glaptop3.roam.corp.google.com>
W dniu 2017-09-21 o 13:03, Eric Dumazet pisze:
> On Thu, 2017-09-21 at 11:06 +0200, Paweł Staszewski wrote:
>> W dniu 2017-09-21 o 03:17, Eric Dumazet pisze:
>>> On Wed, 2017-09-20 at 18:09 -0700, Wei Wang wrote:
>>>>> Thanks very much Pawel for the feedback.
>>>>>
>>>>> I was looking into the code (specifically IPv4 part) and found that in
>>>>> free_fib_info_rcu(), we call free_nh_exceptions() without holding the
>>>>> fnhe_lock. I am wondering if that could cause some race condition on
>>>>> fnhe->fnhe_rth_input/output so a double call on dst_dev_put() on the
>>>>> same dst could be happening.
>>>>>
>>>>> But as we call free_fib_info_rcu() only after the grace period, and
>>>>> the lookup code which could potentially modify
>>>>> fnhe->fnhe_rth_input/output all holds rcu_read_lock(), it seems
>>>>> fine...
>>>>>
>>>> Hi Pawel,
>>>>
>>>> Could you try the following debug patch on top of net-next branch and
>>>> reproduce the issue check if there are warning msg showing?
>>>>
>>>> diff --git a/include/net/dst.h b/include/net/dst.h
>>>> index 93568bd0a352..82aff41c6f63 100644
>>>> --- a/include/net/dst.h
>>>> +++ b/include/net/dst.h
>>>> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry
>>>> *dst, unsigned long time)
>>>> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
>>>> {
>>>> if (dst)
>>>> - atomic_inc(&dst->__refcnt);
>>>> + dst_hold(dst);
>>>> return dst;
>>>> }
>>>>
>>>> Thanks.
>>>> Wei
>>>>
>>> Yes, we believe skb_dst_force() and skb_dst_force_safe() should be
>>> unified (to the 'safe' version)
>>>
>>> We no longer have gc to protect from 0 -> 1 transition of dst refcount.
>>>
>>>
>>>
>>>
>> After adding patch from Wei
>> https://bugzilla.kernel.org/show_bug.cgi?id=197005#c14
>>
> OK we have two problems here
>
> 1) We need to unify skb_dst_force() ( for net tree )
>
> 2) Vlan devices should try to correctly handle IFF_XMIT_DST_RELEASE from
> lower device. This will considerably help your performance.
>
>
> For 1), this is what I had in mind, can you try it ?
>
> Thanks a lot !
>
> diff --git a/include/net/dst.h b/include/net/dst.h
> index 93568bd0a3520bb7402f04d90cf04ac99c81cfbe..f23851eeaad917e8dafc06b58d23a2575405c894 100644
> --- a/include/net/dst.h
> +++ b/include/net/dst.h
> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
> {
> if (dst)
> - atomic_inc(&dst->__refcnt);
> + dst_hold(dst);
> return dst;
> }
>
> @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
> __skb_dst_copy(nskb, oskb->_skb_refdst);
> }
>
> -/**
> - * skb_dst_force - makes sure skb dst is refcounted
> - * @skb: buffer
> - *
> - * If dst is not yet refcounted, let's do it
> - */
> -static inline void skb_dst_force(struct sk_buff *skb)
> -{
> - if (skb_dst_is_noref(skb)) {
> - WARN_ON(!rcu_read_lock_held());
> - skb->_skb_refdst &= ~SKB_DST_NOREF;
> - dst_clone(skb_dst(skb));
> - }
> -}
> -
> /**
> * dst_hold_safe - Take a reference on a dst if possible
> * @dst: pointer to dst entry
> @@ -356,6 +341,23 @@ static inline void skb_dst_force_safe(struct sk_buff *skb)
> }
> }
>
> +/**
> + * skb_dst_force - makes sure skb dst is refcounted
> + * @skb: buffer
> + *
> + * If dst is not yet refcounted, let's do it
> + */
> +static inline void skb_dst_force(struct sk_buff *skb)
> +{
> + if (skb_dst_is_noref(skb)) {
> + struct dst_entry *dst = skb_dst(skb);
> +
> + WARN_ON(!rcu_read_lock_held());
> + if (!dst_hold_safe(dst))
> + dst = NULL;
> + skb->_skb_refdst = (unsigned long)dst;
> + }
> +}
>
> /**
> * __skb_tunnel_rx - prepare skb for rx reinsert
>
>
>
Thanks
What is weird i have this part in my net-next from git:
/**
* skb_dst_force_safe - makes sure skb dst is refcounted
* @skb: buffer
*
* If dst is not yet refcounted and not destroyed, grab a ref on it.
*/
static inline void skb_dst_force_safe(struct sk_buff *skb)
{
if (skb_dst_is_noref(skb)) {
struct dst_entry *dst = skb_dst(skb);
if (!dst_hold_safe(dst))
dst = NULL;
skb->_skb_refdst = (unsigned long)dst;
}
}
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox