* [PATCH net 3/4] net:ethernet:aquantia: Fix transient invalid link down/up indications
From: Igor Russkikh @ 2017-09-21 10:53 UTC (permalink / raw)
To: David S . Miller
Cc: netdev, David Arcari, Pavel Belous, Nadezhda Krupnina,
Simon Edelhaus, Igor Russkikh
In-Reply-To: <cover.1505915085.git.igor.russkikh@aquantia.com>
Due to a bug in aquantia atlantic card firmware, it sometimes reports
invalid link speed bits. That caused driver to report link down events,
although link itself is totally fine.
This patch ignores such out of blue readings.
Signed-off-by: Pavel Belous <Pavel.Belous@aquantia.com>
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
---
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 4f5ec9a..ab5d3cb 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -351,8 +351,7 @@ int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self)
break;
default:
- link_status->mbps = 0U;
- break;
+ return -1;
}
}
--
2.7.4
^ permalink raw reply related
* [PATCH net 4/4] net:ethernet:atlantic: fix iommu errors
From: Igor Russkikh @ 2017-09-21 10:53 UTC (permalink / raw)
To: David S . Miller
Cc: netdev, David Arcari, Pavel Belous, Nadezhda Krupnina,
Simon Edelhaus, Pavel Belous, Igor Russkikh
In-Reply-To: <cover.1505915085.git.igor.russkikh@aquantia.com>
From: Pavel Belous <pavel.belous@aquantia.com>
Call skb_frag_dma_map multiple times if tx length is greater than
device max and avoid processing tx ring until entire packet has been
sent.
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
---
drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 43 ++++++++++++++----------
drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 27 ++++++++++-----
drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 6 ++--
3 files changed, 49 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 24f573c..5b18ffc 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -474,6 +474,7 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
unsigned int frag_count = 0U;
unsigned int dx = ring->sw_tail;
+ struct aq_ring_buff_s *first = NULL;
struct aq_ring_buff_s *dx_buff = &ring->buff_ring[dx];
if (unlikely(skb_is_gso(skb))) {
@@ -484,6 +485,7 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
dx_buff->len_l4 = tcp_hdrlen(skb);
dx_buff->mss = skb_shinfo(skb)->gso_size;
dx_buff->is_txc = 1U;
+ dx_buff->eop_index = 0xffffU;
dx_buff->is_ipv6 =
(ip_hdr(skb)->version == 6) ? 1U : 0U;
@@ -503,6 +505,7 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa)))
goto exit;
+ first = dx_buff;
dx_buff->len_pkt = skb->len;
dx_buff->is_sop = 1U;
dx_buff->is_mapped = 1U;
@@ -531,40 +534,46 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
for (; nr_frags--; ++frag_count) {
unsigned int frag_len = 0U;
+ unsigned int buff_offset = 0U;
+ unsigned int buff_size = 0U;
dma_addr_t frag_pa;
skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_count];
frag_len = skb_frag_size(frag);
- frag_pa = skb_frag_dma_map(aq_nic_get_dev(self), frag, 0,
- frag_len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(aq_nic_get_dev(self), frag_pa)))
- goto mapping_error;
+ while (frag_len) {
+ if (frag_len > AQ_CFG_TX_FRAME_MAX)
+ buff_size = AQ_CFG_TX_FRAME_MAX;
+ else
+ buff_size = frag_len;
+
+ frag_pa = skb_frag_dma_map(aq_nic_get_dev(self),
+ frag,
+ buff_offset,
+ buff_size,
+ DMA_TO_DEVICE);
+
+ if (unlikely(dma_mapping_error(aq_nic_get_dev(self),
+ frag_pa)))
+ goto mapping_error;
- while (frag_len > AQ_CFG_TX_FRAME_MAX) {
dx = aq_ring_next_dx(ring, dx);
dx_buff = &ring->buff_ring[dx];
dx_buff->flags = 0U;
- dx_buff->len = AQ_CFG_TX_FRAME_MAX;
+ dx_buff->len = buff_size;
dx_buff->pa = frag_pa;
dx_buff->is_mapped = 1U;
+ dx_buff->eop_index = 0xffffU;
+
+ frag_len -= buff_size;
+ buff_offset += buff_size;
- frag_len -= AQ_CFG_TX_FRAME_MAX;
- frag_pa += AQ_CFG_TX_FRAME_MAX;
++ret;
}
-
- dx = aq_ring_next_dx(ring, dx);
- dx_buff = &ring->buff_ring[dx];
-
- dx_buff->flags = 0U;
- dx_buff->len = frag_len;
- dx_buff->pa = frag_pa;
- dx_buff->is_mapped = 1U;
- ++ret;
}
+ first->eop_index = dx;
dx_buff->is_eop = 1U;
dx_buff->skb = skb;
goto exit;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 02f79b0..0654e0c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -104,6 +104,12 @@ int aq_ring_init(struct aq_ring_s *self)
return 0;
}
+static inline bool aq_ring_dx_in_range(unsigned int h, unsigned int i,
+ unsigned int t)
+{
+ return (h < t) ? ((h < i) && (i < t)) : ((h < i) || (i < t));
+}
+
void aq_ring_update_queue_state(struct aq_ring_s *ring)
{
if (aq_ring_avail_dx(ring) <= AQ_CFG_SKB_FRAGS_MAX)
@@ -139,23 +145,28 @@ void aq_ring_tx_clean(struct aq_ring_s *self)
struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head];
if (likely(buff->is_mapped)) {
- if (unlikely(buff->is_sop))
+ if (unlikely(buff->is_sop)) {
+ if (!buff->is_eop &&
+ buff->eop_index != 0xffffU &&
+ (!aq_ring_dx_in_range(self->sw_head,
+ buff->eop_index,
+ self->hw_head)))
+ break;
+
dma_unmap_single(dev, buff->pa, buff->len,
DMA_TO_DEVICE);
- else
+ } else {
dma_unmap_page(dev, buff->pa, buff->len,
DMA_TO_DEVICE);
+ }
}
if (unlikely(buff->is_eop))
dev_kfree_skb_any(buff->skb);
- }
-}
-static inline unsigned int aq_ring_dx_in_range(unsigned int h, unsigned int i,
- unsigned int t)
-{
- return (h < t) ? ((h < i) && (i < t)) : ((h < i) || (i < t));
+ buff->pa = 0U;
+ buff->eop_index = 0xffffU;
+ }
}
#define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 24523b5..5844078 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -65,7 +65,7 @@ struct __packed aq_ring_buff_s {
};
union {
struct {
- u32 len:16;
+ u16 len;
u32 is_ip_cso:1;
u32 is_udp_cso:1;
u32 is_tcp_cso:1;
@@ -77,8 +77,10 @@ struct __packed aq_ring_buff_s {
u32 is_cleaned:1;
u32 is_error:1;
u32 rsvd3:6;
+ u16 eop_index;
+ u16 rsvd4;
};
- u32 flags;
+ u64 flags;
};
};
--
2.7.4
^ permalink raw reply related
* Re: [PATCH net-next 2/4] cxgb4: add basic tc flower offload support
From: Yunsheng Lin @ 2017-09-21 10:55 UTC (permalink / raw)
To: Rahul Lakkireddy, netdev; +Cc: davem, kumaras, ganeshgr, nirranjan, indranil
In-Reply-To: <dc5f8e6419ad3439b14f39306245d98537be3306.1505977744.git.rahul.lakkireddy@chelsio.com>
Hi, Kumar
On 2017/9/21 15:33, Rahul Lakkireddy wrote:
> From: Kumar Sanghvi <kumaras@chelsio.com>
>
> Add support to add/remove flows for offload. Following match
> and action are supported for offloading a flow:
>
> Match: ether-protocol, IPv4/IPv6 addresses, L4 ports (TCP/UDP)
> Action: drop, redirect to another port on the device.
>
> The qualifying flows can have accompanying mask information.
>
> Signed-off-by: Kumar Sanghvi <kumaras@chelsio.com>
> Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
> Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
> ---
> drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 +
> drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 26 ++
> drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +
> .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 285 ++++++++++++++++++++-
> .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 17 ++
> drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 +
> 6 files changed, 332 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
> index ea72d2d2e1b4..26eac599ab2c 100644
> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
> @@ -904,6 +904,9 @@ struct adapter {
> /* TC u32 offload */
> struct cxgb4_tc_u32_table *tc_u32;
> struct chcr_stats_debug chcr_stats;
> +
> + /* TC flower offload */
> + DECLARE_HASHTABLE(flower_anymatch_tbl, 9);
> };
>
> /* Support for "sched-class" command to allow a TX Scheduling Class to be
> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
> index 45b5853ca2f1..07a4619e2164 100644
> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
> @@ -148,6 +148,32 @@ static int get_filter_steerq(struct net_device *dev,
> return iq;
> }
>
> +int cxgb4_get_free_ftid(struct net_device *dev, int family)
> +{
> + struct adapter *adap = netdev2adap(dev);
> + struct tid_info *t = &adap->tids;
> + int ftid;
> +
> + spin_lock_bh(&t->ftid_lock);
> + if (family == PF_INET) {
> + ftid = find_first_zero_bit(t->ftid_bmap, t->nftids);
> + if (ftid >= t->nftids)
> + ftid = -1;
> + } else {
> + ftid = bitmap_find_free_region(t->ftid_bmap, t->nftids, 2);
> + if (ftid < 0) {
> + ftid = -1;
ftid = -1 is not needed?
> + goto out_unlock;
> + }
> +
> + /* this is only a lookup, keep the found region unallocated */
> + bitmap_release_region(t->ftid_bmap, ftid, 2);
> + }
> +out_unlock:
> + spin_unlock_bh(&t->ftid_lock);
> + return ftid;
> +}
> +
> static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family)
> {
> spin_lock_bh(&t->ftid_lock);
> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
> index 8923affbdaf8..3ba4e1ff8486 100644
> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
> @@ -5105,6 +5105,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> if (!adapter->tc_u32)
> dev_warn(&pdev->dev,
> "could not offload tc u32, continuing\n");
> +
> + cxgb4_init_tc_flower(adapter);
> }
>
> if (is_offload(adapter)) {
> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
> index 16dff71e4d02..1af01101faaf 100644
> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
> @@ -38,16 +38,292 @@
> #include "cxgb4.h"
> #include "cxgb4_tc_flower.h"
>
> +static struct ch_tc_flower_entry *allocate_flower_entry(void)
> +{
> + struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
> + return new;
> +}
> +
> +/* Must be called with either RTNL or rcu_read_lock */
> +static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap,
> + unsigned long flower_cookie)
> +{
> + struct ch_tc_flower_entry *flower_entry;
> +
> + hash_for_each_possible_rcu(adap->flower_anymatch_tbl, flower_entry,
> + link, flower_cookie)
> + if (flower_entry->tc_flower_cookie == flower_cookie)
> + return flower_entry;
> + return NULL;
> +}
> +
> +static void cxgb4_process_flow_match(struct net_device *dev,
> + struct tc_cls_flower_offload *cls,
> + struct ch_filter_specification *fs)
> +{
> + u16 addr_type = 0;
> +
> + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
> + struct flow_dissector_key_control *key =
> + skb_flow_dissector_target(cls->dissector,
> + FLOW_DISSECTOR_KEY_CONTROL,
> + cls->key);
> +
> + addr_type = key->addr_type;
> + }
> +
> + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
> + struct flow_dissector_key_basic *key =
> + skb_flow_dissector_target(cls->dissector,
> + FLOW_DISSECTOR_KEY_BASIC,
> + cls->key);
> + struct flow_dissector_key_basic *mask =
> + skb_flow_dissector_target(cls->dissector,
> + FLOW_DISSECTOR_KEY_BASIC,
> + cls->mask);
> + u16 ethtype_key = ntohs(key->n_proto);
> + u16 ethtype_mask = ntohs(mask->n_proto);
> +
> + if (ethtype_key == ETH_P_ALL) {
> + ethtype_key = 0;
> + ethtype_mask = 0;
> + }
> +
> + fs->val.ethtype = ethtype_key;
> + fs->mask.ethtype = ethtype_mask;
> + fs->val.proto = key->ip_proto;
> + fs->mask.proto = mask->ip_proto;
> + }
> +
> + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
> + struct flow_dissector_key_ipv4_addrs *key =
> + skb_flow_dissector_target(cls->dissector,
> + FLOW_DISSECTOR_KEY_IPV4_ADDRS,
> + cls->key);
> + struct flow_dissector_key_ipv4_addrs *mask =
> + skb_flow_dissector_target(cls->dissector,
> + FLOW_DISSECTOR_KEY_IPV4_ADDRS,
> + cls->mask);
> + fs->type = 0;
> + memcpy(&fs->val.lip[0], &key->dst, sizeof(key->dst));
> + memcpy(&fs->val.fip[0], &key->src, sizeof(key->src));
> + memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst));
> + memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src));
> + }
> +
> + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
> + struct flow_dissector_key_ipv6_addrs *key =
> + skb_flow_dissector_target(cls->dissector,
> + FLOW_DISSECTOR_KEY_IPV6_ADDRS,
> + cls->key);
> + struct flow_dissector_key_ipv6_addrs *mask =
> + skb_flow_dissector_target(cls->dissector,
> + FLOW_DISSECTOR_KEY_IPV6_ADDRS,
> + cls->mask);
> +
> + fs->type = 1;
> + memcpy(&fs->val.lip[0], key->dst.s6_addr, sizeof(key->dst));
> + memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src));
> + memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst));
> + memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src));
> + }
> +
> + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
> + struct flow_dissector_key_ports *key, *mask;
> +
> + key = skb_flow_dissector_target(cls->dissector,
> + FLOW_DISSECTOR_KEY_PORTS,
> + cls->key);
> + mask = skb_flow_dissector_target(cls->dissector,
> + FLOW_DISSECTOR_KEY_PORTS,
> + cls->mask);
> + fs->val.lport = cpu_to_be16(key->dst);
> + fs->mask.lport = cpu_to_be16(mask->dst);
> + fs->val.fport = cpu_to_be16(key->src);
> + fs->mask.fport = cpu_to_be16(mask->src);
> + }
> +
> + /* Match only packets coming from the ingress port where this
> + * filter will be created.
> + */
> + fs->val.iport = netdev2pinfo(dev)->port_id;
> + fs->mask.iport = ~0;
> +}
> +
> +static int cxgb4_validate_flow_match(struct net_device *dev,
> + struct tc_cls_flower_offload *cls)
> +{
> + if (cls->dissector->used_keys &
> + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
> + BIT(FLOW_DISSECTOR_KEY_BASIC) |
> + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
> + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
> + BIT(FLOW_DISSECTOR_KEY_PORTS))) {
> + netdev_warn(dev, "Unsupported key used: 0x%x\n",
> + cls->dissector->used_keys);
> + return -EOPNOTSUPP;
> + }
> + return 0;
> +}
> +
> +static void cxgb4_process_flow_actions(struct net_device *in,
> + struct tc_cls_flower_offload *cls,
> + struct ch_filter_specification *fs)
> +{
> + const struct tc_action *a;
> + LIST_HEAD(actions);
> +
> + tcf_exts_to_list(cls->exts, &actions);
> + list_for_each_entry(a, &actions, list) {
> + if (is_tcf_gact_shot(a)) {
> + fs->action = FILTER_DROP;
> + } else if (is_tcf_mirred_egress_redirect(a)) {
> + int ifindex = tcf_mirred_ifindex(a);
> + struct net_device *out = __dev_get_by_index(dev_net(in),
> + ifindex);
> + struct port_info *pi = netdev_priv(out);
> +
> + fs->action = FILTER_SWITCH;
> + fs->eport = pi->port_id;
> + }
> + }
> +}
> +
> +static int cxgb4_validate_flow_actions(struct net_device *dev,
> + struct tc_cls_flower_offload *cls)
> +{
> + const struct tc_action *a;
> + LIST_HEAD(actions);
> +
> + tcf_exts_to_list(cls->exts, &actions);
> + list_for_each_entry(a, &actions, list) {
> + if (is_tcf_gact_shot(a)) {
> + /* Do nothing */
> + } else if (is_tcf_mirred_egress_redirect(a)) {
> + struct adapter *adap = netdev2adap(dev);
> + struct net_device *n_dev;
> + unsigned int i, ifindex;
> + bool found = false;
> +
> + ifindex = tcf_mirred_ifindex(a);
> + for_each_port(adap, i) {
> + n_dev = adap->port[i];
> + if (ifindex == n_dev->ifindex) {
> + found = true;
> + break;
> + }
> + }
> +
> + /* If interface doesn't belong to our hw, then
> + * the provided output port is not valid
> + */
> + if (!found) {
> + netdev_err(dev, "%s: Out port invalid\n",
> + __func__);
> + return -EINVAL;
> + }
> + } else {
> + netdev_err(dev, "%s: Unsupported action\n", __func__);
> + return -EOPNOTSUPP;
> + }
> + }
> + return 0;
> +}
> +
> int cxgb4_tc_flower_replace(struct net_device *dev,
> struct tc_cls_flower_offload *cls)
> {
> - return -EOPNOTSUPP;
> + struct adapter *adap = netdev2adap(dev);
> + struct ch_tc_flower_entry *ch_flower;
> + struct ch_filter_specification *fs;
> + struct filter_ctx ctx;
> + int fidx;
> + int ret;
> +
> + if (cxgb4_validate_flow_actions(dev, cls))
> + return -EOPNOTSUPP;
> +
> + if (cxgb4_validate_flow_match(dev, cls))
> + return -EOPNOTSUPP;
> +
> + ch_flower = allocate_flower_entry();
> + if (!ch_flower) {
> + netdev_err(dev, "%s: ch_flower alloc failed.\n", __func__);
> + ret = -ENOMEM;
> + goto err;
Just return, err label is needed?
> + }
> +
> + fs = &ch_flower->fs;
> + fs->hitcnts = 1;
> + cxgb4_process_flow_actions(dev, cls, fs);
> + cxgb4_process_flow_match(dev, cls, fs);
> +
> + fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET);
> + if (fidx < 0) {
> + netdev_err(dev, "%s: No fidx for offload.\n", __func__);
> + ret = -ENOMEM;
> + goto free_entry;
> + }
> +
> + init_completion(&ctx.completion);
> + ret = __cxgb4_set_filter(dev, fidx, fs, &ctx);
> + if (ret) {
> + netdev_err(dev, "%s: filter creation err %d\n",
> + __func__, ret);
> + goto free_entry;
> + }
> +
> + /* Wait for reply */
> + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
> + if (!ret) {
> + ret = -ETIMEDOUT;
> + goto free_entry;
> + }
> +
> + ret = ctx.result;
> + /* Check if hw returned error for filter creation */
> + if (ret) {
> + netdev_err(dev, "%s: filter creation err %d\n",
> + __func__, ret);
> + goto free_entry;
> + }
> +
> + INIT_HLIST_NODE(&ch_flower->link);
> + ch_flower->tc_flower_cookie = cls->cookie;
> + ch_flower->filter_id = ctx.tid;
> + hash_add_rcu(adap->flower_anymatch_tbl, &ch_flower->link, cls->cookie);
> +
> + return ret;
> +
> +free_entry:
> + kfree(ch_flower);
> +err:
> + return ret;
> }
>
> int cxgb4_tc_flower_destroy(struct net_device *dev,
> struct tc_cls_flower_offload *cls)
> {
> - return -EOPNOTSUPP;
> + struct adapter *adap = netdev2adap(dev);
> + struct ch_tc_flower_entry *ch_flower;
> + int ret;
> +
> + ch_flower = ch_flower_lookup(adap, cls->cookie);
> + if (!ch_flower) {
> + ret = -ENOENT;
> + goto err;
Same as above
> + }
> +
> + ret = cxgb4_del_filter(dev, ch_flower->filter_id);
> + if (ret)
> + goto err;
> +
> + hash_del_rcu(&ch_flower->link);
> + kfree_rcu(ch_flower, rcu);
> + return ret;
> +
> +err:
> + return ret;
> }
>
> int cxgb4_tc_flower_stats(struct net_device *dev,
> @@ -55,3 +331,8 @@ int cxgb4_tc_flower_stats(struct net_device *dev,
> {
> return -EOPNOTSUPP;
> }
> +
> +void cxgb4_init_tc_flower(struct adapter *adap)
> +{
> + hash_init(adap->flower_anymatch_tbl);
> +}
> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
> index b321fc205b5a..6145a9e056eb 100644
> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
> @@ -37,10 +37,27 @@
>
> #include <net/pkt_cls.h>
>
> +struct ch_tc_flower_stats {
> + u64 packet_count;
> + u64 byte_count;
> + u64 last_used;
> +};
> +
> +struct ch_tc_flower_entry {
> + struct ch_filter_specification fs;
> + struct ch_tc_flower_stats stats;
> + unsigned long tc_flower_cookie;
> + struct hlist_node link;
> + struct rcu_head rcu;
> + u32 filter_id;
> +};
> +
> int cxgb4_tc_flower_replace(struct net_device *dev,
> struct tc_cls_flower_offload *cls);
> int cxgb4_tc_flower_destroy(struct net_device *dev,
> struct tc_cls_flower_offload *cls);
> int cxgb4_tc_flower_stats(struct net_device *dev,
> struct tc_cls_flower_offload *cls);
> +
> +void cxgb4_init_tc_flower(struct adapter *adap);
> #endif /* __CXGB4_TC_FLOWER_H */
> diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
> index 84541fce94c5..88487095d14f 100644
> --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
> +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
> @@ -212,6 +212,7 @@ struct filter_ctx {
>
> struct ch_filter_specification;
>
> +int cxgb4_get_free_ftid(struct net_device *dev, int family);
> int __cxgb4_set_filter(struct net_device *dev, int filter_id,
> struct ch_filter_specification *fs,
> struct filter_ctx *ctx);
>
^ permalink raw reply
* [PATCH 1/1] net:nfc: use setup_timer
From: Allen Pais @ 2017-09-21 10:59 UTC (permalink / raw)
To: linux-kernel; +Cc: sameo, netdev, Allen Pais
Use setup_timer function instead of initializing timer with the
function and data fields.
Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
net/nfc/core.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 5cf33df..e5e23c2 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1094,9 +1094,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
dev->targets_generation = 1;
if (ops->check_presence) {
- init_timer(&dev->check_pres_timer);
- dev->check_pres_timer.data = (unsigned long)dev;
- dev->check_pres_timer.function = nfc_check_pres_timeout;
+ setup_timer(&dev->check_pres_timer, nfc_check_pres_timeout,
+ (unsigned long)dev);
INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
}
--
2.7.4
^ permalink raw reply related
* Re: Latest net-next from GIT panic
From: Eric Dumazet @ 2017-09-21 11:03 UTC (permalink / raw)
To: Paweł Staszewski
Cc: Wei Wang, Cong Wang, Linux Kernel Network Developers,
Eric Dumazet
In-Reply-To: <a016d5bc-1cbb-44d7-9ebf-e7e5428e6f98@itcare.pl>
On Thu, 2017-09-21 at 11:06 +0200, Paweł Staszewski wrote:
>
> W dniu 2017-09-21 o 03:17, Eric Dumazet pisze:
> > On Wed, 2017-09-20 at 18:09 -0700, Wei Wang wrote:
> >>> Thanks very much Pawel for the feedback.
> >>>
> >>> I was looking into the code (specifically IPv4 part) and found that in
> >>> free_fib_info_rcu(), we call free_nh_exceptions() without holding the
> >>> fnhe_lock. I am wondering if that could cause some race condition on
> >>> fnhe->fnhe_rth_input/output so a double call on dst_dev_put() on the
> >>> same dst could be happening.
> >>>
> >>> But as we call free_fib_info_rcu() only after the grace period, and
> >>> the lookup code which could potentially modify
> >>> fnhe->fnhe_rth_input/output all holds rcu_read_lock(), it seems
> >>> fine...
> >>>
> >> Hi Pawel,
> >>
> >> Could you try the following debug patch on top of net-next branch and
> >> reproduce the issue check if there are warning msg showing?
> >>
> >> diff --git a/include/net/dst.h b/include/net/dst.h
> >> index 93568bd0a352..82aff41c6f63 100644
> >> --- a/include/net/dst.h
> >> +++ b/include/net/dst.h
> >> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry
> >> *dst, unsigned long time)
> >> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
> >> {
> >> if (dst)
> >> - atomic_inc(&dst->__refcnt);
> >> + dst_hold(dst);
> >> return dst;
> >> }
> >>
> >> Thanks.
> >> Wei
> >>
> >
> > Yes, we believe skb_dst_force() and skb_dst_force_safe() should be
> > unified (to the 'safe' version)
> >
> > We no longer have gc to protect from 0 -> 1 transition of dst refcount.
> >
> >
> >
> >
>
> After adding patch from Wei
> https://bugzilla.kernel.org/show_bug.cgi?id=197005#c14
>
OK we have two problems here
1) We need to unify skb_dst_force() ( for net tree )
2) Vlan devices should try to correctly handle IFF_XMIT_DST_RELEASE from
lower device. This will considerably help your performance.
For 1), this is what I had in mind, can you try it ?
Thanks a lot !
diff --git a/include/net/dst.h b/include/net/dst.h
index 93568bd0a3520bb7402f04d90cf04ac99c81cfbe..f23851eeaad917e8dafc06b58d23a2575405c894 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
static inline struct dst_entry *dst_clone(struct dst_entry *dst)
{
if (dst)
- atomic_inc(&dst->__refcnt);
+ dst_hold(dst);
return dst;
}
@@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
__skb_dst_copy(nskb, oskb->_skb_refdst);
}
-/**
- * skb_dst_force - makes sure skb dst is refcounted
- * @skb: buffer
- *
- * If dst is not yet refcounted, let's do it
- */
-static inline void skb_dst_force(struct sk_buff *skb)
-{
- if (skb_dst_is_noref(skb)) {
- WARN_ON(!rcu_read_lock_held());
- skb->_skb_refdst &= ~SKB_DST_NOREF;
- dst_clone(skb_dst(skb));
- }
-}
-
/**
* dst_hold_safe - Take a reference on a dst if possible
* @dst: pointer to dst entry
@@ -356,6 +341,23 @@ static inline void skb_dst_force_safe(struct sk_buff *skb)
}
}
+/**
+ * skb_dst_force - makes sure skb dst is refcounted
+ * @skb: buffer
+ *
+ * If dst is not yet refcounted, let's do it
+ */
+static inline void skb_dst_force(struct sk_buff *skb)
+{
+ if (skb_dst_is_noref(skb)) {
+ struct dst_entry *dst = skb_dst(skb);
+
+ WARN_ON(!rcu_read_lock_held());
+ if (!dst_hold_safe(dst))
+ dst = NULL;
+ skb->_skb_refdst = (unsigned long)dst;
+ }
+}
/**
* __skb_tunnel_rx - prepare skb for rx reinsert
^ permalink raw reply related
* Re: Latest net-next from GIT panic
From: Paweł Staszewski @ 2017-09-21 11:12 UTC (permalink / raw)
To: Eric Dumazet
Cc: Wei Wang, Cong Wang, Linux Kernel Network Developers,
Eric Dumazet
In-Reply-To: <1505991826.29839.124.camel@edumazet-glaptop3.roam.corp.google.com>
W dniu 2017-09-21 o 13:03, Eric Dumazet pisze:
> On Thu, 2017-09-21 at 11:06 +0200, Paweł Staszewski wrote:
>> W dniu 2017-09-21 o 03:17, Eric Dumazet pisze:
>>> On Wed, 2017-09-20 at 18:09 -0700, Wei Wang wrote:
>>>>> Thanks very much Pawel for the feedback.
>>>>>
>>>>> I was looking into the code (specifically IPv4 part) and found that in
>>>>> free_fib_info_rcu(), we call free_nh_exceptions() without holding the
>>>>> fnhe_lock. I am wondering if that could cause some race condition on
>>>>> fnhe->fnhe_rth_input/output so a double call on dst_dev_put() on the
>>>>> same dst could be happening.
>>>>>
>>>>> But as we call free_fib_info_rcu() only after the grace period, and
>>>>> the lookup code which could potentially modify
>>>>> fnhe->fnhe_rth_input/output all holds rcu_read_lock(), it seems
>>>>> fine...
>>>>>
>>>> Hi Pawel,
>>>>
>>>> Could you try the following debug patch on top of net-next branch and
>>>> reproduce the issue check if there are warning msg showing?
>>>>
>>>> diff --git a/include/net/dst.h b/include/net/dst.h
>>>> index 93568bd0a352..82aff41c6f63 100644
>>>> --- a/include/net/dst.h
>>>> +++ b/include/net/dst.h
>>>> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry
>>>> *dst, unsigned long time)
>>>> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
>>>> {
>>>> if (dst)
>>>> - atomic_inc(&dst->__refcnt);
>>>> + dst_hold(dst);
>>>> return dst;
>>>> }
>>>>
>>>> Thanks.
>>>> Wei
>>>>
>>> Yes, we believe skb_dst_force() and skb_dst_force_safe() should be
>>> unified (to the 'safe' version)
>>>
>>> We no longer have gc to protect from 0 -> 1 transition of dst refcount.
>>>
>>>
>>>
>>>
>> After adding patch from Wei
>> https://bugzilla.kernel.org/show_bug.cgi?id=197005#c14
>>
> OK we have two problems here
>
> 1) We need to unify skb_dst_force() ( for net tree )
>
> 2) Vlan devices should try to correctly handle IFF_XMIT_DST_RELEASE from
> lower device. This will considerably help your performance.
>
>
> For 1), this is what I had in mind, can you try it ?
>
> Thanks a lot !
>
> diff --git a/include/net/dst.h b/include/net/dst.h
> index 93568bd0a3520bb7402f04d90cf04ac99c81cfbe..f23851eeaad917e8dafc06b58d23a2575405c894 100644
> --- a/include/net/dst.h
> +++ b/include/net/dst.h
> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
> {
> if (dst)
> - atomic_inc(&dst->__refcnt);
> + dst_hold(dst);
> return dst;
> }
>
> @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
> __skb_dst_copy(nskb, oskb->_skb_refdst);
> }
>
> -/**
> - * skb_dst_force - makes sure skb dst is refcounted
> - * @skb: buffer
> - *
> - * If dst is not yet refcounted, let's do it
> - */
> -static inline void skb_dst_force(struct sk_buff *skb)
> -{
> - if (skb_dst_is_noref(skb)) {
> - WARN_ON(!rcu_read_lock_held());
> - skb->_skb_refdst &= ~SKB_DST_NOREF;
> - dst_clone(skb_dst(skb));
> - }
> -}
> -
> /**
> * dst_hold_safe - Take a reference on a dst if possible
> * @dst: pointer to dst entry
> @@ -356,6 +341,23 @@ static inline void skb_dst_force_safe(struct sk_buff *skb)
> }
> }
>
> +/**
> + * skb_dst_force - makes sure skb dst is refcounted
> + * @skb: buffer
> + *
> + * If dst is not yet refcounted, let's do it
> + */
> +static inline void skb_dst_force(struct sk_buff *skb)
> +{
> + if (skb_dst_is_noref(skb)) {
> + struct dst_entry *dst = skb_dst(skb);
> +
> + WARN_ON(!rcu_read_lock_held());
> + if (!dst_hold_safe(dst))
> + dst = NULL;
> + skb->_skb_refdst = (unsigned long)dst;
> + }
> +}
>
> /**
> * __skb_tunnel_rx - prepare skb for rx reinsert
>
>
>
Thanks
What is weird i have this part in my net-next from git:
/**
* skb_dst_force_safe - makes sure skb dst is refcounted
* @skb: buffer
*
* If dst is not yet refcounted and not destroyed, grab a ref on it.
*/
static inline void skb_dst_force_safe(struct sk_buff *skb)
{
if (skb_dst_is_noref(skb)) {
struct dst_entry *dst = skb_dst(skb);
if (!dst_hold_safe(dst))
dst = NULL;
skb->_skb_refdst = (unsigned long)dst;
}
}
^ permalink raw reply
* Re: Latest net-next from GIT panic
From: Paweł Staszewski @ 2017-09-21 11:14 UTC (permalink / raw)
To: Eric Dumazet
Cc: Wei Wang, Cong Wang, Linux Kernel Network Developers,
Eric Dumazet
In-Reply-To: <22cde020-e13a-3635-512c-25532f754bda@itcare.pl>
W dniu 2017-09-21 o 13:12, Paweł Staszewski pisze:
>
>
> W dniu 2017-09-21 o 13:03, Eric Dumazet pisze:
>> On Thu, 2017-09-21 at 11:06 +0200, Paweł Staszewski wrote:
>>> W dniu 2017-09-21 o 03:17, Eric Dumazet pisze:
>>>> On Wed, 2017-09-20 at 18:09 -0700, Wei Wang wrote:
>>>>>> Thanks very much Pawel for the feedback.
>>>>>>
>>>>>> I was looking into the code (specifically IPv4 part) and found
>>>>>> that in
>>>>>> free_fib_info_rcu(), we call free_nh_exceptions() without holding
>>>>>> the
>>>>>> fnhe_lock. I am wondering if that could cause some race condition on
>>>>>> fnhe->fnhe_rth_input/output so a double call on dst_dev_put() on the
>>>>>> same dst could be happening.
>>>>>>
>>>>>> But as we call free_fib_info_rcu() only after the grace period, and
>>>>>> the lookup code which could potentially modify
>>>>>> fnhe->fnhe_rth_input/output all holds rcu_read_lock(), it seems
>>>>>> fine...
>>>>>>
>>>>> Hi Pawel,
>>>>>
>>>>> Could you try the following debug patch on top of net-next branch and
>>>>> reproduce the issue check if there are warning msg showing?
>>>>>
>>>>> diff --git a/include/net/dst.h b/include/net/dst.h
>>>>> index 93568bd0a352..82aff41c6f63 100644
>>>>> --- a/include/net/dst.h
>>>>> +++ b/include/net/dst.h
>>>>> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry
>>>>> *dst, unsigned long time)
>>>>> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
>>>>> {
>>>>> if (dst)
>>>>> - atomic_inc(&dst->__refcnt);
>>>>> + dst_hold(dst);
>>>>> return dst;
>>>>> }
>>>>>
>>>>> Thanks.
>>>>> Wei
>>>>>
>>>> Yes, we believe skb_dst_force() and skb_dst_force_safe() should be
>>>> unified (to the 'safe' version)
>>>>
>>>> We no longer have gc to protect from 0 -> 1 transition of dst
>>>> refcount.
>>>>
>>>>
>>>>
>>>>
>>> After adding patch from Wei
>>> https://bugzilla.kernel.org/show_bug.cgi?id=197005#c14
>>>
>> OK we have two problems here
>>
>> 1) We need to unify skb_dst_force() ( for net tree )
>>
>> 2) Vlan devices should try to correctly handle IFF_XMIT_DST_RELEASE from
>> lower device. This will considerably help your performance.
>>
>>
>> For 1), this is what I had in mind, can you try it ?
>>
>> Thanks a lot !
>>
>> diff --git a/include/net/dst.h b/include/net/dst.h
>> index
>> 93568bd0a3520bb7402f04d90cf04ac99c81cfbe..f23851eeaad917e8dafc06b58d23a2575405c894
>> 100644
>> --- a/include/net/dst.h
>> +++ b/include/net/dst.h
>> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry
>> *dst, unsigned long time)
>> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
>> {
>> if (dst)
>> - atomic_inc(&dst->__refcnt);
>> + dst_hold(dst);
>> return dst;
>> }
>> @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff
>> *nskb, const struct sk_buff *oskb
>> __skb_dst_copy(nskb, oskb->_skb_refdst);
>> }
>> -/**
>> - * skb_dst_force - makes sure skb dst is refcounted
>> - * @skb: buffer
>> - *
>> - * If dst is not yet refcounted, let's do it
>> - */
>> -static inline void skb_dst_force(struct sk_buff *skb)
>> -{
>> - if (skb_dst_is_noref(skb)) {
>> - WARN_ON(!rcu_read_lock_held());
>> - skb->_skb_refdst &= ~SKB_DST_NOREF;
>> - dst_clone(skb_dst(skb));
>> - }
>> -}
>> -
>> /**
>> * dst_hold_safe - Take a reference on a dst if possible
>> * @dst: pointer to dst entry
>> @@ -356,6 +341,23 @@ static inline void skb_dst_force_safe(struct
>> sk_buff *skb)
>> }
>> }
>> +/**
>> + * skb_dst_force - makes sure skb dst is refcounted
>> + * @skb: buffer
>> + *
>> + * If dst is not yet refcounted, let's do it
>> + */
>> +static inline void skb_dst_force(struct sk_buff *skb)
>> +{
>> + if (skb_dst_is_noref(skb)) {
>> + struct dst_entry *dst = skb_dst(skb);
>> +
>> + WARN_ON(!rcu_read_lock_held());
>> + if (!dst_hold_safe(dst))
>> + dst = NULL;
>> + skb->_skb_refdst = (unsigned long)dst;
>> + }
>> +}
>> /**
>> * __skb_tunnel_rx - prepare skb for rx reinsert
>>
>>
>>
> Thanks
>
> What is weird i have this part in my net-next from git:
> /**
> * skb_dst_force_safe - makes sure skb dst is refcounted
> * @skb: buffer
> *
> * If dst is not yet refcounted and not destroyed, grab a ref on it.
> */
> static inline void skb_dst_force_safe(struct sk_buff *skb)
> {
> if (skb_dst_is_noref(skb)) {
> struct dst_entry *dst = skb_dst(skb);
>
> if (!dst_hold_safe(dst))
> dst = NULL;
>
> skb->_skb_refdst = (unsigned long)dst;
> }
> }
>
>
>
ok the difference is skb_dst_force_safe not skb_dst_force
^ permalink raw reply
* Re: [PATCH net] bpf: one perf event close won't free bpf program attached by another perf event
From: Peter Zijlstra @ 2017-09-21 11:17 UTC (permalink / raw)
To: Yonghong Song; +Cc: Steven Rostedt, ast, daniel, netdev, kernel-team
In-Reply-To: <9e968490-87ae-7a79-9e59-0dcc840a93f5@fb.com>
On Wed, Sep 20, 2017 at 10:20:13PM -0700, Yonghong Song wrote:
> > (2). trace_event_call->perf_events are per cpu data structure, that
> > means, some filtering logic is needed to avoid the same perf_event prog
> > is executing twice.
>
> What I mean here is that the trace_event_call->perf_events need to be
> checked on ALL cpus since bpf prog should be executed regardless of
> cpu affiliation. It is possible that the same perf_event in different
> per_cpu bucket and hence filtering is needed to avoid the same
> perf_event bpf_prog is executed twice.
An event will only ever be on a single CPU's list at any one time IIRC.
Now, hysterically perf_event_set_bpf_prog used the tracepoint crud
because that already had bpf bits in. But it might make sense to look at
unifying the bpf stuff across all the different event types. Have them
all use event->prog.
I suspect that would break a fair bunch of bpf proglets, since the data
access to the trace data would be completely different, but it would be
much nicer to not have this distinction based on event type.
^ permalink raw reply
* Re: [patch net-next 03/12] ipmr: Add FIB notification access functions
From: Nikolay Aleksandrov @ 2017-09-21 11:19 UTC (permalink / raw)
To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw
In-Reply-To: <20170921064338.1282-4-jiri@resnulli.us>
On 21/09/17 09:43, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
>
> Make the ipmr module register as a FIB notifier. To do that, implement both
> the ipmr_seq_read and ipmr_dump ops.
>
> The ipmr_seq_read op returns a sequence counter that is incremented on
> every notification related operation done by the ipmr. To implement that,
> add a sequence counter in the netns_ipv4 struct and increment it whenever a
> new MFC route or VIF are added or deleted. The sequence operations are
> protected by the RTNL lock.
>
> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
> and sends notifications about them. The entries dump is done under RCU.
>
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> include/linux/mroute.h | 15 ++++++
> include/net/netns/ipv4.h | 3 ++
> net/ipv4/ipmr.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 151 insertions(+), 2 deletions(-)
>
[snip]
> +
> +static int ipmr_dump(struct net *net, struct notifier_block *nb)
> +{
> + struct mr_table *mrt;
> + int err;
> +
> + err = ipmr_rules_dump(net, nb);
> + if (err)
> + return err;
> +
> + ipmr_for_each_table(mrt, net) {
> + struct vif_device *v = &mrt->vif_table[0];
> + struct mfc_cache *mfc;
> + int vifi;
> +
> + /* Notifiy on table VIF entries */
> + for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
> + if (!v->dev)
> + continue;
> +
> + call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
> + v, vifi, mrt->id);
> + }
The VIF table is protected by mrt_lock (rwlock), here with RCU only
you're not guaranteed to keep v->dev. It can become NULL after the check above.
For details you can see vif_delete() in net/ipv4/ipmr.c. You need at least
mrt_lock for reading.
> +
> + /* Notify on table MFC entries */
> + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
> + call_ipmr_mfc_entry_notifier(nb, net,
> + FIB_EVENT_ENTRY_ADD, mfc,
> + mrt->id);
> + }
> +
> + return 0;
> +}
> +
> +static const struct fib_notifier_ops ipmr_notifier_ops_template = {
> + .family = RTNL_FAMILY_IPMR,
> + .fib_seq_read = ipmr_seq_read,
> + .fib_dump = ipmr_dump,
> + .owner = THIS_MODULE,
> +};
> +
> +int __net_init ipmr_notifier_init(struct net *net)
> +{
> + struct fib_notifier_ops *ops;
> +
> + net->ipv4.ipmr_seq = 0;
> +
> + ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
> + if (IS_ERR(ops))
> + return PTR_ERR(ops);
> + net->ipv4.ipmr_notifier_ops = ops;
> +
> + return 0;
> +}
> +
> +static void __net_exit ipmr_notifier_exit(struct net *net)
> +{
> + fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
> + net->ipv4.ipmr_notifier_ops = NULL;
> +}
> +
> /* Setup for IP multicast routing */
> static int __net_init ipmr_net_init(struct net *net)
> {
> int err;
>
> + err = ipmr_notifier_init(net);
> + if (err)
> + goto ipmr_notifier_fail;
> +
> err = ipmr_rules_init(net);
> if (err < 0)
> - goto fail;
> + goto ipmr_rules_fail;
>
> #ifdef CONFIG_PROC_FS
> err = -ENOMEM;
> @@ -3074,7 +3202,9 @@ static int __net_init ipmr_net_init(struct net *net)
> proc_vif_fail:
> ipmr_rules_exit(net);
> #endif
> -fail:
> +ipmr_rules_fail:
> + ipmr_notifier_exit(net);
> +ipmr_notifier_fail:
> return err;
> }
>
> @@ -3084,6 +3214,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
> remove_proc_entry("ip_mr_cache", net->proc_net);
> remove_proc_entry("ip_mr_vif", net->proc_net);
> #endif
> + ipmr_notifier_exit(net);
> ipmr_rules_exit(net);
> }
>
>
^ permalink raw reply
* Re: [PATCH net 2/4] net:ethernet:aquantia: Fix Tx queue hangups
From: Yunsheng Lin @ 2017-09-21 11:19 UTC (permalink / raw)
To: Igor Russkikh, David S . Miller
Cc: netdev, David Arcari, Pavel Belous, Nadezhda Krupnina,
Simon Edelhaus
In-Reply-To: <cef3863edd8d504d7406f781c97260c52f21e156.1505915085.git.igor.russkikh@aquantia.com>
Hi, Igor
On 2017/9/21 18:53, Igor Russkikh wrote:
> Driver did a poor job in managing its Tx queues: Sometimes it could stop
> tx queues due to link down condition in aq_nic_xmit - but never waked up
> them. That led to Tx path total suspend.
> This patch fixes this and improves generic queue management:
> - introduces queue restart counter
> - uses generic netif_ interface to disable and enable tx path
> - refactors link up/down condition and introduces dmesg log event when
> link changes.
> - introduces new constant for minimum descriptors count required for queue
> wakeup
>
> Signed-off-by: Pavel Belous <Pavel.Belous@aquantia.com>
> Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
> ---
> drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 4 ++
> drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 91 +++++++++++-------------
> drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 2 -
> drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 26 +++++++
> drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 4 ++
> drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 8 +--
> 6 files changed, 76 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
> index 2149864..0fdaaa6 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
> @@ -51,6 +51,10 @@
>
> #define AQ_CFG_SKB_FRAGS_MAX 32U
>
> +/* Number of descriptors available in one ring to resume this ring queue
> + */
> +#define AQ_CFG_RESTART_DESC_THRES (AQ_CFG_SKB_FRAGS_MAX * 2)
> +
> #define AQ_CFG_NAPI_WEIGHT 64U
>
> #define AQ_CFG_MULTICAST_ADDRESS_MAX 32U
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> index f281392..24f573c 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> @@ -119,6 +119,35 @@ int aq_nic_cfg_start(struct aq_nic_s *self)
> return 0;
> }
>
> +static int aq_nic_update_link_status(struct aq_nic_s *self)
> +{
> + int err = self->aq_hw_ops.hw_get_link_status(self->aq_hw);
> +
> + if (err < 0)
> + return -1;
why not just return err?
> +
> + if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps)
> + pr_info("%s: link change old %d new %d\n",
> + AQ_CFG_DRV_NAME, self->link_status.mbps,
> + self->aq_hw->aq_link_status.mbps);
You has ndev in struct aq_nic_s *self, why not use netdev_*?
> +
> + self->link_status = self->aq_hw->aq_link_status;
> + if (!netif_carrier_ok(self->ndev) && self->link_status.mbps) {
> + aq_utils_obj_set(&self->header.flags,
> + AQ_NIC_FLAG_STARTED);
> + aq_utils_obj_clear(&self->header.flags,
> + AQ_NIC_LINK_DOWN);
> + netif_carrier_on(self->ndev);
> + netif_tx_wake_all_queues(self->ndev);
> + }
> + if (netif_carrier_ok(self->ndev) && !self->link_status.mbps) {
> + netif_carrier_off(self->ndev);
> + netif_tx_disable(self->ndev);
> + aq_utils_obj_set(&self->header.flags, AQ_NIC_LINK_DOWN);
> + }
> + return 0;
> +}
> +
> static void aq_nic_service_timer_cb(unsigned long param)
> {
> struct aq_nic_s *self = (struct aq_nic_s *)param;
> @@ -131,26 +160,13 @@ static void aq_nic_service_timer_cb(unsigned long param)
> if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY))
> goto err_exit;
>
> - err = self->aq_hw_ops.hw_get_link_status(self->aq_hw);
> - if (err < 0)
> + err = aq_nic_update_link_status(self);
> + if (err)
> goto err_exit;
>
> - self->link_status = self->aq_hw->aq_link_status;
> -
> self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw,
> self->aq_nic_cfg.is_interrupt_moderation);
>
> - if (self->link_status.mbps) {
> - aq_utils_obj_set(&self->header.flags,
> - AQ_NIC_FLAG_STARTED);
> - aq_utils_obj_clear(&self->header.flags,
> - AQ_NIC_LINK_DOWN);
> - netif_carrier_on(self->ndev);
> - } else {
> - netif_carrier_off(self->ndev);
> - aq_utils_obj_set(&self->header.flags, AQ_NIC_LINK_DOWN);
> - }
> -
> memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s));
> memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s));
> for (i = AQ_DIMOF(self->aq_vec); i--;) {
> @@ -240,7 +256,6 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
> int aq_nic_ndev_register(struct aq_nic_s *self)
> {
> int err = 0;
> - unsigned int i = 0U;
>
> if (!self->ndev) {
> err = -EINVAL;
> @@ -262,8 +277,7 @@ int aq_nic_ndev_register(struct aq_nic_s *self)
>
> netif_carrier_off(self->ndev);
>
> - for (i = AQ_CFG_VECS_MAX; i--;)
> - aq_nic_ndev_queue_stop(self, i);
> + netif_tx_disable(self->ndev);
>
> err = register_netdev(self->ndev);
> if (err < 0)
> @@ -319,12 +333,8 @@ struct aq_nic_s *aq_nic_alloc_hot(struct net_device *ndev)
> err = -EINVAL;
> goto err_exit;
> }
> - if (netif_running(ndev)) {
> - unsigned int i;
> -
> - for (i = AQ_CFG_VECS_MAX; i--;)
> - netif_stop_subqueue(ndev, i);
> - }
> + if (netif_running(ndev))
> + netif_tx_disable(ndev);
>
> for (self->aq_vecs = 0; self->aq_vecs < self->aq_nic_cfg.vecs;
> self->aq_vecs++) {
> @@ -384,16 +394,6 @@ int aq_nic_init(struct aq_nic_s *self)
> return err;
> }
>
> -void aq_nic_ndev_queue_start(struct aq_nic_s *self, unsigned int idx)
> -{
> - netif_start_subqueue(self->ndev, idx);
> -}
> -
> -void aq_nic_ndev_queue_stop(struct aq_nic_s *self, unsigned int idx)
> -{
> - netif_stop_subqueue(self->ndev, idx);
> -}
> -
> int aq_nic_start(struct aq_nic_s *self)
> {
> struct aq_vec_s *aq_vec = NULL;
> @@ -452,10 +452,6 @@ int aq_nic_start(struct aq_nic_s *self)
> goto err_exit;
> }
>
> - for (i = 0U, aq_vec = self->aq_vec[0];
> - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
> - aq_nic_ndev_queue_start(self, i);
> -
> err = netif_set_real_num_tx_queues(self->ndev, self->aq_vecs);
> if (err < 0)
> goto err_exit;
> @@ -464,6 +460,8 @@ int aq_nic_start(struct aq_nic_s *self)
> if (err < 0)
> goto err_exit;
>
> + netif_tx_start_all_queues(self->ndev);
> +
> err_exit:
> return err;
> }
> @@ -603,7 +601,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
> unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs;
> unsigned int tc = 0U;
> int err = NETDEV_TX_OK;
> - bool is_nic_in_bad_state;
>
> frags = skb_shinfo(skb)->nr_frags + 1;
>
> @@ -614,13 +611,10 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
> goto err_exit;
> }
>
> - is_nic_in_bad_state = aq_utils_obj_test(&self->header.flags,
> - AQ_NIC_FLAGS_IS_NOT_TX_READY) ||
> - (aq_ring_avail_dx(ring) <
> - AQ_CFG_SKB_FRAGS_MAX);
> + aq_ring_update_queue_state(ring);
>
> - if (is_nic_in_bad_state) {
> - aq_nic_ndev_queue_stop(self, ring->idx);
> + /* Above status update may stop the queue. Check this. */
> + if (__netif_subqueue_stopped(self->ndev, ring->idx)) {
> err = NETDEV_TX_BUSY;
> goto err_exit;
> }
> @@ -632,9 +626,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
> ring,
> frags);
> if (err >= 0) {
> - if (aq_ring_avail_dx(ring) < AQ_CFG_SKB_FRAGS_MAX + 1)
> - aq_nic_ndev_queue_stop(self, ring->idx);
> -
> ++ring->stats.tx.packets;
> ring->stats.tx.bytes += skb->len;
> }
> @@ -906,9 +897,7 @@ int aq_nic_stop(struct aq_nic_s *self)
> struct aq_vec_s *aq_vec = NULL;
> unsigned int i = 0U;
>
> - for (i = 0U, aq_vec = self->aq_vec[0];
> - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
> - aq_nic_ndev_queue_stop(self, i);
> + netif_tx_disable(self->ndev);
>
> del_timer_sync(&self->service_timer);
>
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> index 7fc2a5e..0ddd556 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> @@ -83,8 +83,6 @@ struct net_device *aq_nic_get_ndev(struct aq_nic_s *self);
> int aq_nic_init(struct aq_nic_s *self);
> int aq_nic_cfg_start(struct aq_nic_s *self);
> int aq_nic_ndev_register(struct aq_nic_s *self);
> -void aq_nic_ndev_queue_start(struct aq_nic_s *self, unsigned int idx);
> -void aq_nic_ndev_queue_stop(struct aq_nic_s *self, unsigned int idx);
> void aq_nic_ndev_free(struct aq_nic_s *self);
> int aq_nic_start(struct aq_nic_s *self);
> int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb);
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> index 4eee199..02f79b0 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> @@ -104,6 +104,32 @@ int aq_ring_init(struct aq_ring_s *self)
> return 0;
> }
>
> +void aq_ring_update_queue_state(struct aq_ring_s *ring)
> +{
> + if (aq_ring_avail_dx(ring) <= AQ_CFG_SKB_FRAGS_MAX)
> + aq_ring_queue_stop(ring);
> + else if (aq_ring_avail_dx(ring) > AQ_CFG_RESTART_DESC_THRES)
> + aq_ring_queue_wake(ring);
> +}
> +
> +void aq_ring_queue_wake(struct aq_ring_s *ring)
> +{
> + struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic);
> +
> + if (__netif_subqueue_stopped(ndev, ring->idx)) {
> + netif_wake_subqueue(ndev, ring->idx);
> + ring->stats.tx.queue_restarts++;
> + }
> +}
> +
> +void aq_ring_queue_stop(struct aq_ring_s *ring)
> +{
> + struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic);
> +
> + if (!__netif_subqueue_stopped(ndev, ring->idx))
> + netif_stop_subqueue(ndev, ring->idx);
> +}
> +
> void aq_ring_tx_clean(struct aq_ring_s *self)
> {
> struct device *dev = aq_nic_get_dev(self->aq_nic);
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
> index 782176c..24523b5 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
> @@ -94,6 +94,7 @@ struct aq_ring_stats_tx_s {
> u64 errors;
> u64 packets;
> u64 bytes;
> + u64 queue_restarts;
> };
>
> union aq_ring_stats_s {
> @@ -147,6 +148,9 @@ struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self,
> int aq_ring_init(struct aq_ring_s *self);
> void aq_ring_rx_deinit(struct aq_ring_s *self);
> void aq_ring_free(struct aq_ring_s *self);
> +void aq_ring_update_queue_state(struct aq_ring_s *ring);
> +void aq_ring_queue_wake(struct aq_ring_s *ring);
> +void aq_ring_queue_stop(struct aq_ring_s *ring);
> void aq_ring_tx_clean(struct aq_ring_s *self);
> int aq_ring_rx_clean(struct aq_ring_s *self,
> struct napi_struct *napi,
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
> index ebf5880..305ff8f 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
> @@ -59,12 +59,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
> if (ring[AQ_VEC_TX_ID].sw_head !=
> ring[AQ_VEC_TX_ID].hw_head) {
> aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
> -
> - if (aq_ring_avail_dx(&ring[AQ_VEC_TX_ID]) >
> - AQ_CFG_SKB_FRAGS_MAX) {
> - aq_nic_ndev_queue_start(self->aq_nic,
> - ring[AQ_VEC_TX_ID].idx);
> - }
> + aq_ring_update_queue_state(&ring[AQ_VEC_TX_ID]);
> was_tx_cleaned = true;
> }
>
> @@ -364,6 +359,7 @@ void aq_vec_add_stats(struct aq_vec_s *self,
> stats_tx->packets += tx->packets;
> stats_tx->bytes += tx->bytes;
> stats_tx->errors += tx->errors;
> + stats_tx->queue_restarts += tx->queue_restarts;
> }
> }
>
>
^ permalink raw reply
* [PATCH net-next 00/10] Add support for DCB feature in hns3 driver
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
The patchset contains some enhancement related to DCB before
adding support for DCB feature.
This patchset depends on the following patchset:
https://patchwork.ozlabs.org/cover/815646/
https://patchwork.ozlabs.org/cover/816145/
High Level Architecture:
[ tc qdisc ] [ lldpad ]
| |
| |
| |
[ hns3_enet ] [ hns3_dcbnl ]
\ /
\ /
\ /
[ hclge_dcb ]
/ \
/ \
/ \
[ hclgc_main ] [ hclge_tm ]
Current patch-set support following functionality:
1. Use of tc qdisc to configure the tc num and prio_tc_map.
2. Use of lldptool to configure the tc schedule mode, tc
bandwidth(if schedule mode is ETS), prio_tc_map and
PFC parameter.
Yunsheng Lin (10):
net: hns3: Support for dynamically assigning tx buffer to TC
net: hns3: Add support for dynamically buffer reallocation
net: hns3: Add support for PFC setting in TM module
net: hns3: Add support for port shaper setting in TM module
net: hns3: Add tc-based TM support for sriov enabled port
net: hns3: Add some interface for the support of DCB feature
net: hns3: Add hclge_dcb module for the support of DCB feature
net: hns3: Add dcb netlink interface for the support of DCB feature
net: hns3: Setting for fc_mode and dcb enable flag in TM module
net: hns3: Add mqprio support when interacting with network stack
drivers/net/ethernet/hisilicon/Kconfig | 9 +
drivers/net/ethernet/hisilicon/hns3/hnae3.h | 20 ++
.../net/ethernet/hisilicon/hns3/hns3pf/Makefile | 4 +
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 6 +
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 327 +++++++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h | 21 ++
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 219 +++++++++-----
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 8 +-
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 232 +++++++++++++--
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 15 +
.../ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c | 106 +++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 137 +++++++--
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h | 7 +
13 files changed, 983 insertions(+), 128 deletions(-)
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
--
1.9.1
^ permalink raw reply
* [PATCH net-next 01/10] net: hns3: Support for dynamically assigning tx buffer to TC
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add support of dynamically assigning tx buffer to
TC when the TC is enabled.
It will save buffer for rx direction to avoid packet loss.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 1 +
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 69 ++++++++++++++++++----
2 files changed, 60 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 758cf39..a81c6cb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -311,6 +311,7 @@ struct hclge_tc_thrd {
struct hclge_priv_buf {
struct hclge_waterline wl; /* Waterline for low and high*/
u32 buf_size; /* TC private buffer size */
+ u32 tx_buf_size;
u32 enable; /* Enable TC private buffer or not */
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d27618b..dfe0fd2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1324,23 +1324,28 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
return 0;
}
-static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
+static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev)
{
/* TX buffer size is unit by 128 byte */
#define HCLGE_BUF_SIZE_UNIT_SHIFT 7
#define HCLGE_BUF_SIZE_UPDATE_EN_MSK BIT(15)
struct hclge_tx_buff_alloc *req;
+ struct hclge_priv_buf *priv;
struct hclge_desc desc;
+ u32 buf_size;
int ret;
u8 i;
req = (struct hclge_tx_buff_alloc *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0);
- for (i = 0; i < HCLGE_TC_NUM; i++)
+ for (i = 0; i < HCLGE_TC_NUM; i++) {
+ priv = &hdev->priv_buf[i];
+ buf_size = priv->tx_buf_size;
req->tx_pkt_buff[i] =
cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) |
HCLGE_BUF_SIZE_UPDATE_EN_MSK);
+ }
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret) {
@@ -1352,9 +1357,9 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
return 0;
}
-static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, u32 buf_size)
+static int hclge_tx_buffer_alloc(struct hclge_dev *hdev)
{
- int ret = hclge_cmd_alloc_tx_buff(hdev, buf_size);
+ int ret = hclge_cmd_alloc_tx_buff(hdev);
if (ret) {
dev_err(&hdev->pdev->dev,
@@ -1433,6 +1438,18 @@ static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev)
return rx_priv;
}
+static u32 hclge_get_tx_buff_alloced(struct hclge_dev *hdev)
+{
+ struct hclge_priv_buf *priv;
+ u32 tx_buf = 0, i;
+
+ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+ priv = &hdev->priv_buf[i];
+ tx_buf += priv->tx_buf_size;
+ }
+ return tx_buf;
+}
+
static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
{
u32 shared_buf_min, shared_buf_tc, shared_std;
@@ -1477,18 +1494,44 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
return true;
}
+static int hclge_tx_buffer_calc(struct hclge_dev *hdev)
+{
+ struct hclge_priv_buf *priv;
+ u32 i, total_size;
+
+ total_size = hdev->pkt_buf_size;
+
+ /* alloc tx buffer for all enabled tc */
+ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+ priv = &hdev->priv_buf[i];
+
+ if (total_size < HCLGE_DEFAULT_TX_BUF)
+ return -ENOMEM;
+
+ if (hdev->hw_tc_map & BIT(i))
+ priv->tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+ else
+ priv->tx_buf_size = 0;
+
+ total_size -= priv->tx_buf_size;
+ }
+
+ return 0;
+}
+
/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
* @hdev: pointer to struct hclge_dev
- * @tx_size: the allocated tx buffer for all TCs
* @return: 0: calculate sucessful, negative: fail
*/
-int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
+int hclge_rx_buffer_calc(struct hclge_dev *hdev)
{
- u32 rx_all = hdev->pkt_buf_size - tx_size;
+ u32 rx_all = hdev->pkt_buf_size;
int no_pfc_priv_num, pfc_priv_num;
struct hclge_priv_buf *priv;
int i;
+ rx_all -= hclge_get_tx_buff_alloced(hdev);
+
/* When DCB is not supported, rx private
* buffer is not allocated.
*/
@@ -1771,7 +1814,6 @@ static int hclge_common_wl_config(struct hclge_dev *hdev)
int hclge_buffer_alloc(struct hclge_dev *hdev)
{
- u32 tx_buf_size = HCLGE_DEFAULT_TX_BUF;
int ret;
hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM,
@@ -1780,14 +1822,21 @@ int hclge_buffer_alloc(struct hclge_dev *hdev)
if (!hdev->priv_buf)
return -ENOMEM;
- ret = hclge_tx_buffer_alloc(hdev, tx_buf_size);
+ ret = hclge_tx_buffer_calc(hdev);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "could not calc tx buffer size for all TCs %d\n", ret);
+ return ret;
+ }
+
+ ret = hclge_tx_buffer_alloc(hdev);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not alloc tx buffers %d\n", ret);
return ret;
}
- ret = hclge_rx_buffer_calc(hdev, tx_buf_size);
+ ret = hclge_rx_buffer_calc(hdev);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not calc rx priv buffer size for all TCs %d\n",
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 02/10] net: hns3: Add support for dynamically buffer reallocation
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
Current buffer allocation can only happen at init, when
doing buffer reallocation after init, care must be taken
care of memory which priv_buf points to.
This patch fixes it by using a dynamic allocated temporary
memory. Because we only do buffer reallocation at init or
when setting up the DCB parameter, and priv_buf is only
used at buffer allocation process, so it is ok to use a
dynamic allocated temporary memory.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 5 +
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 150 +++++++++++----------
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 -
3 files changed, 87 insertions(+), 70 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index a81c6cb..6b6d28e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -322,6 +322,11 @@ struct hclge_shared_buf {
u32 buf_size;
};
+struct hclge_pkt_buf_alloc {
+ struct hclge_priv_buf priv_buf[HCLGE_MAX_TC_NUM];
+ struct hclge_shared_buf s_buf;
+};
+
#define HCLGE_RX_COM_WL_EN_B 15
struct hclge_rx_com_wl_buf {
__le16 high_wl;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index dfe0fd2..c27b460 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1324,7 +1324,8 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
return 0;
}
-static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev)
+static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
/* TX buffer size is unit by 128 byte */
#define HCLGE_BUF_SIZE_UNIT_SHIFT 7
@@ -1340,7 +1341,7 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev)
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0);
for (i = 0; i < HCLGE_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
buf_size = priv->tx_buf_size;
req->tx_pkt_buff[i] =
cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) |
@@ -1357,9 +1358,10 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev)
return 0;
}
-static int hclge_tx_buffer_alloc(struct hclge_dev *hdev)
+static int hclge_tx_buffer_alloc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- int ret = hclge_cmd_alloc_tx_buff(hdev);
+ int ret = hclge_cmd_alloc_tx_buff(hdev, buf_alloc);
if (ret) {
dev_err(&hdev->pdev->dev,
@@ -1392,13 +1394,14 @@ static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev)
}
/* Get the number of pfc enabled TCs, which have private buffer */
-static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
+static int hclge_get_pfc_priv_num(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
int i, cnt = 0;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if ((hdev->tm_info.hw_pfc_map & BIT(i)) &&
priv->enable)
cnt++;
@@ -1408,13 +1411,14 @@ static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
}
/* Get the number of pfc disabled TCs, which have private buffer */
-static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
+static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
int i, cnt = 0;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i) &&
!(hdev->tm_info.hw_pfc_map & BIT(i)) &&
priv->enable)
@@ -1424,33 +1428,35 @@ static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
return cnt;
}
-static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev)
+static u32 hclge_get_rx_priv_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
u32 rx_priv = 0;
int i;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (priv->enable)
rx_priv += priv->buf_size;
}
return rx_priv;
}
-static u32 hclge_get_tx_buff_alloced(struct hclge_dev *hdev)
+static u32 hclge_get_tx_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
u32 tx_buf = 0, i;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
tx_buf += priv->tx_buf_size;
}
return tx_buf;
}
-static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
+static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc,
+ u32 rx_all)
{
u32 shared_buf_min, shared_buf_tc, shared_std;
int tc_num, pfc_enable_num;
@@ -1471,30 +1477,31 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
hdev->mps;
shared_std = max_t(u32, shared_buf_min, shared_buf_tc);
- rx_priv = hclge_get_rx_priv_buff_alloced(hdev);
+ rx_priv = hclge_get_rx_priv_buff_alloced(buf_alloc);
if (rx_all <= rx_priv + shared_std)
return false;
shared_buf = rx_all - rx_priv;
- hdev->s_buf.buf_size = shared_buf;
- hdev->s_buf.self.high = shared_buf;
- hdev->s_buf.self.low = 2 * hdev->mps;
+ buf_alloc->s_buf.buf_size = shared_buf;
+ buf_alloc->s_buf.self.high = shared_buf;
+ buf_alloc->s_buf.self.low = 2 * hdev->mps;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
if ((hdev->hw_tc_map & BIT(i)) &&
(hdev->tm_info.hw_pfc_map & BIT(i))) {
- hdev->s_buf.tc_thrd[i].low = hdev->mps;
- hdev->s_buf.tc_thrd[i].high = 2 * hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].low = hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].high = 2 * hdev->mps;
} else {
- hdev->s_buf.tc_thrd[i].low = 0;
- hdev->s_buf.tc_thrd[i].high = hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].low = 0;
+ buf_alloc->s_buf.tc_thrd[i].high = hdev->mps;
}
}
return true;
}
-static int hclge_tx_buffer_calc(struct hclge_dev *hdev)
+static int hclge_tx_buffer_calc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
u32 i, total_size;
@@ -1503,7 +1510,7 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev)
/* alloc tx buffer for all enabled tc */
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (total_size < HCLGE_DEFAULT_TX_BUF)
return -ENOMEM;
@@ -1521,22 +1528,24 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev)
/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
* @hdev: pointer to struct hclge_dev
+ * @buf_alloc: pointer to buffer calculation data
* @return: 0: calculate sucessful, negative: fail
*/
-int hclge_rx_buffer_calc(struct hclge_dev *hdev)
+int hclge_rx_buffer_calc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
u32 rx_all = hdev->pkt_buf_size;
int no_pfc_priv_num, pfc_priv_num;
struct hclge_priv_buf *priv;
int i;
- rx_all -= hclge_get_tx_buff_alloced(hdev);
+ rx_all -= hclge_get_tx_buff_alloced(buf_alloc);
/* When DCB is not supported, rx private
* buffer is not allocated.
*/
if (!hnae3_dev_dcb_supported(hdev)) {
- if (!hclge_is_rx_buf_ok(hdev, rx_all))
+ if (!hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return -ENOMEM;
return 0;
@@ -1544,7 +1553,7 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
/* step 1, try to alloc private buffer for all enabled tc */
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i)) {
priv->enable = 1;
if (hdev->tm_info.hw_pfc_map & BIT(i)) {
@@ -1565,14 +1574,14 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
}
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
/* step 2, try to decrease the buffer size of
* no pfc TC's private buffer
*/
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
priv->enable = 0;
priv->wl.low = 0;
@@ -1595,18 +1604,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
}
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
/* step 3, try to reduce the number of pfc disabled TCs,
* which have private buffer
*/
/* get the total no pfc enable TC number, which have private buffer */
- no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev);
+ no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev, buf_alloc);
/* let the last to be cleared first */
for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i) &&
!(hdev->tm_info.hw_pfc_map & BIT(i))) {
@@ -1618,22 +1627,22 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
no_pfc_priv_num--;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) ||
no_pfc_priv_num == 0)
break;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
/* step 4, try to reduce the number of pfc enabled TCs
* which have private buffer.
*/
- pfc_priv_num = hclge_get_pfc_priv_num(hdev);
+ pfc_priv_num = hclge_get_pfc_priv_num(hdev, buf_alloc);
/* let the last to be cleared first */
for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i) &&
hdev->tm_info.hw_pfc_map & BIT(i)) {
@@ -1645,17 +1654,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev)
pfc_priv_num--;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) ||
pfc_priv_num == 0)
break;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
return -ENOMEM;
}
-static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
+static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_rx_priv_buff *req;
struct hclge_desc desc;
@@ -1667,7 +1677,7 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
/* Alloc private buffer TCs */
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- struct hclge_priv_buf *priv = &hdev->priv_buf[i];
+ struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
req->buf_num[i] =
cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S);
@@ -1676,7 +1686,7 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
}
req->shared_buf =
- cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
+ cpu_to_le16((buf_alloc->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
(1 << HCLGE_TC0_PRI_BUF_EN_B));
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -1691,7 +1701,8 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
#define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0)
-static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
+static int hclge_rx_priv_wl_config(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_rx_priv_wl_buf *req;
struct hclge_priv_buf *priv;
@@ -1711,7 +1722,9 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) {
- priv = &hdev->priv_buf[i * HCLGE_TC_NUM_ONE_DESC + j];
+ u32 idx = i * HCLGE_TC_NUM_ONE_DESC + j;
+
+ priv = &buf_alloc->priv_buf[idx];
req->tc_wl[j].high =
cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S);
req->tc_wl[j].high |=
@@ -1736,9 +1749,10 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
return 0;
}
-static int hclge_common_thrd_config(struct hclge_dev *hdev)
+static int hclge_common_thrd_config(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- struct hclge_shared_buf *s_buf = &hdev->s_buf;
+ struct hclge_shared_buf *s_buf = &buf_alloc->s_buf;
struct hclge_rx_com_thrd *req;
struct hclge_desc desc[2];
struct hclge_tc_thrd *tc;
@@ -1782,9 +1796,10 @@ static int hclge_common_thrd_config(struct hclge_dev *hdev)
return 0;
}
-static int hclge_common_wl_config(struct hclge_dev *hdev)
+static int hclge_common_wl_config(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- struct hclge_shared_buf *buf = &hdev->s_buf;
+ struct hclge_shared_buf *buf = &buf_alloc->s_buf;
struct hclge_rx_com_wl *req;
struct hclge_desc desc;
int ret;
@@ -1814,69 +1829,68 @@ static int hclge_common_wl_config(struct hclge_dev *hdev)
int hclge_buffer_alloc(struct hclge_dev *hdev)
{
+ struct hclge_pkt_buf_alloc *pkt_buf;
int ret;
- hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM,
- sizeof(struct hclge_priv_buf),
- GFP_KERNEL | __GFP_ZERO);
- if (!hdev->priv_buf)
+ pkt_buf = kzalloc(sizeof(*pkt_buf), GFP_KERNEL);
+ if (!pkt_buf)
return -ENOMEM;
- ret = hclge_tx_buffer_calc(hdev);
+ ret = hclge_tx_buffer_calc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not calc tx buffer size for all TCs %d\n", ret);
- return ret;
+ goto out;
}
- ret = hclge_tx_buffer_alloc(hdev);
+ ret = hclge_tx_buffer_alloc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not alloc tx buffers %d\n", ret);
- return ret;
+ goto out;
}
- ret = hclge_rx_buffer_calc(hdev);
+ ret = hclge_rx_buffer_calc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not calc rx priv buffer size for all TCs %d\n",
ret);
- return ret;
+ goto out;
}
- ret = hclge_rx_priv_buf_alloc(hdev);
+ ret = hclge_rx_priv_buf_alloc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev, "could not alloc rx priv buffer %d\n",
ret);
- return ret;
+ goto out;
}
if (hnae3_dev_dcb_supported(hdev)) {
- ret = hclge_rx_priv_wl_config(hdev);
+ ret = hclge_rx_priv_wl_config(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not configure rx private waterline %d\n",
ret);
- return ret;
+ goto out;
}
- ret = hclge_common_thrd_config(hdev);
+ ret = hclge_common_thrd_config(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not configure common threshold %d\n",
ret);
- return ret;
+ goto out;
}
}
- ret = hclge_common_wl_config(hdev);
- if (ret) {
+ ret = hclge_common_wl_config(hdev, pkt_buf);
+ if (ret)
dev_err(&hdev->pdev->dev,
"could not configure common waterline %d\n", ret);
- return ret;
- }
- return 0;
+out:
+ kfree(pkt_buf);
+ return ret;
}
static int hclge_init_roce_base_info(struct hclge_vport *vport)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 9fcfd93..4fc36f0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -463,8 +463,6 @@ struct hclge_dev {
u32 pkt_buf_size; /* Total pf buf size for tx/rx */
u32 mps; /* Max packet size */
- struct hclge_priv_buf *priv_buf;
- struct hclge_shared_buf s_buf;
enum hclge_mta_dmac_sel_type mta_mac_sel_type;
bool enable_mta; /* Mutilcast filter enable */
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 03/10] net: hns3: Add support for PFC setting in TM module
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add a pfc_pause_en cmd, and use it to configure
PFC option according to fc_mode in hdev->tm_info.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 68 ++++++++++++++++++++--
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 5 ++
2 files changed, 68 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 73a75d7..0b4b5d9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -124,6 +124,20 @@ static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
+static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
+ u8 pfc_bitmap)
+{
+ struct hclge_desc desc;
+ struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)&desc.data;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PFC_PAUSE_EN, false);
+
+ pfc->tx_rx_en_bitmap = tx_rx_bitmap;
+ pfc->pri_en_bitmap = pfc_bitmap;
+
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
{
u8 tc;
@@ -969,20 +983,64 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
return hclge_tm_schd_mode_hw(hdev);
}
+static int hclge_pfc_setup_hw(struct hclge_dev *hdev)
+{
+ u8 enable_bitmap = 0;
+
+ if (hdev->tm_info.fc_mode == HCLGE_FC_PFC)
+ enable_bitmap = HCLGE_TX_MAC_PAUSE_EN_MSK |
+ HCLGE_RX_MAC_PAUSE_EN_MSK;
+
+ return hclge_pfc_pause_en_cfg(hdev, enable_bitmap,
+ hdev->tm_info.hw_pfc_map);
+}
+
+static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+{
+ bool tx_en, rx_en;
+
+ switch (hdev->tm_info.fc_mode) {
+ case HCLGE_FC_NONE:
+ tx_en = false;
+ rx_en = false;
+ break;
+ case HCLGE_FC_RX_PAUSE:
+ tx_en = false;
+ rx_en = true;
+ break;
+ case HCLGE_FC_TX_PAUSE:
+ tx_en = true;
+ rx_en = false;
+ break;
+ case HCLGE_FC_FULL:
+ tx_en = true;
+ rx_en = true;
+ break;
+ default:
+ tx_en = true;
+ rx_en = true;
+ }
+
+ return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en);
+}
+
int hclge_pause_setup_hw(struct hclge_dev *hdev)
{
- bool en = hdev->tm_info.fc_mode != HCLGE_FC_PFC;
int ret;
u8 i;
- ret = hclge_mac_pause_en_cfg(hdev, en, en);
- if (ret)
- return ret;
+ if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
+ return hclge_mac_pause_setup_hw(hdev);
- /* Only DCB-supported dev supports qset back pressure setting */
+ /* Only DCB-supported dev supports qset back pressure and pfc cmd */
if (!hnae3_dev_dcb_supported(hdev))
return 0;
+ /* When MAC is GE Mode, hdev does not support pfc setting */
+ ret = hclge_pfc_setup_hw(hdev);
+ if (ret)
+ dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret);
+
for (i = 0; i < hdev->tm_info.num_tc; i++) {
ret = hclge_tm_qs_bp_cfg(hdev, i);
if (ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 85158b0..8ecd83c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -94,6 +94,11 @@ struct hclge_bp_to_qs_map_cmd {
u32 rsvd1;
};
+struct hclge_pfc_en_cmd {
+ u8 tx_rx_en_bitmap;
+ u8 pri_en_bitmap;
+};
+
#define hclge_tm_set_field(dest, string, val) \
hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
(HCLGE_TM_SHAP_##string##_LSH), val)
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 04/10] net: hns3: Add support for port shaper setting in TM module
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add a tm_port_shaper cmd and set port shaper
to HCLGE_ETHER_MAX_RATE on TM initialization process.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 33 ++++++++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 4 +++
2 files changed, 37 insertions(+)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 0b4b5d9..33090d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -301,6 +301,35 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
+static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
+{
+ struct hclge_port_shapping_cmd *shap_cfg_cmd;
+ struct hclge_desc desc;
+ u8 ir_u, ir_b, ir_s;
+ int ret;
+
+ ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE,
+ HCLGE_SHAPER_LVL_PORT,
+ &ir_b, &ir_u, &ir_s);
+ if (ret)
+ return ret;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false);
+ shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
+
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para, IR_B, ir_b);
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para, IR_U, ir_u);
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para, IR_S, ir_s);
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para,
+ BS_B, HCLGE_SHAPER_BS_U_DEF);
+ hclge_tm_set_field(shap_cfg_cmd->port_shapping_para,
+ BS_S, HCLGE_SHAPER_BS_S_DEF);
+ shap_cfg_cmd->port_shapping_para =
+ cpu_to_le32(shap_cfg_cmd->port_shapping_para);
+
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
enum hclge_shap_bucket bucket, u8 pri_id,
u8 ir_b, u8 ir_u, u8 ir_s,
@@ -864,6 +893,10 @@ static int hclge_tm_shaper_cfg(struct hclge_dev *hdev)
{
int ret;
+ ret = hclge_tm_port_shaper_cfg(hdev);
+ if (ret)
+ return ret;
+
ret = hclge_tm_pg_shaper_cfg(hdev);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 8ecd83c..19a01e4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -99,6 +99,10 @@ struct hclge_pfc_en_cmd {
u8 pri_en_bitmap;
};
+struct hclge_port_shapping_cmd {
+ __le32 port_shapping_para;
+};
+
#define hclge_tm_set_field(dest, string, val) \
hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
(HCLGE_TM_SHAP_##string##_LSH), val)
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 05/10] net: hns3: Add tc-based TM support for sriov enabled port
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
When sriov is enabled and TM is in tc-based mode, vf's TM
parameters is not set in TM initialization process.
This patch add the tc_based TM support for sriov enabled
using the information in vport struct.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 49 ++++++++++++++--------
1 file changed, 31 insertions(+), 18 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 33090d0..2bc7d63c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -389,13 +389,13 @@ static int hclge_tm_pri_schd_mode_cfg(struct hclge_dev *hdev, u8 pri_id)
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id)
+static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id, u8 mode)
{
struct hclge_desc desc;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, false);
- if (hdev->tm_info.tc_info[qs_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR)
+ if (mode == HCLGE_SCH_MODE_DWRR)
desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
else
desc.data[1] = 0;
@@ -639,17 +639,18 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev)
{
struct hclge_vport *vport = hdev->vport;
int ret;
- u32 i;
+ u32 i, k;
if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
/* Cfg qs -> pri mapping, one by one mapping */
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
- ret = hclge_tm_qs_to_pri_map_cfg(hdev, i, i);
- if (ret)
- return ret;
- }
+ for (k = 0; k < hdev->num_alloc_vport; k++)
+ for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ ret = hclge_tm_qs_to_pri_map_cfg(
+ hdev, vport[k].qs_offset + i, i);
+ if (ret)
+ return ret;
+ }
} else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) {
- int k;
/* Cfg qs -> pri mapping, qs = tc, pri = vf, 8 qs -> 1 pri */
for (k = 0; k < hdev->num_alloc_vport; k++)
for (i = 0; i < HNAE3_MAX_TC; i++) {
@@ -798,10 +799,11 @@ static int hclge_tm_pri_shaper_cfg(struct hclge_dev *hdev)
static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
{
+ struct hclge_vport *vport = hdev->vport;
struct hclge_pg_info *pg_info;
u8 dwrr;
int ret;
- u32 i;
+ u32 i, k;
for (i = 0; i < hdev->tm_info.num_tc; i++) {
pg_info =
@@ -812,9 +814,13 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
if (ret)
return ret;
- ret = hclge_tm_qs_weight_cfg(hdev, i, dwrr);
- if (ret)
- return ret;
+ for (k = 0; k < hdev->num_alloc_vport; k++) {
+ ret = hclge_tm_qs_weight_cfg(
+ hdev, vport[k].qs_offset + i,
+ vport[k].dwrr);
+ if (ret)
+ return ret;
+ }
}
return 0;
@@ -945,7 +951,10 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport)
return ret;
for (i = 0; i < kinfo->num_tc; i++) {
- ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i);
+ u8 sch_mode = hdev->tm_info.tc_info[i].tc_sch_mode;
+
+ ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i,
+ sch_mode);
if (ret)
return ret;
}
@@ -957,7 +966,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
{
struct hclge_vport *vport = hdev->vport;
int ret;
- u8 i;
+ u8 i, k;
if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
for (i = 0; i < hdev->tm_info.num_tc; i++) {
@@ -965,9 +974,13 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
if (ret)
return ret;
- ret = hclge_tm_qs_schd_mode_cfg(hdev, i);
- if (ret)
- return ret;
+ for (k = 0; k < hdev->num_alloc_vport; k++) {
+ ret = hclge_tm_qs_schd_mode_cfg(
+ hdev, vport[k].qs_offset + i,
+ HCLGE_SCH_MODE_DWRR);
+ if (ret)
+ return ret;
+ }
}
} else {
for (i = 0; i < hdev->num_alloc_vport; i++) {
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 06/10] net: hns3: Add some interface for the support of DCB feature
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add some interface and export some interface from
hclge_tm and hclgc_main to support the upcoming DCB feature.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +-
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 3 ++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 48 ++++++++++++++++++++--
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 6 +++
4 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c27b460..49a11d5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -30,7 +30,6 @@
#define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f))
#define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f))
-static int hclge_rss_init_hw(struct hclge_dev *hdev);
static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
enum hclge_mta_dmac_sel_type mta_mac_sel,
bool enable);
@@ -2660,7 +2659,7 @@ static int hclge_get_tc_size(struct hnae3_handle *handle)
return hdev->rss_size_max;
}
-static int hclge_rss_init_hw(struct hclge_dev *hdev)
+int hclge_rss_init_hw(struct hclge_dev *hdev)
{
const u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
struct hclge_vport *vport = hdev->vport;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 4fc36f0..394b587 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -515,4 +515,7 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue)
int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex);
int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
bool is_kill, u16 vlan, u8 qos, __be16 proto);
+
+int hclge_buffer_alloc(struct hclge_dev *hdev);
+int hclge_rss_init_hw(struct hclge_dev *hdev);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 2bc7d63c..e158e66 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -884,10 +884,14 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev)
return 0;
}
-static int hclge_tm_map_cfg(struct hclge_dev *hdev)
+int hclge_tm_map_cfg(struct hclge_dev *hdev)
{
int ret;
+ ret = hclge_up_to_tc_map(hdev);
+ if (ret)
+ return ret;
+
ret = hclge_tm_pg_to_pri_map(hdev);
if (ret)
return ret;
@@ -995,7 +999,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
return 0;
}
-static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
+int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
{
int ret;
@@ -1093,7 +1097,45 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
return ret;
}
- return hclge_up_to_tc_map(hdev);
+ return 0;
+}
+
+int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc)
+{
+ struct hclge_vport *vport = hdev->vport;
+ struct hnae3_knic_private_info *kinfo;
+ u32 i, k;
+
+ for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+ if (prio_tc[i] >= hdev->tm_info.num_tc)
+ return -EINVAL;
+ hdev->tm_info.prio_tc[i] = prio_tc[i];
+
+ for (k = 0; k < hdev->num_alloc_vport; k++) {
+ kinfo = &vport[k].nic.kinfo;
+ kinfo->prio_tc[i] = prio_tc[i];
+ }
+ }
+ return 0;
+}
+
+void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
+{
+ u8 i, bit_map = 0;
+
+ hdev->tm_info.num_tc = num_tc;
+
+ for (i = 0; i < hdev->tm_info.num_tc; i++)
+ bit_map |= BIT(i);
+
+ if (!bit_map) {
+ bit_map = 1;
+ hdev->tm_info.num_tc = 1;
+ }
+
+ hdev->hw_tc_map = bit_map;
+
+ hclge_tm_schd_info_init(hdev);
}
int hclge_tm_init_hw(struct hclge_dev *hdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 19a01e4..bf59961 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -112,4 +112,10 @@ struct hclge_port_shapping_cmd {
int hclge_tm_schd_init(struct hclge_dev *hdev);
int hclge_pause_setup_hw(struct hclge_dev *hdev);
+int hclge_tm_schd_mode_hw(struct hclge_dev *hdev);
+int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc);
+void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc);
+int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
+int hclge_tm_map_cfg(struct hclge_dev *hdev);
+int hclge_tm_init_hw(struct hclge_dev *hdev);
#endif
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 07/10] net: hns3: Add hclge_dcb module for the support of DCB feature
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
The hclge_dcb module calls the interface from hclge_main/tm
and provide interface for the dcb netlink interface.
This patch also update Makefiles required to build the DCB
supported code in HNS3 Ethernet driver and update the existing
Kconfig file in the hisilicon folder.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
drivers/net/ethernet/hisilicon/Kconfig | 9 +
drivers/net/ethernet/hisilicon/hns3/hnae3.h | 20 ++
.../net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2 +
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 327 +++++++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h | 21 ++
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 25 +-
.../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 3 +
7 files changed, 401 insertions(+), 6 deletions(-)
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 91c7bdb..9d7cb03 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -103,4 +103,13 @@ config HNS3_ENET
family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
devices and their associated operations.
+config HNS3_DCB
+ bool "Hisilicon HNS3 Data Center Bridge Support"
+ default n
+ depends on HNS3 && HNS3_HCLGE && DCB
+ ---help---
+ Say Y here if you want to use Data Center Bridging (DCB) in the HNS3 driver.
+
+ If unsure, say N.
+
endif # NET_VENDOR_HISILICON
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 1a01cad..5a6fa53 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -28,6 +28,7 @@
*/
#include <linux/acpi.h>
+#include <linux/dcbnl.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/module.h>
@@ -131,6 +132,7 @@ struct hnae3_client_ops {
int (*init_instance)(struct hnae3_handle *handle);
void (*uninit_instance)(struct hnae3_handle *handle, bool reset);
void (*link_status_change)(struct hnae3_handle *handle, bool state);
+ int (*setup_tc)(struct hnae3_handle *handle, u8 tc);
};
#define HNAE3_CLIENT_NAME_LENGTH 16
@@ -363,6 +365,23 @@ struct hnae3_ae_ops {
u16 vlan, u8 qos, __be16 proto);
};
+struct hnae3_dcb_ops {
+ /* IEEE 802.1Qaz std */
+ int (*ieee_getets)(struct hnae3_handle *, struct ieee_ets *);
+ int (*ieee_setets)(struct hnae3_handle *, struct ieee_ets *);
+ int (*ieee_getpfc)(struct hnae3_handle *, struct ieee_pfc *);
+ int (*ieee_setpfc)(struct hnae3_handle *, struct ieee_pfc *);
+
+ /* DCBX configuration */
+ u8 (*getdcbx)(struct hnae3_handle *);
+ u8 (*setdcbx)(struct hnae3_handle *, u8);
+
+ /* TC setup */
+ int (*setup_tc)(struct hnae3_handle *, u8, u8 *);
+
+ int (*map_update)(struct hnae3_handle *);
+};
+
struct hnae3_ae_algo {
const struct hnae3_ae_ops *ops;
struct list_head node;
@@ -394,6 +413,7 @@ struct hnae3_knic_private_info {
u16 num_tqps; /* total number of TQPs in this handle */
struct hnae3_queue **tqp; /* array base of all TQPs in this instance */
+ const struct hnae3_dcb_ops *dcb_ops;
};
struct hnae3_roce_private_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index 162e8a42..7023dc87 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -7,5 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_HNS3_HCLGE) += hclge.o
hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o
+hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
+
obj-$(CONFIG_HNS3_ENET) += hns3.o
hns3-objs = hns3_enet.o hns3_ethtool.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
new file mode 100644
index 0000000..178333b
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "hclge_main.h"
+#include "hclge_tm.h"
+#include "hnae3.h"
+
+#define BW_PERCENT 100
+
+static int hclge_ieee_ets_to_tm_info(struct hclge_dev *hdev,
+ struct ieee_ets *ets)
+{
+ u8 i;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_STRICT:
+ hdev->tm_info.tc_info[i].tc_sch_mode =
+ HCLGE_SCH_MODE_SP;
+ hdev->tm_info.pg_info[0].tc_dwrr[i] = 0;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ hdev->tm_info.tc_info[i].tc_sch_mode =
+ HCLGE_SCH_MODE_DWRR;
+ hdev->tm_info.pg_info[0].tc_dwrr[i] =
+ ets->tc_tx_bw[i];
+ break;
+ default:
+ /* Hardware only supports SP (strict priority)
+ * or ETS (enhanced transmission selection)
+ * algorithms, if we receive some other value
+ * from dcbnl, then throw an error.
+ */
+ return -EINVAL;
+ }
+ }
+
+ return hclge_tm_prio_tc_info_update(hdev, ets->prio_tc);
+}
+
+static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev,
+ struct ieee_ets *ets)
+{
+ u32 i;
+
+ memset(ets, 0, sizeof(*ets));
+ ets->willing = 1;
+ ets->ets_cap = hdev->tc_max;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ ets->prio_tc[i] = hdev->tm_info.prio_tc[i];
+ ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
+
+ if (hdev->tm_info.tc_info[i].tc_sch_mode ==
+ HCLGE_SCH_MODE_SP)
+ ets->tc_tsa[i] = IEEE_8021QAZ_TSA_STRICT;
+ else
+ ets->tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+ }
+}
+
+/* IEEE std */
+static int hclge_ieee_getets(struct hnae3_handle *h, struct ieee_ets *ets)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+ hclge_tm_info_to_ieee_ets(hdev, ets);
+
+ return 0;
+}
+
+static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
+ u8 *tc, bool *changed)
+{
+ u32 total_ets_bw = 0;
+ u8 max_tc = 0;
+ u8 i;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ if (ets->prio_tc[i] >= hdev->tc_max ||
+ i >= hdev->tc_max)
+ return -EINVAL;
+
+ if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i])
+ *changed = true;
+
+ if (ets->prio_tc[i] > max_tc)
+ max_tc = ets->prio_tc[i];
+
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_STRICT:
+ if (hdev->tm_info.tc_info[i].tc_sch_mode !=
+ HCLGE_SCH_MODE_SP)
+ *changed = true;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ if (hdev->tm_info.tc_info[i].tc_sch_mode !=
+ HCLGE_SCH_MODE_DWRR)
+ *changed = true;
+
+ total_ets_bw += ets->tc_tx_bw[i];
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (total_ets_bw != BW_PERCENT)
+ return -EINVAL;
+
+ *tc = max_tc + 1;
+ if (*tc != hdev->tm_info.num_tc)
+ *changed = true;
+
+ return 0;
+}
+
+static int hclge_map_update(struct hnae3_handle *h)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ ret = hclge_tm_map_cfg(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_tm_schd_mode_hw(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_pause_setup_hw(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_buffer_alloc(hdev);
+ if (ret)
+ return ret;
+
+ return hclge_rss_init_hw(hdev);
+}
+
+static int hclge_client_setup_tc(struct hclge_dev *hdev)
+{
+ struct hclge_vport *vport = hdev->vport;
+ struct hnae3_client *client;
+ struct hnae3_handle *handle;
+ int ret;
+ u32 i;
+
+ for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+ handle = &vport[i].nic;
+ client = handle->client;
+
+ if (!client || !client->ops || !client->ops->setup_tc)
+ continue;
+
+ ret = client->ops->setup_tc(handle, hdev->tm_info.num_tc);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ bool map_changed = false;
+ u8 num_tc = 0;
+ int ret;
+
+ if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
+ if (ret)
+ return ret;
+
+ hclge_tm_schd_info_update(hdev, num_tc);
+
+ ret = hclge_ieee_ets_to_tm_info(hdev, ets);
+ if (ret)
+ return ret;
+
+ if (map_changed) {
+ ret = hclge_client_setup_tc(hdev);
+ if (ret)
+ return ret;
+ }
+
+ return hclge_tm_dwrr_cfg(hdev);
+}
+
+static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ u8 i, j, pfc_map, *prio_tc;
+
+ memset(pfc, 0, sizeof(*pfc));
+ pfc->pfc_cap = hdev->pfc_max;
+ prio_tc = hdev->tm_info.prio_tc;
+ pfc_map = hdev->tm_info.hw_pfc_map;
+
+ /* Pfc setting is based on TC */
+ for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) {
+ if ((prio_tc[j] == i) && (pfc_map & BIT(i)))
+ pfc->pfc_en |= BIT(j);
+ }
+ }
+
+ return 0;
+}
+
+static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ u8 i, j, pfc_map, *prio_tc;
+
+ if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+ return -EINVAL;
+
+ prio_tc = hdev->tm_info.prio_tc;
+ pfc_map = 0;
+
+ for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) {
+ if ((prio_tc[j] == i) && (pfc->pfc_en & BIT(j))) {
+ pfc_map |= BIT(i);
+ break;
+ }
+ }
+ }
+
+ if (pfc_map == hdev->tm_info.hw_pfc_map)
+ return 0;
+
+ hdev->tm_info.hw_pfc_map = pfc_map;
+
+ return hclge_pause_setup_hw(hdev);
+}
+
+/* DCBX configuration */
+static u8 hclge_getdcbx(struct hnae3_handle *h)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+ return hdev->dcbx_cap;
+}
+
+static u8 hclge_setdcbx(struct hnae3_handle *h, u8 mode)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+ /* No support for LLD_MANAGED modes or CEE */
+ if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+ (mode & DCB_CAP_DCBX_VER_CEE) ||
+ !(mode & DCB_CAP_DCBX_HOST))
+ return 1;
+
+ hdev->dcbx_cap = mode;
+
+ return 0;
+}
+
+static int hclge_setup_tc(struct hnae3_handle *h, u8 tc, u8 *prio_tc)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ if (tc > hdev->tc_max) {
+ dev_err(&hdev->pdev->dev,
+ "setup tc failed, tc(%u) > tc_max(%u)\n",
+ tc, hdev->tc_max);
+ return -EINVAL;
+ }
+
+ hclge_tm_schd_info_update(hdev, tc);
+
+ ret = hclge_tm_prio_tc_info_update(hdev, prio_tc);
+ if (ret)
+ return ret;
+
+ return hclge_tm_init_hw(hdev);
+}
+
+static const struct hnae3_dcb_ops hns3_dcb_ops = {
+ .ieee_getets = hclge_ieee_getets,
+ .ieee_setets = hclge_ieee_setets,
+ .ieee_getpfc = hclge_ieee_getpfc,
+ .ieee_setpfc = hclge_ieee_setpfc,
+ .getdcbx = hclge_getdcbx,
+ .setdcbx = hclge_setdcbx,
+ .setup_tc = hclge_setup_tc,
+ .map_update = hclge_map_update,
+};
+
+void hclge_dcb_ops_set(struct hclge_dev *hdev)
+{
+ struct hclge_vport *vport = hdev->vport;
+ struct hnae3_knic_private_info *kinfo;
+
+ /* Hdev does not support DCB or vport is
+ * not a pf, then dcb_ops is not set.
+ */
+ if (!hnae3_dev_dcb_supported(hdev) ||
+ vport->vport_id != 0)
+ return;
+
+ kinfo = &vport->nic.kinfo;
+ kinfo->dcb_ops = &hns3_dcb_ops;
+ hdev->dcbx_cap = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_HOST;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
new file mode 100644
index 0000000..7d808ee
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_DCB_H__
+#define __HCLGE_DCB_H__
+
+#include "hclge_main.h"
+
+#ifdef CONFIG_HNS3_DCB
+void hclge_dcb_ops_set(struct hclge_dev *hdev);
+#else
+static inline void hclge_dcb_ops_set(struct hclge_dev *hdev) {}
+#endif
+
+#endif /* __HCLGE_DCB_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 49a11d5..28bd118 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -19,6 +19,7 @@
#include <linux/platform_device.h>
#include "hclge_cmd.h"
+#include "hclge_dcb.h"
#include "hclge_main.h"
#include "hclge_mdio.h"
#include "hclge_tm.h"
@@ -1057,7 +1058,7 @@ static int hclge_configure(struct hclge_dev *hdev)
hdev->hw.mac.phy_addr = cfg.phy_addr;
hdev->num_desc = cfg.tqp_desc_num;
hdev->tm_info.num_pg = 1;
- hdev->tm_info.num_tc = cfg.tc_num;
+ hdev->tc_max = cfg.tc_num;
hdev->tm_info.hw_pfc_map = 0;
ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
@@ -1066,15 +1067,25 @@ static int hclge_configure(struct hclge_dev *hdev)
return ret;
}
- if ((hdev->tm_info.num_tc > HNAE3_MAX_TC) ||
- (hdev->tm_info.num_tc < 1)) {
+ if ((hdev->tc_max > HNAE3_MAX_TC) ||
+ (hdev->tc_max < 1)) {
dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
- hdev->tm_info.num_tc);
- hdev->tm_info.num_tc = 1;
+ hdev->tc_max);
+ hdev->tc_max = 1;
}
+ /* Dev does not support DCB */
+ if (!hnae3_dev_dcb_supported(hdev)) {
+ hdev->tc_max = 1;
+ hdev->pfc_max = 0;
+ } else {
+ hdev->pfc_max = hdev->tc_max;
+ }
+
+ hdev->tm_info.num_tc = hdev->tc_max;
+
/* Currently not support uncontiuous tc */
- for (i = 0; i < cfg.tc_num; i++)
+ for (i = 0; i < hdev->tm_info.num_tc; i++)
hnae_set_bit(hdev->hw_tc_map, i, 1);
if (!hdev->num_vmdq_vport && !hdev->num_req_vfs)
@@ -4242,6 +4253,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
return ret;
}
+ hclge_dcb_ops_set(hdev);
+
setup_timer(&hdev->service_timer, hclge_service_timer,
(unsigned long)hdev);
INIT_WORK(&hdev->service_task, hclge_service_task);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 394b587..7c66c00 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -421,8 +421,11 @@ struct hclge_dev {
#define HCLGE_FLAG_TC_BASE_SCH_MODE 1
#define HCLGE_FLAG_VNET_BASE_SCH_MODE 2
u8 tx_sch_mode;
+ u8 tc_max;
+ u8 pfc_max;
u8 default_up;
+ u8 dcbx_cap;
struct hclge_tm_info tm_info;
u16 num_msi;
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 08/10] net: hns3: Add dcb netlink interface for the support of DCB feature
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
This patch add dcb netlink interface by calling the interface from
hclge_dcb module.
This patch also update Makefile in order to build hns3_dcbnl module.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2 +
.../ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c | 106 +++++++++++++++++++++
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 +
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h | 7 ++
4 files changed, 117 insertions(+)
create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index 7023dc87..d2b20d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -11,3 +11,5 @@ hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
obj-$(CONFIG_HNS3_ENET) += hns3.o
hns3-objs = hns3_enet.o hns3_ethtool.o
+
+hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
new file mode 100644
index 0000000..9832172
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "hnae3.h"
+#include "hns3_enet.h"
+
+static
+int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->ieee_getets)
+ return h->kinfo.dcb_ops->ieee_getets(h, ets);
+
+ return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->ieee_setets)
+ return h->kinfo.dcb_ops->ieee_setets(h, ets);
+
+ return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->ieee_getpfc)
+ return h->kinfo.dcb_ops->ieee_getpfc(h, pfc);
+
+ return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->ieee_setpfc)
+ return h->kinfo.dcb_ops->ieee_setpfc(h, pfc);
+
+ return -EOPNOTSUPP;
+}
+
+/* DCBX configuration */
+static u8 hns3_dcbnl_getdcbx(struct net_device *ndev)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->getdcbx)
+ return h->kinfo.dcb_ops->getdcbx(h);
+
+ return 0;
+}
+
+/* return 0 if successful, otherwise fail */
+static u8 hns3_dcbnl_setdcbx(struct net_device *ndev, u8 mode)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+
+ if (h->kinfo.dcb_ops->setdcbx)
+ return h->kinfo.dcb_ops->setdcbx(h, mode);
+
+ return 1;
+}
+
+static const struct dcbnl_rtnl_ops hns3_dcbnl_ops = {
+ .ieee_getets = hns3_dcbnl_ieee_getets,
+ .ieee_setets = hns3_dcbnl_ieee_setets,
+ .ieee_getpfc = hns3_dcbnl_ieee_getpfc,
+ .ieee_setpfc = hns3_dcbnl_ieee_setpfc,
+ .getdcbx = hns3_dcbnl_getdcbx,
+ .setdcbx = hns3_dcbnl_setdcbx,
+};
+
+/* hclge_dcbnl_setup - DCBNL setup
+ * @handle: the corresponding vport handle
+ * Set up DCBNL
+ */
+void hns3_dcbnl_setup(struct hnae3_handle *handle)
+{
+ struct net_device *dev = handle->kinfo.netdev;
+
+ if (!handle->kinfo.dcb_ops)
+ return;
+
+ dev->dcbnl_ops = &hns3_dcbnl_ops;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 35369e1..11dab26 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -2790,6 +2790,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
goto out_reg_netdev_fail;
}
+ hns3_dcbnl_setup(handle);
+
/* MTU range: (ETH_MIN_MTU(kernel default) - 9706) */
netdev->max_mtu = HNS3_MAX_MTU - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
index 7e87461..481eada 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
@@ -590,4 +590,11 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
void hns3_ethtool_set_ops(struct net_device *netdev);
int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
+
+#ifdef CONFIG_HNS3_DCB
+void hns3_dcbnl_setup(struct hnae3_handle *handle);
+#else
+static inline void hns3_dcbnl_setup(struct hnae3_handle *handle) {}
+#endif
+
#endif
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 09/10] net: hns3: Setting for fc_mode and dcb enable flag in TM module
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
After the DCB feature is supported, fc_mode and dcb enable flag
must be set according to the DCB parameter.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 34 +++++++++++++++++++---
1 file changed, 30 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index e158e66..cc5efda 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -487,7 +487,11 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
hdev->tm_info.prio_tc[i] =
(i >= hdev->tm_info.num_tc) ? 0 : i;
- hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+ /* DCB is enabled if we have more than 1 TC */
+ if (hdev->tm_info.num_tc > 1)
+ hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+ else
+ hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
}
static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
@@ -513,6 +517,24 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
}
}
+static void hclge_pfc_info_init(struct hclge_dev *hdev)
+{
+ if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) {
+ if (hdev->fc_mode_last_time == HCLGE_FC_PFC)
+ dev_warn(&hdev->pdev->dev,
+ "DCB is disable, but last mode is FC_PFC\n");
+
+ hdev->tm_info.fc_mode = hdev->fc_mode_last_time;
+ } else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
+ /* fc_mode_last_time record the last fc_mode when
+ * DCB is enabled, so that fc_mode can be set to
+ * the correct value when DCB is disabled.
+ */
+ hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+ hdev->tm_info.fc_mode = HCLGE_FC_PFC;
+ }
+}
+
static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
{
if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) &&
@@ -525,8 +547,7 @@ static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
hclge_tm_vport_info_update(hdev);
- hdev->tm_info.fc_mode = HCLGE_FC_NONE;
- hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+ hclge_pfc_info_init(hdev);
return 0;
}
@@ -1159,8 +1180,13 @@ int hclge_tm_init_hw(struct hclge_dev *hdev)
int hclge_tm_schd_init(struct hclge_dev *hdev)
{
- int ret = hclge_tm_schd_info_init(hdev);
+ int ret;
+
+ /* fc_mode is HCLGE_FC_FULL on reset */
+ hdev->tm_info.fc_mode = HCLGE_FC_FULL;
+ hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+ ret = hclge_tm_schd_info_init(hdev);
if (ret)
return ret;
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 10/10] net: hns3: Add mqprio support when interacting with network stack
From: Yunsheng Lin @ 2017-09-21 11:21 UTC (permalink / raw)
To: davem
Cc: huangdaode, xuwei5, liguozhu, Yisen.Zhuang, gabriele.paoloni,
john.garry, linuxarm, yisen.zhuang, salil.mehta, lipeng321,
netdev, linux-kernel
In-Reply-To: <1505992913-107256-1-git-send-email-linyunsheng@huawei.com>
When using tc qdisc to configure DCB parameter, dcb_ops->setup_tc
is used to tell hclge_dcb module to do the setup.
When using lldptool to configure DCB parameter, hclge_dcb module
call the client_ops->setup_tc to tell network stack which queue
and priority is using for specific tc.
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
.../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 135 +++++++++++++++++----
1 file changed, 111 insertions(+), 24 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 11dab26..31fcda4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -196,6 +196,32 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector)
tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
}
+static int hns3_nic_set_real_num_queue(struct net_device *netdev)
+{
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+ struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_knic_private_info *kinfo = &h->kinfo;
+ unsigned int queue_size = kinfo->rss_size * kinfo->num_tc;
+ int ret;
+
+ ret = netif_set_real_num_tx_queues(netdev, queue_size);
+ if (ret) {
+ netdev_err(netdev,
+ "netif_set_real_num_tx_queues fail, ret=%d!\n",
+ ret);
+ return ret;
+ }
+
+ ret = netif_set_real_num_rx_queues(netdev, queue_size);
+ if (ret) {
+ netdev_err(netdev,
+ "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static int hns3_nic_net_up(struct net_device *netdev)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -232,26 +258,13 @@ static int hns3_nic_net_up(struct net_device *netdev)
static int hns3_nic_net_open(struct net_device *netdev)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
int ret;
netif_carrier_off(netdev);
- ret = netif_set_real_num_tx_queues(netdev, h->kinfo.num_tqps);
- if (ret) {
- netdev_err(netdev,
- "netif_set_real_num_tx_queues fail, ret=%d!\n",
- ret);
- return ret;
- }
-
- ret = netif_set_real_num_rx_queues(netdev, h->kinfo.num_tqps);
- if (ret) {
- netdev_err(netdev,
- "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
+ ret = hns3_nic_set_real_num_queue(netdev);
+ if (ret)
return ret;
- }
ret = hns3_nic_net_up(netdev);
if (ret) {
@@ -1193,32 +1206,40 @@ static void hns3_nic_udp_tunnel_del(struct net_device *netdev,
}
}
-static int hns3_setup_tc(struct net_device *netdev, u8 tc)
+static int hns3_setup_tc(struct net_device *netdev, u8 tc, u8 *prio_tc)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hnae3_handle *h = priv->ae_handle;
struct hnae3_knic_private_info *kinfo = &h->kinfo;
+ bool if_running = netif_running(netdev);
unsigned int i;
int ret;
if (tc > HNAE3_MAX_TC)
return -EINVAL;
- if (kinfo->num_tc == tc)
- return 0;
-
if (!netdev)
return -EINVAL;
- if (!tc) {
+ if (if_running) {
+ (void)hns3_nic_net_stop(netdev);
+ msleep(100);
+ }
+
+ ret = (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
+ kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP;
+ if (ret)
+ goto err_out;
+
+ if (tc <= 1) {
netdev_reset_tc(netdev);
- return 0;
+ goto out;
}
/* Set num_tc for netdev */
ret = netdev_set_num_tc(netdev, tc);
if (ret)
- return ret;
+ goto err_out;
/* Set per TC queues for the VSI */
for (i = 0; i < HNAE3_MAX_TC; i++) {
@@ -1229,7 +1250,14 @@ static int hns3_setup_tc(struct net_device *netdev, u8 tc)
kinfo->tc_info[i].tqp_offset);
}
- return 0;
+out:
+ ret = hns3_nic_set_real_num_queue(netdev);
+
+err_out:
+ if (if_running)
+ (void)hns3_nic_net_open(netdev);
+
+ return ret;
}
static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
@@ -1240,7 +1268,7 @@ static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
if (type != TC_SETUP_MQPRIO)
return -EOPNOTSUPP;
- return hns3_setup_tc(dev, mqprio->num_tc);
+ return hns3_setup_tc(dev, mqprio->num_tc, mqprio->prio_tc_map);
}
static int hns3_vlan_rx_add_vid(struct net_device *netdev,
@@ -2848,10 +2876,69 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup)
}
}
+static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
+{
+ struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+ struct net_device *ndev = kinfo->netdev;
+ bool if_running = netif_running(ndev);
+ int ret;
+ u8 i;
+
+ if (tc > HNAE3_MAX_TC)
+ return -EINVAL;
+
+ if (!ndev)
+ return -ENODEV;
+
+ ret = netdev_set_num_tc(ndev, tc);
+ if (ret)
+ return ret;
+
+ if (if_running) {
+ (void)hns3_nic_net_stop(ndev);
+ msleep(100);
+ }
+
+ ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ?
+ kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP;
+ if (ret)
+ goto err_out;
+
+ if (tc <= 1) {
+ netdev_reset_tc(ndev);
+ goto out;
+ }
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ struct hnae3_tc_info *tc_info = &kinfo->tc_info[i];
+
+ if (tc_info->enable)
+ netdev_set_tc_queue(ndev,
+ tc_info->tc,
+ tc_info->tqp_count,
+ tc_info->tqp_offset);
+ }
+
+ for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+ netdev_set_prio_tc_map(ndev, i,
+ kinfo->prio_tc[i]);
+ }
+
+out:
+ ret = hns3_nic_set_real_num_queue(ndev);
+
+err_out:
+ if (if_running)
+ (void)hns3_nic_net_open(ndev);
+
+ return ret;
+}
+
const struct hnae3_client_ops client_ops = {
.init_instance = hns3_client_init,
.uninit_instance = hns3_client_uninit,
.link_status_change = hns3_link_status_change,
+ .setup_tc = hns3_client_setup_tc,
};
/* hns3_init_module - Driver registration routine
--
1.9.1
^ permalink raw reply related
* [PATCH] net: phy: Fix truncation of large IRQ numbers in phy_attached_print()
From: Geert Uytterhoeven @ 2017-09-21 11:27 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli, David S . Miller
Cc: Romain Perier, netdev, linux-kernel, Geert Uytterhoeven
Given NR_IRQS is 2048 on sparc64, and even 32784 on alpha, 3 digits is
not enough to represent interrupt numbers on all architectures. Hence
PHY interrupt numbers may be truncated during printing.
Increase the buffer size from 4 to 8 bytes to fix this.
Fixes: 5e369aefdce4818c ("net: stmmac: Delete dead code for MDIO registration")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
drivers/net/phy/phy_device.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8cf0c5901f95870f..67f25ac29025c539 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -879,7 +879,7 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
{
const char *drv_name = phydev->drv ? phydev->drv->name : "unbound";
char *irq_str;
- char irq_num[4];
+ char irq_num[8];
switch(phydev->irq) {
case PHY_POLL:
--
2.7.4
^ permalink raw reply related
* Re: [patch net-next 05/12] net: ipmr: Add MFC offload indication
From: Nikolay Aleksandrov @ 2017-09-21 11:27 UTC (permalink / raw)
To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw
In-Reply-To: <20170921064338.1282-6-jiri@resnulli.us>
On 21/09/17 09:43, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
>
> Allow drivers, registered to the fib notification chain indicate whether a
> multicast MFC route is offloaded or not, similarly to unicast routes. The
> indication of whether a route is offloaded is done using the mfc_flags
> field on an mfc_cache struct, and the information is sent to the userspace
> via the RTNetlink interface only.
>
> Currently, MFC routes are either offloaded or not, thus there is no need to
> add per-VIF offload indication.
>
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> include/linux/mroute.h | 1 +
> net/ipv4/ipmr.c | 3 +++
> 2 files changed, 4 insertions(+)
>
> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
> index ba5a976..9b64587 100644
> --- a/include/linux/mroute.h
> +++ b/include/linux/mroute.h
> @@ -93,6 +93,7 @@ struct mr_table {
> */
> enum {
> MFC_STATIC = BIT(0),
> + MFC_OFFLOAD = BIT(1),
> };
It maybe redundant but above this enum is a comment with flag descriptions,
since you're adding a new flag could you also please add a short
description above ?
>
> struct mfc_cache_cmp_arg {
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 7891d95..6804885 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -2268,6 +2268,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
> nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
> return -EMSGSIZE;
>
> + if (c->mfc_flags & MFC_OFFLOAD)
> + rtm->rtm_flags |= RTNH_F_OFFLOAD;
> +
> if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
> return -EMSGSIZE;
>
>
^ permalink raw reply
* Re: Latest net-next from GIT panic
From: Paweł Staszewski @ 2017-09-21 11:31 UTC (permalink / raw)
To: Eric Dumazet
Cc: Wei Wang, Cong Wang, Linux Kernel Network Developers,
Eric Dumazet
In-Reply-To: <1505991826.29839.124.camel@edumazet-glaptop3.roam.corp.google.com>
W dniu 2017-09-21 o 13:03, Eric Dumazet pisze:
> OK we have two problems here
>
> 1) We need to unify skb_dst_force() ( for net tree )
>
> 2) Vlan devices should try to correctly handle IFF_XMIT_DST_RELEASE from
> lower device. This will considerably help your performance.
>
>
> For 1), this is what I had in mind, can you try it ?
>
> Thanks a lot !
>
> diff --git a/include/net/dst.h b/include/net/dst.h
> index 93568bd0a3520bb7402f04d90cf04ac99c81cfbe..f23851eeaad917e8dafc06b58d23a2575405c894 100644
> --- a/include/net/dst.h
> +++ b/include/net/dst.h
> @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
> static inline struct dst_entry *dst_clone(struct dst_entry *dst)
> {
> if (dst)
> - atomic_inc(&dst->__refcnt);
> + dst_hold(dst);
> return dst;
> }
>
> @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
> __skb_dst_copy(nskb, oskb->_skb_refdst);
> }
>
> -/**
> - * skb_dst_force - makes sure skb dst is refcounted
> - * @skb: buffer
> - *
> - * If dst is not yet refcounted, let's do it
> - */
> -static inline void skb_dst_force(struct sk_buff *skb)
> -{
> - if (skb_dst_is_noref(skb)) {
> - WARN_ON(!rcu_read_lock_held());
> - skb->_skb_refdst &= ~SKB_DST_NOREF;
> - dst_clone(skb_dst(skb));
> - }
> -}
> -
> /**
> * dst_hold_safe - Take a reference on a dst if possible
> * @dst: pointer to dst entry
> @@ -356,6 +341,23 @@ static inline void skb_dst_force_safe(struct sk_buff *skb)
> }
> }
>
> +/**
> + * skb_dst_force - makes sure skb dst is refcounted
> + * @skb: buffer
> + *
> + * If dst is not yet refcounted, let's do it
> + */
> +static inline void skb_dst_force(struct sk_buff *skb)
> +{
> + if (skb_dst_is_noref(skb)) {
> + struct dst_entry *dst = skb_dst(skb);
> +
> + WARN_ON(!rcu_read_lock_held());
> + if (!dst_hold_safe(dst))
> + dst = NULL;
> + skb->_skb_refdst = (unsigned long)dst;
> + }
> +}
>
> /**
> * __skb_tunnel_rx - prepare skb for rx reinsert
>
>
Patch applied - soo far no problems - and no warnings in dmesg
^ permalink raw reply
* Re: [patch net-next 04/12] ipmr: Send FIB notifications on MFC and VIF entries
From: Nikolay Aleksandrov @ 2017-09-21 11:48 UTC (permalink / raw)
To: Jiri Pirko, netdev; +Cc: davem, yotamg, idosch, mlxsw
In-Reply-To: <20170921064338.1282-5-jiri@resnulli.us>
On 21/09/17 09:43, Jiri Pirko wrote:
> From: Yotam Gigi <yotamg@mellanox.com>
>
> Use the newly introduced notification chain to send events upon VIF and MFC
> addition and deletion. The MFC notifications are sent only on resolved MFC
> entries, as unresolved cannot be offloaded.
>
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
> net/ipv4/ipmr.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 53 insertions(+)
>
LGTM, I only wish we could consolidate all of these call_ipmr_mfc_entry_notifiers()
calls inside mroute_netlink_event() but it will need an additional argument for the
ADD vs REPLACE cases. Anyway,
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 9d331a74..7891d95 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -627,6 +627,27 @@ static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
> return call_fib_notifier(nb, net, event_type, &info.info);
> }
>
> +static int call_ipmr_vif_entry_notifiers(struct net *net,
> + enum fib_event_type event_type,
> + struct vif_device *vif,
> + vifi_t vif_index, u32 tb_id)
> +{
> + struct vif_entry_notifier_info info = {
> + .info = {
> + .family = RTNL_FAMILY_IPMR,
> + .net = net,
> + },
> + .dev = vif->dev,
> + .vif_index = vif_index,
> + .vif_flags = vif->flags,
> + .tb_id = tb_id,
> + };
> +
> + ASSERT_RTNL();
> + net->ipv4.ipmr_seq++;
> + return call_fib_notifiers(net, event_type, &info.info);
> +}
> +
> static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
> struct net *net,
> enum fib_event_type event_type,
> @@ -644,6 +665,24 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
> return call_fib_notifier(nb, net, event_type, &info.info);
> }
>
> +static int call_ipmr_mfc_entry_notifiers(struct net *net,
> + enum fib_event_type event_type,
> + struct mfc_cache *mfc, u32 tb_id)
> +{
> + struct mfc_entry_notifier_info info = {
> + .info = {
> + .family = RTNL_FAMILY_IPMR,
> + .net = net,
> + },
> + .mfc = mfc,
> + .tb_id = tb_id
> + };
> +
> + ASSERT_RTNL();
> + net->ipv4.ipmr_seq++;
> + return call_fib_notifiers(net, event_type, &info.info);
> +}
> +
> /**
> * vif_delete - Delete a VIF entry
> * @notify: Set to 1, if the caller is a notifier_call
> @@ -651,6 +690,7 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
> static int vif_delete(struct mr_table *mrt, int vifi, int notify,
> struct list_head *head)
> {
> + struct net *net = read_pnet(&mrt->net);
> struct vif_device *v;
> struct net_device *dev;
> struct in_device *in_dev;
> @@ -660,6 +700,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
>
> v = &mrt->vif_table[vifi];
>
> + if (VIF_EXISTS(mrt, vifi))
> + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
> + mrt->id);
> +
> write_lock_bh(&mrt_lock);
> dev = v->dev;
> v->dev = NULL;
> @@ -909,6 +953,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
> if (vifi+1 > mrt->maxvif)
> mrt->maxvif = vifi+1;
> write_unlock_bh(&mrt_lock);
> + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
> return 0;
> }
>
> @@ -1209,6 +1254,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
>
> static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
> {
> + struct net *net = read_pnet(&mrt->net);
> struct mfc_cache *c;
>
> /* The entries are added/deleted only under RTNL */
> @@ -1220,6 +1266,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
> return -ENOENT;
> rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> list_del_rcu(&c->list);
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
> mroute_netlink_event(mrt, c, RTM_DELROUTE);
> ipmr_cache_put(c);
>
> @@ -1248,6 +1295,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
> if (!mrtsock)
> c->mfc_flags |= MFC_STATIC;
> write_unlock_bh(&mrt_lock);
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
> + mrt->id);
> mroute_netlink_event(mrt, c, RTM_NEWROUTE);
> return 0;
> }
> @@ -1297,6 +1346,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
> ipmr_cache_resolve(net, mrt, uc, c);
> ipmr_cache_free(uc);
> }
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
> mroute_netlink_event(mrt, c, RTM_NEWROUTE);
> return 0;
> }
> @@ -1304,6 +1354,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
> /* Close the multicast socket, and clear the vif tables etc */
> static void mroute_clean_tables(struct mr_table *mrt, bool all)
> {
> + struct net *net = read_pnet(&mrt->net);
> struct mfc_cache *c, *tmp;
> LIST_HEAD(list);
> int i;
> @@ -1322,6 +1373,8 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
> continue;
> rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
> list_del_rcu(&c->list);
> + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
> + mrt->id);
> mroute_netlink_event(mrt, c, RTM_DELROUTE);
> ipmr_cache_put(c);
> }
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox