* [PATCH net-next 1/3] nfp: flower: use rhashtable for flow caching
2018-10-09 1:57 [PATCH net-next 0/3] nfp: flower: speed up stats update loop Jakub Kicinski
@ 2018-10-09 1:57 ` Jakub Kicinski
2018-10-09 1:57 ` [PATCH net-next 2/3] nfp: flower: use stats array instead of storing stats per flow Jakub Kicinski
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Jakub Kicinski @ 2018-10-09 1:57 UTC (permalink / raw)
To: davem; +Cc: netdev, oss-drivers, Pieter Jansen van Vuuren
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Make use of relativistic hash tables for tracking flows instead
of fixed sized hash tables.
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
drivers/net/ethernet/netronome/nfp/bpf/main.c | 5 --
.../net/ethernet/netronome/nfp/flower/main.h | 8 +-
.../ethernet/netronome/nfp/flower/metadata.c | 73 ++++++++++++++++---
.../ethernet/netronome/nfp/flower/offload.c | 12 ++-
drivers/net/ethernet/netronome/nfp/nfp_app.c | 5 ++
drivers/net/ethernet/netronome/nfp/nfp_app.h | 1 +
6 files changed, 82 insertions(+), 22 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index d9d37aa860e0..28af263d4577 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -509,11 +509,6 @@ static int nfp_bpf_init(struct nfp_app *app)
return err;
}
-static void nfp_check_rhashtable_empty(void *ptr, void *arg)
-{
- WARN_ON_ONCE(1);
-}
-
static void nfp_bpf_clean(struct nfp_app *app)
{
struct nfp_app_bpf *bpf = app->priv;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 5f27318ecdbd..8b2b656da7ca 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -38,6 +38,7 @@
#include <linux/circ_buf.h>
#include <linux/hashtable.h>
+#include <linux/rhashtable.h>
#include <linux/time64.h>
#include <linux/types.h>
#include <net/pkt_cls.h>
@@ -53,7 +54,6 @@ struct nfp_app;
#define NFP_FL_STATS_ENTRY_RS BIT(20)
#define NFP_FL_STATS_ELEM_RS 4
#define NFP_FL_REPEATED_HASH_MAX BIT(17)
-#define NFP_FLOWER_HASH_BITS 19
#define NFP_FLOWER_MASK_ENTRY_RS 256
#define NFP_FLOWER_MASK_ELEMENT_RS 1
#define NFP_FLOWER_MASK_HASH_BITS 10
@@ -172,7 +172,7 @@ struct nfp_flower_priv {
struct nfp_fl_stats_id stats_ids;
struct nfp_fl_mask_id mask_ids;
DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS);
- DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
+ struct rhashtable flow_table;
struct work_struct cmsg_work;
struct sk_buff_head cmsg_skbs_high;
struct sk_buff_head cmsg_skbs_low;
@@ -230,7 +230,7 @@ struct nfp_fl_stats {
struct nfp_fl_payload {
struct nfp_fl_rule_metadata meta;
unsigned long tc_flower_cookie;
- struct hlist_node link;
+ struct rhash_head fl_node;
struct rcu_head rcu;
spinlock_t lock; /* lock stats */
struct nfp_fl_stats stats;
@@ -242,6 +242,8 @@ struct nfp_fl_payload {
bool ingress_offload;
};
+extern const struct rhashtable_params nfp_flower_table_params;
+
struct nfp_fl_stats_frame {
__be32 stats_con_id;
__be32 pkt_count;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index c098730544b7..2427c994c91d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -48,6 +48,12 @@ struct nfp_mask_id_table {
u8 mask_id;
};
+struct nfp_fl_flow_table_cmp_arg {
+ struct net_device *netdev;
+ unsigned long cookie;
+ __be32 host_ctx;
+};
+
static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id)
{
struct nfp_flower_priv *priv = app->priv;
@@ -102,18 +108,15 @@ struct nfp_fl_payload *
nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
struct net_device *netdev, __be32 host_ctx)
{
+ struct nfp_fl_flow_table_cmp_arg flower_cmp_arg;
struct nfp_flower_priv *priv = app->priv;
- struct nfp_fl_payload *flower_entry;
- hash_for_each_possible_rcu(priv->flow_table, flower_entry, link,
- tc_flower_cookie)
- if (flower_entry->tc_flower_cookie == tc_flower_cookie &&
- (!netdev || flower_entry->ingress_dev == netdev) &&
- (host_ctx == NFP_FL_STATS_CTX_DONT_CARE ||
- flower_entry->meta.host_ctx_id == host_ctx))
- return flower_entry;
+ flower_cmp_arg.netdev = netdev;
+ flower_cmp_arg.cookie = tc_flower_cookie;
+ flower_cmp_arg.host_ctx = host_ctx;
- return NULL;
+ return rhashtable_lookup_fast(&priv->flow_table, &flower_cmp_arg,
+ nfp_flower_table_params);
}
static void
@@ -389,12 +392,56 @@ int nfp_modify_flow_metadata(struct nfp_app *app,
return nfp_release_stats_entry(app, temp_ctx_id);
}
+static int nfp_fl_obj_cmpfn(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct nfp_fl_flow_table_cmp_arg *cmp_arg = arg->key;
+ const struct nfp_fl_payload *flow_entry = obj;
+
+ if ((!cmp_arg->netdev || flow_entry->ingress_dev == cmp_arg->netdev) &&
+ (cmp_arg->host_ctx == NFP_FL_STATS_CTX_DONT_CARE ||
+ flow_entry->meta.host_ctx_id == cmp_arg->host_ctx))
+ return flow_entry->tc_flower_cookie != cmp_arg->cookie;
+
+ return 1;
+}
+
+static u32 nfp_fl_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct nfp_fl_payload *flower_entry = data;
+
+ return jhash2((u32 *)&flower_entry->tc_flower_cookie,
+ sizeof(flower_entry->tc_flower_cookie) / sizeof(u32),
+ seed);
+}
+
+static u32 nfp_fl_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct nfp_fl_flow_table_cmp_arg *cmp_arg = data;
+
+ return jhash2((u32 *)&cmp_arg->cookie,
+ sizeof(cmp_arg->cookie) / sizeof(u32), seed);
+}
+
+const struct rhashtable_params nfp_flower_table_params = {
+ .head_offset = offsetof(struct nfp_fl_payload, fl_node),
+ .hashfn = nfp_fl_key_hashfn,
+ .obj_cmpfn = nfp_fl_obj_cmpfn,
+ .obj_hashfn = nfp_fl_obj_hashfn,
+ .automatic_shrinking = true,
+};
+
int nfp_flower_metadata_init(struct nfp_app *app)
{
struct nfp_flower_priv *priv = app->priv;
+ int err;
hash_init(priv->mask_table);
- hash_init(priv->flow_table);
+
+ err = rhashtable_init(&priv->flow_table, &nfp_flower_table_params);
+ if (err)
+ return err;
+
get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed));
/* Init ring buffer and unallocated mask_ids. */
@@ -402,7 +449,7 @@ int nfp_flower_metadata_init(struct nfp_app *app)
kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL);
if (!priv->mask_ids.mask_id_free_list.buf)
- return -ENOMEM;
+ goto err_free_flow_table;
priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1;
@@ -428,6 +475,8 @@ int nfp_flower_metadata_init(struct nfp_app *app)
kfree(priv->mask_ids.last_used);
err_free_mask_id:
kfree(priv->mask_ids.mask_id_free_list.buf);
+err_free_flow_table:
+ rhashtable_destroy(&priv->flow_table);
return -ENOMEM;
}
@@ -438,6 +487,8 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app)
if (!priv)
return;
+ rhashtable_free_and_destroy(&priv->flow_table,
+ nfp_check_rhashtable_empty, NULL);
kfree(priv->mask_ids.mask_id_free_list.buf);
kfree(priv->mask_ids.last_used);
vfree(priv->stats_ids.free_list.buf);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 14f1b91b7b90..3f3649acb78f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -513,9 +513,12 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
if (err)
goto err_destroy_flow;
- INIT_HLIST_NODE(&flow_pay->link);
flow_pay->tc_flower_cookie = flow->cookie;
- hash_add_rcu(priv->flow_table, &flow_pay->link, flow->cookie);
+ err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node,
+ nfp_flower_table_params);
+ if (err)
+ goto err_destroy_flow;
+
port->tc_offload_cnt++;
/* Deallocate flow payload when flower rule has been destroyed. */
@@ -550,6 +553,7 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
struct tc_cls_flower_offload *flow, bool egress)
{
struct nfp_port *port = nfp_port_from_netdev(netdev);
+ struct nfp_flower_priv *priv = app->priv;
struct nfp_fl_payload *nfp_flow;
struct net_device *ingr_dev;
int err;
@@ -573,11 +577,13 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
goto err_free_flow;
err_free_flow:
- hash_del_rcu(&nfp_flow->link);
port->tc_offload_cnt--;
kfree(nfp_flow->action_data);
kfree(nfp_flow->mask_data);
kfree(nfp_flow->unmasked_data);
+ WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+ &nfp_flow->fl_node,
+ nfp_flower_table_params));
kfree_rcu(nfp_flow, rcu);
return err;
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 8607d09ab732..01116822ddf7 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -60,6 +60,11 @@ static const struct nfp_app_type *apps[] = {
#endif
};
+void nfp_check_rhashtable_empty(void *ptr, void *arg)
+{
+ WARN_ON_ONCE(1);
+}
+
struct nfp_app *nfp_app_from_netdev(struct net_device *netdev)
{
if (nfp_netdev_is_nfp_net(netdev)) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index c896eb8f87a1..19f82f24c6ad 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -196,6 +196,7 @@ struct nfp_app {
void *priv;
};
+void nfp_check_rhashtable_empty(void *ptr, void *arg);
bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
--
2.17.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH net-next 2/3] nfp: flower: use stats array instead of storing stats per flow
2018-10-09 1:57 [PATCH net-next 0/3] nfp: flower: speed up stats update loop Jakub Kicinski
2018-10-09 1:57 ` [PATCH net-next 1/3] nfp: flower: use rhashtable for flow caching Jakub Kicinski
@ 2018-10-09 1:57 ` Jakub Kicinski
2018-10-09 1:57 ` [PATCH net-next 3/3] nfp: flower: use host context count provided by firmware Jakub Kicinski
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Jakub Kicinski @ 2018-10-09 1:57 UTC (permalink / raw)
To: davem; +Cc: netdev, oss-drivers, Pieter Jansen van Vuuren
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Make use of an array stats instead of storing stats per flow which
would require a hash lookup at critical times.
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
.../net/ethernet/netronome/nfp/flower/main.h | 6 +-
.../ethernet/netronome/nfp/flower/metadata.c | 56 +++++++++----------
.../ethernet/netronome/nfp/flower/offload.c | 19 ++++---
3 files changed, 40 insertions(+), 41 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 8b2b656da7ca..21a167df90c1 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -139,6 +139,8 @@ struct nfp_fl_lag {
* @mask_ids: List of free mask ids
* @mask_table: Hash table used to store masks
* @flow_table: Hash table used to store flower rules
+ * @stats: Stored stats updates for flower rules
+ * @stats_lock: Lock for flower rule stats updates
* @cmsg_work: Workqueue for control messages processing
* @cmsg_skbs_high: List of higher priority skbs for control message
* processing
@@ -173,6 +175,8 @@ struct nfp_flower_priv {
struct nfp_fl_mask_id mask_ids;
DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS);
struct rhashtable flow_table;
+ struct nfp_fl_stats *stats;
+ spinlock_t stats_lock; /* lock stats */
struct work_struct cmsg_work;
struct sk_buff_head cmsg_skbs_high;
struct sk_buff_head cmsg_skbs_low;
@@ -232,8 +236,6 @@ struct nfp_fl_payload {
unsigned long tc_flower_cookie;
struct rhash_head fl_node;
struct rcu_head rcu;
- spinlock_t lock; /* lock stats */
- struct nfp_fl_stats stats;
__be32 nfp_tun_ipv4_addr;
struct net_device *ingress_dev;
char *unmasked_data;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 2427c994c91d..f0db7f9122d2 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -119,42 +119,26 @@ nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie,
nfp_flower_table_params);
}
-static void
-nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
-{
- struct nfp_fl_payload *nfp_flow;
- unsigned long flower_cookie;
-
- flower_cookie = be64_to_cpu(stats->stats_cookie);
-
- rcu_read_lock();
- nfp_flow = nfp_flower_search_fl_table(app, flower_cookie, NULL,
- stats->stats_con_id);
- if (!nfp_flow)
- goto exit_rcu_unlock;
-
- spin_lock(&nfp_flow->lock);
- nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count);
- nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count);
- nfp_flow->stats.used = jiffies;
- spin_unlock(&nfp_flow->lock);
-
-exit_rcu_unlock:
- rcu_read_unlock();
-}
-
void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb)
{
unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb);
- struct nfp_fl_stats_frame *stats_frame;
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_stats_frame *stats;
unsigned char *msg;
+ u32 ctx_id;
int i;
msg = nfp_flower_cmsg_get_data(skb);
- stats_frame = (struct nfp_fl_stats_frame *)msg;
- for (i = 0; i < msg_len / sizeof(*stats_frame); i++)
- nfp_flower_update_stats(app, stats_frame + i);
+ spin_lock(&priv->stats_lock);
+ for (i = 0; i < msg_len / sizeof(*stats); i++) {
+ stats = (struct nfp_fl_stats_frame *)msg + i;
+ ctx_id = be32_to_cpu(stats->stats_con_id);
+ priv->stats[ctx_id].pkts += be32_to_cpu(stats->pkt_count);
+ priv->stats[ctx_id].bytes += be64_to_cpu(stats->byte_count);
+ priv->stats[ctx_id].used = jiffies;
+ }
+ spin_unlock(&priv->stats_lock);
}
static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
@@ -348,9 +332,9 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
/* Update flow payload with mask ids. */
nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
- nfp_flow->stats.pkts = 0;
- nfp_flow->stats.bytes = 0;
- nfp_flow->stats.used = jiffies;
+ priv->stats[stats_cxt].pkts = 0;
+ priv->stats[stats_cxt].bytes = 0;
+ priv->stats[stats_cxt].used = jiffies;
check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev,
NFP_FL_STATS_CTX_DONT_CARE);
@@ -469,8 +453,17 @@ int nfp_flower_metadata_init(struct nfp_app *app)
priv->stats_ids.init_unalloc = NFP_FL_REPEATED_HASH_MAX;
+ priv->stats = kvmalloc_array(NFP_FL_STATS_ENTRY_RS,
+ sizeof(struct nfp_fl_stats), GFP_KERNEL);
+ if (!priv->stats)
+ goto err_free_ring_buf;
+
+ spin_lock_init(&priv->stats_lock);
+
return 0;
+err_free_ring_buf:
+ vfree(priv->stats_ids.free_list.buf);
err_free_last_used:
kfree(priv->mask_ids.last_used);
err_free_mask_id:
@@ -489,6 +482,7 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app)
rhashtable_free_and_destroy(&priv->flow_table,
nfp_check_rhashtable_empty, NULL);
+ kvfree(priv->stats);
kfree(priv->mask_ids.mask_id_free_list.buf);
kfree(priv->mask_ids.last_used);
vfree(priv->stats_ids.free_list.buf);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 3f3649acb78f..cad28b44b21a 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -428,8 +428,6 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer, bool egress)
flow_pay->nfp_tun_ipv4_addr = 0;
flow_pay->meta.flags = 0;
- spin_lock_init(&flow_pay->lock);
-
flow_pay->ingress_offload = !egress;
return flow_pay;
@@ -604,8 +602,10 @@ static int
nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
struct tc_cls_flower_offload *flow, bool egress)
{
+ struct nfp_flower_priv *priv = app->priv;
struct nfp_fl_payload *nfp_flow;
struct net_device *ingr_dev;
+ u32 ctx_id;
ingr_dev = egress ? NULL : netdev;
nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev,
@@ -616,13 +616,16 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
if (nfp_flow->ingress_offload && egress)
return 0;
- spin_lock_bh(&nfp_flow->lock);
- tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes,
- nfp_flow->stats.pkts, nfp_flow->stats.used);
+ ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
+
+ spin_lock_bh(&priv->stats_lock);
+ tcf_exts_stats_update(flow->exts, priv->stats[ctx_id].bytes,
+ priv->stats[ctx_id].pkts,
+ priv->stats[ctx_id].used);
- nfp_flow->stats.pkts = 0;
- nfp_flow->stats.bytes = 0;
- spin_unlock_bh(&nfp_flow->lock);
+ priv->stats[ctx_id].pkts = 0;
+ priv->stats[ctx_id].bytes = 0;
+ spin_unlock_bh(&priv->stats_lock);
return 0;
}
--
2.17.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH net-next 3/3] nfp: flower: use host context count provided by firmware
2018-10-09 1:57 [PATCH net-next 0/3] nfp: flower: speed up stats update loop Jakub Kicinski
2018-10-09 1:57 ` [PATCH net-next 1/3] nfp: flower: use rhashtable for flow caching Jakub Kicinski
2018-10-09 1:57 ` [PATCH net-next 2/3] nfp: flower: use stats array instead of storing stats per flow Jakub Kicinski
@ 2018-10-09 1:57 ` Jakub Kicinski
2018-10-10 13:40 ` [PATCH net-next 0/3] nfp: flower: speed up stats update loop Or Gerlitz
2018-10-11 5:33 ` David Miller
4 siblings, 0 replies; 7+ messages in thread
From: Jakub Kicinski @ 2018-10-09 1:57 UTC (permalink / raw)
To: davem; +Cc: netdev, oss-drivers, Pieter Jansen van Vuuren
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Read the host context count symbols provided by firmware and use
it to determine the number of allocated stats ids. Previously it
won't be possible to offload more than 2^17 filter even if FW was
able to do so.
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
.../net/ethernet/netronome/nfp/flower/main.c | 15 +++++++++++++--
.../net/ethernet/netronome/nfp/flower/main.h | 9 +++++----
.../ethernet/netronome/nfp/flower/metadata.c | 18 +++++++++---------
3 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 34b0c3602ab2..255a0d28ea15 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -518,8 +518,8 @@ static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn)
static int nfp_flower_init(struct nfp_app *app)
{
const struct nfp_pf *pf = app->pf;
+ u64 version, features, ctx_count;
struct nfp_flower_priv *app_priv;
- u64 version, features;
int err;
if (!pf->eth_tbl) {
@@ -543,6 +543,16 @@ static int nfp_flower_init(struct nfp_app *app)
return err;
}
+ ctx_count = nfp_rtsym_read_le(app->pf->rtbl, "CONFIG_FC_HOST_CTX_COUNT",
+ &err);
+ if (err) {
+ nfp_warn(app->cpp,
+ "FlowerNIC: unsupported host context count: %d\n",
+ err);
+ err = 0;
+ ctx_count = BIT(17);
+ }
+
/* We need to ensure hardware has enough flower capabilities. */
if (version != NFP_FLOWER_ALLOWED_VER) {
nfp_warn(app->cpp, "FlowerNIC: unsupported firmware version\n");
@@ -553,6 +563,7 @@ static int nfp_flower_init(struct nfp_app *app)
if (!app_priv)
return -ENOMEM;
+ app_priv->stats_ring_size = roundup_pow_of_two(ctx_count);
app->priv = app_priv;
app_priv->app = app;
skb_queue_head_init(&app_priv->cmsg_skbs_high);
@@ -563,7 +574,7 @@ static int nfp_flower_init(struct nfp_app *app)
init_waitqueue_head(&app_priv->mtu_conf.wait_q);
spin_lock_init(&app_priv->mtu_conf.lock);
- err = nfp_flower_metadata_init(app);
+ err = nfp_flower_metadata_init(app, ctx_count);
if (err)
goto err_free_app_priv;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 21a167df90c1..037aa9553597 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -51,9 +51,8 @@ struct net_device;
struct nfp_app;
#define NFP_FL_STATS_CTX_DONT_CARE cpu_to_be32(0xffffffff)
-#define NFP_FL_STATS_ENTRY_RS BIT(20)
-#define NFP_FL_STATS_ELEM_RS 4
-#define NFP_FL_REPEATED_HASH_MAX BIT(17)
+#define NFP_FL_STATS_ELEM_RS FIELD_SIZEOF(struct nfp_fl_stats_id, \
+ init_unalloc)
#define NFP_FLOWER_MASK_ENTRY_RS 256
#define NFP_FLOWER_MASK_ELEMENT_RS 1
#define NFP_FLOWER_MASK_HASH_BITS 10
@@ -138,6 +137,7 @@ struct nfp_fl_lag {
* @stats_ids: List of free stats ids
* @mask_ids: List of free mask ids
* @mask_table: Hash table used to store masks
+ * @stats_ring_size: Maximum number of allowed stats ids
* @flow_table: Hash table used to store flower rules
* @stats: Stored stats updates for flower rules
* @stats_lock: Lock for flower rule stats updates
@@ -174,6 +174,7 @@ struct nfp_flower_priv {
struct nfp_fl_stats_id stats_ids;
struct nfp_fl_mask_id mask_ids;
DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS);
+ u32 stats_ring_size;
struct rhashtable flow_table;
struct nfp_fl_stats *stats;
spinlock_t stats_lock; /* lock stats */
@@ -253,7 +254,7 @@ struct nfp_fl_stats_frame {
__be64 stats_cookie;
};
-int nfp_flower_metadata_init(struct nfp_app *app);
+int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count);
void nfp_flower_metadata_cleanup(struct nfp_app *app);
int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index f0db7f9122d2..a4cce9a30830 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -61,14 +61,14 @@ static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id)
ring = &priv->stats_ids.free_list;
/* Check if buffer is full. */
- if (!CIRC_SPACE(ring->head, ring->tail, NFP_FL_STATS_ENTRY_RS *
- NFP_FL_STATS_ELEM_RS -
+ if (!CIRC_SPACE(ring->head, ring->tail,
+ priv->stats_ring_size * NFP_FL_STATS_ELEM_RS -
NFP_FL_STATS_ELEM_RS + 1))
return -ENOBUFS;
memcpy(&ring->buf[ring->head], &stats_context_id, NFP_FL_STATS_ELEM_RS);
ring->head = (ring->head + NFP_FL_STATS_ELEM_RS) %
- (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+ (priv->stats_ring_size * NFP_FL_STATS_ELEM_RS);
return 0;
}
@@ -80,7 +80,7 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
struct circ_buf *ring;
ring = &priv->stats_ids.free_list;
- freed_stats_id = NFP_FL_STATS_ENTRY_RS;
+ freed_stats_id = priv->stats_ring_size;
/* Check for unallocated entries first. */
if (priv->stats_ids.init_unalloc > 0) {
*stats_context_id = priv->stats_ids.init_unalloc - 1;
@@ -98,7 +98,7 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
*stats_context_id = temp_stats_id;
memcpy(&ring->buf[ring->tail], &freed_stats_id, NFP_FL_STATS_ELEM_RS);
ring->tail = (ring->tail + NFP_FL_STATS_ELEM_RS) %
- (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+ (priv->stats_ring_size * NFP_FL_STATS_ELEM_RS);
return 0;
}
@@ -415,7 +415,7 @@ const struct rhashtable_params nfp_flower_table_params = {
.automatic_shrinking = true,
};
-int nfp_flower_metadata_init(struct nfp_app *app)
+int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count)
{
struct nfp_flower_priv *priv = app->priv;
int err;
@@ -447,13 +447,13 @@ int nfp_flower_metadata_init(struct nfp_app *app)
/* Init ring buffer and unallocated stats_ids. */
priv->stats_ids.free_list.buf =
vmalloc(array_size(NFP_FL_STATS_ELEM_RS,
- NFP_FL_STATS_ENTRY_RS));
+ priv->stats_ring_size));
if (!priv->stats_ids.free_list.buf)
goto err_free_last_used;
- priv->stats_ids.init_unalloc = NFP_FL_REPEATED_HASH_MAX;
+ priv->stats_ids.init_unalloc = host_ctx_count;
- priv->stats = kvmalloc_array(NFP_FL_STATS_ENTRY_RS,
+ priv->stats = kvmalloc_array(priv->stats_ring_size,
sizeof(struct nfp_fl_stats), GFP_KERNEL);
if (!priv->stats)
goto err_free_ring_buf;
--
2.17.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH net-next 0/3] nfp: flower: speed up stats update loop
2018-10-09 1:57 [PATCH net-next 0/3] nfp: flower: speed up stats update loop Jakub Kicinski
` (2 preceding siblings ...)
2018-10-09 1:57 ` [PATCH net-next 3/3] nfp: flower: use host context count provided by firmware Jakub Kicinski
@ 2018-10-10 13:40 ` Or Gerlitz
2018-10-10 15:08 ` Jakub Kicinski
2018-10-11 5:33 ` David Miller
4 siblings, 1 reply; 7+ messages in thread
From: Or Gerlitz @ 2018-10-10 13:40 UTC (permalink / raw)
To: Jakub Kicinski; +Cc: David Miller, Linux Netdev List, oss-drivers
On Tue, Oct 9, 2018 at 4:58 AM Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> Given that our statistic IDs are already array indices, and considering
> each statistic is only 24B in size, we decided to forego the use
8B packet + 8B bytes --> 16B -- does your FW/HW provide last use? how do
you express this lastuse value in host jiffies?
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH net-next 0/3] nfp: flower: speed up stats update loop
2018-10-10 13:40 ` [PATCH net-next 0/3] nfp: flower: speed up stats update loop Or Gerlitz
@ 2018-10-10 15:08 ` Jakub Kicinski
0 siblings, 0 replies; 7+ messages in thread
From: Jakub Kicinski @ 2018-10-10 15:08 UTC (permalink / raw)
To: Or Gerlitz; +Cc: David Miller, Linux Netdev List, oss-drivers
On Wed, 10 Oct 2018 16:40:56 +0300, Or Gerlitz wrote:
> On Tue, Oct 9, 2018 at 4:58 AM Jakub Kicinski
> <jakub.kicinski@netronome.com> wrote:
>
> > Given that our statistic IDs are already array indices, and considering
> > each statistic is only 24B in size, we decided to forego the use
>
> 8B packet + 8B bytes --> 16B -- does your FW/HW provide last use? how do
> you express this lastuse value in host jiffies?
24B is the size of the structure we keep in the driver, which costs
host memory. The message from FW contains this:
struct nfp_fl_stats_frame {
__be32 stats_con_id;
__be32 pkt_count;
__be64 byte_count;
__be64 stats_cookie;
};
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH net-next 0/3] nfp: flower: speed up stats update loop
2018-10-09 1:57 [PATCH net-next 0/3] nfp: flower: speed up stats update loop Jakub Kicinski
` (3 preceding siblings ...)
2018-10-10 13:40 ` [PATCH net-next 0/3] nfp: flower: speed up stats update loop Or Gerlitz
@ 2018-10-11 5:33 ` David Miller
4 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2018-10-11 5:33 UTC (permalink / raw)
To: jakub.kicinski; +Cc: netdev, oss-drivers
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 8 Oct 2018 18:57:33 -0700
> This set from Pieter improves performance of processing FW stats
> update notifications. The FW seems to send those at relatively
> high rate (roughly ten per second per flow), therefore if we want
> to approach the million flows mark we have to be very careful
> about our data structures.
>
> We tried rhashtable for stat updates, but according to our experiments
> rhashtable lookup on a u32 takes roughly 60ns on an Xeon E5-2670 v3.
> Which translate to a hard limit of 16M lookups per second on this CPU,
> and, according to perf record jhash and memcmp account for 60% of CPU
> usage on the core handling the updates.
>
> Given that our statistic IDs are already array indices, and considering
> each statistic is only 24B in size, we decided to forego the use
> of hashtables and use a directly indexed array. The CPU savings are
> considerable.
>
> With the recent improvements in TC core and with our own bottlenecks
> out of the way Pieter removes the artificial limit of 128 flows, and
> allows the driver to install as many flows as FW supports.
Series applied.
^ permalink raw reply [flat|nested] 7+ messages in thread