From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org, Yevgeny Kliteynik <kliteyn@nvidia.com>,
Alex Vesker <valex@nvidia.com>,
Saeed Mahameed <saeedm@nvidia.com>
Subject: [v2 net 02/19] net/mlx5: DR, Cache STE shadow memory
Date: Wed, 23 Feb 2022 16:11:06 -0800 [thread overview]
Message-ID: <20220224001123.365265-3-saeed@kernel.org> (raw)
In-Reply-To: <20220224001123.365265-1-saeed@kernel.org>
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
During rule insertion on each ICM memory chunk we also allocate shadow memory
used for management. This includes the hw_ste, dr_ste and miss list per entry.
Since the scale of these allocations is large we noticed a performance hiccup
that happens once malloc and free are stressed.
In extreme usecases when ~1M chunks are freed at once, it might take up to 40
seconds to complete this, up to the point the kernel sees this as self-detected
stall on CPU:
rcu: INFO: rcu_sched self-detected stall on CPU
To resolve this we will increase the reuse of shadow memory.
Doing this we see that a time in the aforementioned usecase dropped from ~40
seconds to ~8-10 seconds.
Fixes: 29cf8febd185 ("net/mlx5: DR, ICM pool memory allocator")
Signed-off-by: Alex Vesker <valex@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
.../mellanox/mlx5/core/steering/dr_icm_pool.c | 109 ++++++++++++------
.../mellanox/mlx5/core/steering/mlx5dr.h | 5 +
2 files changed, 79 insertions(+), 35 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
index 7f6fd9c5e371..f496b7e9401b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -136,37 +136,35 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
kvfree(icm_mr);
}
-static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk)
+static int dr_icm_buddy_get_ste_size(struct mlx5dr_icm_buddy_mem *buddy)
{
- chunk->ste_arr = kvzalloc(chunk->num_of_entries *
- sizeof(chunk->ste_arr[0]), GFP_KERNEL);
- if (!chunk->ste_arr)
- return -ENOMEM;
-
- chunk->hw_ste_arr = kvzalloc(chunk->num_of_entries *
- DR_STE_SIZE_REDUCED, GFP_KERNEL);
- if (!chunk->hw_ste_arr)
- goto out_free_ste_arr;
-
- chunk->miss_list = kvmalloc(chunk->num_of_entries *
- sizeof(chunk->miss_list[0]), GFP_KERNEL);
- if (!chunk->miss_list)
- goto out_free_hw_ste_arr;
+ /* We support only one type of STE size, both for ConnectX-5 and later
+ * devices. Once the support for match STE which has a larger tag is
+ * added (32B instead of 16B), the STE size for devices later than
+ * ConnectX-5 needs to account for that.
+ */
+ return DR_STE_SIZE_REDUCED;
+}
- return 0;
+static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset)
+{
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+ int index = offset / DR_STE_SIZE;
-out_free_hw_ste_arr:
- kvfree(chunk->hw_ste_arr);
-out_free_ste_arr:
- kvfree(chunk->ste_arr);
- return -ENOMEM;
+ chunk->ste_arr = &buddy->ste_arr[index];
+ chunk->miss_list = &buddy->miss_list[index];
+ chunk->hw_ste_arr = buddy->hw_ste_arr +
+ index * dr_icm_buddy_get_ste_size(buddy);
}
static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk)
{
- kvfree(chunk->miss_list);
- kvfree(chunk->hw_ste_arr);
- kvfree(chunk->ste_arr);
+ struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem;
+
+ memset(chunk->hw_ste_arr, 0,
+ chunk->num_of_entries * dr_icm_buddy_get_ste_size(buddy));
+ memset(chunk->ste_arr, 0,
+ chunk->num_of_entries * sizeof(chunk->ste_arr[0]));
}
static enum mlx5dr_icm_type
@@ -189,6 +187,44 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk,
kvfree(chunk);
}
+static int dr_icm_buddy_init_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ int num_of_entries =
+ mlx5dr_icm_pool_chunk_size_to_entries(buddy->pool->max_log_chunk_sz);
+
+ buddy->ste_arr = kvcalloc(num_of_entries,
+ sizeof(struct mlx5dr_ste), GFP_KERNEL);
+ if (!buddy->ste_arr)
+ return -ENOMEM;
+
+ /* Preallocate full STE size on non-ConnectX-5 devices since
+ * we need to support both full and reduced with the same cache.
+ */
+ buddy->hw_ste_arr = kvcalloc(num_of_entries,
+ dr_icm_buddy_get_ste_size(buddy), GFP_KERNEL);
+ if (!buddy->hw_ste_arr)
+ goto free_ste_arr;
+
+ buddy->miss_list = kvmalloc(num_of_entries * sizeof(struct list_head), GFP_KERNEL);
+ if (!buddy->miss_list)
+ goto free_hw_ste_arr;
+
+ return 0;
+
+free_hw_ste_arr:
+ kvfree(buddy->hw_ste_arr);
+free_ste_arr:
+ kvfree(buddy->ste_arr);
+ return -ENOMEM;
+}
+
+static void dr_icm_buddy_cleanup_ste_cache(struct mlx5dr_icm_buddy_mem *buddy)
+{
+ kvfree(buddy->ste_arr);
+ kvfree(buddy->hw_ste_arr);
+ kvfree(buddy->miss_list);
+}
+
static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
{
struct mlx5dr_icm_buddy_mem *buddy;
@@ -208,11 +244,19 @@ static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool)
buddy->icm_mr = icm_mr;
buddy->pool = pool;
+ if (pool->icm_type == DR_ICM_TYPE_STE) {
+ /* Reduce allocations by preallocating and reusing the STE structures */
+ if (dr_icm_buddy_init_ste_cache(buddy))
+ goto err_cleanup_buddy;
+ }
+
/* add it to the -start- of the list in order to search in it first */
list_add(&buddy->list_node, &pool->buddy_mem_list);
return 0;
+err_cleanup_buddy:
+ mlx5dr_buddy_cleanup(buddy);
err_free_buddy:
kvfree(buddy);
free_mr:
@@ -234,6 +278,9 @@ static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy)
mlx5dr_buddy_cleanup(buddy);
+ if (buddy->pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_buddy_cleanup_ste_cache(buddy);
+
kvfree(buddy);
}
@@ -261,26 +308,18 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool,
chunk->byte_size =
mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type);
chunk->seg = seg;
+ chunk->buddy_mem = buddy_mem_pool;
- if (pool->icm_type == DR_ICM_TYPE_STE && dr_icm_chunk_ste_init(chunk)) {
- mlx5dr_err(pool->dmn,
- "Failed to init ste arrays (order: %d)\n",
- chunk_size);
- goto out_free_chunk;
- }
+ if (pool->icm_type == DR_ICM_TYPE_STE)
+ dr_icm_chunk_ste_init(chunk, offset);
buddy_mem_pool->used_memory += chunk->byte_size;
- chunk->buddy_mem = buddy_mem_pool;
INIT_LIST_HEAD(&chunk->chunk_list);
/* chunk now is part of the used_list */
list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list);
return chunk;
-
-out_free_chunk:
- kvfree(chunk);
- return NULL;
}
static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index c7c93131b762..dfa223415fe2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -160,6 +160,11 @@ struct mlx5dr_icm_buddy_mem {
* sync_ste command sets them free.
*/
struct list_head hot_list;
+
+ /* Memory optimisation */
+ struct mlx5dr_ste *ste_arr;
+ struct list_head *miss_list;
+ u8 *hw_ste_arr;
};
int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy,
--
2.35.1
next prev parent reply other threads:[~2022-02-24 0:12 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-24 0:11 [pull request][v2 net 00/19] mlx5 fixes 2022-02-22 Saeed Mahameed
2022-02-24 0:11 ` [v2 net 01/19] net/mlx5: Update the list of the PCI supported devices Saeed Mahameed
2022-02-24 4:40 ` patchwork-bot+netdevbpf
2022-02-24 0:11 ` Saeed Mahameed [this message]
2022-02-24 0:11 ` [v2 net 03/19] net/mlx5: DR, Fix slab-out-of-bounds in mlx5_cmd_dr_create_fte Saeed Mahameed
2022-02-24 0:11 ` [v2 net 04/19] net/mlx5: DR, Don't allow match on IP w/o matching on full ethertype/ip_version Saeed Mahameed
2022-02-24 0:11 ` [v2 net 05/19] net/mlx5: DR, Fix the threshold that defines when pool sync is initiated Saeed Mahameed
2022-02-24 0:11 ` [v2 net 06/19] net/mlx5: Update log_max_qp value to be 17 at most Saeed Mahameed
2022-02-24 0:11 ` [v2 net 07/19] net/mlx5: Fix wrong limitation of metadata match on ecpf Saeed Mahameed
2022-02-24 0:11 ` [v2 net 08/19] net/mlx5: Fix tc max supported prio for nic mode Saeed Mahameed
2022-02-24 0:11 ` [v2 net 09/19] net/mlx5: Fix possible deadlock on rule deletion Saeed Mahameed
2022-02-24 0:11 ` [v2 net 10/19] net/mlx5e: Fix wrong return value on ioctl EEPROM query failure Saeed Mahameed
2022-02-24 0:11 ` [v2 net 11/19] net/mlx5e: kTLS, Use CHECKSUM_UNNECESSARY for device-offloaded packets Saeed Mahameed
2022-02-24 0:11 ` [v2 net 12/19] net/mlx5e: TC, Reject rules with drop and modify hdr action Saeed Mahameed
2022-02-24 0:11 ` [v2 net 13/19] net/mlx5e: TC, Reject rules with forward and drop actions Saeed Mahameed
2022-02-24 0:11 ` [v2 net 14/19] net/mlx5e: TC, Skip redundant ct clear actions Saeed Mahameed
2022-02-24 0:11 ` [v2 net 15/19] net/mlx5e: Add feature check for set fec counters Saeed Mahameed
2022-02-24 0:11 ` [v2 net 16/19] net/mlx5e: Fix MPLSoUDP encap to use MPLS action information Saeed Mahameed
2022-02-24 0:11 ` [v2 net 17/19] net/mlx5e: MPLSoUDP decap, fix check for unsupported matches Saeed Mahameed
2022-02-24 0:11 ` [v2 net 18/19] net/mlx5e: Add missing increment of count Saeed Mahameed
2022-02-24 0:11 ` [v2 net 19/19] net/mlx5e: Fix VF min/max rate parameters interchange mistake Saeed Mahameed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220224001123.365265-3-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=davem@davemloft.net \
--cc=kliteyn@nvidia.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=saeedm@nvidia.com \
--cc=valex@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).