From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5A924E7BDAC for ; Mon, 16 Feb 2026 13:13:15 +0000 (UTC) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 3860840289; Mon, 16 Feb 2026 14:13:14 +0100 (CET) Received: from dkmailrelay1.smartsharesystems.com (smartserver.smartsharesystems.com [77.243.40.215]) by mails.dpdk.org (Postfix) with ESMTP id AB1EA40269 for ; Mon, 16 Feb 2026 14:13:12 +0100 (CET) Received: from smartserver.smartsharesystems.com (smartserver.smartsharesys.local [192.168.4.10]) by dkmailrelay1.smartsharesystems.com (Postfix) with ESMTP id 81B9C20FCA; Mon, 16 Feb 2026 14:13:12 +0100 (CET) Received: from dkrd4.smartsharesys.local ([192.168.4.26]) by smartserver.smartsharesystems.com with Microsoft SMTPSVC(6.0.3790.4675); Mon, 16 Feb 2026 14:13:11 +0100 From: =?UTF-8?q?Morten=20Br=C3=B8rup?= To: Andrew Rybchenko , dev@dpdk.org Cc: =?UTF-8?q?Morten=20Br=C3=B8rup?= Subject: [RFC PATCH v2 2/2] mempool: de-inline get/put objects unlikely code paths Date: Mon, 16 Feb 2026 13:13:03 +0000 Message-ID: <20260216131303.104297-3-mb@smartsharesystems.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20260216131303.104297-1-mb@smartsharesystems.com> References: <20260216115813.103515-1-mb@smartsharesystems.com> <20260216131303.104297-1-mb@smartsharesystems.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-OriginalArrivalTime: 16 Feb 2026 13:13:11.0603 (UTC) FILETIME=[004FD030:01DC9F46] X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org De-inline unlikely code paths, for smaller footprint. Signed-off-by: Morten Brørup --- v2: * Removed review functions. * Changed #if 0 to #if AVOID_RTE_MEMCPY. --- lib/mempool/rte_mempool.c | 114 ++++++++++++++++++++- lib/mempool/rte_mempool.h | 202 +++++++++++++++++++------------------- 2 files changed, 214 insertions(+), 102 deletions(-) diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c index 3042d94c14..c9e6f49de5 100644 --- a/lib/mempool/rte_mempool.c +++ b/lib/mempool/rte_mempool.c @@ -1016,6 +1016,118 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, return NULL; } +/* internal */ +RTE_EXPORT_INTERNAL_SYMBOL(_rte_mempool_do_generic_put_more) +void +_rte_mempool_do_generic_put_more(struct rte_mempool *mp, void * const *obj_table, + unsigned int n, struct rte_mempool_cache *cache) +{ + __rte_assume(cache->flushthresh <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2); + __rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2); + __rte_assume(cache->len <= cache->flushthresh); + __rte_assume(cache->len + n > cache->flushthresh); + if (likely(n <= cache->flushthresh)) { + uint32_t len; + void **cache_objs; + + /* + * The cache is big enough for the objects, but - as detected by + * rte_mempool_do_generic_put() - has insufficient room for them. + * Flush the cache to make room for the objects. + */ + len = cache->len; + cache_objs = &cache->objs[0]; + cache->len = n; + rte_mempool_ops_enqueue_bulk(mp, cache_objs, len); + + /* Add the objects to the cache. */ +#ifdef AVOID_RTE_MEMCPY /* Simple alternative to rte_memcpy(). */ + for (uint32_t index = 0; index < n; index++) + *cache_objs++ = *obj_table++; +#else + rte_memcpy(cache_objs, obj_table, sizeof(void *) * n); +#endif + + return; + } + + /* The request itself is too big for the cache. Push objects directly to the backend. */ + rte_mempool_ops_enqueue_bulk(mp, obj_table, n); +} + +/* internal */ +RTE_EXPORT_INTERNAL_SYMBOL(_rte_mempool_do_generic_get_more) +int +_rte_mempool_do_generic_get_more(struct rte_mempool *mp, void **obj_table, + unsigned int n, struct rte_mempool_cache *cache) +{ + int ret; + unsigned int remaining; + uint32_t index, len; + void **cache_objs; + + /* Use the cache as much as we have to return hot objects first. */ + __rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2); + len = cache->len; + remaining = n - len; + cache_objs = &cache->objs[len]; + cache->len = 0; + for (index = 0; index < len; index++) + *obj_table++ = *--cache_objs; + + /* Dequeue below would overflow mem allocated for cache? */ + if (unlikely(remaining > RTE_MEMPOOL_CACHE_MAX_SIZE)) + goto driver_dequeue; + + /* Fill the cache from the backend; fetch size + remaining objects. */ + ret = rte_mempool_ops_dequeue_bulk(mp, cache->objs, + cache->size + remaining); + if (unlikely(ret < 0)) { + /* + * We are buffer constrained, and not able to fetch all that. + * Do not fill the cache, just satisfy the remaining part of + * the request directly from the backend. + */ + goto driver_dequeue; + } + + /* Satisfy the remaining part of the request from the filled cache. */ + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1); + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n); + + __rte_assume(cache->size <= RTE_MEMPOOL_CACHE_MAX_SIZE); + __rte_assume(remaining <= RTE_MEMPOOL_CACHE_MAX_SIZE); + cache_objs = &cache->objs[cache->size + remaining]; + cache->len = cache->size; + for (index = 0; index < remaining; index++) + *obj_table++ = *--cache_objs; + + return 0; + +driver_dequeue: + + /* Get remaining objects directly from the backend. */ + ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, remaining); + + if (unlikely(ret < 0)) { + cache->len = n - remaining; + /* + * No further action is required to roll the first part + * of the request back into the cache, as objects in + * the cache are intact. + */ + + RTE_MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); + RTE_MEMPOOL_STAT_ADD(mp, get_fail_objs, n); + } else { + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1); + RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n); + __rte_assume(ret == 0); + } + + return ret; +} + /* Return the number of entries in the mempool */ RTE_EXPORT_SYMBOL(rte_mempool_avail_count) unsigned int @@ -1633,4 +1745,4 @@ RTE_INIT(mempool_init_telemetry) "Returns list of available mempool. Takes no parameters"); rte_telemetry_register_cmd("/mempool/info", mempool_handle_info, "Returns mempool info. Parameters: pool_name"); -} +} \ No newline at end of file diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h index 7989d7a475..86163e5377 100644 --- a/lib/mempool/rte_mempool.h +++ b/lib/mempool/rte_mempool.h @@ -1370,6 +1370,24 @@ rte_mempool_cache_flush(struct rte_mempool_cache *cache, cache->len = 0; } +/** + * @internal + * Put several objects back in the mempool, more than the cache has room for; used internally. + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to store back in the mempool, must be strictly + * positive. + * @param cache + * A pointer to a mempool cache structure. + */ +__rte_internal +void +_rte_mempool_do_generic_put_more(struct rte_mempool *mp, void * const *obj_table, + unsigned int n, struct rte_mempool_cache *cache); + /** * @internal Put several objects back in the mempool; used internally. * @param mp @@ -1388,9 +1406,16 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table, { void **cache_objs; - /* No cache provided? */ - if (unlikely(cache == NULL)) - goto driver_enqueue; + if (unlikely(cache == NULL)) { + /* No cache. Push objects directly to the backend. */ + /* Increment stats now, adding in mempool always succeeds. */ + RTE_MEMPOOL_STAT_ADD(mp, put_bulk, 1); + RTE_MEMPOOL_STAT_ADD(mp, put_objs, n); + + rte_mempool_ops_enqueue_bulk(mp, obj_table, n); + + return; + } /* Increment stats now, adding in mempool always succeeds. */ RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_bulk, 1); @@ -1403,35 +1428,43 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table, /* Sufficient room in the cache for the objects. */ cache_objs = &cache->objs[cache->len]; cache->len += n; - } else if (n <= cache->flushthresh) { + +cache_enqueue: +#ifdef AVOID_RTE_MEMCPY /* Simple alternative to rte_memcpy(). */ /* - * The cache is big enough for the objects, but - as detected by - * the comparison above - has insufficient room for them. - * Flush the cache to make room for the objects. + * Add the objects to the cache. + * If the request size is known at build time, + * the compiler unrolls the fixed length copy loop. */ - cache_objs = &cache->objs[0]; - rte_mempool_ops_enqueue_bulk(mp, cache_objs, cache->len); - cache->len = n; - } else { - /* The request itself is too big for the cache. */ - goto driver_enqueue_stats_incremented; - } - - /* Add the objects to the cache. */ - rte_memcpy(cache_objs, obj_table, sizeof(void *) * n); + for (uint32_t index = 0; index < n; index++) + *cache_objs++ = *obj_table++; +#else + /* Add the objects to the cache. */ + rte_memcpy(cache_objs, obj_table, sizeof(void *) * n); +#endif - return; + return; + } -driver_enqueue: + if (__rte_constant(n) && likely(n <= cache->flushthresh)) { + uint32_t len; - /* increment stat now, adding in mempool always success */ - RTE_MEMPOOL_STAT_ADD(mp, put_bulk, 1); - RTE_MEMPOOL_STAT_ADD(mp, put_objs, n); + /* + * The cache is big enough for the objects, but - as detected + * above - has insufficient room for them. + * Flush the cache to make room for the objects. + */ + len = cache->len; + cache_objs = &cache->objs[0]; + cache->len = n; + rte_mempool_ops_enqueue_bulk(mp, cache_objs, len); -driver_enqueue_stats_incremented: + /* Add the objects to the cache. */ + goto cache_enqueue; + } - /* push objects to the backend */ - rte_mempool_ops_enqueue_bulk(mp, obj_table, n); + /* Insufficient room in the cache for the objects. */ + _rte_mempool_do_generic_put_more(mp, obj_table, n, cache); } @@ -1498,6 +1531,26 @@ rte_mempool_put(struct rte_mempool *mp, void *obj) rte_mempool_put_bulk(mp, &obj, 1); } +/** + * @internal + * Get several objects from the mempool, more than held in the cache; used internally. + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to get, must be strictly positive. + * @param cache + * A pointer to a mempool cache structure. + * @return + * - 0: Success. + * - <0: Error; code of driver dequeue function. + */ +__rte_internal +int +_rte_mempool_do_generic_get_more(struct rte_mempool *mp, void **obj_table, + unsigned int n, struct rte_mempool_cache *cache); + /** * @internal Get several objects from the mempool; used internally. * @param mp @@ -1516,26 +1569,36 @@ static __rte_always_inline int rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table, unsigned int n, struct rte_mempool_cache *cache) { - int ret; - unsigned int remaining; - uint32_t index, len; - void **cache_objs; - - /* No cache provided? */ if (unlikely(cache == NULL)) { - remaining = n; - goto driver_dequeue; - } + int ret; - /* The cache is a stack, so copy will be in reverse order. */ - cache_objs = &cache->objs[cache->len]; + /* No cache. Get objects directly from the backend. */ + ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, n); + + if (unlikely(ret < 0)) { + RTE_MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); + RTE_MEMPOOL_STAT_ADD(mp, get_fail_objs, n); + } else { + RTE_MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); + RTE_MEMPOOL_STAT_ADD(mp, get_success_objs, n); + __rte_assume(ret == 0); + } + + return ret; + } __rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2); if (likely(n <= cache->len)) { + uint32_t index; + void **cache_objs; + /* The entire request can be satisfied from the cache. */ RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1); RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n); + /* The cache is a stack, so copy will be in reverse order. */ + cache_objs = &cache->objs[cache->len]; + /* * If the request size is known at build time, * the compiler unrolls the fixed length copy loop. @@ -1547,71 +1610,8 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table, return 0; } - /* Use the cache as much as we have to return hot objects first. */ - len = cache->len; - remaining = n - len; - cache->len = 0; - for (index = 0; index < len; index++) - *obj_table++ = *--cache_objs; - - /* Dequeue below would overflow mem allocated for cache? */ - if (unlikely(remaining > RTE_MEMPOOL_CACHE_MAX_SIZE)) - goto driver_dequeue; - - /* Fill the cache from the backend; fetch size + remaining objects. */ - ret = rte_mempool_ops_dequeue_bulk(mp, cache->objs, - cache->size + remaining); - if (unlikely(ret < 0)) { - /* - * We are buffer constrained, and not able to fetch all that. - * Do not fill the cache, just satisfy the remaining part of - * the request directly from the backend. - */ - goto driver_dequeue; - } - - /* Satisfy the remaining part of the request from the filled cache. */ - RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1); - RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n); - - __rte_assume(cache->size <= RTE_MEMPOOL_CACHE_MAX_SIZE); - __rte_assume(remaining <= RTE_MEMPOOL_CACHE_MAX_SIZE); - cache_objs = &cache->objs[cache->size + remaining]; - cache->len = cache->size; - for (index = 0; index < remaining; index++) - *obj_table++ = *--cache_objs; - - return 0; - -driver_dequeue: - - /* Get remaining objects directly from the backend. */ - ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, remaining); - - if (unlikely(ret < 0)) { - if (likely(cache != NULL)) { - cache->len = n - remaining; - /* - * No further action is required to roll the first part - * of the request back into the cache, as objects in - * the cache are intact. - */ - } - - RTE_MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1); - RTE_MEMPOOL_STAT_ADD(mp, get_fail_objs, n); - } else { - if (likely(cache != NULL)) { - RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1); - RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n); - } else { - RTE_MEMPOOL_STAT_ADD(mp, get_success_bulk, 1); - RTE_MEMPOOL_STAT_ADD(mp, get_success_objs, n); - } - __rte_assume(ret == 0); - } - - return ret; + /* The entire request cannot be satisfied from the cache. */ + return _rte_mempool_do_generic_get_more(mp, obj_table, n, cache); } /** -- 2.43.0