From: Ravi Kerur <rkerur@gmail.com>
To: dev@dpdk.org
Subject: [PATCH v3] Implement memcmp using Intel SIMD instrinsics.
Date: Mon, 18 May 2015 13:01:43 -0700 [thread overview]
Message-ID: <1431979303-1346-2-git-send-email-rkerur@gmail.com> (raw)
In-Reply-To: <1431979303-1346-1-git-send-email-rkerur@gmail.com>
This patch implements memcmp and use librte_hash as the first candidate
to use rte_memcmp which is implemented using AVX/SSE intrinsics.
Tested with GCC(4.8.2) and Clang(3.4-1) compilers and both tests show better
performance on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu 14.04
x86_64 shows when compared to memcmp.
Changes in v3:
Implement complete memcmp functionality.
Implement functional and performance tests and add it to
"make test" infrastructure code.
Changes in v2:
Modified code to support only upto 64 bytes as that's the max bytes
used by hash for comparison.
Changes in v1:
Initial changes to support memcmp with support upto 128 bytes.
Signed-off-by: Ravi Kerur <rkerur@gmail.com>
---
app/test/Makefile | 5 +-
app/test/autotest_data.py | 19 +
app/test/test_hash_perf.c | 36 +-
app/test/test_memcmp.c | 229 ++++++
app/test/test_memcmp_perf.c | 339 ++++++++
.../common/include/arch/ppc_64/rte_memcmp.h | 62 ++
.../common/include/arch/x86/rte_memcmp.h | 900 +++++++++++++++++++++
lib/librte_eal/common/include/generic/rte_memcmp.h | 175 ++++
lib/librte_hash/rte_hash.c | 59 +-
9 files changed, 1789 insertions(+), 35 deletions(-)
create mode 100644 app/test/test_memcmp.c
create mode 100644 app/test/test_memcmp_perf.c
create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h
create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h
diff --git a/app/test/Makefile b/app/test/Makefile
index 4aca77c..957e4f1 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -81,6 +81,9 @@ SRCS-y += test_logs.c
SRCS-y += test_memcpy.c
SRCS-y += test_memcpy_perf.c
+SRCS-y += test_memcmp.c
+SRCS-y += test_memcmp_perf.c
+
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
@@ -150,7 +153,7 @@ CFLAGS_test_kni.o += -Wno-deprecated-declarations
endif
CFLAGS += -D_GNU_SOURCE
-# Disable VTA for memcpy test
+# Disable VTA for memcpy tests
ifeq ($(CC), gcc)
ifeq ($(shell test $(GCC_VERSION) -ge 44 && echo 1), 1)
CFLAGS_test_memcpy.o += -fno-var-tracking-assignments
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 618a946..e07f087 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -187,6 +187,12 @@ parallel_test_group_list = [
"Report" : None,
},
{
+ "Name" : "Memcmp autotest",
+ "Command" : "memcmp_autotest",
+ "Func" : default_autotest,
+ "Report" : None,
+ },
+ {
"Name" : "Memzone autotest",
"Command" : "memzone_autotest",
"Func" : default_autotest,
@@ -399,6 +405,19 @@ non_parallel_test_group_list = [
]
},
{
+ "Prefix": "memcmp_perf",
+ "Memory" : all_sockets(512),
+ "Tests" :
+ [
+ {
+ "Name" : "Memcmp performance autotest",
+ "Command" : "memcmp_perf_autotest",
+ "Func" : default_autotest,
+ "Report" : None,
+ },
+ ]
+},
+{
"Prefix": "hash_perf",
"Memory" : all_sockets(512),
"Tests" :
diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..6887629 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -440,7 +440,7 @@ run_single_tbl_perf_test(const struct rte_hash *h, hash_operation func,
uint32_t *invalid_pos_count)
{
uint64_t begin, end, ticks = 0;
- uint8_t *key = NULL;
+ uint8_t * volatile key = NULL;
uint32_t *bucket_occupancies = NULL;
uint32_t num_buckets, i, j;
int32_t pos;
@@ -547,30 +547,30 @@ run_tbl_perf_test(struct tbl_perf_test_params *params)
case ADD_UPDATE:
num_iterations = params->num_iterations;
params->num_iterations = params->entries;
- run_single_tbl_perf_test(handle, rte_hash_add_key, params,
- &avg_occupancy, &invalid_pos);
- params->num_iterations = num_iterations;
ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
params, &avg_occupancy, &invalid_pos);
+ params->num_iterations = num_iterations;
+ ticks += run_single_tbl_perf_test(handle, rte_hash_add_key,
+ params, &avg_occupancy, &invalid_pos);
break;
case DELETE:
num_iterations = params->num_iterations;
params->num_iterations = params->entries;
- run_single_tbl_perf_test(handle, rte_hash_add_key, params,
- &avg_occupancy, &invalid_pos);
+ ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
+ params, &avg_occupancy, &invalid_pos);
params->num_iterations = num_iterations;
- ticks = run_single_tbl_perf_test(handle, rte_hash_del_key,
+ ticks += run_single_tbl_perf_test(handle, rte_hash_del_key,
params, &avg_occupancy, &invalid_pos);
break;
case LOOKUP:
num_iterations = params->num_iterations;
params->num_iterations = params->entries;
- run_single_tbl_perf_test(handle, rte_hash_add_key, params,
- &avg_occupancy, &invalid_pos);
+ ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
+ params, &avg_occupancy, &invalid_pos);
params->num_iterations = num_iterations;
- ticks = run_single_tbl_perf_test(handle, rte_hash_lookup,
+ ticks += run_single_tbl_perf_test(handle, rte_hash_lookup,
params, &avg_occupancy, &invalid_pos);
break;
default: return -1;
@@ -623,10 +623,15 @@ static int run_all_tbl_perf_tests(void)
static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
uint32_t key_len)
{
- static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+ static uint8_t * volatile key;
uint64_t ticks = 0, start, end;
unsigned i, j;
+ key = rte_zmalloc("func hash key",
+ key_len * sizeof(uint8_t), 16);
+ if (key == NULL)
+ return;
+
for (i = 0; i < HASHTEST_ITERATIONS; i++) {
for (j = 0; j < key_len; j++)
@@ -638,8 +643,11 @@ static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
ticks += end - start;
}
- printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
- (unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
+ rte_free(key);
+
+ printf("%-12s, %-18u, %-13u, %.02f\n",
+ get_hash_name(f), (unsigned) key_len, (unsigned) init_val,
+ (double)ticks / HASHTEST_ITERATIONS);
}
/*
@@ -687,7 +695,7 @@ fbk_hash_perf_test(void)
.socket_id = rte_socket_id(),
};
struct rte_fbk_hash_table *handle = NULL;
- uint32_t *keys = NULL;
+ uint32_t * volatile keys = NULL;
unsigned indexes[TEST_SIZE];
uint64_t lookup_time = 0;
unsigned added = 0;
diff --git a/app/test/test_memcmp.c b/app/test/test_memcmp.c
new file mode 100644
index 0000000..7d9c85f
--- /dev/null
+++ b/app/test/test_memcmp.c
@@ -0,0 +1,229 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_random.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_memcmp.h>
+
+#include "test.h"
+
+/*******************************************************************************
+ * Memcmp function performance test configuration section.
+ * Each performance test will be performed HASHTEST_ITERATIONS times.
+ *
+ * The five arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+static size_t memcmp_sizes[] = {
+ 1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129, 255,
+ 256, 257, 320, 384, 511, 512, 513, 1023, 1024, 1025, 1518, 1522, 1600,
+ 2048, 3072, 4096, 5120, 6144, 7168, 8192, 16384
+};
+
+/******************************************************************************/
+
+#define RTE_MEMCMP_LENGTH_MAX 16384
+
+/*
+ * Test a memcmp equal function.
+ */
+static int run_memcmp_eq_func_test(uint32_t len)
+{
+ uint32_t i, rc = 0;
+ uint8_t * volatile key = NULL;
+
+ key = rte_zmalloc("memcmp key", len * sizeof(uint8_t), 16);
+ if (key == NULL)
+ return -1;
+
+ for (i = 0; i < len; i++)
+ key[i] = (uint8_t) rte_rand();
+
+ rc = rte_memcmp(key, key, len);
+ rte_free(key);
+
+ return rc;
+}
+
+/*
+ * Test memcmp equal functions.
+ */
+static int run_memcmp_eq_func_tests(void)
+{
+ unsigned i;
+
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ if (run_memcmp_eq_func_test(memcmp_sizes[i])) {
+ printf("Comparing equal %zd bytes failed\n", memcmp_sizes[i]);
+ return 1;
+ }
+ }
+ printf("RTE memcmp for equality successful\n");
+ return 0;
+}
+
+/*
+ * Test a memcmp less than function.
+ */
+static int run_memcmp_lt_func_test(uint32_t len)
+{
+ uint32_t i, rc;
+ uint8_t * volatile key_1 = NULL;
+ uint8_t * volatile key_2 = NULL;
+
+ key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16);
+ if (key_1 == NULL)
+ return -1;
+
+ key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16);
+ if (key_2 == NULL)
+ return -1;
+
+ for (i = 0; i < len; i++)
+ key_1[i] = i;
+
+ for (i = 0; i < len; i++)
+ key_2[i] = 2;
+
+ rc = rte_memcmp(key_1, key_2, len);
+ rte_free(key_1);
+ rte_free(key_2);
+
+ return rc;
+}
+
+/*
+ * Test memcmp less than functions.
+ */
+static int run_memcmp_lt_func_tests(void)
+{
+ unsigned i;
+
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ if (!(run_memcmp_lt_func_test(memcmp_sizes[i]) < 0)) {
+ printf("Comparing less than for %zd bytes failed\n", memcmp_sizes[i]);
+ return 1;
+ }
+ }
+ printf("RTE memcmp for less than successful\n");
+ return 0;
+}
+
+/*
+ * Test a memcmp greater than function.
+ */
+static int run_memcmp_gt_func_test(uint32_t len)
+{
+ uint32_t i, rc;
+ uint8_t * volatile key_1 = NULL;
+ uint8_t * volatile key_2 = NULL;
+
+ key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16);
+ if (key_1 == NULL)
+ return -1;
+
+ key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16);
+ if (key_2 == NULL)
+ return -1;
+
+ for (i = 0; i < len; i++)
+ key_1[i] = 2;
+
+ for (i = 0; i < len; i++)
+ key_2[i] = i;
+
+ rc = rte_memcmp(key_1, key_2, len);
+ rte_free(key_1);
+ rte_free(key_2);
+
+ return rc;
+}
+
+/*
+ * Test memcmp less than functions.
+ */
+static int run_memcmp_gt_func_tests(void)
+{
+ unsigned i;
+
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ if (!(run_memcmp_gt_func_test(memcmp_sizes[i]) > 0)) {
+ printf("Comparing greater than for %zd bytes failed\n", memcmp_sizes[i]);
+ return 1;
+ }
+ }
+ printf("RTE memcmp for greater than successful\n");
+ return 0;
+}
+
+/*
+ * Do all unit and performance tests.
+ */
+static int
+test_memcmp(void)
+{
+ if (run_memcmp_eq_func_tests())
+ return -1;
+
+ if (run_memcmp_gt_func_tests())
+ return -1;
+
+ if (run_memcmp_lt_func_tests())
+ return -1;
+
+ return 0;
+}
+
+static struct test_command memcmp_cmd = {
+ .command = "memcmp_autotest",
+ .callback = test_memcmp,
+};
+REGISTER_TEST_COMMAND(memcmp_cmd);
diff --git a/app/test/test_memcmp_perf.c b/app/test/test_memcmp_perf.c
new file mode 100644
index 0000000..8b7a0c4
--- /dev/null
+++ b/app/test/test_memcmp_perf.c
@@ -0,0 +1,339 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <sys/times.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_random.h>
+#include <rte_memory.h>
+#include <rte_memcmp.h>
+
+#include "test.h"
+
+/*******************************************************************************
+ * Memcmp function performance test configuration section. Each performance test
+ * will be performed MEMCMP_ITERATIONS times.
+ *
+ * The five arrays below control what tests are performed. Every combination
+ * from the array entries is tested.
+ */
+#define MEMCMP_ITERATIONS 500 * 500 * 500
+
+static size_t memcmp_sizes[] = {
+ 2, 5, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128,
+ 129, 191, 192, 193, 255, 256, 257, 319, 320, 321, 383, 384, 385, 447, 448,
+ 449, 511, 512, 513, 767, 768, 769, 1023, 1024, 1025, 1522, 1536, 1600,
+ 2048, 2560, 3072, 3584, 4096, 4608, 5632, 6144, 6656, 7168, 7680, 8192,
+ 16834
+};
+
+static size_t memcmp_lt_gt_sizes[] = {
+ 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192};
+
+/******************************************************************************/
+
+static int
+run_single_memcmp_eq_perf_test(uint32_t len, int func_type, uint64_t iterations)
+{
+ double begin = 0, end = 0;
+ uint64_t i, j, rc = 0;
+ uint8_t * volatile key = NULL;
+
+ key = rte_zmalloc("memcmp key", len * sizeof(uint8_t), 16);
+ if (key == NULL)
+ return -1;
+
+ /* Prepare inputs for the current iteration */
+ for (j = 0; j < len; j++)
+ key[j] = j / 64;
+
+ begin = rte_rdtsc();
+
+ /* Perform operation, and measure time it takes */
+ for (i = 0; i < iterations; i++) {
+
+ if (func_type == 1)
+ rc += rte_memcmp(key, key, len);
+ else
+ rc += memcmp(key, key, len);
+ }
+
+ end = rte_rdtsc() - begin;
+
+ printf(" *** %10i, %10.4f ***\n", len, (double)(end/iterations));
+
+ rte_free(key);
+
+ return rc;
+}
+
+/*
+ * Run all memcmp table performance tests.
+ */
+static int run_all_memcmp_eq_perf_tests(void)
+{
+ unsigned i;
+
+ printf(" *** RTE memcmp equal performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_eq_perf_test(memcmp_sizes[i], 1,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+
+ printf(" *** memcmp equal performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_sizes) / sizeof(memcmp_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_eq_perf_test(memcmp_sizes[i], 2,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+run_single_memcmp_lt_perf_test(uint32_t len, int func_type,
+ uint64_t iterations)
+{
+ double begin = 0, end = 0;
+ uint64_t i, j;
+ uint8_t * volatile key_1 = NULL;
+ uint8_t * volatile key_2 = NULL;
+
+ key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16);
+ if (key_1 == NULL)
+ return -1;
+
+ key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16);
+ if (key_2 == NULL) {
+ rte_free(key_1);
+ return -1;
+ }
+
+ /* Prepare inputs for the current iteration */
+ for (j = 0; j < len; j++)
+ key_1[j] = 1;
+
+ for (j = 0; j < len; j++)
+ key_2[j] = 1;
+
+ key_2[len / 2] = 2;
+
+ begin = rte_rdtsc();
+
+ /* Perform operation, and measure time it takes */
+ for (i = 0; i < iterations; i++) {
+
+ if (func_type == 1) {
+ if (!(rte_memcmp(key_1, key_2, len) < 0))
+ return -1;
+ } else {
+ if (!(memcmp(key_1, key_2, len) < 0))
+ return -1;
+ }
+ }
+
+ end = rte_rdtsc() - begin;
+
+ printf(" *** %10i, %10.4f ***\n", len, (double)(end/iterations));
+
+ rte_free(key_1);
+ rte_free(key_2);
+
+ return 0;
+}
+
+/*
+ * Run all memcmp table performance tests.
+ */
+static int run_all_memcmp_lt_perf_tests(void)
+{
+ unsigned i;
+
+ printf(" *** RTE memcmp less than performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_lt_perf_test(memcmp_lt_gt_sizes[i], 1,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+
+ printf(" *** memcmp less than performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_lt_perf_test(memcmp_lt_gt_sizes[i], 2,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+run_single_memcmp_gt_perf_test(uint32_t len, int func_type,
+ uint64_t iterations)
+{
+ double begin = 0, end = 0;
+ uint64_t i, j;
+ uint8_t * volatile key_1 = NULL;
+ uint8_t * volatile key_2 = NULL;
+
+ key_1 = rte_zmalloc("memcmp key_1", len * sizeof(uint8_t), 16);
+ if (key_1 == NULL)
+ return -1;
+
+ key_2 = rte_zmalloc("memcmp key_2", len * sizeof(uint8_t), 16);
+ if (key_2 == NULL) {
+ rte_free(key_1);
+ return -1;
+ }
+
+ /* Prepare inputs for the current iteration */
+ for (j = 0; j < len; j++)
+ key_1[j] = 1;
+ key_1[len / 2] = 2;
+
+ for (j = 0; j < len; j++)
+ key_2[j] = 1;
+
+ begin = rte_rdtsc();
+
+ /* Perform operation, and measure time it takes */
+ for (i = 0; i < iterations; i++) {
+
+ if (func_type == 1) {
+ if (!(rte_memcmp(key_1, key_2, len) > 0))
+ return -1;
+ } else {
+ if (!(memcmp(key_1, key_2, len) > 0))
+ return -1;
+ }
+ }
+
+ end = rte_rdtsc() - begin;
+
+ printf(" *** %10i, %10.4f ***\n", len, (double)(end/iterations));
+
+ rte_free(key_1);
+ rte_free(key_2);
+
+ return 0;
+}
+
+/*
+ * Run all memcmp table performance tests.
+ */
+static int run_all_memcmp_gt_perf_tests(void)
+{
+ unsigned i;
+
+ printf(" *** RTE memcmp greater than performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_gt_perf_test(memcmp_lt_gt_sizes[i], 1,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+
+ printf(" *** memcmp greater than performance test results ***\n");
+ printf(" *** Length (bytes), Ticks/Op. ***\n");
+
+ /* Loop through every combination of test parameters */
+ for (i = 0;
+ i < sizeof(memcmp_lt_gt_sizes) / sizeof(memcmp_lt_gt_sizes[0]);
+ i++) {
+ /* Perform test */
+ if (run_single_memcmp_gt_perf_test(memcmp_lt_gt_sizes[i], 2,
+ MEMCMP_ITERATIONS) != 0)
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Do all performance tests.
+ */
+static int
+test_memcmp_perf(void)
+{
+ if (run_all_memcmp_eq_perf_tests() != 0)
+ return -1;
+
+ if (run_all_memcmp_lt_perf_tests() != 0)
+ return -1;
+
+ if (run_all_memcmp_gt_perf_tests() != 0)
+ return -1;
+
+ return 0;
+}
+
+static struct test_command memcmp_perf_cmd = {
+ .command = "memcmp_perf_autotest",
+ .callback = test_memcmp_perf,
+};
+REGISTER_TEST_COMMAND(memcmp_perf_cmd);
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
new file mode 100644
index 0000000..6e54f3b
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
@@ -0,0 +1,62 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) IBM Corporation 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCMP_PPC_64_H_
+#define _RTE_MEMCMP_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+/*To include altivec.h, GCC version must >= 4.8 */
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcmp.h"
+
+#define rte_memcmp(dst, src, n) \
+ ({ (__builtin_constant_p(n)) ? \
+ memcmp((dst), (src), (n)) : \
+ rte_memcmp_func((dst), (src), (n)); })
+
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n)
+{
+ return memcmp(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
new file mode 100644
index 0000000..085dfb2
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
@@ -0,0 +1,900 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_X86_64_H_
+#define _RTE_MEMCMP_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2 implementation of memcmp().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include <rte_vect.h>
+#include <rte_branch_prediction.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to compare.
+ * @return
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_memcmp(const void *src_1, const void *src,
+ size_t n) __attribute__((always_inline));
+
+/**
+ * Find the first different bit for comparison.
+ */
+static inline int
+rte_cmpffd (uint32_t x, uint32_t y)
+{
+ int i;
+ int pos = x ^ y;
+ for (i = 0; i < 32; i++)
+ if (pos & (1<<i))
+ return i;
+ return -1;
+}
+
+/**
+ * Find the first different byte for comparison.
+ */
+static inline int
+rte_cmpffdb (const uint8_t *x, const uint8_t *y, size_t n)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ if (x[i] != y[i])
+ return x[i] - y[i];
+ return 0;
+}
+
+/**
+ * Compare 16 bytes between two locations.
+ * locations should not overlap.
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2)
+{
+ __m128i xmm0, xmm1, xmm2;
+
+ xmm0 = _mm_lddqu_si128((const __m128i *)src_1);
+ xmm1 = _mm_lddqu_si128((const __m128i *)src_2);
+ xmm2 = _mm_xor_si128(xmm0, xmm1);
+
+ if (unlikely(!_mm_testz_si128(xmm2, xmm2))) {
+
+ uint64_t mm11 = _mm_extract_epi64(xmm0, 0);
+ uint64_t mm12 = _mm_extract_epi64(xmm0, 1);
+
+ uint64_t mm21 = _mm_extract_epi64(xmm1, 0);
+ uint64_t mm22 = _mm_extract_epi64(xmm1, 1);
+
+ if (mm11 == mm21)
+ return rte_cmpffdb((const uint8_t *)&mm12,
+ (const uint8_t *)&mm22, 8);
+ else
+ return rte_cmpffdb((const uint8_t *)&mm11,
+ (const uint8_t *)&mm21, 8);
+ }
+
+ return 0;
+}
+
+/**
+ * Compare 0 to 15 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_memcmp_regular(const uint8_t *src_1u, const uint8_t *src_2u, size_t n)
+{
+ int ret = 1;
+
+ /**
+ * Compare less than 16 bytes
+ */
+ if (n & 0x08) {
+ ret = (*(const uint64_t *)src_1u ==
+ *(const uint64_t *)src_2u);
+
+ if ((ret != 1))
+ goto exit_8;
+
+ n -= 0x8;
+ src_1u += 0x8;
+ src_2u += 0x8;
+ }
+
+ if (n & 0x04) {
+ ret = (*(const uint32_t *)src_1u ==
+ *(const uint32_t *)src_2u);
+
+ if ((ret != 1))
+ goto exit_4;
+
+ n -= 0x4;
+ src_1u += 0x4;
+ src_2u += 0x4;
+ }
+
+ if (n & 0x02) {
+ ret = (*(const uint16_t *)src_1u ==
+ *(const uint16_t *)src_2u);
+
+ if ((ret != 1))
+ goto exit_2;
+
+ n -= 0x2;
+ src_1u += 0x2;
+ src_2u += 0x2;
+ }
+
+ if (n & 0x01) {
+ ret = (*(const uint8_t *)src_1u ==
+ *(const uint8_t *)src_2u);
+
+ if ((ret != 1))
+ goto exit_1;
+
+ n -= 0x1;
+ src_1u += 0x1;
+ src_2u += 0x1;
+ }
+
+ return !ret;
+
+exit_8:
+ return rte_cmpffdb(src_1u, src_2u, 8);
+exit_4:
+ return rte_cmpffdb(src_1u, src_2u, 4);
+exit_2:
+ return rte_cmpffdb(src_1u, src_2u, 2);
+exit_1:
+ return rte_cmpffdb(src_1u, src_2u, 1);
+}
+
+/**
+ * AVX2 implementation below
+ */
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+ const __m128i* src1 = (const __m128i*)src_1;
+ const __m128i* src2 = (const __m128i*)src_2;
+ const uint8_t *s1, *s2;
+
+ __m128i mm11 = _mm_lddqu_si128(src1);
+ __m128i mm12 = _mm_lddqu_si128(src1 + 1);
+ __m128i mm21 = _mm_lddqu_si128(src2);
+ __m128i mm22 = _mm_lddqu_si128(src2 + 1);
+
+ __m128i mm1 = _mm_xor_si128(mm11, mm21);
+ __m128i mm2 = _mm_xor_si128(mm12, mm22);
+ __m128i mm = _mm_or_si128(mm1, mm2);
+
+ if (unlikely(!_mm_testz_si128(mm, mm))) {
+
+ /*
+ * Find out which of the two 16-byte blocks
+ * are different.
+ */
+ if (_mm_testz_si128(mm1, mm1)) {
+ mm11 = mm12;
+ mm21 = mm22;
+ mm1 = mm2;
+ s1 = (const uint8_t *)(src1 + 1);
+ s2 = (const uint8_t *)(src2 + 1);
+ } else {
+ s1 = (const uint8_t *)src1;
+ s2 = (const uint8_t *)src2;
+ }
+
+ // Produce the comparison result
+ __m128i mm_cmp = _mm_cmpgt_epi8(mm11, mm21);
+ __m128i mm_rcmp = _mm_cmpgt_epi8(mm21, mm11);
+ mm_cmp = _mm_xor_si128(mm1, mm_cmp);
+ mm_rcmp = _mm_xor_si128(mm1, mm_rcmp);
+
+ uint32_t cmp = _mm_movemask_epi8(mm_cmp);
+ uint32_t rcmp = _mm_movemask_epi8(mm_rcmp);
+
+ int cmp_b = rte_cmpffd(cmp, rcmp);
+
+ int ret = (cmp_b == -1) ? 0 : (s1[cmp_b] - s2[cmp_b]);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * Compare 48 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32,
+ (const uint8_t *)src_2 + 0 * 32);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 32,
+ (const uint8_t *)src_2 + 1 * 32);
+ return ret;
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp64 (const void* src_1, const void* src_2)
+{
+ const __m256i* src1 = (const __m256i*)src_1;
+ const __m256i* src2 = (const __m256i*)src_2;
+ const uint8_t *s1, *s2;
+
+ __m256i mm11 = _mm256_lddqu_si256(src1);
+ __m256i mm12 = _mm256_lddqu_si256(src1 + 1);
+ __m256i mm21 = _mm256_lddqu_si256(src2);
+ __m256i mm22 = _mm256_lddqu_si256(src2 + 1);
+
+ __m256i mm1 = _mm256_xor_si256(mm11, mm21);
+ __m256i mm2 = _mm256_xor_si256(mm12, mm22);
+ __m256i mm = _mm256_or_si256(mm1, mm2);
+
+ if (unlikely(!_mm256_testz_si256(mm, mm))) {
+ /*
+ * Find out which of the two 32-byte blocks
+ * are different.
+ */
+ if (_mm256_testz_si256(mm1, mm1)) {
+ mm11 = mm12;
+ mm21 = mm22;
+ mm1 = mm2;
+ s1 = (const uint8_t *)(src1 + 1);
+ s2 = (const uint8_t *)(src2 + 1);
+ } else {
+ s1 = (const uint8_t *)src1;
+ s2 = (const uint8_t *)src2;
+ }
+
+ // Produce the comparison result
+ __m256i mm_cmp = _mm256_cmpgt_epi8(mm11, mm21);
+ __m256i mm_rcmp = _mm256_cmpgt_epi8(mm21, mm11);
+ mm_cmp = _mm256_xor_si256(mm1, mm_cmp);
+ mm_rcmp = _mm256_xor_si256(mm1, mm_rcmp);
+
+ uint32_t cmp = _mm256_movemask_epi8(mm_cmp);
+ uint32_t rcmp = _mm256_movemask_epi8(mm_rcmp);
+
+ int cmp_b = rte_cmpffd(cmp, rcmp);
+
+ int ret = (cmp_b == -1) ? 0 : (s1[cmp_b] - s2[cmp_b]);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * Compare 128 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64,
+ (const uint8_t *)src_2 + 0 * 64);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp64((const uint8_t *)src_1 + 1 * 64,
+ (const uint8_t *)src_2 + 1 * 64);
+}
+
+/**
+ * Compare 256 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp256(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64,
+ (const uint8_t *)src_2 + 0 * 64);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp64((const uint8_t *)src_1 + 1 * 64,
+ (const uint8_t *)src_2 + 1 * 64);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp64((const uint8_t *)src_1 + 2 * 64,
+ (const uint8_t *)src_2 + 2 * 64);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp64((const uint8_t *)src_1 + 3 * 64,
+ (const uint8_t *)src_2 + 3 * 64);
+}
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to compare.
+ * @return
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+ const uint8_t *src_1 = (const uint8_t *)_src_1;
+ const uint8_t *src_2 = (const uint8_t *)_src_2;
+ int ret = 0;
+
+ if (n < 16)
+ return rte_memcmp_regular(src_1, src_2, n);
+
+ if (n <= 32) {
+ ret = rte_cmp16(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 48) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 64) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 32, src_2 + 32);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 96) {
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 64, src_2 + 64);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 128) {
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp32(src_1 + 64, src_2 + 64);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 96, src_2 + 96);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+CMP_BLOCK_LESS_THAN_512:
+ if (n <= 512) {
+ if (n >= 256) {
+ ret = rte_cmp256(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ src_1 = src_1 + 256;
+ src_2 = src_2 + 256;
+ n -= 256;
+ }
+ if (n >= 128) {
+ ret = rte_cmp128(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ src_1 = src_1 + 128;
+ src_2 = src_2 + 128;
+ n -= 128;
+ }
+ if (n >= 64) {
+ n -= 64;
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ src_1 = src_1 + 64;
+ src_2 = src_2 + 64;
+ }
+ if (n > 32) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+ return ret;
+ }
+ if (n > 0)
+ ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+
+ return ret;
+ }
+
+ while (n > 512) {
+ ret = rte_cmp256(src_1 + 0 * 256, src_2 + 0 * 256);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp256(src_1 + 1 * 256, src_2 + 1 * 256);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 512;
+ src_2 = src_2 + 512;
+ n -= 512;
+ }
+ goto CMP_BLOCK_LESS_THAN_512;
+}
+
+#else /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+}
+
+/**
+ * Compare 48 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+ (const uint8_t *)src_2 + 2 * 16);
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+ (const uint8_t *)src_2 + 2 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 3 * 16,
+ (const uint8_t *)src_2 + 3 * 16);
+}
+
+/**
+ * Compare 128 bytes or its multiple between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+ (const uint8_t *)src_2 + 2 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 3 * 16,
+ (const uint8_t *)src_2 + 3 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 4 * 16,
+ (const uint8_t *)src_2 + 4 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 5 * 16,
+ (const uint8_t *)src_2 + 5 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 6 * 16,
+ (const uint8_t *)src_2 + 6 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 7 * 16,
+ (const uint8_t *)src_2 + 7 * 16);
+}
+
+/**
+ * Compare 256 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp256(const void *src_1, const void *src_2)
+{
+ int ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+ (const uint8_t *)src_2 + 0 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+ (const uint8_t *)src_2 + 1 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+ (const uint8_t *)src_2 + 2 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 3 * 16,
+ (const uint8_t *)src_2 + 3 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 4 * 16,
+ (const uint8_t *)src_2 + 4 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 5 * 16,
+ (const uint8_t *)src_2 + 5 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 6 * 16,
+ (const uint8_t *)src_2 + 6 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 7 * 16,
+ (const uint8_t *)src_2 + 7 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 8 * 16,
+ (const uint8_t *)src_2 + 8 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 9 * 16,
+ (const uint8_t *)src_2 + 9 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 10 * 16,
+ (const uint8_t *)src_2 + 10 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 11 * 16,
+ (const uint8_t *)src_2 + 11 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 12 * 16,
+ (const uint8_t *)src_2 + 12 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 13 * 16,
+ (const uint8_t *)src_2 + 13 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16((const uint8_t *)src_1 + 14 * 16,
+ (const uint8_t *)src_2 + 14 * 16);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16((const uint8_t *)src_1 + 15 * 16,
+ (const uint8_t *)src_2 + 15 * 16);
+}
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to compare.
+ * @return
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+ const uint8_t *src_1 = (const uint8_t *)_src_1;
+ const uint8_t *src_2 = (const uint8_t *)_src_2;
+ int ret = 0;
+
+ if (n < 16)
+ return rte_memcmp_regular(src_1, src_2, n);
+
+ if (n <= 32) {
+ ret = rte_cmp16(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 48) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 64) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 32, src_2 + 32);
+
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 96) {
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ ret = rte_cmp16(src_1 + 64, src_2 + 64);
+ if (unlikely(ret != 0))
+ return ret;
+
+ return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ }
+
+ if (n <= 128)
+ goto CMP_BLOCK_LESS_THAN_128;
+
+ if (n <= 512) {
+ if (n >= 256) {
+ ret = rte_cmp256(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 256;
+ src_2 = src_2 + 256;
+ n -= 256;
+ }
+
+CMP_BLOCK_LESS_THAN_256:
+ if (n >= 128) {
+ ret = rte_cmp128(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 128;
+ src_2 = src_2 + 128;
+ n -= 128;
+ }
+
+CMP_BLOCK_LESS_THAN_128:
+ if (n >= 64) {
+ ret = rte_cmp64(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 64;
+ src_2 = src_2 + 64;
+ n -= 64;
+ }
+
+ if (n >= 32) {
+ ret = rte_cmp32(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ src_1 = src_1 + 32;
+ src_2 = src_2 + 32;
+ n -= 32;
+ }
+ if (n > 16) {
+ ret = rte_cmp16(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+ ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+ return ret;
+ }
+ if (n > 0)
+ ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+
+ return ret;
+ }
+
+ for (; n >= 256; n -= 256) {
+ ret = rte_cmp256(src_1, src_2);
+ if (unlikely(ret != 0))
+ return ret;
+
+ src_1 = src_1 + 256;
+ src_2 = src_2 + 256;
+ }
+
+ goto CMP_BLOCK_LESS_THAN_256;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h
new file mode 100644
index 0000000..5e68036
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memcmp.h
@@ -0,0 +1,175 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_H_
+#define _RTE_MEMCMP_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcmp().
+ */
+
+/**
+ * Find the first different bit for comparison.
+ */
+static inline int
+rte_cmpffd (uint32_t x, uint32_t y);
+
+/**
+ * Find the first different byte for comparison.
+ */
+static inline int
+rte_cmpffdb (const uint8_t *x, const uint8_t *y, size_t n);
+
+/**
+ * Compare 16 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2);
+
+/**
+ * Compare 32 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2);
+
+/**
+ * Compare 64 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2);
+
+/**
+ * Compare 48 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2);
+
+/**
+ * Compare 128 bytes between two locations using
+ * optimised instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2);
+
+/**
+ * Compare 256 bytes or greater between two locations using
+ * optimised instructions. The locations should not overlap.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp256(const void *src_1, const void *src_2);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ * Pointer to the first source of the data.
+ * @param src_2
+ * Pointer to the second source of the data.
+ * @param n
+ * Number of bytes to copy.
+ * @return
+ * zero if src_1 equal src_2
+ * -ve if src_1 less than src_2
+ * +ve if src_1 greater than src_2
+ */
+static int
+rte_memcmp(const void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+/*
+ * memcmp() function used by rte_memcmp macro
+ */
+static inline int
+rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+#endif /* _RTE_MEMCMP_H_ */
diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c
index 9245716..075da62 100644
--- a/lib/librte_hash/rte_hash.c
+++ b/lib/librte_hash/rte_hash.c
@@ -42,6 +42,7 @@
#include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */
#include <rte_log.h>
#include <rte_memcpy.h>
+#include <rte_memcmp.h>
#include <rte_prefetch.h>
#include <rte_branch_prediction.h>
#include <rte_memzone.h>
@@ -299,6 +300,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
uint8_t *key_bucket;
uint32_t bucket_index, i;
int32_t pos;
+ const void * volatile key_1 = key;
/* Get the hash signature and bucket index */
sig |= h->sig_msb;
@@ -308,10 +310,13 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
- if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
- h->key_len) == 0)) {
- return bucket_index * h->bucket_entries + i;
+ if (sig == sig_bucket[i]) {
+
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, i);
+
+ if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0))
+ return bucket_index * h->bucket_entries + i;
}
}
@@ -350,6 +355,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
uint8_t *key_bucket;
uint32_t bucket_index, i;
+ const void * volatile key_1 = key;
+
/* Get the hash signature and bucket index */
sig = sig | h->sig_msb;
bucket_index = sig & h->bucket_bitmask;
@@ -358,11 +365,14 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
- if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
- h->key_len) == 0)) {
- sig_bucket[i] = NULL_SIGNATURE;
- return bucket_index * h->bucket_entries + i;
+ if (sig == sig_bucket[i]) {
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, i);
+
+ if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) {
+ sig_bucket[i] = NULL_SIGNATURE;
+ return bucket_index * h->bucket_entries + i;
+ }
}
}
@@ -392,6 +402,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
uint8_t *key_bucket;
uint32_t bucket_index, i;
+ const void * volatile key_1 = key;
+
/* Get the hash signature and bucket index */
sig |= h->sig_msb;
bucket_index = sig & h->bucket_bitmask;
@@ -400,10 +412,13 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
/* Check if key is already present in the hash */
for (i = 0; i < h->bucket_entries; i++) {
- if ((sig == sig_bucket[i]) &&
- likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
- h->key_len) == 0)) {
- return bucket_index * h->bucket_entries + i;
+ if (sig == sig_bucket[i]) {
+
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, i);
+
+ if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0))
+ return bucket_index * h->bucket_entries + i;
}
}
@@ -456,13 +471,17 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
positions[i] = -ENOENT;
for (j = 0; j < h->bucket_entries; j++) {
- if ((sigs[i] == sig_bucket[j]) &&
- likely(memcmp(keys[i],
- get_key_from_bucket(h, key_bucket, j),
- h->key_len) == 0)) {
- positions[i] = bucket_index *
- h->bucket_entries + j;
- break;
+ if (sigs[i] == sig_bucket[j]) {
+
+ const void * volatile key_1 = keys[i];
+ const void * volatile key_2 =
+ get_key_from_bucket(h, key_bucket, j);
+ if (likely(rte_memcmp(key_1, key_2,
+ h->key_len) == 0)) {
+ positions[i] = bucket_index *
+ h->bucket_entries + j;
+ break;
+ }
}
}
}
--
1.9.1
next prev parent reply other threads:[~2015-05-18 20:01 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-05-18 20:01 [PATCH v3] Implement memcmp using SIMD intrinsics Ravi Kerur
2015-05-18 20:01 ` Ravi Kerur [this message]
2015-10-14 0:32 ` [PATCH v3] Implement memcmp using Intel SIMD instrinsics Stephen Hemminger
2016-01-28 3:08 ` [dpdk-dev, " Zhihong Wang
2016-02-19 17:50 ` Ravi Kerur
2016-02-23 12:22 ` Wang, Zhihong
2016-02-24 4:00 ` Ravi Kerur
2015-06-12 8:30 ` [PATCH v3] Implement memcmp using SIMD intrinsics Ondřej Bílka
2015-06-12 9:03 ` Bruce Richardson
2015-06-15 20:47 ` Ravi Kerur
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1431979303-1346-2-git-send-email-rkerur@gmail.com \
--to=rkerur@gmail.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.