* [RFC] pcapng: improve performance of timestamping
@ 2025-11-26 5:12 Stephen Hemminger
2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger
` (4 more replies)
0 siblings, 5 replies; 58+ messages in thread
From: Stephen Hemminger @ 2025-11-26 5:12 UTC (permalink / raw)
To: dev; +Cc: Stephen Hemminger
Avoid doing expensive divide operations when converting
timestamps from cycles (TSC) to pcapng scaled value (ns).
This logic was derived from the math used by Linux kernel
virtual system call with help from AI.
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
lib/pcapng/rte_pcapng.c | 61 ++++++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 22 deletions(-)
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index 7c3c400c71..283962fa2d 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -38,8 +38,14 @@
struct rte_pcapng {
int outfd; /* output file */
unsigned int ports; /* number of interfaces added */
- uint64_t offset_ns; /* ns since 1/1/1970 when initialized */
- uint64_t tsc_base; /* TSC when started */
+
+ struct pcapng_time_conv {
+ uint64_t tsc_base; /* TSC when started */
+ uint64_t ns_base; /* ns since 1/1/1970 when initialized */
+ uint64_t mult; /* scaling factor relative to TSC hz */
+ uint32_t shift; /* shift for scaling (24) */
+ uint64_t mask; /* mask of bits used (56) */
+ } tc;
/* DPDK port id to interface index in file */
uint32_t port_index[RTE_MAX_ETHPORTS];
@@ -95,21 +101,38 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt)
#define if_indextoname(ifindex, ifname) NULL
#endif
+/* Initialize time conversion based on logic similar to rte_cyclecounter */
+static void
+pcapng_timestamp_init(struct pcapng_time_conv *tc)
+{
+ struct timespec ts;
+ uint64_t cycles = rte_get_tsc_cycles();
+
+ /* record start time in ns since 1/1/1970 */
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ /* Compute baseline TSC which occured during clock_gettime */
+ tc->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
+ tc->ns_base = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+
+ /* Set conversion factors for reasonabl prescision with no overflow */
+ uint64_t tsc_hz = rte_get_tsc_hz();
+ tc->shift = 24;
+ tc->mult = ((uint64_t)1000000000ULL << tc->shift) / tsc_hz;
+ tc->mask = RTE_BIT64(56) - 1;
+}
+
/* Convert from TSC (CPU cycles) to nanoseconds */
static uint64_t
-pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles)
+pcapng_timestamp(const struct pcapng_time_conv *tc, uint64_t cycles)
{
- uint64_t delta, rem, secs, ns;
- const uint64_t hz = rte_get_tsc_hz();
-
- delta = cycles - self->tsc_base;
+ /* Compute TSC delta with mask to avoid wraparound */
+ uint64_t delta = (cycles - tc->tsc_base) & tc->mask;
- /* Avoid numeric wraparound by computing seconds first */
- secs = delta / hz;
- rem = delta % hz;
- ns = (rem * NS_PER_S) / hz;
+ /* Convert TSC delta to nanoseconds (no division) */
+ uint64_t ns_delta = (delta * tc->mult) >> tc->shift;
- return secs * NS_PER_S + ns + self->offset_ns;
+ return tc->ns_base + ns_delta;
}
/* length of option including padding */
@@ -329,7 +352,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
{
struct pcapng_statistics *hdr;
struct pcapng_option *opt;
- uint64_t start_time = self->offset_ns;
+ uint64_t start_time = self->tc.ns_base;
uint64_t sample_time;
uint32_t optlen, len;
uint32_t *buf;
@@ -379,7 +402,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
hdr->block_length = len;
hdr->interface_id = self->port_index[port_id];
- sample_time = pcapng_timestamp(self, rte_get_tsc_cycles());
+ sample_time = pcapng_timestamp(&self->tc, rte_get_tsc_cycles());
hdr->timestamp_hi = sample_time >> 32;
hdr->timestamp_lo = (uint32_t)sample_time;
@@ -658,7 +681,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
/* adjust timestamp recorded in packet */
cycles = (uint64_t)epb->timestamp_hi << 32;
cycles += epb->timestamp_lo;
- timestamp = pcapng_timestamp(self, cycles);
+ timestamp = pcapng_timestamp(&self->tc, cycles);
epb->timestamp_hi = timestamp >> 32;
epb->timestamp_lo = (uint32_t)timestamp;
@@ -704,8 +727,6 @@ rte_pcapng_fdopen(int fd,
{
unsigned int i;
rte_pcapng_t *self;
- struct timespec ts;
- uint64_t cycles;
self = malloc(sizeof(*self));
if (!self) {
@@ -716,11 +737,7 @@ rte_pcapng_fdopen(int fd,
self->outfd = fd;
self->ports = 0;
- /* record start time in ns since 1/1/1970 */
- cycles = rte_get_tsc_cycles();
- clock_gettime(CLOCK_REALTIME, &ts);
- self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
- self->offset_ns = rte_timespec_to_ns(&ts);
+ pcapng_timestamp_init(&self->tc);
for (i = 0; i < RTE_MAX_ETHPORTS; i++)
self->port_index[i] = UINT32_MAX;
--
2.51.0
^ permalink raw reply related [flat|nested] 58+ messages in thread* [PATCH v2 0/6] pcapng: timestamping and comment fixes 2025-11-26 5:12 [RFC] pcapng: improve performance of timestamping Stephen Hemminger @ 2025-12-29 23:01 ` Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer Stephen Hemminger ` (5 more replies) 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger ` (3 subsequent siblings) 4 siblings, 6 replies; 58+ messages in thread From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger Performance improve performance and tests for pcapng. Stephen Hemminger (6): pcapng: use alloca instead of fixed buffer pcapng: add additional mbuf if space required on copy test: add more tests for comments in pcapng test: vary size of packets in pcapng test test: increase gap in pcapng test pcapng: improve performance of timestamping app/test/test_pcapng.c | 134 +++++++++++++++++++++++++++++----------- lib/pcapng/rte_pcapng.c | 101 +++++++++++++++++++----------- 2 files changed, 163 insertions(+), 72 deletions(-) -- 2.51.0 ^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer 2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger @ 2025-12-29 23:01 ` Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 2/6] pcapng: add additional mbuf if space required on copy Stephen Hemminger ` (4 subsequent siblings) 5 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan This is an API that accepts strings as options, and user could potentially ask for very large string as comment. The dynamic way to fix is to use alloca() to allocate the buffer used to hold options. Bugzilla ID: 1820 Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 21bc94cea1..3067033e89 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -34,9 +34,6 @@ /* conversion from DPDK speed to PCAPNG */ #define PCAPNG_MBPS_SPEED 1000000ull -/* upper bound for section, stats and interface blocks (in uint32_t) */ -#define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) - /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ @@ -145,7 +142,7 @@ pcapng_section_block(rte_pcapng_t *self, { struct pcapng_section_header *hdr; struct pcapng_option *opt; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; uint32_t len; len = sizeof(*hdr); @@ -162,7 +159,8 @@ pcapng_section_block(rte_pcapng_t *self, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) + buf = alloca(len); + if (buf == NULL) return -1; hdr = (struct pcapng_section_header *)buf; @@ -214,7 +212,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, struct pcapng_option *opt; const uint8_t tsresol = 9; /* nanosecond resolution */ uint32_t len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; @@ -268,7 +266,8 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) + buf = alloca(len); + if (buf == NULL) return -1; hdr = (struct pcapng_interface_block *)buf; @@ -333,7 +332,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint64_t start_time = self->offset_ns; uint64_t sample_time; uint32_t optlen, len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -353,7 +352,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, optlen += pcapng_optlen(0); len = sizeof(*hdr) + optlen + sizeof(uint32_t); - if (len > sizeof(buf)) + + buf = alloca(len); + if (buf == NULL) return -1; hdr = (struct pcapng_statistics *)buf; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v2 2/6] pcapng: add additional mbuf if space required on copy 2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer Stephen Hemminger @ 2025-12-29 23:01 ` Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 3/6] test: add more tests for comments in pcapng Stephen Hemminger ` (3 subsequent siblings) 5 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan If comment is passed to rte_pcapng_copy(), additional space maybe needed at the end of the mbuf. To handle this case generate a segmented mbuf with additional space for the options. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 3067033e89..7c3c400c71 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -540,11 +540,24 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (comment) optlen += pcapng_optlen(strlen(comment)); - /* reserve trailing options and block length */ + /* + * Try to put options at the end of this mbuf. + * If not use an mbuf chain. + */ opt = (struct pcapng_option *) rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); - if (unlikely(opt == NULL)) - goto fail; + if (unlikely(opt == NULL)) { + struct rte_mbuf *ml = rte_pktmbuf_alloc(mp); + + if (unlikely(ml == NULL)) + goto fail; + + opt = (struct pcapng_option *)rte_pktmbuf_append(ml, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL || rte_pktmbuf_chain(mc, ml) != 0)) { + rte_pktmbuf_free(ml); + goto fail; + } + } switch (direction) { case RTE_PCAPNG_DIRECTION_IN: -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v2 3/6] test: add more tests for comments in pcapng 2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 2/6] pcapng: add additional mbuf if space required on copy Stephen Hemminger @ 2025-12-29 23:01 ` Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 4/6] test: vary size of packets in pcapng test Stephen Hemminger ` (2 subsequent siblings) 5 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Add some more cases where comment is set in pcapng file. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index bcf99724fa..f2b49c31c7 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -125,8 +125,7 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", - MAX_BURST * 32, 0, 0, - rte_pcapng_mbuf_size(pkt_len) + 128, + MAX_BURST * 32, 0, 0, rte_pcapng_mbuf_size(pkt_len), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -149,6 +148,13 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) unsigned int burst_size; unsigned int count; ssize_t len; + const char *examples[] = { + "EAL init complete. May the cores be ever in your favor.", + "No packets were harmed in the making of this burst.", + "rte_eth_dev_start(): crossing fingers and enabling queues...", + "Congratulations, you’ve reached the end of the RX path. Please collect your free cache miss.", + "Lockless and fearless — that’s how we roll in userspace." + }; /* make a dummy packet */ mbuf1_prepare(&mbfs, pkt_len); @@ -162,9 +168,14 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) burst_size = rte_rand_max(MAX_BURST) + 1; for (i = 0; i < burst_size; i++) { struct rte_mbuf *mc; + const char *comment = NULL; + + /* Put comment on occasional packets */ + if ((count + i) % 42 == 0) + comment = examples[rte_rand_max(RTE_DIM(examples))]; mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), - RTE_PCAPNG_DIRECTION_IN, NULL); + RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; @@ -386,7 +397,7 @@ static int test_write_packets(void) { char file_name[] = "/tmp/pcapng_test_XXXXXX.pcapng"; - static rte_pcapng_t *pcapng; + rte_pcapng_t *pcapng = NULL; int ret, tmp_fd, count; uint64_t now = current_timestamp(); @@ -413,6 +424,13 @@ test_write_packets(void) goto fail; } + /* write a statistics block */ + ret = rte_pcapng_write_stats(pcapng, port_id, 0, 0, NULL); + if (ret <= 0) { + fprintf(stderr, "Write of statistics failed\n"); + goto fail; + } + count = fill_pcapng_file(pcapng, TOTAL_PACKETS); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v2 4/6] test: vary size of packets in pcapng test 2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger ` (2 preceding siblings ...) 2025-12-29 23:01 ` [PATCH v2 3/6] test: add more tests for comments in pcapng Stephen Hemminger @ 2025-12-29 23:01 ` Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 5/6] test: increase gap " Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 6/6] pcapng: improve performance of timestamping Stephen Hemminger 5 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan In order exercise more logic in pcapng, vary the size of the packets. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 94 +++++++++++++++++++++++++++++------------- 1 file changed, 65 insertions(+), 29 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index f2b49c31c7..5010355df5 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -28,10 +28,9 @@ #define TOTAL_PACKETS 4096 #define MAX_BURST 64 #define MAX_GAP_US 100000 -#define DUMMY_MBUF_NUM 3 +#define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; -static const uint32_t pkt_len = 200; static uint16_t port_id; static const char null_dev[] = "net_null0"; @@ -41,13 +40,36 @@ struct dummy_mbuf { uint8_t buf[DUMMY_MBUF_NUM][RTE_MBUF_DEFAULT_BUF_SIZE]; }; +#define MAX_DATA_SIZE (RTE_MBUF_DEFAULT_BUF_SIZE - RTE_PKTMBUF_HEADROOM) + +/* RFC 864 chargen pattern used for comment testing */ +#define FILL_LINE_LENGTH 72 +#define FILL_START 0x21 /* ! */ +#define FILL_END 0x7e /* ~ */ +#define FILL_RANGE (FILL_END - FILL_START) + static void -dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, - uint32_t data_len) +fill_mbuf(struct rte_mbuf *mb) { - uint32_t i; - uint8_t *db; + unsigned int len = rte_pktmbuf_tailroom(mb); + char *buf = rte_pktmbuf_append(mb, len); + unsigned int n = 0; + + while (n < len - 1) { + char ch = FILL_START + (n % FILL_LINE_LENGTH) % FILL_RANGE; + for (unsigned int i = 0; i < FILL_LINE_LENGTH && n < len - 1; i++) { + buf[n++] = ch; + if (++ch == FILL_END) + ch = FILL_START; + } + if (n < len - 1) + buf[n++] = '\n'; + } +} +static void +dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len) +{ mb->buf_addr = buf; rte_mbuf_iova_set(mb, (uintptr_t)buf); mb->buf_len = buf_len; @@ -57,15 +79,11 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, mb->pool = (void *)buf; rte_pktmbuf_reset(mb); - db = (uint8_t *)rte_pktmbuf_append(mb, data_len); - - for (i = 0; i != data_len; i++) - db[i] = i; } /* Make an IP packet consisting of chain of one packets */ static void -mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) +mbuf1_prepare(struct dummy_mbuf *dm) { struct { struct rte_ether_hdr eth; @@ -84,32 +102,47 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) .dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST), }, .udp = { + .src_port = rte_cpu_to_be_16(19), /* Chargen port */ .dst_port = rte_cpu_to_be_16(9), /* Discard port */ }, }; memset(dm, 0, sizeof(*dm)); - dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen); + dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0])); + dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1])); rte_eth_random_addr(pkt.eth.src_addr.addr_bytes); - plen -= sizeof(struct rte_ether_hdr); + memcpy(rte_pktmbuf_append(&dm->mb[0], sizeof(pkt)), &pkt, sizeof(pkt)); + + fill_mbuf(&dm->mb[1]); + rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); +} - pkt.ip.total_length = rte_cpu_to_be_16(plen); - pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip); +static void +mbuf1_resize(struct dummy_mbuf *dm, uint16_t len) +{ + struct { + struct rte_ether_hdr eth; + struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; + } *pkt = rte_pktmbuf_mtod(&dm->mb[0], void *); - plen -= sizeof(struct rte_ipv4_hdr); - pkt.udp.src_port = rte_rand(); - pkt.udp.dgram_len = rte_cpu_to_be_16(plen); + dm->mb[1].data_len = len; + dm->mb[0].pkt_len = dm->mb[0].data_len + dm->mb[1].data_len; - memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt)); + len += sizeof(struct rte_udp_hdr); + pkt->udp.dgram_len = rte_cpu_to_be_16(len); - /* Idea here is to create mbuf chain big enough that after mbuf deep copy they won't be - * compressed into single mbuf to properly test store of chained mbufs - */ - dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1]), pkt_len); - dummy_mbuf_prep(&dm->mb[2], dm->buf[2], sizeof(dm->buf[2]), pkt_len); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[2]); + len += sizeof(struct rte_ipv4_hdr); + pkt->ip.total_length = rte_cpu_to_be_16(len); + pkt->ip.hdr_checksum = 0; + pkt->ip.hdr_checksum = rte_ipv4_cksum(&pkt->ip); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); } static int @@ -125,7 +158,8 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", - MAX_BURST * 32, 0, 0, rte_pcapng_mbuf_size(pkt_len), + MAX_BURST * 32, 0, 0, + rte_pcapng_mbuf_size(MAX_DATA_SIZE), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -156,8 +190,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) "Lockless and fearless — that’s how we roll in userspace." }; - /* make a dummy packet */ - mbuf1_prepare(&mbfs, pkt_len); + mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; for (count = 0; count < num_packets; count += burst_size) { @@ -174,6 +207,9 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) if ((count + i) % 42 == 0) comment = examples[rte_rand_max(RTE_DIM(examples))]; + /* Vary the size of the packets */ + mbuf1_resize(&mbfs, rte_rand_max(MAX_DATA_SIZE)); + mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v2 5/6] test: increase gap in pcapng test 2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger ` (3 preceding siblings ...) 2025-12-29 23:01 ` [PATCH v2 4/6] test: vary size of packets in pcapng test Stephen Hemminger @ 2025-12-29 23:01 ` Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 6/6] pcapng: improve performance of timestamping Stephen Hemminger 5 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Want to make sure that test takes long enough that 32 bit counter wraps around. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 5010355df5..73557eb2f1 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -27,7 +27,6 @@ #define TOTAL_PACKETS 4096 #define MAX_BURST 64 -#define MAX_GAP_US 100000 #define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; @@ -175,7 +174,7 @@ test_setup(void) } static int -fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) +fill_pcapng_file(rte_pcapng_t *pcapng) { struct dummy_mbuf mbfs; struct rte_mbuf *orig; @@ -193,7 +192,15 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; - for (count = 0; count < num_packets; count += burst_size) { + /* How many microseconds does it take TSC to wrap around 32 bits */ + const unsigned wrap_us + = (US_PER_S * (uint64_t)UINT32_MAX) / rte_get_tsc_hz(); + + /* Want overall test to take to wraparound at least twice. */ + const unsigned int avg_gap = (2 * wrap_us) + / (TOTAL_PACKETS / (MAX_BURST / 2)); + + for (count = 0; count < TOTAL_PACKETS; count += burst_size) { struct rte_mbuf *clones[MAX_BURST]; unsigned int i; @@ -229,8 +236,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) return -1; } - /* Leave a small gap between packets to test for time wrap */ - usleep(rte_rand_max(MAX_GAP_US)); + rte_delay_us_block(rte_rand_max(2 * avg_gap)); } return count; @@ -467,7 +473,7 @@ test_write_packets(void) goto fail; } - count = fill_pcapng_file(pcapng, TOTAL_PACKETS); + count = fill_pcapng_file(pcapng); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v2 6/6] pcapng: improve performance of timestamping 2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger ` (4 preceding siblings ...) 2025-12-29 23:01 ` [PATCH v2 5/6] test: increase gap " Stephen Hemminger @ 2025-12-29 23:01 ` Stephen Hemminger 5 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2025-12-29 23:01 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Avoid doing expensive divide operations when converting timestamps from cycles (TSC) to pcapng scaled value (ns). This logic was derived from the math used by Linux kernel virtual system call with help from AI. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 63 ++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 7c3c400c71..b12814e305 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -38,8 +38,14 @@ struct rte_pcapng { int outfd; /* output file */ unsigned int ports; /* number of interfaces added */ - uint64_t offset_ns; /* ns since 1/1/1970 when initialized */ - uint64_t tsc_base; /* TSC when started */ + + struct pcapng_time_conv { + uint64_t tsc_base; /* TSC when started */ + uint64_t ns_base; /* ns since 1/1/1970 when initialized */ + uint64_t mult; /* scaling factor relative to TSC hz */ + uint32_t shift; /* shift for scaling (24) */ + uint64_t mask; /* mask of bits used (56) */ + } tc; /* DPDK port id to interface index in file */ uint32_t port_index[RTE_MAX_ETHPORTS]; @@ -95,21 +101,38 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt) #define if_indextoname(ifindex, ifname) NULL #endif +/* Initialize time conversion based on logic similar to rte_cyclecounter */ +static void +pcapng_timestamp_init(struct pcapng_time_conv *tc) +{ + struct timespec ts; + uint64_t cycles = rte_get_tsc_cycles(); + + /* record start time in ns since 1/1/1970 */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Compute baseline TSC which occurred during clock_gettime */ + tc->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; + tc->ns_base = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec; + + /* Set conversion factors for reasonable precision with no overflow */ + uint64_t tsc_hz = rte_get_tsc_hz(); + tc->shift = 24; + tc->mult = ((uint64_t)1000000000ULL << tc->shift) / tsc_hz; + tc->mask = RTE_BIT64(56) - 1; +} + /* Convert from TSC (CPU cycles) to nanoseconds */ static uint64_t -pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles) +pcapng_timestamp(const struct pcapng_time_conv *tc, uint64_t cycles) { - uint64_t delta, rem, secs, ns; - const uint64_t hz = rte_get_tsc_hz(); - - delta = cycles - self->tsc_base; + /* Compute TSC delta with mask to avoid wraparound */ + uint64_t delta = (cycles - tc->tsc_base) & tc->mask; - /* Avoid numeric wraparound by computing seconds first */ - secs = delta / hz; - rem = delta % hz; - ns = (rem * NS_PER_S) / hz; + /* Convert TSC delta to nanoseconds (no division) */ + uint64_t ns_delta = (delta * tc->mult) >> tc->shift; - return secs * NS_PER_S + ns + self->offset_ns; + return tc->ns_base + ns_delta; } /* length of option including padding */ @@ -309,7 +332,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); - /* clone block_length after optionsa */ + /* clone block_length after options */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); /* remember the file index */ @@ -329,7 +352,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, { struct pcapng_statistics *hdr; struct pcapng_option *opt; - uint64_t start_time = self->offset_ns; + uint64_t start_time = self->tc.ns_base; uint64_t sample_time; uint32_t optlen, len; uint32_t *buf; @@ -379,7 +402,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, hdr->block_length = len; hdr->interface_id = self->port_index[port_id]; - sample_time = pcapng_timestamp(self, rte_get_tsc_cycles()); + sample_time = pcapng_timestamp(&self->tc, rte_get_tsc_cycles()); hdr->timestamp_hi = sample_time >> 32; hdr->timestamp_lo = (uint32_t)sample_time; @@ -658,7 +681,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, /* adjust timestamp recorded in packet */ cycles = (uint64_t)epb->timestamp_hi << 32; cycles += epb->timestamp_lo; - timestamp = pcapng_timestamp(self, cycles); + timestamp = pcapng_timestamp(&self->tc, cycles); epb->timestamp_hi = timestamp >> 32; epb->timestamp_lo = (uint32_t)timestamp; @@ -704,8 +727,6 @@ rte_pcapng_fdopen(int fd, { unsigned int i; rte_pcapng_t *self; - struct timespec ts; - uint64_t cycles; self = malloc(sizeof(*self)); if (!self) { @@ -716,11 +737,7 @@ rte_pcapng_fdopen(int fd, self->outfd = fd; self->ports = 0; - /* record start time in ns since 1/1/1970 */ - cycles = rte_get_tsc_cycles(); - clock_gettime(CLOCK_REALTIME, &ts); - self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; - self->offset_ns = rte_timespec_to_ns(&ts); + pcapng_timestamp_init(&self->tc); for (i = 0; i < RTE_MAX_ETHPORTS; i++) self->port_index[i] = UINT32_MAX; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v3 0/7] pcapng: fixes and improvements 2025-11-26 5:12 [RFC] pcapng: improve performance of timestamping Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger @ 2026-01-12 4:50 ` Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 1/7] pcapng: add length checks to string arguments Stephen Hemminger ` (6 more replies) 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger ` (2 subsequent siblings) 4 siblings, 7 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-12 4:50 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger This series addresses several issues in the pcapng library and improves test coverage. Several of these patches were submitted previously in other series but have been more throughly tested and passed AI review. Bug Fixes: Patches 1-3 fix potential issues with string handling in the pcapng administrative APIs. The pcapng file format uses 16-bit length fields for option data, but the library did not validate input string lengths, and used fixed-size stack buffers that could be overrun by large comments or metadata strings. Patch 1 adds length validation against the format's 16-bit limit Patch 2 replaces fixed stack buffers with dynamic allocation Patch 3 handles the case where comment options exceed available mbuf tailroom by chaining an additional mbuf segment Test Improvements: Patches 4-6 improve the pcapng unit test coverage: Patch 4 exercises the comment option code path with varied test strings Patch 5 varies packet sizes to exercise more of the copy logic Patch 6 ensures the test runs long enough to verify 32-bit TSC wraparound handling Performance: Patch 7 optimizes timestamp conversion by replacing division operations with shift-multiply arithmetic, following the approach used by the Linux kernel vDSO. This eliminates expensive divides from the packet capture hot path. Stephen Hemminger (7): pcapng: add length checks to string arguments pcapng: use malloc instead of fixed buffer size pcapng: add additional mbuf if space required on copy test: add more tests for comments in pcapng test: vary size of packets in pcapng test test: increase gap in pcapng test pcapng: improve performance of timestamping app/test/test_pcapng.c | 134 ++++++++++++++++++++++++++---------- lib/pcapng/rte_pcapng.c | 147 +++++++++++++++++++++++++++++----------- lib/pcapng/rte_pcapng.h | 8 ++- 3 files changed, 211 insertions(+), 78 deletions(-) -- 2.51.0 ^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH v3 1/7] pcapng: add length checks to string arguments 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger @ 2026-01-12 4:50 ` Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 2/7] pcapng: use malloc instead of fixed buffer size Stephen Hemminger ` (5 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-12 4:50 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan The pcapng file format has a maximum possible string length of 16 bits since information is recorded as type, value, length. The API should check these lengths before possible memory allocation or overwrite failures. Update Doxygen comments to include return value. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 31 ++++++++++++++++++++++++++++--- lib/pcapng/rte_pcapng.h | 8 +++++++- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 21bc94cea1..863706a365 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -34,6 +34,9 @@ /* conversion from DPDK speed to PCAPNG */ #define PCAPNG_MBPS_SPEED 1000000ull +/* upper bound for strings in pcapng option data */ +#define PCAPNG_STR_MAX UINT16_MAX + /* upper bound for section, stats and interface blocks (in uint32_t) */ #define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) @@ -218,9 +221,11 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; + int ret; - if (rte_eth_dev_info_get(port, &dev_info) < 0) - return -1; + ret = rte_eth_dev_info_get(port, &dev_info); + if (ret < 0) + return ret; /* make something like an interface name */ if (ifname == NULL) { @@ -230,8 +235,14 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port); ifname = ifname_buf; } + } else if (strlen(ifname) > PCAPNG_STR_MAX) { + return -EINVAL; } + if ((ifdescr && strlen(ifdescr) > PCAPNG_STR_MAX) || + (filter && strlen(filter) > PCAPNG_STR_MAX)) + return -EINVAL; + /* make a useful device hardware string */ dev = dev_info.device; if (dev) @@ -337,6 +348,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return -1; + optlen = 0; if (ifrecv != UINT64_MAX) @@ -489,6 +503,9 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, #ifdef RTE_LIBRTE_ETHDEV_DEBUG RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); + + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return NULL; #endif orig_len = rte_pktmbuf_pkt_len(md); @@ -693,8 +710,16 @@ rte_pcapng_fdopen(int fd, struct timespec ts; uint64_t cycles; + if ((osname && strlen(osname) > PCAPNG_STR_MAX) || + (hardware && strlen(hardware) > PCAPNG_STR_MAX) || + (appname && strlen(appname) > PCAPNG_STR_MAX) || + (comment && strlen(comment) > PCAPNG_STR_MAX)) { + rte_errno = ENAMETOOLONG; + return NULL; + } + self = malloc(sizeof(*self)); - if (!self) { + if (self == NULL) { rte_errno = ENOMEM; return NULL; } diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index de1bf953e9..4f085f5c86 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -89,6 +89,12 @@ rte_pcapng_close(rte_pcapng_t *self); * Interfaces must be added to the output file after opening * and before any packet record. All ports used in packet capture * must be added. + * + * @return + * - (0) if successful. + * - (-ENOTSUP) if support for dev_infos_get() does not exist for the device. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. */ int rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, @@ -192,7 +198,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * @param comment * Optional comment to add to statistics. * @return - * number of bytes written to file, -1 on failure to write file + * number of bytes written to file, -1 on failure to write file or memory allocation failure. */ ssize_t rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port, -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v3 2/7] pcapng: use malloc instead of fixed buffer size 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 1/7] pcapng: add length checks to string arguments Stephen Hemminger @ 2026-01-12 4:50 ` Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 3/7] pcapng: add additional mbuf if space required on copy Stephen Hemminger ` (4 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-12 4:50 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Ray Kinsella The administrative APIs accept comments and other meta data as strings. This leads to possibility of very large strings which can overrun the use of fixed size buffers. Instead, use malloc to allocate a buffer of the necessary size and handle potential allocation failures. Bugzilla ID: 1820 Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 863706a365..c2635d8b03 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -37,9 +37,6 @@ /* upper bound for strings in pcapng option data */ #define PCAPNG_STR_MAX UINT16_MAX -/* upper bound for section, stats and interface blocks (in uint32_t) */ -#define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) - /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ @@ -148,7 +145,7 @@ pcapng_section_block(rte_pcapng_t *self, { struct pcapng_section_header *hdr; struct pcapng_option *opt; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; uint32_t len; len = sizeof(*hdr); @@ -165,8 +162,11 @@ pcapng_section_block(rte_pcapng_t *self, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) + buf = malloc(len); + if (buf == NULL) { + errno = ENOMEM; return -1; + } hdr = (struct pcapng_section_header *)buf; *hdr = (struct pcapng_section_header) { @@ -199,7 +199,9 @@ pcapng_section_block(rte_pcapng_t *self, /* clone block_length after option */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ssize_t ret = write(self->outfd, buf, len); + free(buf); + return ret; } /* Write an interface block for a DPDK port */ @@ -217,7 +219,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, struct pcapng_option *opt; const uint8_t tsresol = 9; /* nanosecond resolution */ uint32_t len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; @@ -279,8 +281,11 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) + buf = malloc(len); + if (buf == NULL) { + errno = ENOMEM; return -1; + } hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -327,7 +332,9 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, /* remember the file index */ self->port_index[port] = self->ports++; - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret; } /* @@ -344,7 +351,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint64_t start_time = self->offset_ns; uint64_t sample_time; uint32_t optlen, len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -367,8 +374,11 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, optlen += pcapng_optlen(0); len = sizeof(*hdr) + optlen + sizeof(uint32_t); - if (len > sizeof(buf)) + buf = malloc(len); + if (buf == NULL) { + errno = ENOMEM; return -1; + } hdr = (struct pcapng_statistics *)buf; opt = (struct pcapng_option *)(hdr + 1); @@ -399,7 +409,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, /* clone block_length after option */ memcpy(opt, &len, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ssize_t ret = write(self->outfd, buf, len); + free(buf); + return ret; } RTE_EXPORT_SYMBOL(rte_pcapng_mbuf_size) -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v3 3/7] pcapng: add additional mbuf if space required on copy 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 1/7] pcapng: add length checks to string arguments Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 2/7] pcapng: use malloc instead of fixed buffer size Stephen Hemminger @ 2026-01-12 4:50 ` Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 4/7] test: add more tests for comments in pcapng Stephen Hemminger ` (3 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-12 4:50 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Amit Prakash Shukla If comment is passed to rte_pcapng_copy(), additional space may be needed at the end of the mbuf. To handle this case generate a segmented mbuf with additional space for the options. Fixes: c1abd1e93dbd ("pcapng: support comment in enhanced packet block") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index c2635d8b03..f53e6dfecd 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -568,11 +568,24 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (comment) optlen += pcapng_optlen(strlen(comment)); - /* reserve trailing options and block length */ + /* + * Try to put options at the end of this mbuf. + * If not use an mbuf chain. + */ opt = (struct pcapng_option *) rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); - if (unlikely(opt == NULL)) - goto fail; + if (unlikely(opt == NULL)) { + struct rte_mbuf *ml = rte_pktmbuf_alloc(mp); + + if (unlikely(ml == NULL)) + goto fail; + + opt = (struct pcapng_option *)rte_pktmbuf_append(ml, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL || rte_pktmbuf_chain(mc, ml) != 0)) { + rte_pktmbuf_free(ml); + goto fail; + } + } switch (direction) { case RTE_PCAPNG_DIRECTION_IN: -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v3 4/7] test: add more tests for comments in pcapng 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger ` (2 preceding siblings ...) 2026-01-12 4:50 ` [PATCH v3 3/7] pcapng: add additional mbuf if space required on copy Stephen Hemminger @ 2026-01-12 4:50 ` Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 5/7] test: vary size of packets in pcapng test Stephen Hemminger ` (2 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-12 4:50 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Add some more cases where comment is set in pcapng file. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index bcf99724fa..228724d4c9 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -125,8 +125,7 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", - MAX_BURST * 32, 0, 0, - rte_pcapng_mbuf_size(pkt_len) + 128, + MAX_BURST * 32, 0, 0, rte_pcapng_mbuf_size(pkt_len), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -149,6 +148,14 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) unsigned int burst_size; unsigned int count; ssize_t len; + static const char *examples[] = { + "EAL init complete. May the cores be ever in your favor.", + "No packets were harmed in the making of this burst.", + "rte_eth_dev_start(): crossing fingers and enabling queues...", + "Congratulations, you’ve reached the end of the RX path. " + "Please collect your free cache miss.", + "Lockless and fearless - that’s how we roll in userspace." + }; /* make a dummy packet */ mbuf1_prepare(&mbfs, pkt_len); @@ -162,9 +169,14 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) burst_size = rte_rand_max(MAX_BURST) + 1; for (i = 0; i < burst_size; i++) { struct rte_mbuf *mc; + const char *comment = NULL; + + /* Put comment on occasional packets */ + if ((count + i) % 42 == 0) + comment = examples[rte_rand_max(RTE_DIM(examples))]; mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), - RTE_PCAPNG_DIRECTION_IN, NULL); + RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; @@ -386,7 +398,7 @@ static int test_write_packets(void) { char file_name[] = "/tmp/pcapng_test_XXXXXX.pcapng"; - static rte_pcapng_t *pcapng; + rte_pcapng_t *pcapng = NULL; int ret, tmp_fd, count; uint64_t now = current_timestamp(); @@ -413,6 +425,13 @@ test_write_packets(void) goto fail; } + /* write a statistics block */ + ret = rte_pcapng_write_stats(pcapng, port_id, 0, 0, NULL); + if (ret <= 0) { + fprintf(stderr, "Write of statistics failed\n"); + goto fail; + } + count = fill_pcapng_file(pcapng, TOTAL_PACKETS); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v3 5/7] test: vary size of packets in pcapng test 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger ` (3 preceding siblings ...) 2026-01-12 4:50 ` [PATCH v3 4/7] test: add more tests for comments in pcapng Stephen Hemminger @ 2026-01-12 4:50 ` Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 6/7] test: increase gap " Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 7/7] pcapng: improve performance of timestamping Stephen Hemminger 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-12 4:50 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan In order to exercise more logic in pcapng, vary the size of the packets. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 94 +++++++++++++++++++++++++++++------------- 1 file changed, 65 insertions(+), 29 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 228724d4c9..6e3df17525 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -28,10 +28,9 @@ #define TOTAL_PACKETS 4096 #define MAX_BURST 64 #define MAX_GAP_US 100000 -#define DUMMY_MBUF_NUM 3 +#define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; -static const uint32_t pkt_len = 200; static uint16_t port_id; static const char null_dev[] = "net_null0"; @@ -41,13 +40,36 @@ struct dummy_mbuf { uint8_t buf[DUMMY_MBUF_NUM][RTE_MBUF_DEFAULT_BUF_SIZE]; }; +#define MAX_DATA_SIZE (RTE_MBUF_DEFAULT_BUF_SIZE - RTE_PKTMBUF_HEADROOM) + +/* RFC 864 chargen pattern used for comment testing */ +#define FILL_LINE_LENGTH 72 +#define FILL_START 0x21 /* ! */ +#define FILL_END 0x7e /* ~ */ +#define FILL_RANGE (FILL_END - FILL_START) + static void -dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, - uint32_t data_len) +fill_mbuf(struct rte_mbuf *mb) { - uint32_t i; - uint8_t *db; + unsigned int len = rte_pktmbuf_tailroom(mb); + char *buf = rte_pktmbuf_append(mb, len); + unsigned int n = 0; + + while (n < len - 1) { + char ch = FILL_START + (n % FILL_LINE_LENGTH) % FILL_RANGE; + for (unsigned int i = 0; i < FILL_LINE_LENGTH && n < len - 1; i++) { + buf[n++] = ch; + if (++ch == FILL_END) + ch = FILL_START; + } + if (n < len - 1) + buf[n++] = '\n'; + } +} +static void +dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len) +{ mb->buf_addr = buf; rte_mbuf_iova_set(mb, (uintptr_t)buf); mb->buf_len = buf_len; @@ -57,15 +79,11 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, mb->pool = (void *)buf; rte_pktmbuf_reset(mb); - db = (uint8_t *)rte_pktmbuf_append(mb, data_len); - - for (i = 0; i != data_len; i++) - db[i] = i; } /* Make an IP packet consisting of chain of one packets */ static void -mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) +mbuf1_prepare(struct dummy_mbuf *dm) { struct { struct rte_ether_hdr eth; @@ -84,32 +102,47 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) .dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST), }, .udp = { + .src_port = rte_cpu_to_be_16(19), /* Chargen port */ .dst_port = rte_cpu_to_be_16(9), /* Discard port */ }, }; memset(dm, 0, sizeof(*dm)); - dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen); + dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0])); + dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1])); rte_eth_random_addr(pkt.eth.src_addr.addr_bytes); - plen -= sizeof(struct rte_ether_hdr); + memcpy(rte_pktmbuf_append(&dm->mb[0], sizeof(pkt)), &pkt, sizeof(pkt)); + + fill_mbuf(&dm->mb[1]); + rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); +} - pkt.ip.total_length = rte_cpu_to_be_16(plen); - pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip); +static void +mbuf1_resize(struct dummy_mbuf *dm, uint16_t len) +{ + struct { + struct rte_ether_hdr eth; + struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; + } *pkt = rte_pktmbuf_mtod(&dm->mb[0], void *); - plen -= sizeof(struct rte_ipv4_hdr); - pkt.udp.src_port = rte_rand(); - pkt.udp.dgram_len = rte_cpu_to_be_16(plen); + dm->mb[1].data_len = len; + dm->mb[0].pkt_len = dm->mb[0].data_len + dm->mb[1].data_len; - memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt)); + len += sizeof(struct rte_udp_hdr); + pkt->udp.dgram_len = rte_cpu_to_be_16(len); - /* Idea here is to create mbuf chain big enough that after mbuf deep copy they won't be - * compressed into single mbuf to properly test store of chained mbufs - */ - dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1]), pkt_len); - dummy_mbuf_prep(&dm->mb[2], dm->buf[2], sizeof(dm->buf[2]), pkt_len); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[2]); + len += sizeof(struct rte_ipv4_hdr); + pkt->ip.total_length = rte_cpu_to_be_16(len); + pkt->ip.hdr_checksum = 0; + pkt->ip.hdr_checksum = rte_ipv4_cksum(&pkt->ip); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); } static int @@ -125,7 +158,8 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", - MAX_BURST * 32, 0, 0, rte_pcapng_mbuf_size(pkt_len), + MAX_BURST * 32, 0, 0, + rte_pcapng_mbuf_size(MAX_DATA_SIZE), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -157,8 +191,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) "Lockless and fearless - that’s how we roll in userspace." }; - /* make a dummy packet */ - mbuf1_prepare(&mbfs, pkt_len); + mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; for (count = 0; count < num_packets; count += burst_size) { @@ -175,6 +208,9 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) if ((count + i) % 42 == 0) comment = examples[rte_rand_max(RTE_DIM(examples))]; + /* Vary the size of the packets */ + mbuf1_resize(&mbfs, rte_rand_max(MAX_DATA_SIZE)); + mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v3 6/7] test: increase gap in pcapng test 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger ` (4 preceding siblings ...) 2026-01-12 4:50 ` [PATCH v3 5/7] test: vary size of packets in pcapng test Stephen Hemminger @ 2026-01-12 4:50 ` Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 7/7] pcapng: improve performance of timestamping Stephen Hemminger 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-12 4:50 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Want to make sure that test takes long enough that 32 bit counter wraps around. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 6e3df17525..f93bbb26ac 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -27,7 +27,6 @@ #define TOTAL_PACKETS 4096 #define MAX_BURST 64 -#define MAX_GAP_US 100000 #define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; @@ -175,7 +174,7 @@ test_setup(void) } static int -fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) +fill_pcapng_file(rte_pcapng_t *pcapng) { struct dummy_mbuf mbfs; struct rte_mbuf *orig; @@ -190,11 +189,18 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) "Please collect your free cache miss.", "Lockless and fearless - that’s how we roll in userspace." }; + /* How many microseconds does it take TSC to wrap around 32 bits */ + const unsigned wrap_us + = (US_PER_S * (uint64_t)UINT32_MAX) / rte_get_tsc_hz(); + + /* Want overall test to take to wraparound at least twice. */ + const unsigned int avg_gap = (2 * wrap_us) + / (TOTAL_PACKETS / (MAX_BURST / 2)); mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; - for (count = 0; count < num_packets; count += burst_size) { + for (count = 0; count < TOTAL_PACKETS; count += burst_size) { struct rte_mbuf *clones[MAX_BURST]; unsigned int i; @@ -230,8 +236,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) return -1; } - /* Leave a small gap between packets to test for time wrap */ - usleep(rte_rand_max(MAX_GAP_US)); + rte_delay_us_block(rte_rand_max(2 * avg_gap)); } return count; @@ -468,7 +473,7 @@ test_write_packets(void) goto fail; } - count = fill_pcapng_file(pcapng, TOTAL_PACKETS); + count = fill_pcapng_file(pcapng); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v3 7/7] pcapng: improve performance of timestamping 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger ` (5 preceding siblings ...) 2026-01-12 4:50 ` [PATCH v3 6/7] test: increase gap " Stephen Hemminger @ 2026-01-12 4:50 ` Stephen Hemminger 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-12 4:50 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Avoid doing expensive divide operations when converting timestamps from cycles (TSC) to pcapng scaled value (ns). This logic was derived from the math used by Linux kernel virtual system call with help from AI. Also fix a typo. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 63 ++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index f53e6dfecd..b5333aca7a 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -41,8 +41,14 @@ struct rte_pcapng { int outfd; /* output file */ unsigned int ports; /* number of interfaces added */ - uint64_t offset_ns; /* ns since 1/1/1970 when initialized */ - uint64_t tsc_base; /* TSC when started */ + + struct pcapng_time_conv { + uint64_t tsc_base; /* TSC when started */ + uint64_t ns_base; /* ns since 1/1/1970 when initialized */ + uint64_t mult; /* scaling factor relative to TSC hz */ + uint32_t shift; /* shift for scaling (24) */ + uint64_t mask; /* mask of bits used (56) */ + } tc; /* DPDK port id to interface index in file */ uint32_t port_index[RTE_MAX_ETHPORTS]; @@ -98,21 +104,38 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt) #define if_indextoname(ifindex, ifname) NULL #endif +/* Initialize time conversion based on logic similar to rte_cyclecounter */ +static void +pcapng_timestamp_init(struct pcapng_time_conv *tc) +{ + struct timespec ts; + uint64_t cycles = rte_get_tsc_cycles(); + + /* record start time in ns since 1/1/1970 */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Compute baseline TSC which occurred during clock_gettime */ + tc->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; + tc->ns_base = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec; + + /* Set conversion factors for reasonable precision with no overflow */ + uint64_t tsc_hz = rte_get_tsc_hz(); + tc->shift = 24; + tc->mult = ((uint64_t)1000000000ULL << tc->shift) / tsc_hz; + tc->mask = RTE_BIT64(56) - 1; +} + /* Convert from TSC (CPU cycles) to nanoseconds */ static uint64_t -pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles) +pcapng_timestamp(const struct pcapng_time_conv *tc, uint64_t cycles) { - uint64_t delta, rem, secs, ns; - const uint64_t hz = rte_get_tsc_hz(); - - delta = cycles - self->tsc_base; + /* Compute TSC delta with mask to avoid wraparound */ + uint64_t delta = (cycles - tc->tsc_base) & tc->mask; - /* Avoid numeric wraparound by computing seconds first */ - secs = delta / hz; - rem = delta % hz; - ns = (rem * NS_PER_S) / hz; + /* Convert TSC delta to nanoseconds (no division) */ + uint64_t ns_delta = (delta * tc->mult) >> tc->shift; - return secs * NS_PER_S + ns + self->offset_ns; + return tc->ns_base + ns_delta; } /* length of option including padding */ @@ -326,7 +349,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); - /* clone block_length after optionsa */ + /* clone block_length after options */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); /* remember the file index */ @@ -348,7 +371,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, { struct pcapng_statistics *hdr; struct pcapng_option *opt; - uint64_t start_time = self->offset_ns; + uint64_t start_time = self->tc.ns_base; uint64_t sample_time; uint32_t optlen, len; uint32_t *buf; @@ -402,7 +425,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, hdr->block_length = len; hdr->interface_id = self->port_index[port_id]; - sample_time = pcapng_timestamp(self, rte_get_tsc_cycles()); + sample_time = pcapng_timestamp(&self->tc, rte_get_tsc_cycles()); hdr->timestamp_hi = sample_time >> 32; hdr->timestamp_lo = (uint32_t)sample_time; @@ -686,7 +709,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, /* adjust timestamp recorded in packet */ cycles = (uint64_t)epb->timestamp_hi << 32; cycles += epb->timestamp_lo; - timestamp = pcapng_timestamp(self, cycles); + timestamp = pcapng_timestamp(&self->tc, cycles); epb->timestamp_hi = timestamp >> 32; epb->timestamp_lo = (uint32_t)timestamp; @@ -732,8 +755,6 @@ rte_pcapng_fdopen(int fd, { unsigned int i; rte_pcapng_t *self; - struct timespec ts; - uint64_t cycles; if ((osname && strlen(osname) > PCAPNG_STR_MAX) || (hardware && strlen(hardware) > PCAPNG_STR_MAX) || @@ -752,11 +773,7 @@ rte_pcapng_fdopen(int fd, self->outfd = fd; self->ports = 0; - /* record start time in ns since 1/1/1970 */ - cycles = rte_get_tsc_cycles(); - clock_gettime(CLOCK_REALTIME, &ts); - self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; - self->offset_ns = rte_timespec_to_ns(&ts); + pcapng_timestamp_init(&self->tc); for (i = 0; i < RTE_MAX_ETHPORTS; i++) self->port_index[i] = UINT32_MAX; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v4 0/7] pcapng: fixes and improvements 2025-11-26 5:12 [RFC] pcapng: improve performance of timestamping Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger @ 2026-01-13 0:51 ` Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 1/7] pcapng: add length checks to string arguments Stephen Hemminger ` (6 more replies) 2026-01-19 18:18 ` [PATCH v5 0/5] pcapng: fixes and improvements Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger 4 siblings, 7 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-13 0:51 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger This series addresses several issues in the pcapng library and improves test coverage. Several of these patches were submitted previously in other series but have been more throughly tested and passed AI review. Bug Fixes: Patches 1-3 fix potential issues with string handling in the pcapng administrative APIs. The pcapng file format uses 16-bit length fields for option data, but the library did not validate input string lengths, and used fixed-size stack buffers that could be overrun by large comments or metadata strings. Patch 1 adds length validation against the format's 16-bit limit Patch 2 replaces fixed stack buffers with dynamic allocation Patch 3 handles the case where comment options exceed available mbuf tailroom by chaining an additional mbuf segment Test Improvements: Patches 4-6 improve the pcapng unit test coverage: Patch 4 exercises the comment option code path with varied test strings Patch 5 varies packet sizes to exercise more of the copy logic Patch 6 ensures the test runs long enough to verify 32-bit TSC wraparound handling Performance: Patch 7 optimizes timestamp conversion by replacing division operations with shift-multiply arithmetic, following the approach used by the Linux kernel vDSO. This eliminates expensive divides from the packet capture hot path. v4 - add one longer comment, and handle case where clang wants parenthesis for multi-line string constants. Stephen Hemminger (7): pcapng: add length checks to string arguments pcapng: use malloc instead of fixed buffer size pcapng: add additional mbuf if space required on copy test: add more tests for comments in pcapng test: vary size of packets in pcapng test test: increase gap in pcapng test pcapng: improve performance of timestamping app/test/test_pcapng.c | 142 ++++++++++++++++++++++++++++---------- lib/pcapng/rte_pcapng.c | 147 +++++++++++++++++++++++++++++----------- lib/pcapng/rte_pcapng.h | 8 ++- 3 files changed, 219 insertions(+), 78 deletions(-) -- 2.51.0 ^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH v4 1/7] pcapng: add length checks to string arguments 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger @ 2026-01-13 0:51 ` Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 2/7] pcapng: use malloc instead of fixed buffer size Stephen Hemminger ` (5 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-13 0:51 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan The pcapng file format has a maximum possible string length of 16 bits since information is recorded as type, value, length. The API should check these lengths before possible memory allocation or overwrite failures. Update Doxygen comments to include return value. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 31 ++++++++++++++++++++++++++++--- lib/pcapng/rte_pcapng.h | 8 +++++++- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 21bc94cea1..863706a365 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -34,6 +34,9 @@ /* conversion from DPDK speed to PCAPNG */ #define PCAPNG_MBPS_SPEED 1000000ull +/* upper bound for strings in pcapng option data */ +#define PCAPNG_STR_MAX UINT16_MAX + /* upper bound for section, stats and interface blocks (in uint32_t) */ #define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) @@ -218,9 +221,11 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; + int ret; - if (rte_eth_dev_info_get(port, &dev_info) < 0) - return -1; + ret = rte_eth_dev_info_get(port, &dev_info); + if (ret < 0) + return ret; /* make something like an interface name */ if (ifname == NULL) { @@ -230,8 +235,14 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port); ifname = ifname_buf; } + } else if (strlen(ifname) > PCAPNG_STR_MAX) { + return -EINVAL; } + if ((ifdescr && strlen(ifdescr) > PCAPNG_STR_MAX) || + (filter && strlen(filter) > PCAPNG_STR_MAX)) + return -EINVAL; + /* make a useful device hardware string */ dev = dev_info.device; if (dev) @@ -337,6 +348,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return -1; + optlen = 0; if (ifrecv != UINT64_MAX) @@ -489,6 +503,9 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, #ifdef RTE_LIBRTE_ETHDEV_DEBUG RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); + + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return NULL; #endif orig_len = rte_pktmbuf_pkt_len(md); @@ -693,8 +710,16 @@ rte_pcapng_fdopen(int fd, struct timespec ts; uint64_t cycles; + if ((osname && strlen(osname) > PCAPNG_STR_MAX) || + (hardware && strlen(hardware) > PCAPNG_STR_MAX) || + (appname && strlen(appname) > PCAPNG_STR_MAX) || + (comment && strlen(comment) > PCAPNG_STR_MAX)) { + rte_errno = ENAMETOOLONG; + return NULL; + } + self = malloc(sizeof(*self)); - if (!self) { + if (self == NULL) { rte_errno = ENOMEM; return NULL; } diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index de1bf953e9..4f085f5c86 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -89,6 +89,12 @@ rte_pcapng_close(rte_pcapng_t *self); * Interfaces must be added to the output file after opening * and before any packet record. All ports used in packet capture * must be added. + * + * @return + * - (0) if successful. + * - (-ENOTSUP) if support for dev_infos_get() does not exist for the device. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. */ int rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, @@ -192,7 +198,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * @param comment * Optional comment to add to statistics. * @return - * number of bytes written to file, -1 on failure to write file + * number of bytes written to file, -1 on failure to write file or memory allocation failure. */ ssize_t rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port, -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v4 2/7] pcapng: use malloc instead of fixed buffer size 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 1/7] pcapng: add length checks to string arguments Stephen Hemminger @ 2026-01-13 0:51 ` Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 3/7] pcapng: add additional mbuf if space required on copy Stephen Hemminger ` (4 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-13 0:51 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Ray Kinsella The administrative APIs accept comments and other meta data as strings. This leads to possibility of very large strings which can overrun the use of fixed size buffers. Instead, use malloc to allocate a buffer of the necessary size and handle potential allocation failures. Bugzilla ID: 1820 Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 863706a365..c2635d8b03 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -37,9 +37,6 @@ /* upper bound for strings in pcapng option data */ #define PCAPNG_STR_MAX UINT16_MAX -/* upper bound for section, stats and interface blocks (in uint32_t) */ -#define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) - /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ @@ -148,7 +145,7 @@ pcapng_section_block(rte_pcapng_t *self, { struct pcapng_section_header *hdr; struct pcapng_option *opt; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; uint32_t len; len = sizeof(*hdr); @@ -165,8 +162,11 @@ pcapng_section_block(rte_pcapng_t *self, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) + buf = malloc(len); + if (buf == NULL) { + errno = ENOMEM; return -1; + } hdr = (struct pcapng_section_header *)buf; *hdr = (struct pcapng_section_header) { @@ -199,7 +199,9 @@ pcapng_section_block(rte_pcapng_t *self, /* clone block_length after option */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ssize_t ret = write(self->outfd, buf, len); + free(buf); + return ret; } /* Write an interface block for a DPDK port */ @@ -217,7 +219,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, struct pcapng_option *opt; const uint8_t tsresol = 9; /* nanosecond resolution */ uint32_t len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; @@ -279,8 +281,11 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) + buf = malloc(len); + if (buf == NULL) { + errno = ENOMEM; return -1; + } hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -327,7 +332,9 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, /* remember the file index */ self->port_index[port] = self->ports++; - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret; } /* @@ -344,7 +351,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint64_t start_time = self->offset_ns; uint64_t sample_time; uint32_t optlen, len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -367,8 +374,11 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, optlen += pcapng_optlen(0); len = sizeof(*hdr) + optlen + sizeof(uint32_t); - if (len > sizeof(buf)) + buf = malloc(len); + if (buf == NULL) { + errno = ENOMEM; return -1; + } hdr = (struct pcapng_statistics *)buf; opt = (struct pcapng_option *)(hdr + 1); @@ -399,7 +409,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, /* clone block_length after option */ memcpy(opt, &len, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ssize_t ret = write(self->outfd, buf, len); + free(buf); + return ret; } RTE_EXPORT_SYMBOL(rte_pcapng_mbuf_size) -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v4 3/7] pcapng: add additional mbuf if space required on copy 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 1/7] pcapng: add length checks to string arguments Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 2/7] pcapng: use malloc instead of fixed buffer size Stephen Hemminger @ 2026-01-13 0:51 ` Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 4/7] test: add more tests for comments in pcapng Stephen Hemminger ` (3 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-13 0:51 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Amit Prakash Shukla If comment is passed to rte_pcapng_copy(), additional space may be needed at the end of the mbuf. To handle this case generate a segmented mbuf with additional space for the options. Fixes: c1abd1e93dbd ("pcapng: support comment in enhanced packet block") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index c2635d8b03..f53e6dfecd 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -568,11 +568,24 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (comment) optlen += pcapng_optlen(strlen(comment)); - /* reserve trailing options and block length */ + /* + * Try to put options at the end of this mbuf. + * If not use an mbuf chain. + */ opt = (struct pcapng_option *) rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); - if (unlikely(opt == NULL)) - goto fail; + if (unlikely(opt == NULL)) { + struct rte_mbuf *ml = rte_pktmbuf_alloc(mp); + + if (unlikely(ml == NULL)) + goto fail; + + opt = (struct pcapng_option *)rte_pktmbuf_append(ml, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL || rte_pktmbuf_chain(mc, ml) != 0)) { + rte_pktmbuf_free(ml); + goto fail; + } + } switch (direction) { case RTE_PCAPNG_DIRECTION_IN: -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v4 4/7] test: add more tests for comments in pcapng 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger ` (2 preceding siblings ...) 2026-01-13 0:51 ` [PATCH v4 3/7] pcapng: add additional mbuf if space required on copy Stephen Hemminger @ 2026-01-13 0:51 ` Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 5/7] test: vary size of packets in pcapng test Stephen Hemminger ` (2 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-13 0:51 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Add some more cases where comment is set in pcapng file. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index bcf99724fa..09a963c676 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -125,8 +125,7 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", - MAX_BURST * 32, 0, 0, - rte_pcapng_mbuf_size(pkt_len) + 128, + MAX_BURST * 32, 0, 0, rte_pcapng_mbuf_size(pkt_len), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -149,6 +148,22 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) unsigned int burst_size; unsigned int count; ssize_t len; + /* These are some silly comments to test various lengths and alignments sprinkle into the file */ + static const char * const examples[] = { + "Lockless and fearless - that’s how we roll in userspace.", + "Memory pool deep / Mbufs swim in lockless rings / Zero copy dreams," + "Poll mode driver waits / No interrupts disturb its zen / Busy loop finds peace," + "Memory barriers / rte_atomic_thread_fence() / Guards our shared state", + "Hugepages so vast / Two megabytes of glory / TLB misses weep", + "Packets flow like streams / Through the graph node pipeline / Iterate in place", + + /* Long one to make sure we can do > 256 characters */ + ("Dear future maintainer: I am sorry. This packet was captured at 3 AM while " + "debugging a priority flow control issue that turned out to be a loose cable. " + "The rte_eth_tx_burst() call you see here has been cargo-culted through four " + "generations of example code. The magic number 32 is not documented because " + "nobody remembers why. Trust the process."), + }; /* make a dummy packet */ mbuf1_prepare(&mbfs, pkt_len); @@ -162,9 +177,14 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) burst_size = rte_rand_max(MAX_BURST) + 1; for (i = 0; i < burst_size; i++) { struct rte_mbuf *mc; + const char *comment = NULL; + + /* Put comment on occasional packets */ + if ((count + i) % 41 == 0) + comment = examples[rte_rand_max(RTE_DIM(examples))]; mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), - RTE_PCAPNG_DIRECTION_IN, NULL); + RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; @@ -386,7 +406,7 @@ static int test_write_packets(void) { char file_name[] = "/tmp/pcapng_test_XXXXXX.pcapng"; - static rte_pcapng_t *pcapng; + rte_pcapng_t *pcapng = NULL; int ret, tmp_fd, count; uint64_t now = current_timestamp(); @@ -413,6 +433,13 @@ test_write_packets(void) goto fail; } + /* write a statistics block */ + ret = rte_pcapng_write_stats(pcapng, port_id, 0, 0, NULL); + if (ret <= 0) { + fprintf(stderr, "Write of statistics failed\n"); + goto fail; + } + count = fill_pcapng_file(pcapng, TOTAL_PACKETS); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v4 5/7] test: vary size of packets in pcapng test 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger ` (3 preceding siblings ...) 2026-01-13 0:51 ` [PATCH v4 4/7] test: add more tests for comments in pcapng Stephen Hemminger @ 2026-01-13 0:51 ` Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 6/7] test: increase gap " Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 7/7] pcapng: improve performance of timestamping Stephen Hemminger 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-13 0:51 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan In order to exercise more logic in pcapng, vary the size of the packets. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 94 +++++++++++++++++++++++++++++------------- 1 file changed, 65 insertions(+), 29 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 09a963c676..f1405cfa7c 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -28,10 +28,9 @@ #define TOTAL_PACKETS 4096 #define MAX_BURST 64 #define MAX_GAP_US 100000 -#define DUMMY_MBUF_NUM 3 +#define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; -static const uint32_t pkt_len = 200; static uint16_t port_id; static const char null_dev[] = "net_null0"; @@ -41,13 +40,36 @@ struct dummy_mbuf { uint8_t buf[DUMMY_MBUF_NUM][RTE_MBUF_DEFAULT_BUF_SIZE]; }; +#define MAX_DATA_SIZE (RTE_MBUF_DEFAULT_BUF_SIZE - RTE_PKTMBUF_HEADROOM) + +/* RFC 864 chargen pattern used for comment testing */ +#define FILL_LINE_LENGTH 72 +#define FILL_START 0x21 /* ! */ +#define FILL_END 0x7e /* ~ */ +#define FILL_RANGE (FILL_END - FILL_START) + static void -dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, - uint32_t data_len) +fill_mbuf(struct rte_mbuf *mb) { - uint32_t i; - uint8_t *db; + unsigned int len = rte_pktmbuf_tailroom(mb); + char *buf = rte_pktmbuf_append(mb, len); + unsigned int n = 0; + + while (n < len - 1) { + char ch = FILL_START + (n % FILL_LINE_LENGTH) % FILL_RANGE; + for (unsigned int i = 0; i < FILL_LINE_LENGTH && n < len - 1; i++) { + buf[n++] = ch; + if (++ch == FILL_END) + ch = FILL_START; + } + if (n < len - 1) + buf[n++] = '\n'; + } +} +static void +dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len) +{ mb->buf_addr = buf; rte_mbuf_iova_set(mb, (uintptr_t)buf); mb->buf_len = buf_len; @@ -57,15 +79,11 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, mb->pool = (void *)buf; rte_pktmbuf_reset(mb); - db = (uint8_t *)rte_pktmbuf_append(mb, data_len); - - for (i = 0; i != data_len; i++) - db[i] = i; } /* Make an IP packet consisting of chain of one packets */ static void -mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) +mbuf1_prepare(struct dummy_mbuf *dm) { struct { struct rte_ether_hdr eth; @@ -84,32 +102,47 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) .dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST), }, .udp = { + .src_port = rte_cpu_to_be_16(19), /* Chargen port */ .dst_port = rte_cpu_to_be_16(9), /* Discard port */ }, }; memset(dm, 0, sizeof(*dm)); - dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen); + dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0])); + dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1])); rte_eth_random_addr(pkt.eth.src_addr.addr_bytes); - plen -= sizeof(struct rte_ether_hdr); + memcpy(rte_pktmbuf_append(&dm->mb[0], sizeof(pkt)), &pkt, sizeof(pkt)); + + fill_mbuf(&dm->mb[1]); + rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); +} - pkt.ip.total_length = rte_cpu_to_be_16(plen); - pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip); +static void +mbuf1_resize(struct dummy_mbuf *dm, uint16_t len) +{ + struct { + struct rte_ether_hdr eth; + struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; + } *pkt = rte_pktmbuf_mtod(&dm->mb[0], void *); - plen -= sizeof(struct rte_ipv4_hdr); - pkt.udp.src_port = rte_rand(); - pkt.udp.dgram_len = rte_cpu_to_be_16(plen); + dm->mb[1].data_len = len; + dm->mb[0].pkt_len = dm->mb[0].data_len + dm->mb[1].data_len; - memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt)); + len += sizeof(struct rte_udp_hdr); + pkt->udp.dgram_len = rte_cpu_to_be_16(len); - /* Idea here is to create mbuf chain big enough that after mbuf deep copy they won't be - * compressed into single mbuf to properly test store of chained mbufs - */ - dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1]), pkt_len); - dummy_mbuf_prep(&dm->mb[2], dm->buf[2], sizeof(dm->buf[2]), pkt_len); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[2]); + len += sizeof(struct rte_ipv4_hdr); + pkt->ip.total_length = rte_cpu_to_be_16(len); + pkt->ip.hdr_checksum = 0; + pkt->ip.hdr_checksum = rte_ipv4_cksum(&pkt->ip); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); } static int @@ -125,7 +158,8 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", - MAX_BURST * 32, 0, 0, rte_pcapng_mbuf_size(pkt_len), + MAX_BURST * 32, 0, 0, + rte_pcapng_mbuf_size(MAX_DATA_SIZE), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -165,8 +199,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) "nobody remembers why. Trust the process."), }; - /* make a dummy packet */ - mbuf1_prepare(&mbfs, pkt_len); + mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; for (count = 0; count < num_packets; count += burst_size) { @@ -183,6 +216,9 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) if ((count + i) % 41 == 0) comment = examples[rte_rand_max(RTE_DIM(examples))]; + /* Vary the size of the packets */ + mbuf1_resize(&mbfs, rte_rand_max(MAX_DATA_SIZE)); + mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v4 6/7] test: increase gap in pcapng test 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger ` (4 preceding siblings ...) 2026-01-13 0:51 ` [PATCH v4 5/7] test: vary size of packets in pcapng test Stephen Hemminger @ 2026-01-13 0:51 ` Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 7/7] pcapng: improve performance of timestamping Stephen Hemminger 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-13 0:51 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Want to make sure that test takes long enough that 32 bit counter wraps around. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index f1405cfa7c..8a311c92de 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -27,7 +27,6 @@ #define TOTAL_PACKETS 4096 #define MAX_BURST 64 -#define MAX_GAP_US 100000 #define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; @@ -175,7 +174,7 @@ test_setup(void) } static int -fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) +fill_pcapng_file(rte_pcapng_t *pcapng) { struct dummy_mbuf mbfs; struct rte_mbuf *orig; @@ -198,11 +197,18 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) "generations of example code. The magic number 32 is not documented because " "nobody remembers why. Trust the process."), }; + /* How many microseconds does it take TSC to wrap around 32 bits */ + const unsigned wrap_us + = (US_PER_S * (uint64_t)UINT32_MAX) / rte_get_tsc_hz(); + + /* Want overall test to take to wraparound at least twice. */ + const unsigned int avg_gap = (2 * wrap_us) + / (TOTAL_PACKETS / (MAX_BURST / 2)); mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; - for (count = 0; count < num_packets; count += burst_size) { + for (count = 0; count < TOTAL_PACKETS; count += burst_size) { struct rte_mbuf *clones[MAX_BURST]; unsigned int i; @@ -238,8 +244,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) return -1; } - /* Leave a small gap between packets to test for time wrap */ - usleep(rte_rand_max(MAX_GAP_US)); + rte_delay_us_block(rte_rand_max(2 * avg_gap)); } return count; @@ -476,7 +481,7 @@ test_write_packets(void) goto fail; } - count = fill_pcapng_file(pcapng, TOTAL_PACKETS); + count = fill_pcapng_file(pcapng); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v4 7/7] pcapng: improve performance of timestamping 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger ` (5 preceding siblings ...) 2026-01-13 0:51 ` [PATCH v4 6/7] test: increase gap " Stephen Hemminger @ 2026-01-13 0:51 ` Stephen Hemminger 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-13 0:51 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Avoid doing expensive divide operations when converting timestamps from cycles (TSC) to pcapng scaled value (ns). This logic was derived from the math used by Linux kernel virtual system call with help from AI. Also fix a typo. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 63 ++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index f53e6dfecd..b5333aca7a 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -41,8 +41,14 @@ struct rte_pcapng { int outfd; /* output file */ unsigned int ports; /* number of interfaces added */ - uint64_t offset_ns; /* ns since 1/1/1970 when initialized */ - uint64_t tsc_base; /* TSC when started */ + + struct pcapng_time_conv { + uint64_t tsc_base; /* TSC when started */ + uint64_t ns_base; /* ns since 1/1/1970 when initialized */ + uint64_t mult; /* scaling factor relative to TSC hz */ + uint32_t shift; /* shift for scaling (24) */ + uint64_t mask; /* mask of bits used (56) */ + } tc; /* DPDK port id to interface index in file */ uint32_t port_index[RTE_MAX_ETHPORTS]; @@ -98,21 +104,38 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt) #define if_indextoname(ifindex, ifname) NULL #endif +/* Initialize time conversion based on logic similar to rte_cyclecounter */ +static void +pcapng_timestamp_init(struct pcapng_time_conv *tc) +{ + struct timespec ts; + uint64_t cycles = rte_get_tsc_cycles(); + + /* record start time in ns since 1/1/1970 */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Compute baseline TSC which occurred during clock_gettime */ + tc->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; + tc->ns_base = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec; + + /* Set conversion factors for reasonable precision with no overflow */ + uint64_t tsc_hz = rte_get_tsc_hz(); + tc->shift = 24; + tc->mult = ((uint64_t)1000000000ULL << tc->shift) / tsc_hz; + tc->mask = RTE_BIT64(56) - 1; +} + /* Convert from TSC (CPU cycles) to nanoseconds */ static uint64_t -pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles) +pcapng_timestamp(const struct pcapng_time_conv *tc, uint64_t cycles) { - uint64_t delta, rem, secs, ns; - const uint64_t hz = rte_get_tsc_hz(); - - delta = cycles - self->tsc_base; + /* Compute TSC delta with mask to avoid wraparound */ + uint64_t delta = (cycles - tc->tsc_base) & tc->mask; - /* Avoid numeric wraparound by computing seconds first */ - secs = delta / hz; - rem = delta % hz; - ns = (rem * NS_PER_S) / hz; + /* Convert TSC delta to nanoseconds (no division) */ + uint64_t ns_delta = (delta * tc->mult) >> tc->shift; - return secs * NS_PER_S + ns + self->offset_ns; + return tc->ns_base + ns_delta; } /* length of option including padding */ @@ -326,7 +349,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); - /* clone block_length after optionsa */ + /* clone block_length after options */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); /* remember the file index */ @@ -348,7 +371,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, { struct pcapng_statistics *hdr; struct pcapng_option *opt; - uint64_t start_time = self->offset_ns; + uint64_t start_time = self->tc.ns_base; uint64_t sample_time; uint32_t optlen, len; uint32_t *buf; @@ -402,7 +425,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, hdr->block_length = len; hdr->interface_id = self->port_index[port_id]; - sample_time = pcapng_timestamp(self, rte_get_tsc_cycles()); + sample_time = pcapng_timestamp(&self->tc, rte_get_tsc_cycles()); hdr->timestamp_hi = sample_time >> 32; hdr->timestamp_lo = (uint32_t)sample_time; @@ -686,7 +709,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, /* adjust timestamp recorded in packet */ cycles = (uint64_t)epb->timestamp_hi << 32; cycles += epb->timestamp_lo; - timestamp = pcapng_timestamp(self, cycles); + timestamp = pcapng_timestamp(&self->tc, cycles); epb->timestamp_hi = timestamp >> 32; epb->timestamp_lo = (uint32_t)timestamp; @@ -732,8 +755,6 @@ rte_pcapng_fdopen(int fd, { unsigned int i; rte_pcapng_t *self; - struct timespec ts; - uint64_t cycles; if ((osname && strlen(osname) > PCAPNG_STR_MAX) || (hardware && strlen(hardware) > PCAPNG_STR_MAX) || @@ -752,11 +773,7 @@ rte_pcapng_fdopen(int fd, self->outfd = fd; self->ports = 0; - /* record start time in ns since 1/1/1970 */ - cycles = rte_get_tsc_cycles(); - clock_gettime(CLOCK_REALTIME, &ts); - self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; - self->offset_ns = rte_timespec_to_ns(&ts); + pcapng_timestamp_init(&self->tc); for (i = 0; i < RTE_MAX_ETHPORTS; i++) self->port_index[i] = UINT32_MAX; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v5 0/5] pcapng: fixes and improvements 2025-11-26 5:12 [RFC] pcapng: improve performance of timestamping Stephen Hemminger ` (2 preceding siblings ...) 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger @ 2026-01-19 18:18 ` Stephen Hemminger 2026-01-19 18:18 ` [PATCH v5 1/5] pcapng: add length checks to string arguments Stephen Hemminger ` (4 more replies) 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger 4 siblings, 5 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-19 18:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger This series addresses several issues in the pcapng library and improves test coverage. Bug Fixes: Patches 1-3 fix potential issues with string handling in the pcapng administrative APIs. The pcapng file format uses 16-bit length fields for option data, but the library did not validate input string lengths, and used fixed-size stack buffers that could be overrun by large comments or metadata strings. Patch 1 adds length validation against the format's 16-bit limit Patch 2 replaces fixed stack buffers with dynamic allocation Patch 3 handles the case where comment options exceed available mbuf tailroom by chaining an additional mbuf segment Performance: Patch 4 optimizes timestamp conversion by replacing division operations with shift-multiply arithmetic, following the approach used by the Linux kernel vDSO. This eliminates expensive divides from the packet capture hot path. Test Improvements: Patch 5 improves the pcapng unit test coverage by exercising comment options with varied test strings, varying packet sizes to test more copy logic paths, and ensuring the test runs long enough to verify 32-bit TSC wraparound handling. v5: - Fix missing commas in test string array (AI review feedback) - Reorder patches to put library changes before test changes v4: - Handle case where clang wants parenthesis for multi-line string constants - Add one longer comment test case v3: - More thorough testing - Passed AI review Stephen Hemminger (5): pcapng: add length checks to string arguments pcapng: use malloc instead of fixed buffer size pcapng: add additional mbuf if space required on copy pcapng: improve performance of timestamping test: add more tests for pcapng app/test/test_pcapng.c | 145 ++++++++++++++++++++++++++++++---------- lib/pcapng/rte_pcapng.c | 145 +++++++++++++++++++++++++++++----------- lib/pcapng/rte_pcapng.h | 8 ++- 3 files changed, 220 insertions(+), 78 deletions(-) -- 2.51.0 ^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH v5 1/5] pcapng: add length checks to string arguments 2026-01-19 18:18 ` [PATCH v5 0/5] pcapng: fixes and improvements Stephen Hemminger @ 2026-01-19 18:18 ` Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 2/5] pcapng: use malloc instead of fixed buffer size Stephen Hemminger ` (3 subsequent siblings) 4 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-19 18:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan The pcapng file format has a maximum possible string length of 16 bits since information is recorded as type, value, length. The API should check these lengths before possible memory allocation or overwrite failures. Update Doxygen comments to include return value. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 33 +++++++++++++++++++++++++++++---- lib/pcapng/rte_pcapng.h | 8 +++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 21bc94cea1..ac46e43b38 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -34,6 +34,9 @@ /* conversion from DPDK speed to PCAPNG */ #define PCAPNG_MBPS_SPEED 1000000ull +/* upper bound for strings in pcapng option data */ +#define PCAPNG_STR_MAX UINT16_MAX + /* upper bound for section, stats and interface blocks (in uint32_t) */ #define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) @@ -218,9 +221,11 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; + int ret; - if (rte_eth_dev_info_get(port, &dev_info) < 0) - return -1; + ret = rte_eth_dev_info_get(port, &dev_info); + if (ret < 0) + return ret; /* make something like an interface name */ if (ifname == NULL) { @@ -230,8 +235,14 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port); ifname = ifname_buf; } + } else if (strlen(ifname) > PCAPNG_STR_MAX) { + return -EINVAL; } + if ((ifdescr && strlen(ifdescr) > PCAPNG_STR_MAX) || + (filter && strlen(filter) > PCAPNG_STR_MAX)) + return -EINVAL; + /* make a useful device hardware string */ dev = dev_info.device; if (dev) @@ -269,7 +280,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += sizeof(uint32_t); if (len > sizeof(buf)) - return -1; + return -EINVAL; hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -337,6 +348,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return -1; + optlen = 0; if (ifrecv != UINT64_MAX) @@ -489,6 +503,9 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, #ifdef RTE_LIBRTE_ETHDEV_DEBUG RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); + + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return NULL; #endif orig_len = rte_pktmbuf_pkt_len(md); @@ -693,8 +710,16 @@ rte_pcapng_fdopen(int fd, struct timespec ts; uint64_t cycles; + if ((osname && strlen(osname) > PCAPNG_STR_MAX) || + (hardware && strlen(hardware) > PCAPNG_STR_MAX) || + (appname && strlen(appname) > PCAPNG_STR_MAX) || + (comment && strlen(comment) > PCAPNG_STR_MAX)) { + rte_errno = ENAMETOOLONG; + return NULL; + } + self = malloc(sizeof(*self)); - if (!self) { + if (self == NULL) { rte_errno = ENOMEM; return NULL; } diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index de1bf953e9..4f085f5c86 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -89,6 +89,12 @@ rte_pcapng_close(rte_pcapng_t *self); * Interfaces must be added to the output file after opening * and before any packet record. All ports used in packet capture * must be added. + * + * @return + * - (0) if successful. + * - (-ENOTSUP) if support for dev_infos_get() does not exist for the device. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. */ int rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, @@ -192,7 +198,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * @param comment * Optional comment to add to statistics. * @return - * number of bytes written to file, -1 on failure to write file + * number of bytes written to file, -1 on failure to write file or memory allocation failure. */ ssize_t rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port, -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v5 2/5] pcapng: use malloc instead of fixed buffer size 2026-01-19 18:18 ` [PATCH v5 0/5] pcapng: fixes and improvements Stephen Hemminger 2026-01-19 18:18 ` [PATCH v5 1/5] pcapng: add length checks to string arguments Stephen Hemminger @ 2026-01-19 18:19 ` Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 3/5] pcapng: add additional mbuf if space required on copy Stephen Hemminger ` (2 subsequent siblings) 4 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-19 18:19 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Ray Kinsella The administrative APIs accept comments and other meta data as strings. This leads to possibility of very large strings which can overrun the use of fixed size buffers. Instead, use malloc to allocate a buffer of the necessary size and handle potential allocation failures. Bugzilla ID: 1820 Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index ac46e43b38..49fac60a3d 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -37,9 +37,6 @@ /* upper bound for strings in pcapng option data */ #define PCAPNG_STR_MAX UINT16_MAX -/* upper bound for section, stats and interface blocks (in uint32_t) */ -#define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) - /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ @@ -148,7 +145,7 @@ pcapng_section_block(rte_pcapng_t *self, { struct pcapng_section_header *hdr; struct pcapng_option *opt; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; uint32_t len; len = sizeof(*hdr); @@ -165,8 +162,11 @@ pcapng_section_block(rte_pcapng_t *self, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) + buf = malloc(len); + if (buf == NULL) { + errno = ENOMEM; return -1; + } hdr = (struct pcapng_section_header *)buf; *hdr = (struct pcapng_section_header) { @@ -199,7 +199,9 @@ pcapng_section_block(rte_pcapng_t *self, /* clone block_length after option */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ssize_t ret = write(self->outfd, buf, len); + free(buf); + return ret; } /* Write an interface block for a DPDK port */ @@ -217,7 +219,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, struct pcapng_option *opt; const uint8_t tsresol = 9; /* nanosecond resolution */ uint32_t len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; @@ -279,8 +281,9 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) - return -EINVAL; + buf = malloc(len); + if (buf == NULL) + return -ENOMEM; hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -327,7 +330,9 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, /* remember the file index */ self->port_index[port] = self->ports++; - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret; } /* @@ -344,7 +349,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint64_t start_time = self->offset_ns; uint64_t sample_time; uint32_t optlen, len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -367,8 +372,11 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, optlen += pcapng_optlen(0); len = sizeof(*hdr) + optlen + sizeof(uint32_t); - if (len > sizeof(buf)) + buf = malloc(len); + if (buf == NULL) { + errno = ENOMEM; return -1; + } hdr = (struct pcapng_statistics *)buf; opt = (struct pcapng_option *)(hdr + 1); @@ -399,7 +407,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, /* clone block_length after option */ memcpy(opt, &len, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ssize_t ret = write(self->outfd, buf, len); + free(buf); + return ret; } RTE_EXPORT_SYMBOL(rte_pcapng_mbuf_size) -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v5 3/5] pcapng: add additional mbuf if space required on copy 2026-01-19 18:18 ` [PATCH v5 0/5] pcapng: fixes and improvements Stephen Hemminger 2026-01-19 18:18 ` [PATCH v5 1/5] pcapng: add length checks to string arguments Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 2/5] pcapng: use malloc instead of fixed buffer size Stephen Hemminger @ 2026-01-19 18:19 ` Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 4/5] pcapng: improve performance of timestamping Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 5/5] test: add more tests for pcapng Stephen Hemminger 4 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-19 18:19 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Amit Prakash Shukla If comment is passed to rte_pcapng_copy(), additional space may be needed at the end of the mbuf. To handle this case generate a segmented mbuf with additional space for the options. Fixes: c1abd1e93dbd ("pcapng: support comment in enhanced packet block") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 49fac60a3d..7deef7639f 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -566,11 +566,24 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (comment) optlen += pcapng_optlen(strlen(comment)); - /* reserve trailing options and block length */ + /* + * Try to put options at the end of this mbuf. + * If not use an mbuf chain. + */ opt = (struct pcapng_option *) rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); - if (unlikely(opt == NULL)) - goto fail; + if (unlikely(opt == NULL)) { + struct rte_mbuf *ml = rte_pktmbuf_alloc(mp); + + if (unlikely(ml == NULL)) + goto fail; + + opt = (struct pcapng_option *)rte_pktmbuf_append(ml, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL || rte_pktmbuf_chain(mc, ml) != 0)) { + rte_pktmbuf_free(ml); + goto fail; + } + } switch (direction) { case RTE_PCAPNG_DIRECTION_IN: -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v5 4/5] pcapng: improve performance of timestamping 2026-01-19 18:18 ` [PATCH v5 0/5] pcapng: fixes and improvements Stephen Hemminger ` (2 preceding siblings ...) 2026-01-19 18:19 ` [PATCH v5 3/5] pcapng: add additional mbuf if space required on copy Stephen Hemminger @ 2026-01-19 18:19 ` Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 5/5] test: add more tests for pcapng Stephen Hemminger 4 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-19 18:19 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Avoid doing expensive divide operations when converting timestamps from cycles (TSC) to pcapng scaled value (ns). This logic was derived from the math used by Linux kernel virtual system call with help from AI. Also fix a typo. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 61 ++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 7deef7639f..8572fe9e7b 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -41,8 +41,14 @@ struct rte_pcapng { int outfd; /* output file */ unsigned int ports; /* number of interfaces added */ - uint64_t offset_ns; /* ns since 1/1/1970 when initialized */ - uint64_t tsc_base; /* TSC when started */ + + struct pcapng_time_conv { + uint64_t tsc_base; /* TSC when started */ + uint64_t ns_base; /* ns since 1/1/1970 when initialized */ + uint64_t mult; /* scaling factor relative to TSC hz */ + uint32_t shift; /* shift for scaling (24) */ + uint64_t mask; /* mask of bits used (56) */ + } tc; /* DPDK port id to interface index in file */ uint32_t port_index[RTE_MAX_ETHPORTS]; @@ -98,21 +104,38 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt) #define if_indextoname(ifindex, ifname) NULL #endif +/* Initialize time conversion based on logic similar to rte_cyclecounter */ +static void +pcapng_timestamp_init(struct pcapng_time_conv *tc) +{ + struct timespec ts; + uint64_t cycles = rte_get_tsc_cycles(); + + /* record start time in ns since 1/1/1970 */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Compute baseline TSC which occurred during clock_gettime */ + tc->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; + tc->ns_base = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec; + + /* Set conversion factors for reasonable precision with no overflow */ + uint64_t tsc_hz = rte_get_tsc_hz(); + tc->shift = 24; + tc->mult = ((uint64_t)1000000000ULL << tc->shift) / tsc_hz; + tc->mask = RTE_BIT64(56) - 1; +} + /* Convert from TSC (CPU cycles) to nanoseconds */ static uint64_t -pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles) +pcapng_timestamp(const struct pcapng_time_conv *tc, uint64_t cycles) { - uint64_t delta, rem, secs, ns; - const uint64_t hz = rte_get_tsc_hz(); - - delta = cycles - self->tsc_base; + /* Compute TSC delta with mask to avoid wraparound */ + uint64_t delta = (cycles - tc->tsc_base) & tc->mask; - /* Avoid numeric wraparound by computing seconds first */ - secs = delta / hz; - rem = delta % hz; - ns = (rem * NS_PER_S) / hz; + /* Convert TSC delta to nanoseconds (no division) */ + uint64_t ns_delta = (delta * tc->mult) >> tc->shift; - return secs * NS_PER_S + ns + self->offset_ns; + return tc->ns_base + ns_delta; } /* length of option including padding */ @@ -346,7 +369,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, { struct pcapng_statistics *hdr; struct pcapng_option *opt; - uint64_t start_time = self->offset_ns; + uint64_t start_time = self->tc.ns_base; uint64_t sample_time; uint32_t optlen, len; uint32_t *buf; @@ -400,7 +423,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, hdr->block_length = len; hdr->interface_id = self->port_index[port_id]; - sample_time = pcapng_timestamp(self, rte_get_tsc_cycles()); + sample_time = pcapng_timestamp(&self->tc, rte_get_tsc_cycles()); hdr->timestamp_hi = sample_time >> 32; hdr->timestamp_lo = (uint32_t)sample_time; @@ -684,7 +707,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, /* adjust timestamp recorded in packet */ cycles = (uint64_t)epb->timestamp_hi << 32; cycles += epb->timestamp_lo; - timestamp = pcapng_timestamp(self, cycles); + timestamp = pcapng_timestamp(&self->tc, cycles); epb->timestamp_hi = timestamp >> 32; epb->timestamp_lo = (uint32_t)timestamp; @@ -730,8 +753,6 @@ rte_pcapng_fdopen(int fd, { unsigned int i; rte_pcapng_t *self; - struct timespec ts; - uint64_t cycles; if ((osname && strlen(osname) > PCAPNG_STR_MAX) || (hardware && strlen(hardware) > PCAPNG_STR_MAX) || @@ -750,11 +771,7 @@ rte_pcapng_fdopen(int fd, self->outfd = fd; self->ports = 0; - /* record start time in ns since 1/1/1970 */ - cycles = rte_get_tsc_cycles(); - clock_gettime(CLOCK_REALTIME, &ts); - self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; - self->offset_ns = rte_timespec_to_ns(&ts); + pcapng_timestamp_init(&self->tc); for (i = 0; i < RTE_MAX_ETHPORTS; i++) self->port_index[i] = UINT32_MAX; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v5 5/5] test: add more tests for pcapng 2026-01-19 18:18 ` [PATCH v5 0/5] pcapng: fixes and improvements Stephen Hemminger ` (3 preceding siblings ...) 2026-01-19 18:19 ` [PATCH v5 4/5] pcapng: improve performance of timestamping Stephen Hemminger @ 2026-01-19 18:19 ` Stephen Hemminger 4 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-19 18:19 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Add some more cases where comment is set in pcapng file. In order to exercise more logic in pcapng, vary the size of the packets. Want to make sure that test takes long enough that 32 bit counter wraps around. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 145 ++++++++++++++++++++++++++++++----------- 1 file changed, 108 insertions(+), 37 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 5d362ec70e..283a6496d4 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -27,11 +27,9 @@ #define TOTAL_PACKETS 4096 #define MAX_BURST 64 -#define MAX_GAP_US 100000 -#define DUMMY_MBUF_NUM 3 +#define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; -static const uint32_t pkt_len = 200; static uint16_t port_id; static const char null_dev[] = "net_null0"; @@ -41,13 +39,36 @@ struct dummy_mbuf { uint8_t buf[DUMMY_MBUF_NUM][RTE_MBUF_DEFAULT_BUF_SIZE]; }; +#define MAX_DATA_SIZE (RTE_MBUF_DEFAULT_BUF_SIZE - RTE_PKTMBUF_HEADROOM) + +/* RFC 864 chargen pattern used for comment testing */ +#define FILL_LINE_LENGTH 72 +#define FILL_START 0x21 /* ! */ +#define FILL_END 0x7e /* ~ */ +#define FILL_RANGE (FILL_END - FILL_START) + static void -dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, - uint32_t data_len) +fill_mbuf(struct rte_mbuf *mb) { - uint32_t i; - uint8_t *db; + unsigned int len = rte_pktmbuf_tailroom(mb); + char *buf = rte_pktmbuf_append(mb, len); + unsigned int n = 0; + + while (n < len - 1) { + char ch = FILL_START + (n % FILL_LINE_LENGTH) % FILL_RANGE; + for (unsigned int i = 0; i < FILL_LINE_LENGTH && n < len - 1; i++) { + buf[n++] = ch; + if (++ch == FILL_END) + ch = FILL_START; + } + if (n < len - 1) + buf[n++] = '\n'; + } +} +static void +dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len) +{ mb->buf_addr = buf; rte_mbuf_iova_set(mb, (uintptr_t)buf); mb->buf_len = buf_len; @@ -57,15 +78,11 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, mb->pool = (void *)buf; rte_pktmbuf_reset(mb); - db = (uint8_t *)rte_pktmbuf_append(mb, data_len); - - for (i = 0; i != data_len; i++) - db[i] = i; } /* Make an IP packet consisting of chain of one packets */ static void -mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) +mbuf1_prepare(struct dummy_mbuf *dm) { struct { struct rte_ether_hdr eth; @@ -84,32 +101,47 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) .dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST), }, .udp = { + .src_port = rte_cpu_to_be_16(19), /* Chargen port */ .dst_port = rte_cpu_to_be_16(9), /* Discard port */ }, }; memset(dm, 0, sizeof(*dm)); - dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen); + dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0])); + dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1])); rte_eth_random_addr(pkt.eth.src_addr.addr_bytes); - plen -= sizeof(struct rte_ether_hdr); + memcpy(rte_pktmbuf_append(&dm->mb[0], sizeof(pkt)), &pkt, sizeof(pkt)); - pkt.ip.total_length = rte_cpu_to_be_16(plen); - pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip); + fill_mbuf(&dm->mb[1]); + rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - plen -= sizeof(struct rte_ipv4_hdr); - pkt.udp.src_port = rte_rand(); - pkt.udp.dgram_len = rte_cpu_to_be_16(plen); + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); +} - memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt)); +static void +mbuf1_resize(struct dummy_mbuf *dm, uint16_t len) +{ + struct { + struct rte_ether_hdr eth; + struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; + } *pkt = rte_pktmbuf_mtod(&dm->mb[0], void *); - /* Idea here is to create mbuf chain big enough that after mbuf deep copy they won't be - * compressed into single mbuf to properly test store of chained mbufs - */ - dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1]), pkt_len); - dummy_mbuf_prep(&dm->mb[2], dm->buf[2], sizeof(dm->buf[2]), pkt_len); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[2]); + dm->mb[1].data_len = len; + dm->mb[0].pkt_len = dm->mb[0].data_len + dm->mb[1].data_len; + + len += sizeof(struct rte_udp_hdr); + pkt->udp.dgram_len = rte_cpu_to_be_16(len); + + len += sizeof(struct rte_ipv4_hdr); + pkt->ip.total_length = rte_cpu_to_be_16(len); + pkt->ip.hdr_checksum = 0; + pkt->ip.hdr_checksum = rte_ipv4_cksum(&pkt->ip); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); } static int @@ -126,7 +158,7 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", MAX_BURST * 32, 0, 0, - rte_pcapng_mbuf_size(pkt_len) + 128, + rte_pcapng_mbuf_size(MAX_DATA_SIZE), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -142,19 +174,44 @@ test_setup(void) } static int -fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) +fill_pcapng_file(rte_pcapng_t *pcapng) { struct dummy_mbuf mbfs; struct rte_mbuf *orig; unsigned int burst_size; unsigned int count; ssize_t len; + /* + * These are some silly comments to test various lengths and alignments sprinkle + * into the file. You can see these comments by using the dumpcap program on the file + */ + static const char * const examples[] = { + "Lockless and fearless - that’s how we roll in userspace.", + "Memory pool deep / Mbufs swim in lockless rings / Zero copy dreams,", + "Poll mode driver waits / No interrupts disturb its zen / Busy loop finds peace,", + "Memory barriers / rte_atomic_thread_fence() / Guards our shared state", + "Hugepages so vast / Two megabytes of glory / TLB misses weep", + "Packets flow like streams / Through the graph node pipeline / Iterate in place", + + /* Long one to make sure we can do > 256 characters */ + ("Dear future maintainer: I am sorry. This packet was captured at 3 AM while " + "debugging a priority flow control issue that turned out to be a loose cable. " + "The rte_eth_tx_burst() call you see here has been cargo-culted through four " + "generations of example code. The magic number 32 is not documented because " + "nobody remembers why. Trust the process."), + }; + /* How many microseconds does it take TSC to wrap around 32 bits */ + const unsigned wrap_us + = (US_PER_S * (uint64_t)UINT32_MAX) / rte_get_tsc_hz(); - /* make a dummy packet */ - mbuf1_prepare(&mbfs, pkt_len); + /* Want overall test to take to wraparound at least twice. */ + const unsigned int avg_gap = (2 * wrap_us) + / (TOTAL_PACKETS / (MAX_BURST / 2)); + + mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; - for (count = 0; count < num_packets; count += burst_size) { + for (count = 0; count < TOTAL_PACKETS; count += burst_size) { struct rte_mbuf *clones[MAX_BURST]; unsigned int i; @@ -162,9 +219,17 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) burst_size = rte_rand_max(MAX_BURST) + 1; for (i = 0; i < burst_size; i++) { struct rte_mbuf *mc; + const char *comment = NULL; + + /* Put comment on occasional packets */ + if ((count + i) % 41 == 0) + comment = examples[rte_rand_max(RTE_DIM(examples))]; + + /* Vary the size of the packets */ + mbuf1_resize(&mbfs, rte_rand_max(MAX_DATA_SIZE)); mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), - RTE_PCAPNG_DIRECTION_IN, NULL); + RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; @@ -182,8 +247,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) return -1; } - /* Leave a small gap between packets to test for time wrap */ - usleep(rte_rand_max(MAX_GAP_US)); + rte_delay_us_block(rte_rand_max(2 * avg_gap)); } return count; @@ -386,7 +450,7 @@ static int test_write_packets(void) { char file_name[] = "/tmp/pcapng_test_XXXXXX.pcapng"; - static rte_pcapng_t *pcapng; + rte_pcapng_t *pcapng = NULL; int ret, tmp_fd, count; uint64_t now = current_timestamp(); @@ -413,7 +477,14 @@ test_write_packets(void) goto fail; } - count = fill_pcapng_file(pcapng, TOTAL_PACKETS); + /* write a statistics block */ + ret = rte_pcapng_write_stats(pcapng, port_id, 0, 0, NULL); + if (ret <= 0) { + fprintf(stderr, "Write of statistics failed\n"); + goto fail; + } + + count = fill_pcapng_file(pcapng); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v6 0/5] pcapng: fixes and improvements 2025-11-26 5:12 [RFC] pcapng: improve performance of timestamping Stephen Hemminger ` (3 preceding siblings ...) 2026-01-19 18:18 ` [PATCH v5 0/5] pcapng: fixes and improvements Stephen Hemminger @ 2026-01-26 21:04 ` Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 1/5] pcapng: add length checks to string arguments Stephen Hemminger ` (6 more replies) 4 siblings, 7 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-26 21:04 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger This series addresses several issues in the pcapng library and improves test coverage. Bug Fixes: Patches 1-3 fix potential issues with string handling in the pcapng administrative APIs. The pcapng file format uses 16-bit length fields for option data, but the library did not validate input string lengths, and used fixed-size stack buffers that could be overrun by large comments or metadata strings. Patch 1 adds length validation against the format's 16-bit limit Patch 2 replaces fixed stack buffers with dynamic allocation Patch 3 handles the case where comment options exceed available mbuf tailroom by chaining an additional mbuf segment Performance: Patch 4 optimizes timestamp conversion by replacing division operations with shift-multiply arithmetic, following the approach used by the Linux kernel vDSO. This eliminates expensive divides from the packet capture hot path. Test Improvements: Patch 5 improves the pcapng unit test coverage by exercising comment options with varied test strings, varying packet sizes to test more copy logic paths, and ensuring the test runs long enough to verify 32-bit TSC wraparound handling. v6: - AI review feedback - Simplify return value documentation for rte_pcapng_add_interface() - Add explanatory comment for debug-only validation in rte_pcapng_copy() - Adjust test comment frequency to 1% of packets for clearer intent - Add comment clarifying that zero-sized packets are acceptable in test v5: - Fix missing commas in test string array (AI review feedback) - Reorder patches to put library changes before test changes v4: - Handle case where clang wants parenthesis for multi-line string constants - Add one longer comment test case v3: - More thorough testing - Passed AI review Stephen Hemminger (5): pcapng: add length checks to string arguments pcapng: use malloc instead of fixed buffer size pcapng: chain additional mbuf when comment exceeds tailroom pcapng: improve performance of timestamping test/pcapng: add tests for comments app/test/test_pcapng.c | 147 +++++++++++++++++------ doc/guides/rel_notes/release_26_03.rst | 3 + lib/pcapng/rte_pcapng.c | 160 ++++++++++++++++++------- lib/pcapng/rte_pcapng.h | 7 +- 4 files changed, 234 insertions(+), 83 deletions(-) -- 2.51.0 ^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH v6 1/5] pcapng: add length checks to string arguments 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger @ 2026-01-26 21:04 ` Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 2/5] pcapng: use malloc instead of fixed buffer size Stephen Hemminger ` (5 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-26 21:04 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan The pcapng file format uses a 16-bit length field in the option TLV (Type-Length-Value) encoding, limiting strings to UINT16_MAX bytes. Add validation for string arguments to prevent silent truncation or buffer issues when callers pass excessively long strings. Also update the Doxygen comments for rte_pcapng_add_interface() and rte_pcapng_write_stats() to document return values. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- doc/guides/rel_notes/release_26_03.rst | 3 ++ lib/pcapng/rte_pcapng.c | 39 ++++++++++++++++++++++---- lib/pcapng/rte_pcapng.h | 6 +++- 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst index 15dabee7a1..0909be140e 100644 --- a/doc/guides/rel_notes/release_26_03.rst +++ b/doc/guides/rel_notes/release_26_03.rst @@ -84,6 +84,9 @@ API Changes Also, make sure to start the actual text at the margin. ======================================================= +* pcapng: The maximum length of option strings is now validated. + The pcapng file format only allows up to 65536 characters. + ABI Changes ----------- diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 2cc9e2040d..61d37b4462 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -34,6 +34,9 @@ /* conversion from DPDK speed to PCAPNG */ #define PCAPNG_MBPS_SPEED 1000000ull +/* upper bound for strings in pcapng option data */ +#define PCAPNG_STR_MAX UINT16_MAX + /* upper bound for section, stats and interface blocks (in uint32_t) */ #define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) @@ -218,9 +221,11 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; + int ret; - if (rte_eth_dev_info_get(port, &dev_info) < 0) - return -1; + ret = rte_eth_dev_info_get(port, &dev_info); + if (ret < 0) + return ret; /* make something like an interface name */ if (ifname == NULL) { @@ -230,8 +235,14 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port); ifname = ifname_buf; } + } else if (strlen(ifname) > PCAPNG_STR_MAX) { + return -EINVAL; } + if ((ifdescr && strlen(ifdescr) > PCAPNG_STR_MAX) || + (filter && strlen(filter) > PCAPNG_STR_MAX)) + return -EINVAL; + /* make a useful device hardware string */ dev = dev_info.device; if (dev) @@ -269,7 +280,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += sizeof(uint32_t); if (len > sizeof(buf)) - return -1; + return -EINVAL; hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -334,7 +345,10 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint32_t optlen, len; uint32_t buf[PCAPNG_BLKSIZ]; - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return -EINVAL; optlen = 0; @@ -487,7 +501,14 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, bool rss_hash; #ifdef RTE_LIBRTE_ETHDEV_DEBUG + /* + * Since this function is used in the fast path for packet capture + * skip argument validation checks unless debug is enabled. + */ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); + + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return NULL; #endif orig_len = rte_pktmbuf_pkt_len(md); @@ -692,8 +713,16 @@ rte_pcapng_fdopen(int fd, struct timespec ts; uint64_t cycles; + if ((osname && strlen(osname) > PCAPNG_STR_MAX) || + (hardware && strlen(hardware) > PCAPNG_STR_MAX) || + (appname && strlen(appname) > PCAPNG_STR_MAX) || + (comment && strlen(comment) > PCAPNG_STR_MAX)) { + rte_errno = ENAMETOOLONG; + return NULL; + } + self = malloc(sizeof(*self)); - if (!self) { + if (self == NULL) { rte_errno = ENOMEM; return NULL; } diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index de1bf953e9..d866042b66 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -89,6 +89,10 @@ rte_pcapng_close(rte_pcapng_t *self); * Interfaces must be added to the output file after opening * and before any packet record. All ports used in packet capture * must be added. + * + * @return + * - returns number of bytes written on success, + * or negative errno on failure. */ int rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, @@ -192,7 +196,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * @param comment * Optional comment to add to statistics. * @return - * number of bytes written to file, -1 on failure to write file + * number of bytes written to file, -1 on failure to write file or memory allocation failure. */ ssize_t rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port, -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v6 2/5] pcapng: use malloc instead of fixed buffer size 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 1/5] pcapng: add length checks to string arguments Stephen Hemminger @ 2026-01-26 21:04 ` Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 3/5] pcapng: chain additional mbuf when comment exceeds tailroom Stephen Hemminger ` (4 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-26 21:04 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Ray Kinsella The administrative APIs accept comments and other metadata as strings. Since these strings can be arbitrarily long (up to UINT16_MAX bytes), they may overflow the fixed-size stack buffers previously used for block construction. Replace the fixed-size buffers with dynamically allocated memory sized to the actual block length. Return appropriate error codes on allocation failure. Bugzilla ID: 1820 Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 39 +++++++++++++++++++++++++-------------- lib/pcapng/rte_pcapng.h | 3 ++- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 61d37b4462..d5b3a0cd29 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -37,9 +37,6 @@ /* upper bound for strings in pcapng option data */ #define PCAPNG_STR_MAX UINT16_MAX -/* upper bound for section, stats and interface blocks (in uint32_t) */ -#define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) - /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ @@ -148,8 +145,9 @@ pcapng_section_block(rte_pcapng_t *self, { struct pcapng_section_header *hdr; struct pcapng_option *opt; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; uint32_t len; + ssize_t ret; len = sizeof(*hdr); if (hw) @@ -165,8 +163,11 @@ pcapng_section_block(rte_pcapng_t *self, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) + buf = malloc(len); + if (buf == NULL) { + errno = ENOMEM; return -1; + } hdr = (struct pcapng_section_header *)buf; *hdr = (struct pcapng_section_header) { @@ -199,7 +200,9 @@ pcapng_section_block(rte_pcapng_t *self, /* clone block_length after option */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret; } /* Write an interface block for a DPDK port */ @@ -217,7 +220,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, struct pcapng_option *opt; const uint8_t tsresol = 9; /* nanosecond resolution */ uint32_t len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; @@ -279,8 +282,9 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) - return -EINVAL; + buf = malloc(len); + if (buf == NULL) + return -ENOMEM; hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -326,7 +330,9 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, /* remember the file index */ self->port_index[port] = self->ports++; - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret; } /* @@ -343,7 +349,8 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint64_t start_time = self->offset_ns; uint64_t sample_time; uint32_t optlen, len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; + ssize_t ret; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); @@ -366,8 +373,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, optlen += pcapng_optlen(0); len = sizeof(*hdr) + optlen + sizeof(uint32_t); - if (len > sizeof(buf)) - return -1; + buf = malloc(len); + if (buf == NULL) + return -ENOMEM; hdr = (struct pcapng_statistics *)buf; opt = (struct pcapng_option *)(hdr + 1); @@ -398,7 +406,10 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, /* clone block_length after option */ memcpy(opt, &len, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + + return ret < 0 ? -errno : ret; } RTE_EXPORT_SYMBOL(rte_pcapng_mbuf_size) diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index d866042b66..85abf1d93f 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -196,7 +196,8 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * @param comment * Optional comment to add to statistics. * @return - * number of bytes written to file, -1 on failure to write file or memory allocation failure. + * number of bytes written to file on success, + * negative errno value on error. */ ssize_t rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port, -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v6 3/5] pcapng: chain additional mbuf when comment exceeds tailroom 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 1/5] pcapng: add length checks to string arguments Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 2/5] pcapng: use malloc instead of fixed buffer size Stephen Hemminger @ 2026-01-26 21:04 ` Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 4/5] pcapng: improve performance of timestamping Stephen Hemminger ` (3 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-26 21:04 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Amit Prakash Shukla When rte_pcapng_copy() is called with a comment, the option data may not fit in the mbuf's remaining tailroom, causing the append to fail and the packet to be dropped. Fix this by allocating and chaining an additional mbuf segment when rte_pktmbuf_append() fails. This allows comments of any length (up to UINT16_MAX) to be attached to captured packets. Fixes: c1abd1e93dbd ("pcapng: support comment in enhanced packet block") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index d5b3a0cd29..b9de51849b 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -570,11 +570,26 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (comment) optlen += pcapng_optlen(strlen(comment)); - /* reserve trailing options and block length */ - opt = (struct pcapng_option *) - rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); - if (unlikely(opt == NULL)) - goto fail; + /* + * Try to put options at the end of this mbuf. + * If not extend the mbuf by adding another segment. + */ + opt = (struct pcapng_option *)rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL)) { + struct rte_mbuf *ml = rte_pktmbuf_alloc(mp); + + if (unlikely(ml == NULL)) + goto fail; /* mbuf pool is empty */ + + if (unlikely(rte_pktmbuf_chain(mc, ml) != 0)) { + rte_pktmbuf_free(ml); + goto fail; /* too many segments in the mbuf */ + } + + opt = (struct pcapng_option *)rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL)) + goto fail; /* additional segment and still no space */ + } switch (direction) { case RTE_PCAPNG_DIRECTION_IN: -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v6 4/5] pcapng: improve performance of timestamping 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger ` (2 preceding siblings ...) 2026-01-26 21:04 ` [PATCH v6 3/5] pcapng: chain additional mbuf when comment exceeds tailroom Stephen Hemminger @ 2026-01-26 21:04 ` Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 5/5] test/pcapng: add tests for comments Stephen Hemminger ` (2 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-26 21:04 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Avoid doing expensive divide operations when converting timestamps from cycles (TSC) to pcapng scaled value (ns). This logic was derived from the math used by Linux kernel virtual system call with help from AI. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 61 ++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index b9de51849b..27ae5e1fd6 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -41,8 +41,14 @@ struct rte_pcapng { int outfd; /* output file */ unsigned int ports; /* number of interfaces added */ - uint64_t offset_ns; /* ns since 1/1/1970 when initialized */ - uint64_t tsc_base; /* TSC when started */ + + struct pcapng_time_conv { + uint64_t tsc_base; /* TSC when started */ + uint64_t ns_base; /* ns since 1/1/1970 when initialized */ + uint64_t mult; /* scaling factor relative to TSC hz */ + uint32_t shift; /* shift for scaling (24) */ + uint64_t mask; /* mask of bits used (56) */ + } tc; /* DPDK port id to interface index in file */ uint32_t port_index[RTE_MAX_ETHPORTS]; @@ -98,21 +104,38 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt) #define if_indextoname(ifindex, ifname) NULL #endif +/* Initialize time conversion based on logic similar to rte_cyclecounter */ +static void +pcapng_timestamp_init(struct pcapng_time_conv *tc) +{ + struct timespec ts; + uint64_t cycles = rte_get_tsc_cycles(); + + /* record start time in ns since 1/1/1970 */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Compute baseline TSC which occurred during clock_gettime */ + tc->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; + tc->ns_base = (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec; + + /* Set conversion factors for reasonable precision with no overflow */ + uint64_t tsc_hz = rte_get_tsc_hz(); + tc->shift = 24; + tc->mult = ((uint64_t)1000000000ULL << tc->shift) / tsc_hz; + tc->mask = RTE_BIT64(56) - 1; +} + /* Convert from TSC (CPU cycles) to nanoseconds */ static uint64_t -pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles) +pcapng_timestamp(const struct pcapng_time_conv *tc, uint64_t cycles) { - uint64_t delta, rem, secs, ns; - const uint64_t hz = rte_get_tsc_hz(); - - delta = cycles - self->tsc_base; + /* Compute TSC delta with mask to avoid wraparound */ + uint64_t delta = (cycles - tc->tsc_base) & tc->mask; - /* Avoid numeric wraparound by computing seconds first */ - secs = delta / hz; - rem = delta % hz; - ns = (rem * NS_PER_S) / hz; + /* Convert TSC delta to nanoseconds (no division) */ + uint64_t ns_delta = (delta * tc->mult) >> tc->shift; - return secs * NS_PER_S + ns + self->offset_ns; + return tc->ns_base + ns_delta; } /* length of option including padding */ @@ -346,7 +369,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, { struct pcapng_statistics *hdr; struct pcapng_option *opt; - uint64_t start_time = self->offset_ns; + uint64_t start_time = self->tc.ns_base; uint64_t sample_time; uint32_t optlen, len; uint32_t *buf; @@ -399,7 +422,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, hdr->block_length = len; hdr->interface_id = self->port_index[port_id]; - sample_time = pcapng_timestamp(self, rte_get_tsc_cycles()); + sample_time = pcapng_timestamp(&self->tc, rte_get_tsc_cycles()); hdr->timestamp_hi = sample_time >> 32; hdr->timestamp_lo = (uint32_t)sample_time; @@ -690,7 +713,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, /* adjust timestamp recorded in packet */ cycles = (uint64_t)epb->timestamp_hi << 32; cycles += epb->timestamp_lo; - timestamp = pcapng_timestamp(self, cycles); + timestamp = pcapng_timestamp(&self->tc, cycles); epb->timestamp_hi = timestamp >> 32; epb->timestamp_lo = (uint32_t)timestamp; @@ -736,8 +759,6 @@ rte_pcapng_fdopen(int fd, { unsigned int i; rte_pcapng_t *self; - struct timespec ts; - uint64_t cycles; if ((osname && strlen(osname) > PCAPNG_STR_MAX) || (hardware && strlen(hardware) > PCAPNG_STR_MAX) || @@ -756,11 +777,7 @@ rte_pcapng_fdopen(int fd, self->outfd = fd; self->ports = 0; - /* record start time in ns since 1/1/1970 */ - cycles = rte_get_tsc_cycles(); - clock_gettime(CLOCK_REALTIME, &ts); - self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; - self->offset_ns = rte_timespec_to_ns(&ts); + pcapng_timestamp_init(&self->tc); for (i = 0; i < RTE_MAX_ETHPORTS; i++) self->port_index[i] = UINT32_MAX; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v6 5/5] test/pcapng: add tests for comments 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger ` (3 preceding siblings ...) 2026-01-26 21:04 ` [PATCH v6 4/5] pcapng: improve performance of timestamping Stephen Hemminger @ 2026-01-26 21:04 ` Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-01-26 21:04 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Expand pcapng test coverage to exercise comment handling and additional code paths: * Add packet comments of varying lengths, including strings longer than 256 bytes, to verify option block encoding. * Vary packet sizes randomly to test different capture lengths * Calculate inter-packet delays to ensure timestamp values wrap around 32 bits at least twice during the test. * Add a statistics block write before packet capture. * Use RFC 864 chargen pattern for predictable fill data. Increase total packets from 4096 to 10000 and refactor mbuf preparation for more flexible packet construction. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 147 ++++++++++++++++++++++++++++++----------- 1 file changed, 109 insertions(+), 38 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 5d362ec70e..e1f722791c 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -25,13 +25,11 @@ #define PCAPNG_TEST_DEBUG 0 -#define TOTAL_PACKETS 4096 +#define TOTAL_PACKETS 10000 #define MAX_BURST 64 -#define MAX_GAP_US 100000 -#define DUMMY_MBUF_NUM 3 +#define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; -static const uint32_t pkt_len = 200; static uint16_t port_id; static const char null_dev[] = "net_null0"; @@ -41,13 +39,36 @@ struct dummy_mbuf { uint8_t buf[DUMMY_MBUF_NUM][RTE_MBUF_DEFAULT_BUF_SIZE]; }; +#define MAX_DATA_SIZE (RTE_MBUF_DEFAULT_BUF_SIZE - RTE_PKTMBUF_HEADROOM) + +/* RFC 864 chargen pattern used for comment testing */ +#define FILL_LINE_LENGTH 72 +#define FILL_START 0x21 /* ! */ +#define FILL_END 0x7e /* ~ */ +#define FILL_RANGE (FILL_END - FILL_START) + static void -dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, - uint32_t data_len) +fill_mbuf(struct rte_mbuf *mb) { - uint32_t i; - uint8_t *db; + unsigned int len = rte_pktmbuf_tailroom(mb); + char *buf = rte_pktmbuf_append(mb, len); + unsigned int n = 0; + + while (n < len - 1) { + char ch = FILL_START + (n % FILL_LINE_LENGTH) % FILL_RANGE; + for (unsigned int i = 0; i < FILL_LINE_LENGTH && n < len - 1; i++) { + buf[n++] = ch; + if (++ch == FILL_END) + ch = FILL_START; + } + if (n < len - 1) + buf[n++] = '\n'; + } +} +static void +dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len) +{ mb->buf_addr = buf; rte_mbuf_iova_set(mb, (uintptr_t)buf); mb->buf_len = buf_len; @@ -57,15 +78,11 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, mb->pool = (void *)buf; rte_pktmbuf_reset(mb); - db = (uint8_t *)rte_pktmbuf_append(mb, data_len); - - for (i = 0; i != data_len; i++) - db[i] = i; } /* Make an IP packet consisting of chain of one packets */ static void -mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) +mbuf1_prepare(struct dummy_mbuf *dm) { struct { struct rte_ether_hdr eth; @@ -84,32 +101,47 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) .dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST), }, .udp = { + .src_port = rte_cpu_to_be_16(19), /* Chargen port */ .dst_port = rte_cpu_to_be_16(9), /* Discard port */ }, }; memset(dm, 0, sizeof(*dm)); - dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen); + dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0])); + dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1])); rte_eth_random_addr(pkt.eth.src_addr.addr_bytes); - plen -= sizeof(struct rte_ether_hdr); + memcpy(rte_pktmbuf_append(&dm->mb[0], sizeof(pkt)), &pkt, sizeof(pkt)); - pkt.ip.total_length = rte_cpu_to_be_16(plen); - pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip); + fill_mbuf(&dm->mb[1]); + rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - plen -= sizeof(struct rte_ipv4_hdr); - pkt.udp.src_port = rte_rand(); - pkt.udp.dgram_len = rte_cpu_to_be_16(plen); + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); +} - memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt)); +static void +mbuf1_resize(struct dummy_mbuf *dm, uint16_t len) +{ + struct { + struct rte_ether_hdr eth; + struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; + } *pkt = rte_pktmbuf_mtod(&dm->mb[0], void *); - /* Idea here is to create mbuf chain big enough that after mbuf deep copy they won't be - * compressed into single mbuf to properly test store of chained mbufs - */ - dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1]), pkt_len); - dummy_mbuf_prep(&dm->mb[2], dm->buf[2], sizeof(dm->buf[2]), pkt_len); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[2]); + dm->mb[1].data_len = len; + dm->mb[0].pkt_len = dm->mb[0].data_len + dm->mb[1].data_len; + + len += sizeof(struct rte_udp_hdr); + pkt->udp.dgram_len = rte_cpu_to_be_16(len); + + len += sizeof(struct rte_ipv4_hdr); + pkt->ip.total_length = rte_cpu_to_be_16(len); + pkt->ip.hdr_checksum = 0; + pkt->ip.hdr_checksum = rte_ipv4_cksum(&pkt->ip); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); } static int @@ -126,7 +158,7 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", MAX_BURST * 32, 0, 0, - rte_pcapng_mbuf_size(pkt_len) + 128, + rte_pcapng_mbuf_size(MAX_DATA_SIZE), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -142,19 +174,44 @@ test_setup(void) } static int -fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) +fill_pcapng_file(rte_pcapng_t *pcapng) { struct dummy_mbuf mbfs; struct rte_mbuf *orig; unsigned int burst_size; unsigned int count; ssize_t len; + /* + * These are some silly comments to test various lengths and alignments sprinkle + * into the file. You can see these comments by using the dumpcap program on the file + */ + static const char * const examples[] = { + "Lockless and fearless - that’s how we roll in userspace.", + "Memory pool deep / Mbufs swim in lockless rings / Zero copy dreams,", + "Poll mode driver waits / No interrupts disturb its zen / Busy loop finds peace,", + "Memory barriers / rte_atomic_thread_fence() / Guards our shared state", + "Hugepages so vast / Two megabytes of glory / TLB misses weep", + "Packets flow like streams / Through the graph node pipeline / Iterate in place", + + /* Long one to make sure we can do > 256 characters */ + ("Dear future maintainer: I am sorry. This packet was captured at 3 AM while " + "debugging a priority flow control issue that turned out to be a loose cable. " + "The rte_eth_tx_burst() call you see here has been cargo-culted through four " + "generations of example code. The magic number 32 is not documented because " + "nobody remembers why. Trust the process."), + }; + /* How many microseconds does it take TSC to wrap around 32 bits */ + const unsigned wrap_us + = (US_PER_S * (uint64_t)UINT32_MAX) / rte_get_tsc_hz(); - /* make a dummy packet */ - mbuf1_prepare(&mbfs, pkt_len); + /* Want overall test to take to wraparound at least twice. */ + const unsigned int avg_gap = (2 * wrap_us) + / (TOTAL_PACKETS / (MAX_BURST / 2)); + + mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; - for (count = 0; count < num_packets; count += burst_size) { + for (count = 0; count < TOTAL_PACKETS; count += burst_size) { struct rte_mbuf *clones[MAX_BURST]; unsigned int i; @@ -162,9 +219,17 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) burst_size = rte_rand_max(MAX_BURST) + 1; for (i = 0; i < burst_size; i++) { struct rte_mbuf *mc; + const char *comment = NULL; + + /* Put randomized comment on every 100th packet (1%) */ + if (count % 100 == 0) + comment = examples[rte_rand_max(RTE_DIM(examples))]; + + /* Vary the size of the packets, okay to allow 0 sized packet */ + mbuf1_resize(&mbfs, rte_rand_max(MAX_DATA_SIZE)); mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), - RTE_PCAPNG_DIRECTION_IN, NULL); + RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; @@ -182,8 +247,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) return -1; } - /* Leave a small gap between packets to test for time wrap */ - usleep(rte_rand_max(MAX_GAP_US)); + rte_delay_us_block(rte_rand_max(2 * avg_gap)); } return count; @@ -386,7 +450,7 @@ static int test_write_packets(void) { char file_name[] = "/tmp/pcapng_test_XXXXXX.pcapng"; - static rte_pcapng_t *pcapng; + rte_pcapng_t *pcapng = NULL; int ret, tmp_fd, count; uint64_t now = current_timestamp(); @@ -413,7 +477,14 @@ test_write_packets(void) goto fail; } - count = fill_pcapng_file(pcapng, TOTAL_PACKETS); + /* write a statistics block */ + ret = rte_pcapng_write_stats(pcapng, port_id, 0, 0, NULL); + if (ret <= 0) { + fprintf(stderr, "Write of statistics failed\n"); + goto fail; + } + + count = fill_pcapng_file(pcapng); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v7 0/7] pcapng: fixes and improvements 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger ` (4 preceding siblings ...) 2026-01-26 21:04 ` [PATCH v6 5/5] test/pcapng: add tests for comments Stephen Hemminger @ 2026-02-13 19:18 ` Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 1/7] pcapng: add length checks to string arguments Stephen Hemminger ` (6 more replies) 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger 6 siblings, 7 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-13 19:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger This series addresses several issues in the pcapng library, improves test coverage, and updates dumpcap error handling. Patches 1-3 fix string handling in the pcapng administrative APIs. The pcapng format uses 16-bit length fields for option data, but the library did not validate input lengths and used fixed-size stack buffers that could overflow with large strings. Patch 4 optimizes timestamp conversion by replacing per-packet divisions with precomputed reciprocal multiply-shift operations. Patch 5 improves pcapng unit test coverage with varied comment strings, randomized packet sizes, and 32-bit TSC wraparound verification. Patch 6 fixes the test build dependency so the pcapng test is skipped when the null PMD is disabled. Patch 7 improves error reporting in dumpcap for pcapng API failures. v7: - Add meson build fix for disabled null PMD - Add dumpcap error reporting improvements - Update timestamp commit message to describe current implementation Stephen Hemminger (7): pcapng: add length checks to string arguments pcapng: use malloc instead of fixed buffer size pcapng: chain additional mbuf when comment exceeds tailroom pcapng: improve performance of timestamping test/pcapng: add tests for comments test/pcapng: skip test if null driver missing dumpcap: improve pcapng error reporting app/dumpcap/main.c | 36 +++-- app/test/meson.build | 2 +- app/test/test_pcapng.c | 151 ++++++++++++++----- doc/guides/rel_notes/release_26_03.rst | 9 ++ lib/pcapng/rte_pcapng.c | 199 +++++++++++++++++++------ lib/pcapng/rte_pcapng.h | 12 +- 6 files changed, 307 insertions(+), 102 deletions(-) -- 2.51.0 ^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH v7 1/7] pcapng: add length checks to string arguments 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger @ 2026-02-13 19:18 ` Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 2/7] pcapng: use malloc instead of fixed buffer size Stephen Hemminger ` (5 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-13 19:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan The pcapng file format uses a 16-bit length field in the option TLV (Type-Length-Value) encoding, limiting strings to UINT16_MAX bytes. Add validation for string arguments to prevent silent truncation or buffer issues when callers pass excessively long strings. Also update the Doxygen comments for rte_pcapng_add_interface() and rte_pcapng_write_stats() to document return values. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- doc/guides/rel_notes/release_26_03.rst | 9 +++++++ lib/pcapng/rte_pcapng.c | 37 +++++++++++++++++++++++--- lib/pcapng/rte_pcapng.h | 11 ++++++-- 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst index afdf1af06c..7a99420453 100644 --- a/doc/guides/rel_notes/release_26_03.rst +++ b/doc/guides/rel_notes/release_26_03.rst @@ -140,6 +140,15 @@ API Changes * cfgfile: name must be less than CFG_NAME_LEN and value must be less than CFG_VALUE_LEN. +* **Updated the pcapng library.** + + API functions now do more validation. + + * The length of comment strings is now validated. + Maximum allowable length is 2^16-1 because of pcapng file format. + * Passing an invalid port id returns uses ENODEV rather than EINVAL + for consistency with ethdev API's. + ABI Changes ----------- diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 2cc9e2040d..4ccf833777 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -34,6 +34,9 @@ /* conversion from DPDK speed to PCAPNG */ #define PCAPNG_MBPS_SPEED 1000000ull +/* upper bound for strings in pcapng option data */ +#define PCAPNG_STR_MAX UINT16_MAX + /* upper bound for section, stats and interface blocks (in uint32_t) */ #define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) @@ -218,9 +221,11 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; + int ret; - if (rte_eth_dev_info_get(port, &dev_info) < 0) - return -1; + ret = rte_eth_dev_info_get(port, &dev_info); + if (ret < 0) + return ret; /* make something like an interface name */ if (ifname == NULL) { @@ -230,8 +235,14 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port); ifname = ifname_buf; } + } else if (strlen(ifname) > PCAPNG_STR_MAX) { + return -EINVAL; } + if ((ifdescr && strlen(ifdescr) > PCAPNG_STR_MAX) || + (filter && strlen(filter) > PCAPNG_STR_MAX)) + return -EINVAL; + /* make a useful device hardware string */ dev = dev_info.device; if (dev) @@ -269,7 +280,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += sizeof(uint32_t); if (len > sizeof(buf)) - return -1; + return -EINVAL; hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -334,7 +345,10 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint32_t optlen, len; uint32_t buf[PCAPNG_BLKSIZ]; - RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return -EINVAL; optlen = 0; @@ -487,7 +501,14 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, bool rss_hash; #ifdef RTE_LIBRTE_ETHDEV_DEBUG + /* + * Since this function is used in the fast path for packet capture + * skip argument validation checks unless debug is enabled. + */ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); + + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return NULL; #endif orig_len = rte_pktmbuf_pkt_len(md); @@ -692,6 +713,14 @@ rte_pcapng_fdopen(int fd, struct timespec ts; uint64_t cycles; + if ((osname && strlen(osname) > PCAPNG_STR_MAX) || + (hardware && strlen(hardware) > PCAPNG_STR_MAX) || + (appname && strlen(appname) > PCAPNG_STR_MAX) || + (comment && strlen(comment) > PCAPNG_STR_MAX)) { + rte_errno = EINVAL; + return NULL; + } + self = malloc(sizeof(*self)); if (!self) { rte_errno = ENOMEM; diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index de1bf953e9..2dd438842d 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -89,6 +89,10 @@ rte_pcapng_close(rte_pcapng_t *self); * Interfaces must be added to the output file after opening * and before any packet record. All ports used in packet capture * must be added. + * + * @return + * - returns number of bytes written on success, + * or negative errno on failure. */ int rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, @@ -128,7 +132,7 @@ enum rte_pcapng_direction { * * @return * - The pointer to the new mbuf formatted for pcapng_write - * - NULL if allocation fails. + * - NULL on error such as invalid port or out of memory. */ struct rte_mbuf * rte_pcapng_copy(uint16_t port_id, uint32_t queue, @@ -192,7 +196,10 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * @param comment * Optional comment to add to statistics. * @return - * number of bytes written to file, -1 on failure to write file + * On success number of bytes written to file, + * -1 on failure to write file (and errno is set) + * - (-ENODEV) if *port_id* is invalid. + * - (-EINVAL) if bad parameter */ ssize_t rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port, -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v7 2/7] pcapng: use malloc instead of fixed buffer size 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 1/7] pcapng: add length checks to string arguments Stephen Hemminger @ 2026-02-13 19:18 ` Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 3/7] pcapng: chain additional mbuf when comment exceeds tailroom Stephen Hemminger ` (4 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-13 19:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Ray Kinsella The administrative APIs accept comments and other metadata as strings. Since these strings can be arbitrarily long (up to UINT16_MAX bytes), they may overflow the fixed-size stack buffers previously used for block construction. Replace the fixed-size buffers with dynamically allocated memory sized to the actual block length. Return appropriate error codes on allocation failure. Bugzilla ID: 1820 Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 44 ++++++++++++++++++++++++++--------------- lib/pcapng/rte_pcapng.h | 1 + 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 4ccf833777..413e6f1f4a 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -37,9 +37,6 @@ /* upper bound for strings in pcapng option data */ #define PCAPNG_STR_MAX UINT16_MAX -/* upper bound for section, stats and interface blocks (in uint32_t) */ -#define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) - /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ @@ -148,8 +145,9 @@ pcapng_section_block(rte_pcapng_t *self, { struct pcapng_section_header *hdr; struct pcapng_option *opt; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; uint32_t len; + ssize_t ret; len = sizeof(*hdr); if (hw) @@ -165,8 +163,9 @@ pcapng_section_block(rte_pcapng_t *self, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) - return -1; + buf = malloc(len); + if (buf == NULL) + return -ENOMEM; hdr = (struct pcapng_section_header *)buf; *hdr = (struct pcapng_section_header) { @@ -199,7 +198,9 @@ pcapng_section_block(rte_pcapng_t *self, /* clone block_length after option */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret < 0 ? -errno : 0; } /* Write an interface block for a DPDK port */ @@ -217,7 +218,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, struct pcapng_option *opt; const uint8_t tsresol = 9; /* nanosecond resolution */ uint32_t len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; @@ -279,8 +280,9 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) - return -EINVAL; + buf = malloc(len); + if (buf == NULL) + return -ENOMEM; hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -326,7 +328,9 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, /* remember the file index */ self->port_index[port] = self->ports++; - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret; } /* @@ -343,7 +347,8 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint64_t start_time = self->offset_ns; uint64_t sample_time; uint32_t optlen, len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; + ssize_t ret; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); @@ -366,8 +371,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, optlen += pcapng_optlen(0); len = sizeof(*hdr) + optlen + sizeof(uint32_t); - if (len > sizeof(buf)) - return -1; + buf = malloc(len); + if (buf == NULL) + return -ENOMEM; hdr = (struct pcapng_statistics *)buf; opt = (struct pcapng_option *)(hdr + 1); @@ -398,7 +404,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, /* clone block_length after option */ memcpy(opt, &len, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret; } RTE_EXPORT_SYMBOL(rte_pcapng_mbuf_size) @@ -712,6 +720,7 @@ rte_pcapng_fdopen(int fd, rte_pcapng_t *self; struct timespec ts; uint64_t cycles; + int ret; if ((osname && strlen(osname) > PCAPNG_STR_MAX) || (hardware && strlen(hardware) > PCAPNG_STR_MAX) || @@ -739,8 +748,11 @@ rte_pcapng_fdopen(int fd, for (i = 0; i < RTE_MAX_ETHPORTS; i++) self->port_index[i] = UINT32_MAX; - if (pcapng_section_block(self, osname, hardware, appname, comment) < 0) + ret = pcapng_section_block(self, osname, hardware, appname, comment); + if (ret < 0) { + rte_errno = -ret; goto fail; + } return self; fail: diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index 2dd438842d..ff39fa7b49 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -200,6 +200,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * -1 on failure to write file (and errno is set) * - (-ENODEV) if *port_id* is invalid. * - (-EINVAL) if bad parameter + * - (-ENOMEM) could not allocate memory for buffer */ ssize_t rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port, -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v7 3/7] pcapng: chain additional mbuf when comment exceeds tailroom 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 1/7] pcapng: add length checks to string arguments Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 2/7] pcapng: use malloc instead of fixed buffer size Stephen Hemminger @ 2026-02-13 19:18 ` Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 4/7] pcapng: improve performance of timestamping Stephen Hemminger ` (3 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-13 19:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Amit Prakash Shukla When rte_pcapng_copy() is called with a comment, the option data may not fit in the mbuf's remaining tailroom, causing the append to fail and the packet to be dropped. Fix this by allocating and chaining an additional mbuf segment when rte_pktmbuf_append() fails. This allows comments of any length (up to UINT16_MAX) to be attached to captured packets. Fixes: c1abd1e93dbd ("pcapng: support comment in enhanced packet block") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 413e6f1f4a..a2254ba807 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -567,11 +567,26 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (comment) optlen += pcapng_optlen(strlen(comment)); - /* reserve trailing options and block length */ - opt = (struct pcapng_option *) - rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); - if (unlikely(opt == NULL)) - goto fail; + /* + * Try to put options at the end of this mbuf. + * If not extend the mbuf by adding another segment. + */ + opt = (struct pcapng_option *)rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL)) { + struct rte_mbuf *ml = rte_pktmbuf_alloc(mp); + + if (unlikely(ml == NULL)) + goto fail; /* mbuf pool is empty */ + + if (unlikely(rte_pktmbuf_chain(mc, ml) != 0)) { + rte_pktmbuf_free(ml); + goto fail; /* too many segments in the mbuf */ + } + + opt = (struct pcapng_option *)rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL)) + goto fail; /* additional segment and still no space */ + } switch (direction) { case RTE_PCAPNG_DIRECTION_IN: -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v7 4/7] pcapng: improve performance of timestamping 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger ` (2 preceding siblings ...) 2026-02-13 19:18 ` [PATCH v7 3/7] pcapng: chain additional mbuf when comment exceeds tailroom Stephen Hemminger @ 2026-02-13 19:18 ` Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 5/7] test/pcapng: add tests for comments Stephen Hemminger ` (2 subsequent siblings) 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-13 19:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Avoid doing expensive divide operations when converting timestamps from cycles (TSC) to nanoseconds for pcapng. Precompute a rte_reciprocal_u64 inverse of the TSC frequency and a right-shift count chosen so that the intermediate product (delta >> shift) * NSEC_PER_SEC cannot overflow uint64_t. The per-packet conversion then requires only a shift, a multiply, and a reciprocal divide—no division. For TSC frequencies less than 18.4 GHz the shift value will be zero but code is defensive to be future proof. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 97 +++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index a2254ba807..7eedbaf298 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -37,12 +37,23 @@ /* upper bound for strings in pcapng option data */ #define PCAPNG_STR_MAX UINT16_MAX +/* + * Converter from TSC values to nanoseconds since Unix epoch. + * Uses reciprocal multiply to avoid runtime division. + */ +struct tsc_clock { + uint64_t tsc_base; /* TSC value at initialization. */ + uint64_t ns_base; /* Nanoseconds since epoch at init. */ + struct rte_reciprocal_u64 tsc_hz_inv; /* Reciprocal of TSC frequency. */ + uint32_t shift; /* Pre-shift to avoid overflow. */ +}; + /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ unsigned int ports; /* number of interfaces added */ - uint64_t offset_ns; /* ns since 1/1/1970 when initialized */ - uint64_t tsc_base; /* TSC when started */ + + struct tsc_clock clock; /* DPDK port id to interface index in file */ uint32_t port_index[RTE_MAX_ETHPORTS]; @@ -98,21 +109,59 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt) #define if_indextoname(ifindex, ifname) NULL #endif -/* Convert from TSC (CPU cycles) to nanoseconds */ -static uint64_t -pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles) +/* + * Initialize TSC-to-epoch-ns converter. + * + * Captures current TSC and system clock as a reference point. + */ +static int +tsc_clock_init(struct tsc_clock *clk) { - uint64_t delta, rem, secs, ns; - const uint64_t hz = rte_get_tsc_hz(); + struct timespec ts; + uint64_t cycles, tsc_hz, divisor; + uint32_t shift; + + memset(clk, 0, sizeof(*clk)); + + /* If Hz is zero, something is seriously broken. */ + tsc_hz = rte_get_tsc_hz(); + if (tsc_hz == 0) + return -1; + + /* + * Choose shift so (delta >> shift) * NSEC_PER_SEC fits in uint64_t. + * For typical GHz-range TSC and ~1s deltas this is 0. + */ + shift = 0; + divisor = tsc_hz; + while (divisor > UINT64_MAX / NSEC_PER_SEC) { + divisor >>= 1; + shift++; + } + + clk->shift = shift; + clk->tsc_hz_inv = rte_reciprocal_value_u64(divisor); + + /* Sample TSC and system clock as close together as possible. */ + cycles = rte_get_tsc_cycles(); + clock_gettime(CLOCK_REALTIME, &ts); + clk->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; + clk->ns_base = (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; + + return 0; +} - delta = cycles - self->tsc_base; +/* Convert a TSC value to nanoseconds since Unix epoch. */ +static inline uint64_t +tsc_to_ns_epoch(const struct tsc_clock *clk, uint64_t tsc) +{ + uint64_t delta, ns; - /* Avoid numeric wraparound by computing seconds first */ - secs = delta / hz; - rem = delta % hz; - ns = (rem * NS_PER_S) / hz; + delta = tsc - clk->tsc_base; + ns = (delta >> clk->shift) * NSEC_PER_SEC; + ns = rte_reciprocal_divide_u64(ns, &clk->tsc_hz_inv); - return secs * NS_PER_S + ns + self->offset_ns; + return clk->ns_base + ns; } /* length of option including padding */ @@ -344,7 +393,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, { struct pcapng_statistics *hdr; struct pcapng_option *opt; - uint64_t start_time = self->offset_ns; + uint64_t start_time = self->clock.ns_base; uint64_t sample_time; uint32_t optlen, len; uint32_t *buf; @@ -397,7 +446,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, hdr->block_length = len; hdr->interface_id = self->port_index[port_id]; - sample_time = pcapng_timestamp(self, rte_get_tsc_cycles()); + sample_time = tsc_to_ns_epoch(&self->clock, rte_get_tsc_cycles()); hdr->timestamp_hi = sample_time >> 32; hdr->timestamp_lo = (uint32_t)sample_time; @@ -684,10 +733,13 @@ rte_pcapng_write_packets(rte_pcapng_t *self, return -1; } - /* adjust timestamp recorded in packet */ + /* + * When data is captured pcapng_copy the current TSC is stored. + * Adjust the value recorded in file to PCAP epoch units. + */ cycles = (uint64_t)epb->timestamp_hi << 32; cycles += epb->timestamp_lo; - timestamp = pcapng_timestamp(self, cycles); + timestamp = tsc_to_ns_epoch(&self->clock, cycles); epb->timestamp_hi = timestamp >> 32; epb->timestamp_lo = (uint32_t)timestamp; @@ -733,8 +785,6 @@ rte_pcapng_fdopen(int fd, { unsigned int i; rte_pcapng_t *self; - struct timespec ts; - uint64_t cycles; int ret; if ((osname && strlen(osname) > PCAPNG_STR_MAX) || @@ -754,11 +804,10 @@ rte_pcapng_fdopen(int fd, self->outfd = fd; self->ports = 0; - /* record start time in ns since 1/1/1970 */ - cycles = rte_get_tsc_cycles(); - clock_gettime(CLOCK_REALTIME, &ts); - self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; - self->offset_ns = rte_timespec_to_ns(&ts); + if (tsc_clock_init(&self->clock) < 0) { + rte_errno = ENODEV; + goto fail; + } for (i = 0; i < RTE_MAX_ETHPORTS; i++) self->port_index[i] = UINT32_MAX; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v7 5/7] test/pcapng: add tests for comments 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger ` (3 preceding siblings ...) 2026-02-13 19:18 ` [PATCH v7 4/7] pcapng: improve performance of timestamping Stephen Hemminger @ 2026-02-13 19:18 ` Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 6/7] test/pcapng: skip test if null driver missing Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 7/7] dumpcap: improve pcapng error reporting Stephen Hemminger 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-13 19:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Expand pcapng test coverage to exercise comment handling and additional code paths: * Add packet comments of varying lengths, including strings longer than 256 bytes, to verify option block encoding. * Vary packet sizes randomly to test different capture lengths * Calculate inter-packet delays to ensure timestamp values wrap around 32 bits at least twice during the test. * Add a statistics block write before packet capture. * Use RFC 864 chargen pattern for predictable fill data. Increase total packets from 4096 to 10000 and refactor mbuf preparation for more flexible packet construction. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 151 ++++++++++++++++++++++++++++++----------- 1 file changed, 113 insertions(+), 38 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 5d362ec70e..50cc92ad7d 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -25,13 +25,11 @@ #define PCAPNG_TEST_DEBUG 0 -#define TOTAL_PACKETS 4096 +#define TOTAL_PACKETS 10000 #define MAX_BURST 64 -#define MAX_GAP_US 100000 -#define DUMMY_MBUF_NUM 3 +#define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; -static const uint32_t pkt_len = 200; static uint16_t port_id; static const char null_dev[] = "net_null0"; @@ -41,13 +39,40 @@ struct dummy_mbuf { uint8_t buf[DUMMY_MBUF_NUM][RTE_MBUF_DEFAULT_BUF_SIZE]; }; +#define MAX_DATA_SIZE (RTE_MBUF_DEFAULT_BUF_SIZE - RTE_PKTMBUF_HEADROOM) + +/* RFC 864 chargen pattern used for comment testing */ +#define FILL_LINE_LENGTH 72 +#define FILL_START 0x21 /* ! */ +#define FILL_END 0x7e /* ~ */ +#define FILL_RANGE (FILL_END - FILL_START) + static void -dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, - uint32_t data_len) +fill_mbuf(struct rte_mbuf *mb) { - uint32_t i; - uint8_t *db; + unsigned int len = rte_pktmbuf_tailroom(mb); + char *buf = rte_pktmbuf_append(mb, len); + unsigned int n = 0; + unsigned int line = 0; + while (n < len - 1) { + char ch = FILL_START + (line % FILL_RANGE); + unsigned int i; + + for (i = 0; i < FILL_LINE_LENGTH && n < len - 1; i++) { + buf[n++] = ch; + if (++ch > FILL_END) + ch = FILL_START; + } + if (n < len - 1) + buf[n++] = '\n'; + line++; + } +} + +static void +dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len) +{ mb->buf_addr = buf; rte_mbuf_iova_set(mb, (uintptr_t)buf); mb->buf_len = buf_len; @@ -57,15 +82,11 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, mb->pool = (void *)buf; rte_pktmbuf_reset(mb); - db = (uint8_t *)rte_pktmbuf_append(mb, data_len); - - for (i = 0; i != data_len; i++) - db[i] = i; } /* Make an IP packet consisting of chain of one packets */ static void -mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) +mbuf1_prepare(struct dummy_mbuf *dm) { struct { struct rte_ether_hdr eth; @@ -84,32 +105,47 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) .dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST), }, .udp = { + .src_port = rte_cpu_to_be_16(19), /* Chargen port */ .dst_port = rte_cpu_to_be_16(9), /* Discard port */ }, }; memset(dm, 0, sizeof(*dm)); - dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen); + dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0])); + dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1])); rte_eth_random_addr(pkt.eth.src_addr.addr_bytes); - plen -= sizeof(struct rte_ether_hdr); + memcpy(rte_pktmbuf_append(&dm->mb[0], sizeof(pkt)), &pkt, sizeof(pkt)); + + fill_mbuf(&dm->mb[1]); + rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); +} + +static void +mbuf1_resize(struct dummy_mbuf *dm, uint16_t len) +{ + struct { + struct rte_ether_hdr eth; + struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; + } *pkt = rte_pktmbuf_mtod(&dm->mb[0], void *); - pkt.ip.total_length = rte_cpu_to_be_16(plen); - pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip); + dm->mb[1].data_len = len; + dm->mb[0].pkt_len = dm->mb[0].data_len + dm->mb[1].data_len; - plen -= sizeof(struct rte_ipv4_hdr); - pkt.udp.src_port = rte_rand(); - pkt.udp.dgram_len = rte_cpu_to_be_16(plen); + len += sizeof(struct rte_udp_hdr); + pkt->udp.dgram_len = rte_cpu_to_be_16(len); - memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt)); + len += sizeof(struct rte_ipv4_hdr); + pkt->ip.total_length = rte_cpu_to_be_16(len); + pkt->ip.hdr_checksum = 0; + pkt->ip.hdr_checksum = rte_ipv4_cksum(&pkt->ip); - /* Idea here is to create mbuf chain big enough that after mbuf deep copy they won't be - * compressed into single mbuf to properly test store of chained mbufs - */ - dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1]), pkt_len); - dummy_mbuf_prep(&dm->mb[2], dm->buf[2], sizeof(dm->buf[2]), pkt_len); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[2]); + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); } static int @@ -126,7 +162,7 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", MAX_BURST * 32, 0, 0, - rte_pcapng_mbuf_size(pkt_len) + 128, + rte_pcapng_mbuf_size(MAX_DATA_SIZE), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -142,19 +178,44 @@ test_setup(void) } static int -fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) +fill_pcapng_file(rte_pcapng_t *pcapng) { struct dummy_mbuf mbfs; struct rte_mbuf *orig; unsigned int burst_size; unsigned int count; ssize_t len; + /* + * These are some silly comments to test various lengths and alignments sprinkle + * into the file. You can see these comments by using the dumpcap program on the file + */ + static const char * const examples[] = { + "Lockless and fearless - that’s how we roll in userspace.", + "Memory pool deep / Mbufs swim in lockless rings / Zero copy dreams,", + "Poll mode driver waits / No interrupts disturb its zen / Busy loop finds peace,", + "Memory barriers / rte_atomic_thread_fence() / Guards our shared state", + "Hugepages so vast / Two megabytes of glory / TLB misses weep", + "Packets flow like streams / Through the graph node pipeline / Iterate in place", + + /* Long one to make sure we can do > 256 characters */ + ("Dear future maintainer: I am sorry. This packet was captured at 3 AM while " + "debugging a priority flow control issue that turned out to be a loose cable. " + "The rte_eth_tx_burst() call you see here has been cargo-culted through four " + "generations of example code. The magic number 32 is not documented because " + "nobody remembers why. Trust the process."), + }; + /* How many microseconds does it take TSC to wrap around 32 bits */ + const unsigned wrap_us + = (US_PER_S * (uint64_t)UINT32_MAX) / rte_get_tsc_hz(); + + /* Want overall test to take to wraparound at least twice. */ + const unsigned int avg_gap = (2 * wrap_us) + / (TOTAL_PACKETS / (MAX_BURST / 2)); - /* make a dummy packet */ - mbuf1_prepare(&mbfs, pkt_len); + mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; - for (count = 0; count < num_packets; count += burst_size) { + for (count = 0; count < TOTAL_PACKETS; count += burst_size) { struct rte_mbuf *clones[MAX_BURST]; unsigned int i; @@ -162,9 +223,17 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) burst_size = rte_rand_max(MAX_BURST) + 1; for (i = 0; i < burst_size; i++) { struct rte_mbuf *mc; + const char *comment = NULL; + + /* Put randomized comment on every 100th packet (1%) */ + if (count % 100 == 0) + comment = examples[rte_rand_max(RTE_DIM(examples))]; + + /* Vary the size of the packets, okay to allow 0 sized packet */ + mbuf1_resize(&mbfs, rte_rand_max(MAX_DATA_SIZE)); mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), - RTE_PCAPNG_DIRECTION_IN, NULL); + RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; @@ -182,8 +251,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) return -1; } - /* Leave a small gap between packets to test for time wrap */ - usleep(rte_rand_max(MAX_GAP_US)); + rte_delay_us_block(rte_rand_max(2 * avg_gap)); } return count; @@ -386,7 +454,7 @@ static int test_write_packets(void) { char file_name[] = "/tmp/pcapng_test_XXXXXX.pcapng"; - static rte_pcapng_t *pcapng; + rte_pcapng_t *pcapng = NULL; int ret, tmp_fd, count; uint64_t now = current_timestamp(); @@ -413,7 +481,14 @@ test_write_packets(void) goto fail; } - count = fill_pcapng_file(pcapng, TOTAL_PACKETS); + /* write a statistics block */ + ret = rte_pcapng_write_stats(pcapng, port_id, 0, 0, NULL); + if (ret <= 0) { + fprintf(stderr, "Write of statistics failed\n"); + goto fail; + } + + count = fill_pcapng_file(pcapng); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v7 6/7] test/pcapng: skip test if null driver missing 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger ` (4 preceding siblings ...) 2026-02-13 19:18 ` [PATCH v7 5/7] test/pcapng: add tests for comments Stephen Hemminger @ 2026-02-13 19:18 ` Stephen Hemminger 2026-02-16 10:01 ` David Marchand 2026-02-13 19:18 ` [PATCH v7 7/7] dumpcap: improve pcapng error reporting Stephen Hemminger 6 siblings, 1 reply; 58+ messages in thread From: Stephen Hemminger @ 2026-02-13 19:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, David Marchand If null PMD is disabled in the build via -Ddisable_drivers=null then do not build pcapng test. Fixes: 6f01a3ca5c7f ("test: fix dependency on pcapng") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test/meson.build b/app/test/meson.build index 48874037eb..4fd8670e05 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -135,7 +135,7 @@ source_file_deps = { 'test_mp_secondary.c': ['hash'], 'test_net_ether.c': ['net'], 'test_net_ip6.c': ['net'], - 'test_pcapng.c': ['ethdev', 'net', 'pcapng', 'bus_vdev'], + 'test_pcapng.c': ['net_null', 'net', 'ethdev', 'pcapng', 'bus_vdev'], 'test_pdcp.c': ['eventdev', 'pdcp', 'net', 'timer', 'security'], 'test_pdump.c': ['pdump'] + sample_packet_forward_deps, 'test_per_lcore.c': [], -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* Re: [PATCH v7 6/7] test/pcapng: skip test if null driver missing 2026-02-13 19:18 ` [PATCH v7 6/7] test/pcapng: skip test if null driver missing Stephen Hemminger @ 2026-02-16 10:01 ` David Marchand 2026-02-16 16:26 ` Stephen Hemminger 0 siblings, 1 reply; 58+ messages in thread From: David Marchand @ 2026-02-16 10:01 UTC (permalink / raw) To: Stephen Hemminger; +Cc: dev, stable On Fri, 13 Feb 2026 at 20:21, Stephen Hemminger <stephen@networkplumber.org> wrote: > > If null PMD is disabled in the build via -Ddisable_drivers=null > then do not build pcapng test. > > Fixes: 6f01a3ca5c7f ("test: fix dependency on pcapng") There was no dependency on net/null at this point in git history. Instead, this fix is about: Fixes: 0c614a0fa6c6 ("test/pcapng: cleanup") > Cc: stable@dpdk.org > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> -- David Marchand ^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH v7 6/7] test/pcapng: skip test if null driver missing 2026-02-16 10:01 ` David Marchand @ 2026-02-16 16:26 ` Stephen Hemminger 2026-02-16 16:43 ` David Marchand 0 siblings, 1 reply; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 16:26 UTC (permalink / raw) To: David Marchand; +Cc: dev, stable On Mon, 16 Feb 2026 11:01:34 +0100 David Marchand <david.marchand@redhat.com> wrote: > On Fri, 13 Feb 2026 at 20:21, Stephen Hemminger > <stephen@networkplumber.org> wrote: > > > > If null PMD is disabled in the build via -Ddisable_drivers=null > > then do not build pcapng test. > > > > Fixes: 6f01a3ca5c7f ("test: fix dependency on pcapng") > > There was no dependency on net/null at this point in git history. > Instead, this fix is about: > Fixes: 0c614a0fa6c6 ("test/pcapng: cleanup") > > > > Cc: stable@dpdk.org > > > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> > > The point is if the null driver is disabled the test would always fail. The commit log is incorrect, the flag would be -Ddisable_drivers=net/null Seemed better to just not build the test if the driver isn't there. Alternative is to detect it and marked as SKIPPED. But vdev_init error codes are very broad, and my preference is to save space/time. # DPDK_TEST=pcapng_autotest ./build/app/dpdk-test --no-huge -m 2048 EAL: Detected CPU lcores: 32 EAL: Detected NUMA nodes: 1 EAL: Detected static linkage of DPDK EAL: Multi-process socket /var/run/dpdk/rte/mp_socket EAL: Selected IOVA mode 'VA' APP: HPET is not enabled, using TSC as default timer RTE>>pcapng_autotest + ------------------------------------------------------- + + Test Suite : Test Pcapng Unit Test Suite Failed to create vdev 'net_null0' ^ permalink raw reply [flat|nested] 58+ messages in thread
* Re: [PATCH v7 6/7] test/pcapng: skip test if null driver missing 2026-02-16 16:26 ` Stephen Hemminger @ 2026-02-16 16:43 ` David Marchand 0 siblings, 0 replies; 58+ messages in thread From: David Marchand @ 2026-02-16 16:43 UTC (permalink / raw) To: Stephen Hemminger; +Cc: dev, stable On Mon, 16 Feb 2026 at 17:26, Stephen Hemminger <stephen@networkplumber.org> wrote: > > On Mon, 16 Feb 2026 11:01:34 +0100 > David Marchand <david.marchand@redhat.com> wrote: > > > On Fri, 13 Feb 2026 at 20:21, Stephen Hemminger > > <stephen@networkplumber.org> wrote: > > > > > > If null PMD is disabled in the build via -Ddisable_drivers=null > > > then do not build pcapng test. > > > > > > Fixes: 6f01a3ca5c7f ("test: fix dependency on pcapng") > > > > There was no dependency on net/null at this point in git history. > > Instead, this fix is about: > > Fixes: 0c614a0fa6c6 ("test/pcapng: cleanup") > > > > > > > Cc: stable@dpdk.org > > > > > > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> > > > > > > The point is if the null driver is disabled the test would always fail. > The commit log is incorrect, the flag would be -Ddisable_drivers=net/null > > Seemed better to just not build the test if the driver isn't there. > Alternative is to detect it and marked as SKIPPED. But vdev_init > error codes are very broad, and my preference is to save space/time. > > # DPDK_TEST=pcapng_autotest ./build/app/dpdk-test --no-huge -m 2048 > EAL: Detected CPU lcores: 32 > EAL: Detected NUMA nodes: 1 > EAL: Detected static linkage of DPDK > EAL: Multi-process socket /var/run/dpdk/rte/mp_socket > EAL: Selected IOVA mode 'VA' > APP: HPET is not enabled, using TSC as default timer > RTE>>pcapng_autotest > + ------------------------------------------------------- + > + Test Suite : Test Pcapng Unit Test Suite > Failed to create vdev 'net_null0' Then please fix the commitlog and the Fixes: tag. Thanks. -- David Marchand ^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH v7 7/7] dumpcap: improve pcapng error reporting 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger ` (5 preceding siblings ...) 2026-02-13 19:18 ` [PATCH v7 6/7] test/pcapng: skip test if null driver missing Stephen Hemminger @ 2026-02-13 19:18 ` Stephen Hemminger 6 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-13 19:18 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Print meaningful error messages when rte_pcapng_add_interface() or rte_pcapng_write_stats() fails instead of silently ignoring the error or printing a generic message. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/dumpcap/main.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/app/dumpcap/main.c b/app/dumpcap/main.c index 46a6cb251e..f105ee92c2 100644 --- a/app/dumpcap/main.c +++ b/app/dumpcap/main.c @@ -587,9 +587,16 @@ report_packet_stats(dumpcap_out_t out) ifrecv = pdump_stats.accepted + pdump_stats.filtered; ifdrop = pdump_stats.nombuf + pdump_stats.ringfull; - if (use_pcapng) - rte_pcapng_write_stats(out.pcapng, intf->port, - ifrecv, ifdrop, NULL); + if (use_pcapng) { + ssize_t written; + + written = rte_pcapng_write_stats(out.pcapng, intf->port, + ifrecv, ifdrop, NULL); + if (written < 0) { + fprintf(stderr, "Failed to write stats for %s: %s\n", + intf->name, rte_strerror(-written)); + } + } if (ifrecv == 0) percent = 0; @@ -760,7 +767,7 @@ static char *get_os_info(void) static dumpcap_out_t create_output(void) { - dumpcap_out_t ret; + dumpcap_out_t out; static char tmp_path[PATH_MAX]; int fd; @@ -802,19 +809,20 @@ static dumpcap_out_t create_output(void) struct interface *intf; char *os = get_os_info(); - ret.pcapng = rte_pcapng_fdopen(fd, os, NULL, + out.pcapng = rte_pcapng_fdopen(fd, os, NULL, version(), capture_comment); - if (ret.pcapng == NULL) + if (out.pcapng == NULL) rte_exit(EXIT_FAILURE, "pcapng_fdopen failed: %s\n", strerror(rte_errno)); free(os); TAILQ_FOREACH(intf, &interfaces, next) { - if (rte_pcapng_add_interface(ret.pcapng, intf->port, DLT_EN10MB, - intf->ifname, intf->ifdescr, - intf->opts.filter) < 0) - rte_exit(EXIT_FAILURE, "rte_pcapng_add_interface %u failed\n", - intf->port); + int ret = rte_pcapng_add_interface(out.pcapng, intf->port, DLT_EN10MB, + intf->ifname, intf->ifdescr, + intf->opts.filter); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_pcapng_add_interface %u failed: %s\n", + intf->port, rte_strerror(-ret)); } } else { pcap_t *pcap; @@ -825,13 +833,13 @@ static dumpcap_out_t create_output(void) if (pcap == NULL) rte_exit(EXIT_FAILURE, "pcap_open_dead failed\n"); - ret.dumper = pcap_dump_fopen(pcap, fdopen(fd, "w")); - if (ret.dumper == NULL) + out.dumper = pcap_dump_fopen(pcap, fdopen(fd, "w")); + if (out.dumper == NULL) rte_exit(EXIT_FAILURE, "pcap_dump_fopen failed: %s\n", pcap_geterr(pcap)); } - return ret; + return out; } static void enable_pdump(struct rte_ring *r, struct rte_mempool *mp) -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v8 0/8] pcapng: fixes and improvements 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger ` (5 preceding siblings ...) 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger @ 2026-02-16 21:37 ` Stephen Hemminger 2026-02-16 21:37 ` [PATCH v8 1/8] pcapng: correct typo in comment Stephen Hemminger ` (8 more replies) 6 siblings, 9 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 21:37 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger Fix string handling in the pcapng library and improve test coverage. The pcapng format uses 16-bit length fields for option data, but the library did not validate input lengths and used fixed-size stack buffers that could overflow with large strings. Patches 1-4 add string validation, dynamic allocation, and return value documentation. Existing return value conventions are preserved to avoid an ABI break. Patch 5 chains an additional mbuf segment when a comment exceeds tailroom instead of dropping the packet. Patch 6 replaces per-packet timestamp divisions with a precomputed reciprocal multiply-shift. Patches 7-8 improve test coverage and fix the build dependency on the null PMD. v8: - Split patches for clarity (typo, Doxygen, validation, malloc) - Preserve existing API/ABI — no error return changes - Drop dumpcap patch (no API change to handle) - Truncate overlong comments via strnlen on fast path - Defer port_index update until after successful write v7: - Add meson build fix for disabled null PMD - Add dumpcap error reporting improvements Stephen Hemminger (8): pcapng: correct typo in comment pcapng: document return values pcapng: add length checks to string arguments pcapng: use malloc instead of fixed buffer size pcapng: chain additional mbuf when comment exceeds tailroom pcapng: improve performance of timestamping test/pcapng: skip test if null driver missing test/pcapng: add tests for comments app/test/meson.build | 2 +- app/test/test_pcapng.c | 154 +++++++++++++----- doc/guides/rel_notes/release_26_03.rst | 5 + lib/pcapng/rte_pcapng.c | 207 +++++++++++++++++++------ lib/pcapng/rte_pcapng.h | 13 +- 5 files changed, 288 insertions(+), 93 deletions(-) -- 2.51.0 ^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH v8 1/8] pcapng: correct typo in comment 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger @ 2026-02-16 21:37 ` Stephen Hemminger 2026-02-16 21:37 ` [PATCH v8 2/8] pcapng: document return values Stephen Hemminger ` (7 subsequent siblings) 8 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 21:37 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Minor spelling typo. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 2cc9e2040d..5789c2cdca 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -309,7 +309,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0); - /* clone block_length after optionsa */ + /* clone block_length after options */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); /* remember the file index */ -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v8 2/8] pcapng: document return values 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger 2026-02-16 21:37 ` [PATCH v8 1/8] pcapng: correct typo in comment Stephen Hemminger @ 2026-02-16 21:37 ` Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 3/8] pcapng: add length checks to string arguments Stephen Hemminger ` (6 subsequent siblings) 8 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 21:37 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Ray Kinsella Update the Doxygen comments for rte_pcapng_add_interface() and rte_pcapng_write_stats() to document return values. Return values management in this library is not as consistent as other libraries like ethdev but fixing it would break API and ABI compatibility. Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index de1bf953e9..8d0974c3fd 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -89,6 +89,9 @@ rte_pcapng_close(rte_pcapng_t *self); * Interfaces must be added to the output file after opening * and before any packet record. All ports used in packet capture * must be added. + * + * @return + * number of bytes written to file, -1 on failure */ int rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, @@ -128,7 +131,7 @@ enum rte_pcapng_direction { * * @return * - The pointer to the new mbuf formatted for pcapng_write - * - NULL if allocation fails. + * - NULL on error such as invalid port or out of memory. */ struct rte_mbuf * rte_pcapng_copy(uint16_t port_id, uint32_t queue, @@ -192,7 +195,9 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * @param comment * Optional comment to add to statistics. * @return - * number of bytes written to file, -1 on failure to write file + * On success number of bytes written to file, + * -1 on failure to write file (and errno is set) + * - (-EINVAL) if bad parameter. */ ssize_t rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port, -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v8 3/8] pcapng: add length checks to string arguments 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger 2026-02-16 21:37 ` [PATCH v8 1/8] pcapng: correct typo in comment Stephen Hemminger 2026-02-16 21:37 ` [PATCH v8 2/8] pcapng: document return values Stephen Hemminger @ 2026-02-16 21:38 ` Stephen Hemminger 2026-02-17 14:34 ` Thomas Monjalon 2026-02-16 21:38 ` [PATCH v8 4/8] pcapng: use malloc instead of fixed buffer size Stephen Hemminger ` (5 subsequent siblings) 8 siblings, 1 reply; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 21:38 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan The pcapng file format uses a 16-bit length field in the option TLV (Type-Length-Value) encoding, limiting strings to UINT16_MAX bytes. Add validation for string arguments to prevent silent truncation or buffer issues when callers pass excessively long strings. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- doc/guides/rel_notes/release_26_03.rst | 5 ++++ lib/pcapng/rte_pcapng.c | 37 ++++++++++++++++++++++---- lib/pcapng/rte_pcapng.h | 3 ++- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst index 39f381e561..eea01e44e9 100644 --- a/doc/guides/rel_notes/release_26_03.rst +++ b/doc/guides/rel_notes/release_26_03.rst @@ -148,6 +148,11 @@ API Changes * cfgfile: name must be less than CFG_NAME_LEN and value must be less than CFG_VALUE_LEN. +* **Updated the pcapng library.** + + * The length of comment strings is now validated. + Maximum allowable length is 2^16-1 because of pcapng file format. + ABI Changes ----------- diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 5789c2cdca..914d6e7095 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -34,6 +34,9 @@ /* conversion from DPDK speed to PCAPNG */ #define PCAPNG_MBPS_SPEED 1000000ull +/* upper bound for strings in pcapng option data */ +#define PCAPNG_STR_MAX UINT16_MAX + /* upper bound for section, stats and interface blocks (in uint32_t) */ #define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) @@ -218,9 +221,11 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; + int ret; - if (rte_eth_dev_info_get(port, &dev_info) < 0) - return -1; + ret = rte_eth_dev_info_get(port, &dev_info); + if (ret < 0) + return -1; /* should be ret */ /* make something like an interface name */ if (ifname == NULL) { @@ -230,8 +235,14 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, snprintf(ifname_buf, IF_NAMESIZE, "dpdk:%u", port); ifname = ifname_buf; } + } else if (strlen(ifname) > PCAPNG_STR_MAX) { + return -1; /* ENAMETOOLONG */ } + if ((ifdescr && strlen(ifdescr) > PCAPNG_STR_MAX) || + (filter && strlen(filter) > PCAPNG_STR_MAX)) + return -1; /* EINVAL */ + /* make a useful device hardware string */ dev = dev_info.device; if (dev) @@ -269,7 +280,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += sizeof(uint32_t); if (len > sizeof(buf)) - return -1; + return -1; /* EINVAL */ hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -336,6 +347,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + if (comment && strlen(comment) > PCAPNG_STR_MAX) + return -EINVAL; + optlen = 0; if (ifrecv != UINT64_MAX) @@ -487,6 +501,10 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, bool rss_hash; #ifdef RTE_LIBRTE_ETHDEV_DEBUG + /* + * Since this function is used in the fast path for packet capture + * skip argument validation checks unless debug is enabled. + */ RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); #endif orig_len = rte_pktmbuf_pkt_len(md); @@ -535,8 +553,9 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (rss_hash) optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t)); + /* Comment is silently truncated if necessary */ if (comment) - optlen += pcapng_optlen(strlen(comment)); + optlen += pcapng_optlen(strnlen(comment, PCAPNG_STR_MAX)); /* reserve trailing options and block length */ opt = (struct pcapng_option *) @@ -577,7 +596,7 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (comment) opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, comment, - strlen(comment)); + strnlen(comment, PCAPNG_STR_MAX)); /* Note: END_OPT necessary here. Wireshark doesn't do it. */ @@ -692,6 +711,14 @@ rte_pcapng_fdopen(int fd, struct timespec ts; uint64_t cycles; + if ((osname && strlen(osname) > PCAPNG_STR_MAX) || + (hardware && strlen(hardware) > PCAPNG_STR_MAX) || + (appname && strlen(appname) > PCAPNG_STR_MAX) || + (comment && strlen(comment) > PCAPNG_STR_MAX)) { + rte_errno = EINVAL; + return NULL; + } + self = malloc(sizeof(*self)); if (!self) { rte_errno = ENOMEM; diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index 8d0974c3fd..68e13c67e4 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -127,7 +127,8 @@ enum rte_pcapng_direction { * @param direction * The direction of the packer: receive, transmit or unknown. * @param comment - * Packet comment. + * Optional per packet comment. + * Truncated to UINT16_MAX characters. * * @return * - The pointer to the new mbuf formatted for pcapng_write -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* Re: [PATCH v8 3/8] pcapng: add length checks to string arguments 2026-02-16 21:38 ` [PATCH v8 3/8] pcapng: add length checks to string arguments Stephen Hemminger @ 2026-02-17 14:34 ` Thomas Monjalon 0 siblings, 0 replies; 58+ messages in thread From: Thomas Monjalon @ 2026-02-17 14:34 UTC (permalink / raw) To: Stephen Hemminger; +Cc: dev, Reshma Pattan 16/02/2026 22:38, Stephen Hemminger: > The pcapng file format uses a 16-bit length field in the option > TLV (Type-Length-Value) encoding, limiting strings to UINT16_MAX > bytes. > > Add validation for string arguments to prevent silent truncation > or buffer issues when callers pass excessively long strings. GCC warns on a remaining issue: In function 'pcapng_add_option', inlined from 'rte_pcapng_write_stats' at ../../dpdk/lib/pcapng/rte_pcapng.c:376:9: ../../dpdk/lib/pcapng/rte_pcapng.c:136:17: error: 'memcpy' forming offset [2048, 65552] is out of the bounds [0, 2048] of object 'buf' with type 'uint32_t[512]' {aka 'unsigned int[512]'} [-Werror=array-bounds=] 136 | memcpy(popt->data, data, len); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../../dpdk/lib/pcapng/rte_pcapng.c: In function 'rte_pcapng_write_stats': ../../dpdk/lib/pcapng/rte_pcapng.c:346:18: note: 'buf' declared here 346 | uint32_t buf[PCAPNG_BLKSIZ]; | ^~~ I have to do this change: if (comment) opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, - comment, strlen(comment)); + comment, strnlen(comment, PCAPNG_STR_MAX)); I'm not sure to understand why it fixes the problem on the buffer of size 2048, but it works. ^ permalink raw reply [flat|nested] 58+ messages in thread
* [PATCH v8 4/8] pcapng: use malloc instead of fixed buffer size 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger ` (2 preceding siblings ...) 2026-02-16 21:38 ` [PATCH v8 3/8] pcapng: add length checks to string arguments Stephen Hemminger @ 2026-02-16 21:38 ` Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 5/8] pcapng: chain additional mbuf when comment exceeds tailroom Stephen Hemminger ` (4 subsequent siblings) 8 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 21:38 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Ray Kinsella The administrative APIs accept comments and other metadata as strings. Since these strings can be arbitrarily long (up to UINT16_MAX bytes), they may overflow the fixed-size stack buffers previously used for block construction. Replace the fixed-size buffers with dynamically allocated memory sized to the actual block length. Return appropriate error codes on allocation failure. Bugzilla ID: 1820 Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 50 ++++++++++++++++++++++++++--------------- lib/pcapng/rte_pcapng.h | 1 + 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 914d6e7095..a5a9a827bb 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -37,9 +37,6 @@ /* upper bound for strings in pcapng option data */ #define PCAPNG_STR_MAX UINT16_MAX -/* upper bound for section, stats and interface blocks (in uint32_t) */ -#define PCAPNG_BLKSIZ (2048 / sizeof(uint32_t)) - /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ @@ -148,8 +145,9 @@ pcapng_section_block(rte_pcapng_t *self, { struct pcapng_section_header *hdr; struct pcapng_option *opt; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; uint32_t len; + ssize_t ret; len = sizeof(*hdr); if (hw) @@ -165,8 +163,9 @@ pcapng_section_block(rte_pcapng_t *self, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) - return -1; + buf = malloc(len); + if (buf == NULL) + return -ENOMEM; hdr = (struct pcapng_section_header *)buf; *hdr = (struct pcapng_section_header) { @@ -199,7 +198,9 @@ pcapng_section_block(rte_pcapng_t *self, /* clone block_length after option */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret < 0 ? -errno : 0; } /* Write an interface block for a DPDK port */ @@ -217,7 +218,7 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, struct pcapng_option *opt; const uint8_t tsresol = 9; /* nanosecond resolution */ uint32_t len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; char ifname_buf[IF_NAMESIZE]; char ifhw[256]; uint64_t speed = 0; @@ -279,8 +280,9 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, len += pcapng_optlen(0); len += sizeof(uint32_t); - if (len > sizeof(buf)) - return -1; /* EINVAL */ + buf = malloc(len); + if (buf == NULL) + return -1; /* ENOMEM */ hdr = (struct pcapng_interface_block *)buf; *hdr = (struct pcapng_interface_block) { @@ -323,10 +325,14 @@ rte_pcapng_add_interface(rte_pcapng_t *self, uint16_t port, uint16_t link_type, /* clone block_length after options */ memcpy(opt, &hdr->block_length, sizeof(uint32_t)); - /* remember the file index */ - self->port_index[port] = self->ports++; + ret = write(self->outfd, buf, len); + free(buf); + + /* remember the file index only after successful write */ + if (ret > 0) + self->port_index[port] = self->ports++; - return write(self->outfd, buf, len); + return ret; } /* @@ -343,7 +349,8 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, uint64_t start_time = self->offset_ns; uint64_t sample_time; uint32_t optlen, len; - uint32_t buf[PCAPNG_BLKSIZ]; + uint32_t *buf; + ssize_t ret; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -366,8 +373,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, optlen += pcapng_optlen(0); len = sizeof(*hdr) + optlen + sizeof(uint32_t); - if (len > sizeof(buf)) - return -1; + buf = malloc(len); + if (buf == NULL) + return -ENOMEM; hdr = (struct pcapng_statistics *)buf; opt = (struct pcapng_option *)(hdr + 1); @@ -398,7 +406,9 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, /* clone block_length after option */ memcpy(opt, &len, sizeof(uint32_t)); - return write(self->outfd, buf, len); + ret = write(self->outfd, buf, len); + free(buf); + return ret; } RTE_EXPORT_SYMBOL(rte_pcapng_mbuf_size) @@ -710,6 +720,7 @@ rte_pcapng_fdopen(int fd, rte_pcapng_t *self; struct timespec ts; uint64_t cycles; + int ret; if ((osname && strlen(osname) > PCAPNG_STR_MAX) || (hardware && strlen(hardware) > PCAPNG_STR_MAX) || @@ -737,8 +748,11 @@ rte_pcapng_fdopen(int fd, for (i = 0; i < RTE_MAX_ETHPORTS; i++) self->port_index[i] = UINT32_MAX; - if (pcapng_section_block(self, osname, hardware, appname, comment) < 0) + ret = pcapng_section_block(self, osname, hardware, appname, comment); + if (ret < 0) { + rte_errno = -ret; goto fail; + } return self; fail: diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index 68e13c67e4..d8d328f710 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -199,6 +199,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * On success number of bytes written to file, * -1 on failure to write file (and errno is set) * - (-EINVAL) if bad parameter. + * - (-ENOMEM) if unable to allocate resources. */ ssize_t rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port, -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v8 5/8] pcapng: chain additional mbuf when comment exceeds tailroom 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger ` (3 preceding siblings ...) 2026-02-16 21:38 ` [PATCH v8 4/8] pcapng: use malloc instead of fixed buffer size Stephen Hemminger @ 2026-02-16 21:38 ` Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 6/8] pcapng: improve performance of timestamping Stephen Hemminger ` (3 subsequent siblings) 8 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 21:38 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable, Reshma Pattan, Amit Prakash Shukla When rte_pcapng_copy() is called with a comment, the option data may not fit in the mbuf's remaining tailroom, causing the append to fail and the packet to be dropped. Fix this by allocating and chaining an additional mbuf segment when rte_pktmbuf_append() fails. This allows comments of any length (up to UINT16_MAX) to be attached to captured packets. Fixes: c1abd1e93dbd ("pcapng: support comment in enhanced packet block") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index a5a9a827bb..2cc2ea2f2f 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -567,11 +567,26 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (comment) optlen += pcapng_optlen(strnlen(comment, PCAPNG_STR_MAX)); - /* reserve trailing options and block length */ - opt = (struct pcapng_option *) - rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); - if (unlikely(opt == NULL)) - goto fail; + /* + * Try to put options at the end of this mbuf. + * If not extend the mbuf by adding another segment. + */ + opt = (struct pcapng_option *)rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL)) { + struct rte_mbuf *ml = rte_pktmbuf_alloc(mp); + + if (unlikely(ml == NULL)) + goto fail; /* mbuf pool is empty */ + + if (unlikely(rte_pktmbuf_chain(mc, ml) != 0)) { + rte_pktmbuf_free(ml); + goto fail; /* too many segments in the mbuf */ + } + + opt = (struct pcapng_option *)rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); + if (unlikely(opt == NULL)) + goto fail; /* additional segment and still no space */ + } switch (direction) { case RTE_PCAPNG_DIRECTION_IN: -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v8 6/8] pcapng: improve performance of timestamping 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger ` (4 preceding siblings ...) 2026-02-16 21:38 ` [PATCH v8 5/8] pcapng: chain additional mbuf when comment exceeds tailroom Stephen Hemminger @ 2026-02-16 21:38 ` Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 7/8] test/pcapng: skip test if null driver missing Stephen Hemminger ` (2 subsequent siblings) 8 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 21:38 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Avoid doing expensive divide operations when converting timestamps from cycles (TSC) to nanoseconds for pcapng. Precompute a rte_reciprocal_u64 inverse of the TSC frequency and a right-shift count chosen so that the intermediate product (delta >> shift) * NSEC_PER_SEC cannot overflow uint64_t. The per-packet conversion then requires only a shift, a multiply, and a reciprocal divide—no division. For TSC frequencies less than 18.4 GHz the shift value will be zero but code is defensive to be future proof. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- lib/pcapng/rte_pcapng.c | 97 +++++++++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 2cc2ea2f2f..38fc518515 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -37,12 +37,23 @@ /* upper bound for strings in pcapng option data */ #define PCAPNG_STR_MAX UINT16_MAX +/* + * Converter from TSC values to nanoseconds since Unix epoch. + * Uses reciprocal multiply to avoid runtime division. + */ +struct tsc_clock { + uint64_t tsc_base; /* TSC value at initialization. */ + uint64_t ns_base; /* Nanoseconds since epoch at init. */ + struct rte_reciprocal_u64 tsc_hz_inv; /* Reciprocal of TSC frequency. */ + uint32_t shift; /* Pre-shift to avoid overflow. */ +}; + /* Format of the capture file handle */ struct rte_pcapng { int outfd; /* output file */ unsigned int ports; /* number of interfaces added */ - uint64_t offset_ns; /* ns since 1/1/1970 when initialized */ - uint64_t tsc_base; /* TSC when started */ + + struct tsc_clock clock; /* DPDK port id to interface index in file */ uint32_t port_index[RTE_MAX_ETHPORTS]; @@ -98,21 +109,59 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt) #define if_indextoname(ifindex, ifname) NULL #endif -/* Convert from TSC (CPU cycles) to nanoseconds */ -static uint64_t -pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles) +/* + * Initialize TSC-to-epoch-ns converter. + * + * Captures current TSC and system clock as a reference point. + */ +static int +tsc_clock_init(struct tsc_clock *clk) { - uint64_t delta, rem, secs, ns; - const uint64_t hz = rte_get_tsc_hz(); + struct timespec ts; + uint64_t cycles, tsc_hz, divisor; + uint32_t shift; + + memset(clk, 0, sizeof(*clk)); + + /* If Hz is zero, something is seriously broken. */ + tsc_hz = rte_get_tsc_hz(); + if (tsc_hz == 0) + return -1; + + /* + * Choose shift so (delta >> shift) * NSEC_PER_SEC fits in uint64_t. + * For typical GHz-range TSC and ~1s deltas this is 0. + */ + shift = 0; + divisor = tsc_hz; + while (divisor > UINT64_MAX / NSEC_PER_SEC) { + divisor >>= 1; + shift++; + } + + clk->shift = shift; + clk->tsc_hz_inv = rte_reciprocal_value_u64(divisor); + + /* Sample TSC and system clock as close together as possible. */ + cycles = rte_get_tsc_cycles(); + clock_gettime(CLOCK_REALTIME, &ts); + clk->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; + clk->ns_base = (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; + + return 0; +} - delta = cycles - self->tsc_base; +/* Convert a TSC value to nanoseconds since Unix epoch. */ +static inline uint64_t +tsc_to_ns_epoch(const struct tsc_clock *clk, uint64_t tsc) +{ + uint64_t delta, ns; - /* Avoid numeric wraparound by computing seconds first */ - secs = delta / hz; - rem = delta % hz; - ns = (rem * NS_PER_S) / hz; + delta = tsc - clk->tsc_base; + ns = (delta >> clk->shift) * NSEC_PER_SEC; + ns = rte_reciprocal_divide_u64(ns, &clk->tsc_hz_inv); - return secs * NS_PER_S + ns + self->offset_ns; + return clk->ns_base + ns; } /* length of option including padding */ @@ -346,7 +395,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, { struct pcapng_statistics *hdr; struct pcapng_option *opt; - uint64_t start_time = self->offset_ns; + uint64_t start_time = self->clock.ns_base; uint64_t sample_time; uint32_t optlen, len; uint32_t *buf; @@ -399,7 +448,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id, hdr->block_length = len; hdr->interface_id = self->port_index[port_id]; - sample_time = pcapng_timestamp(self, rte_get_tsc_cycles()); + sample_time = tsc_to_ns_epoch(&self->clock, rte_get_tsc_cycles()); hdr->timestamp_hi = sample_time >> 32; hdr->timestamp_lo = (uint32_t)sample_time; @@ -684,10 +733,13 @@ rte_pcapng_write_packets(rte_pcapng_t *self, return -1; } - /* adjust timestamp recorded in packet */ + /* + * When data is captured by pcapng_copy the current TSC is stored. + * Adjust the value recorded in file to PCAP epoch units. + */ cycles = (uint64_t)epb->timestamp_hi << 32; cycles += epb->timestamp_lo; - timestamp = pcapng_timestamp(self, cycles); + timestamp = tsc_to_ns_epoch(&self->clock, cycles); epb->timestamp_hi = timestamp >> 32; epb->timestamp_lo = (uint32_t)timestamp; @@ -733,8 +785,6 @@ rte_pcapng_fdopen(int fd, { unsigned int i; rte_pcapng_t *self; - struct timespec ts; - uint64_t cycles; int ret; if ((osname && strlen(osname) > PCAPNG_STR_MAX) || @@ -754,11 +804,10 @@ rte_pcapng_fdopen(int fd, self->outfd = fd; self->ports = 0; - /* record start time in ns since 1/1/1970 */ - cycles = rte_get_tsc_cycles(); - clock_gettime(CLOCK_REALTIME, &ts); - self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2; - self->offset_ns = rte_timespec_to_ns(&ts); + if (tsc_clock_init(&self->clock) < 0) { + rte_errno = ENODEV; + goto fail; + } for (i = 0; i < RTE_MAX_ETHPORTS; i++) self->port_index[i] = UINT32_MAX; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v8 7/8] test/pcapng: skip test if null driver missing 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger ` (5 preceding siblings ...) 2026-02-16 21:38 ` [PATCH v8 6/8] pcapng: improve performance of timestamping Stephen Hemminger @ 2026-02-16 21:38 ` Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 8/8] test/pcapng: add tests for comments Stephen Hemminger 2026-02-17 16:39 ` [PATCH v8 0/8] pcapng: fixes and improvements Thomas Monjalon 8 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 21:38 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, stable The pcapng test requires the net_null driver to create a virtual device. If the null driver is excluded from the build, the test fails with: RTE>>pcapng_autotest + ------------------------------------------------------- + + Test Suite : Test Pcapng Unit Test Suite Failed to create vdev 'net_null0' Add net_null to the meson build dependency list for the test so that it is automatically skipped when the driver is not available. Fixes: 0c614a0fa6c6 ("test/pcapng: cleanup") Cc: stable@dpdk.org Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test/meson.build b/app/test/meson.build index 48874037eb..4fd8670e05 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -135,7 +135,7 @@ source_file_deps = { 'test_mp_secondary.c': ['hash'], 'test_net_ether.c': ['net'], 'test_net_ip6.c': ['net'], - 'test_pcapng.c': ['ethdev', 'net', 'pcapng', 'bus_vdev'], + 'test_pcapng.c': ['net_null', 'net', 'ethdev', 'pcapng', 'bus_vdev'], 'test_pdcp.c': ['eventdev', 'pdcp', 'net', 'timer', 'security'], 'test_pdump.c': ['pdump'] + sample_packet_forward_deps, 'test_per_lcore.c': [], -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* [PATCH v8 8/8] test/pcapng: add tests for comments 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger ` (6 preceding siblings ...) 2026-02-16 21:38 ` [PATCH v8 7/8] test/pcapng: skip test if null driver missing Stephen Hemminger @ 2026-02-16 21:38 ` Stephen Hemminger 2026-02-17 16:39 ` [PATCH v8 0/8] pcapng: fixes and improvements Thomas Monjalon 8 siblings, 0 replies; 58+ messages in thread From: Stephen Hemminger @ 2026-02-16 21:38 UTC (permalink / raw) To: dev; +Cc: Stephen Hemminger, Reshma Pattan Expand pcapng test coverage to exercise comment handling and additional code paths: * Add packet comments of varying lengths, including strings longer than 256 bytes, to verify option block encoding. * Vary packet sizes randomly to test different capture lengths * Calculate inter-packet delays to ensure timestamp values wrap around 32 bits at least twice during the test. * Add a statistics block write before packet capture. * Use RFC 864 chargen pattern for predictable fill data. Increase total packets from 4096 to 10000 and refactor mbuf preparation for more flexible packet construction. Signed-off-by: Stephen Hemminger <stephen@networkplumber.org> --- app/test/test_pcapng.c | 154 +++++++++++++++++++++++++++++++---------- 1 file changed, 116 insertions(+), 38 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 5d362ec70e..ad9ad51f4c 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -25,13 +25,11 @@ #define PCAPNG_TEST_DEBUG 0 -#define TOTAL_PACKETS 4096 +#define TOTAL_PACKETS 10000 #define MAX_BURST 64 -#define MAX_GAP_US 100000 -#define DUMMY_MBUF_NUM 3 +#define DUMMY_MBUF_NUM 2 static struct rte_mempool *mp; -static const uint32_t pkt_len = 200; static uint16_t port_id; static const char null_dev[] = "net_null0"; @@ -41,13 +39,43 @@ struct dummy_mbuf { uint8_t buf[DUMMY_MBUF_NUM][RTE_MBUF_DEFAULT_BUF_SIZE]; }; +#define MAX_DATA_SIZE (RTE_MBUF_DEFAULT_BUF_SIZE - RTE_PKTMBUF_HEADROOM) + +/* RFC 864 chargen pattern used for comment testing */ +#define FILL_LINE_LENGTH 72 +#define FILL_START 0x21 /* ! */ +#define FILL_END 0x7e /* ~ */ +#define FILL_RANGE (FILL_END - FILL_START) + static void -dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, - uint32_t data_len) +fill_mbuf(struct rte_mbuf *mb) { - uint32_t i; - uint8_t *db; + unsigned int len = rte_pktmbuf_tailroom(mb); + char *buf = rte_pktmbuf_append(mb, len); + unsigned int n = 0; + unsigned int line = 0; + + if (len == 0) + return; + + while (n < len - 1) { + char ch = FILL_START + (line % FILL_RANGE); + unsigned int i; + for (i = 0; i < FILL_LINE_LENGTH && n < len - 1; i++) { + buf[n++] = ch; + if (++ch > FILL_END) + ch = FILL_START; + } + if (n < len - 1) + buf[n++] = '\n'; + line++; + } +} + +static void +dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len) +{ mb->buf_addr = buf; rte_mbuf_iova_set(mb, (uintptr_t)buf); mb->buf_len = buf_len; @@ -57,15 +85,11 @@ dummy_mbuf_prep(struct rte_mbuf *mb, uint8_t buf[], uint32_t buf_len, mb->pool = (void *)buf; rte_pktmbuf_reset(mb); - db = (uint8_t *)rte_pktmbuf_append(mb, data_len); - - for (i = 0; i != data_len; i++) - db[i] = i; } /* Make an IP packet consisting of chain of one packets */ static void -mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) +mbuf1_prepare(struct dummy_mbuf *dm) { struct { struct rte_ether_hdr eth; @@ -84,32 +108,47 @@ mbuf1_prepare(struct dummy_mbuf *dm, uint32_t plen) .dst_addr = rte_cpu_to_be_32(RTE_IPV4_BROADCAST), }, .udp = { + .src_port = rte_cpu_to_be_16(19), /* Chargen port */ .dst_port = rte_cpu_to_be_16(9), /* Discard port */ }, }; memset(dm, 0, sizeof(*dm)); - dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0]), plen); + dummy_mbuf_prep(&dm->mb[0], dm->buf[0], sizeof(dm->buf[0])); + dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1])); rte_eth_random_addr(pkt.eth.src_addr.addr_bytes); - plen -= sizeof(struct rte_ether_hdr); + memcpy(rte_pktmbuf_append(&dm->mb[0], sizeof(pkt)), &pkt, sizeof(pkt)); - pkt.ip.total_length = rte_cpu_to_be_16(plen); - pkt.ip.hdr_checksum = rte_ipv4_cksum(&pkt.ip); + fill_mbuf(&dm->mb[1]); + rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - plen -= sizeof(struct rte_ipv4_hdr); - pkt.udp.src_port = rte_rand(); - pkt.udp.dgram_len = rte_cpu_to_be_16(plen); + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); +} - memcpy(rte_pktmbuf_mtod(dm->mb, void *), &pkt, sizeof(pkt)); +static void +mbuf1_resize(struct dummy_mbuf *dm, uint16_t len) +{ + struct { + struct rte_ether_hdr eth; + struct rte_ipv4_hdr ip; + struct rte_udp_hdr udp; + } *pkt = rte_pktmbuf_mtod(&dm->mb[0], void *); - /* Idea here is to create mbuf chain big enough that after mbuf deep copy they won't be - * compressed into single mbuf to properly test store of chained mbufs - */ - dummy_mbuf_prep(&dm->mb[1], dm->buf[1], sizeof(dm->buf[1]), pkt_len); - dummy_mbuf_prep(&dm->mb[2], dm->buf[2], sizeof(dm->buf[2]), pkt_len); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[1]); - rte_pktmbuf_chain(&dm->mb[0], &dm->mb[2]); + dm->mb[1].data_len = len; + dm->mb[0].pkt_len = dm->mb[0].data_len + dm->mb[1].data_len; + + len += sizeof(struct rte_udp_hdr); + pkt->udp.dgram_len = rte_cpu_to_be_16(len); + + len += sizeof(struct rte_ipv4_hdr); + pkt->ip.total_length = rte_cpu_to_be_16(len); + pkt->ip.hdr_checksum = 0; + pkt->ip.hdr_checksum = rte_ipv4_cksum(&pkt->ip); + + rte_mbuf_sanity_check(&dm->mb[0], 1); + rte_mbuf_sanity_check(&dm->mb[1], 0); } static int @@ -126,7 +165,7 @@ test_setup(void) /* Make a pool for cloned packets */ mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", MAX_BURST * 32, 0, 0, - rte_pcapng_mbuf_size(pkt_len) + 128, + rte_pcapng_mbuf_size(MAX_DATA_SIZE), SOCKET_ID_ANY, "ring_mp_sc"); if (mp == NULL) { fprintf(stderr, "Cannot create mempool\n"); @@ -142,19 +181,44 @@ test_setup(void) } static int -fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) +fill_pcapng_file(rte_pcapng_t *pcapng) { struct dummy_mbuf mbfs; struct rte_mbuf *orig; unsigned int burst_size; unsigned int count; ssize_t len; + /* + * These are some silly comments to test various lengths and alignments sprinkle + * into the file. You can see these comments by using the dumpcap program on the file + */ + static const char * const examples[] = { + "Lockless and fearless - that’s how we roll in userspace.", + "Memory pool deep / Mbufs swim in lockless rings / Zero copy dreams,", + "Poll mode driver waits / No interrupts disturb its zen / Busy loop finds peace,", + "Memory barriers / rte_atomic_thread_fence() / Guards our shared state", + "Hugepages so vast / Two megabytes of glory / TLB misses weep", + "Packets flow like streams / Through the graph node pipeline / Iterate in place", + + /* Long one to make sure we can do > 256 characters */ + ("Dear future maintainer: I am sorry. This packet was captured at 3 AM while " + "debugging a priority flow control issue that turned out to be a loose cable. " + "The rte_eth_tx_burst() call you see here has been cargo-culted through four " + "generations of example code. The magic number 32 is not documented because " + "nobody remembers why. Trust the process."), + }; + /* How many microseconds does it take TSC to wrap around 32 bits */ + const unsigned wrap_us + = (US_PER_S * (uint64_t)UINT32_MAX) / rte_get_tsc_hz(); + + /* Want overall test to take to wraparound at least twice. */ + const unsigned int avg_gap = (2 * wrap_us) + / (TOTAL_PACKETS / (MAX_BURST / 2)); - /* make a dummy packet */ - mbuf1_prepare(&mbfs, pkt_len); + mbuf1_prepare(&mbfs); orig = &mbfs.mb[0]; - for (count = 0; count < num_packets; count += burst_size) { + for (count = 0; count < TOTAL_PACKETS; count += burst_size) { struct rte_mbuf *clones[MAX_BURST]; unsigned int i; @@ -162,9 +226,17 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) burst_size = rte_rand_max(MAX_BURST) + 1; for (i = 0; i < burst_size; i++) { struct rte_mbuf *mc; + const char *comment = NULL; + + /* Put randomized comment on every 100th packet (1%) */ + if (count % 100 == 0) + comment = examples[rte_rand_max(RTE_DIM(examples))]; + + /* Vary the size of the packets, okay to allow 0 sized packet */ + mbuf1_resize(&mbfs, rte_rand_max(MAX_DATA_SIZE)); mc = rte_pcapng_copy(port_id, 0, orig, mp, rte_pktmbuf_pkt_len(orig), - RTE_PCAPNG_DIRECTION_IN, NULL); + RTE_PCAPNG_DIRECTION_IN, comment); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; @@ -182,8 +254,7 @@ fill_pcapng_file(rte_pcapng_t *pcapng, unsigned int num_packets) return -1; } - /* Leave a small gap between packets to test for time wrap */ - usleep(rte_rand_max(MAX_GAP_US)); + rte_delay_us_block(rte_rand_max(2 * avg_gap)); } return count; @@ -386,7 +457,7 @@ static int test_write_packets(void) { char file_name[] = "/tmp/pcapng_test_XXXXXX.pcapng"; - static rte_pcapng_t *pcapng; + rte_pcapng_t *pcapng = NULL; int ret, tmp_fd, count; uint64_t now = current_timestamp(); @@ -413,7 +484,14 @@ test_write_packets(void) goto fail; } - count = fill_pcapng_file(pcapng, TOTAL_PACKETS); + /* write a statistics block */ + ret = rte_pcapng_write_stats(pcapng, port_id, 0, 0, NULL); + if (ret <= 0) { + fprintf(stderr, "Write of statistics failed\n"); + goto fail; + } + + count = fill_pcapng_file(pcapng); if (count < 0) goto fail; -- 2.51.0 ^ permalink raw reply related [flat|nested] 58+ messages in thread
* Re: [PATCH v8 0/8] pcapng: fixes and improvements 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger ` (7 preceding siblings ...) 2026-02-16 21:38 ` [PATCH v8 8/8] test/pcapng: add tests for comments Stephen Hemminger @ 2026-02-17 16:39 ` Thomas Monjalon 8 siblings, 0 replies; 58+ messages in thread From: Thomas Monjalon @ 2026-02-17 16:39 UTC (permalink / raw) To: Stephen Hemminger; +Cc: dev 16/02/2026 22:37, Stephen Hemminger: > Fix string handling in the pcapng library and improve test coverage. > > The pcapng format uses 16-bit length fields for option data, but the > library did not validate input lengths and used fixed-size stack > buffers that could overflow with large strings. > > Patches 1-4 add string validation, dynamic allocation, and return > value documentation. Existing return value conventions are preserved > to avoid an ABI break. > > Patch 5 chains an additional mbuf segment when a comment exceeds > tailroom instead of dropping the packet. > > Patch 6 replaces per-packet timestamp divisions with a precomputed > reciprocal multiply-shift. > > Patches 7-8 improve test coverage and fix the build dependency on > the null PMD. Applied, thanks. ^ permalink raw reply [flat|nested] 58+ messages in thread
end of thread, other threads:[~2026-02-17 16:39 UTC | newest] Thread overview: 58+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2025-11-26 5:12 [RFC] pcapng: improve performance of timestamping Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 0/6] pcapng: timestamping and comment fixes Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 1/6] pcapng: use alloca instead of fixed buffer Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 2/6] pcapng: add additional mbuf if space required on copy Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 3/6] test: add more tests for comments in pcapng Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 4/6] test: vary size of packets in pcapng test Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 5/6] test: increase gap " Stephen Hemminger 2025-12-29 23:01 ` [PATCH v2 6/6] pcapng: improve performance of timestamping Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 1/7] pcapng: add length checks to string arguments Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 2/7] pcapng: use malloc instead of fixed buffer size Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 3/7] pcapng: add additional mbuf if space required on copy Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 4/7] test: add more tests for comments in pcapng Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 5/7] test: vary size of packets in pcapng test Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 6/7] test: increase gap " Stephen Hemminger 2026-01-12 4:50 ` [PATCH v3 7/7] pcapng: improve performance of timestamping Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 1/7] pcapng: add length checks to string arguments Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 2/7] pcapng: use malloc instead of fixed buffer size Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 3/7] pcapng: add additional mbuf if space required on copy Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 4/7] test: add more tests for comments in pcapng Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 5/7] test: vary size of packets in pcapng test Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 6/7] test: increase gap " Stephen Hemminger 2026-01-13 0:51 ` [PATCH v4 7/7] pcapng: improve performance of timestamping Stephen Hemminger 2026-01-19 18:18 ` [PATCH v5 0/5] pcapng: fixes and improvements Stephen Hemminger 2026-01-19 18:18 ` [PATCH v5 1/5] pcapng: add length checks to string arguments Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 2/5] pcapng: use malloc instead of fixed buffer size Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 3/5] pcapng: add additional mbuf if space required on copy Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 4/5] pcapng: improve performance of timestamping Stephen Hemminger 2026-01-19 18:19 ` [PATCH v5 5/5] test: add more tests for pcapng Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 0/5] pcapng: fixes and improvements Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 1/5] pcapng: add length checks to string arguments Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 2/5] pcapng: use malloc instead of fixed buffer size Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 3/5] pcapng: chain additional mbuf when comment exceeds tailroom Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 4/5] pcapng: improve performance of timestamping Stephen Hemminger 2026-01-26 21:04 ` [PATCH v6 5/5] test/pcapng: add tests for comments Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 0/7] pcapng: fixes and improvements Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 1/7] pcapng: add length checks to string arguments Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 2/7] pcapng: use malloc instead of fixed buffer size Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 3/7] pcapng: chain additional mbuf when comment exceeds tailroom Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 4/7] pcapng: improve performance of timestamping Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 5/7] test/pcapng: add tests for comments Stephen Hemminger 2026-02-13 19:18 ` [PATCH v7 6/7] test/pcapng: skip test if null driver missing Stephen Hemminger 2026-02-16 10:01 ` David Marchand 2026-02-16 16:26 ` Stephen Hemminger 2026-02-16 16:43 ` David Marchand 2026-02-13 19:18 ` [PATCH v7 7/7] dumpcap: improve pcapng error reporting Stephen Hemminger 2026-02-16 21:37 ` [PATCH v8 0/8] pcapng: fixes and improvements Stephen Hemminger 2026-02-16 21:37 ` [PATCH v8 1/8] pcapng: correct typo in comment Stephen Hemminger 2026-02-16 21:37 ` [PATCH v8 2/8] pcapng: document return values Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 3/8] pcapng: add length checks to string arguments Stephen Hemminger 2026-02-17 14:34 ` Thomas Monjalon 2026-02-16 21:38 ` [PATCH v8 4/8] pcapng: use malloc instead of fixed buffer size Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 5/8] pcapng: chain additional mbuf when comment exceeds tailroom Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 6/8] pcapng: improve performance of timestamping Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 7/8] test/pcapng: skip test if null driver missing Stephen Hemminger 2026-02-16 21:38 ` [PATCH v8 8/8] test/pcapng: add tests for comments Stephen Hemminger 2026-02-17 16:39 ` [PATCH v8 0/8] pcapng: fixes and improvements Thomas Monjalon
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox