linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Xu Kuohai <xukuohai@huaweicloud.com>
To: bpf@vger.kernel.org, linux-kselftest@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>,
	Martin KaFai Lau <martin.lau@linux.dev>,
	Eduard Zingerman <eddyz87@gmail.com>, Yonghong Song <yhs@fb.com>,
	Song Liu <song@kernel.org>,
	John Fastabend <john.fastabend@gmail.com>,
	KP Singh <kpsingh@kernel.org>,
	Stanislav Fomichev <sdf@google.com>, Hao Luo <haoluo@google.com>,
	Jiri Olsa <jolsa@kernel.org>, Mykola Lysenko <mykolal@fb.com>,
	Shuah Khan <shuah@kernel.org>,
	Stanislav Fomichev <sdf@fomichev.me>,
	Willem de Bruijn <willemb@google.com>,
	Jason Xing <kerneljasonxing@gmail.com>,
	Paul Chaignon <paul.chaignon@gmail.com>,
	Tao Chen <chen.dylane@linux.dev>,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>,
	Martin Kelly <martin.kelly@crowdstrike.com>
Subject: [PATCH bpf-next 2/4] libbpf: ringbuf: Add overwrite ring buffer process
Date: Mon,  4 Aug 2025 10:20:58 +0800	[thread overview]
Message-ID: <20250804022101.2171981-3-xukuohai@huaweicloud.com> (raw)
In-Reply-To: <20250804022101.2171981-1-xukuohai@huaweicloud.com>

From: Xu Kuohai <xukuohai@huawei.com>

In overwrite mode, the producer does not wait for the consumer, so the
consumer is responsible for handling conflicts. An optimistic method
is used to resolve the conflicts: the consumer first reads consumer_pos,
producer_pos and overwrite_pos, then calculates a read window and copies
data in the window from the ring buffer. After copying, it checks the
positions to decide if the data in the copy window have been overwritten
by be the producer. If so, it discards the copy and tries again. Once
success, the consumer processes the events in the copy.

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 tools/lib/bpf/ringbuf.c | 103 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 102 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 9702b70da444..9c072af675ff 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -27,10 +27,13 @@ struct ring {
 	ring_buffer_sample_fn sample_cb;
 	void *ctx;
 	void *data;
+	void *read_buffer;
 	unsigned long *consumer_pos;
 	unsigned long *producer_pos;
+	unsigned long *overwrite_pos;
 	unsigned long mask;
 	int map_fd;
+	bool overwrite_mode;
 };
 
 struct ring_buffer {
@@ -69,6 +72,9 @@ static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r)
 		r->producer_pos = NULL;
 	}
 
+	if (r->read_buffer)
+		free(r->read_buffer);
+
 	free(r);
 }
 
@@ -119,6 +125,14 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 	r->sample_cb = sample_cb;
 	r->ctx = ctx;
 	r->mask = info.max_entries - 1;
+	r->overwrite_mode = info.map_flags & BPF_F_OVERWRITE;
+	if (unlikely(r->overwrite_mode)) {
+		r->read_buffer = malloc(info.max_entries);
+		if (!r->read_buffer) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+	}
 
 	/* Map writable consumer page */
 	tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0);
@@ -148,6 +162,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 		goto err_out;
 	}
 	r->producer_pos = tmp;
+	r->overwrite_pos = r->producer_pos + 1; /* overwrite_pos is next to producer_pos */
 	r->data = tmp + rb->page_size;
 
 	e = &rb->events[rb->ring_cnt];
@@ -232,7 +247,7 @@ static inline int roundup_len(__u32 len)
 	return (len + 7) / 8 * 8;
 }
 
-static int64_t ringbuf_process_ring(struct ring *r, size_t n)
+static int64_t ringbuf_process_normal_ring(struct ring *r, size_t n)
 {
 	int *len_ptr, len, err;
 	/* 64-bit to avoid overflow in case of extreme application behavior */
@@ -278,6 +293,92 @@ static int64_t ringbuf_process_ring(struct ring *r, size_t n)
 	return cnt;
 }
 
+static int64_t ringbuf_process_overwrite_ring(struct ring *r, size_t n)
+{
+
+	int err;
+	uint32_t *len_ptr, len;
+	/* 64-bit to avoid overflow in case of extreme application behavior */
+	int64_t cnt = 0;
+	size_t size, offset;
+	unsigned long cons_pos, prod_pos, over_pos, tmp_pos;
+	bool got_new_data;
+	void *sample;
+	bool copied;
+
+	size = r->mask + 1;
+
+	cons_pos = smp_load_acquire(r->consumer_pos);
+	do {
+		got_new_data = false;
+
+		/* grab a copy of data */
+		prod_pos = smp_load_acquire(r->producer_pos);
+		do {
+			over_pos = READ_ONCE(*r->overwrite_pos);
+			/* prod_pos may be outdated now */
+			if (over_pos < prod_pos) {
+				tmp_pos = max(cons_pos, over_pos);
+				/* smp_load_acquire(r->producer_pos) before
+				 * READ_ONCE(*r->overwrite_pos) ensures that
+				 * over_pos + r->mask < prod_pos never occurs,
+				 * so size is never larger than r->mask
+				 */
+				size = prod_pos - tmp_pos;
+				if (!size)
+					goto done;
+				memcpy(r->read_buffer,
+				       r->data + (tmp_pos & r->mask), size);
+				copied = true;
+			} else {
+				copied = false;
+			}
+			prod_pos = smp_load_acquire(r->producer_pos);
+		/* retry if data is overwritten by producer */
+		} while (!copied || prod_pos - tmp_pos > r->mask);
+
+		cons_pos = tmp_pos;
+
+		for (offset = 0; offset < size; offset += roundup_len(len)) {
+			len_ptr = r->read_buffer + (offset & r->mask);
+			len = *len_ptr;
+
+			if (len & BPF_RINGBUF_BUSY_BIT)
+				goto done;
+
+			got_new_data = true;
+			cons_pos += roundup_len(len);
+
+			if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) {
+				sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ;
+				err = r->sample_cb(r->ctx, sample, len);
+				if (err < 0) {
+					/* update consumer pos and bail out */
+					smp_store_release(r->consumer_pos,
+							  cons_pos);
+					return err;
+				}
+				cnt++;
+			}
+
+			if (cnt >= n)
+				goto done;
+		}
+	} while (got_new_data);
+
+done:
+	smp_store_release(r->consumer_pos, cons_pos);
+	return cnt;
+}
+
+static int64_t ringbuf_process_ring(struct ring *r, size_t n)
+{
+	if (likely(!r->overwrite_mode))
+		return ringbuf_process_normal_ring(r, n);
+	else
+		return ringbuf_process_overwrite_ring(r, n);
+}
+
 /* Consume available ring buffer(s) data without event polling, up to n
  * records.
  *
-- 
2.43.0


  parent reply	other threads:[~2025-08-04  2:27 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-04  2:20 [PATCH bpf-next 0/4] Add overwrite mode for bpf ring buffer Xu Kuohai
2025-08-04  2:20 ` [PATCH bpf-next 1/4] bpf: " Xu Kuohai
2025-08-08 21:39   ` Alexei Starovoitov
2025-08-12  4:02     ` Xu Kuohai
2025-08-13 13:22       ` Jordan Rome
2025-08-14 13:59         ` Xu Kuohai
2025-08-04  2:20 ` Xu Kuohai [this message]
2025-08-13 18:21   ` [PATCH bpf-next 2/4] libbpf: ringbuf: Add overwrite ring buffer process Zvi Effron
2025-08-14 14:10     ` Xu Kuohai
2025-08-14 19:34   ` Eduard Zingerman
2025-08-14 21:20     ` Zvi Effron
2025-08-22 21:23   ` Andrii Nakryiko
2025-08-23 14:38     ` Xu Kuohai
2025-08-04  2:20 ` [PATCH bpf-next 3/4] selftests/bpf: Add test for overwrite ring buffer Xu Kuohai
2025-08-04  2:21 ` [PATCH bpf-next 4/4] selftests/bpf/benchs: Add overwrite mode bench for rb-libbpf Xu Kuohai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250804022101.2171981-3-xukuohai@huaweicloud.com \
    --to=xukuohai@huaweicloud.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=chen.dylane@linux.dev \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kerneljasonxing@gmail.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=martin.kelly@crowdstrike.com \
    --cc=martin.lau@linux.dev \
    --cc=memxor@gmail.com \
    --cc=mykolal@fb.com \
    --cc=paul.chaignon@gmail.com \
    --cc=sdf@fomichev.me \
    --cc=sdf@google.com \
    --cc=shuah@kernel.org \
    --cc=song@kernel.org \
    --cc=willemb@google.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).