Re: [PATCH v2] perf/core: restore __perf_remove_from_context when DETACH_EXIT not set

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "David Wang" <00107082@163.com>
To: "Peter Zijlstra" <peterz@infradead.org>
Cc: mingo@redhat.com, acme@kernel.org, namhyung@kernel.org,
	mingo@kernel.org, yeoreum.yun@arm.com, leo.yan@arm.com,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2] perf/core: restore __perf_remove_from_context when DETACH_EXIT not set
Date: Tue, 3 Jun 2025 21:49:17 +0800 (CST)	[thread overview]
Message-ID: <b3624f4.bde4.197360db3ea.Coremail.00107082@163.com> (raw)
In-Reply-To: <6487969b.b907.19735e42e05.Coremail.00107082@163.com>




At 2025-06-03 21:03:55, "David Wang" <00107082@163.com> wrote:
>
>At 2025-06-03 20:54:40, "Peter Zijlstra" <peterz@infradead.org> wrote:
>>On Tue, Jun 03, 2025 at 02:50:56PM +0200, Peter Zijlstra wrote:
>>> On Tue, Jun 03, 2025 at 06:44:58PM +0800, David Wang wrote:
>>> 
>>> 
>>> > (As yeoreum.yun@arm.com pointed out,  the change in perf_remove_from_context() made
>>> > perf_event_set_state() happened before list_del_event(), resulting in perf_cgroup_event_disable()
>>> > not called.)
>>> 
>>> Aah, d'0h. Let me see what we should do there.
>>
>>Does this help? This way event_sched_out() will call
>>perf_cgroup_event_disable().
>>
>>
>>diff --git a/kernel/events/core.c b/kernel/events/core.c
>>index f34c99f8ce8f..adbb0372825f 100644
>>--- a/kernel/events/core.c
>>+++ b/kernel/events/core.c
>>@@ -2494,9 +2494,9 @@ __perf_remove_from_context(struct perf_event *event,
>> 	if (flags & DETACH_REVOKE)
>> 		state = PERF_EVENT_STATE_REVOKED;
>> 	if (flags & DETACH_DEAD) {
>>-		event->pending_disable = 1;
>> 		state = PERF_EVENT_STATE_DEAD;
>> 	}
>>+	event->pending_disable = 1;
>> 	event_sched_out(event, ctx);
>> 	perf_event_set_state(event, min(event->state, state));
>> 
>
>Ok, I will give it a try and update later.

Sadly no, caught a kernel panic at the first round....

I tried to use perf to reproduce this, but no luck so far. Following is the code I used to reproduce.

(The code is silly, but valid I think....)
To reproduce, I use following steps:
Open two terminals:
1. In terminal A
mkdir /sys/fs/cgroup/mytest
echo $$ > /sys/fs/cgroup/mytest/cgroup.procs
2. In terminal B
[g++ following code if not done yet g++ -o profiler xx.cpp]
./profiler mytest
3. Do something in terminal A, usually I would run following command under kernel source tree
for i in {1..200}; do find ./ -name nottobefound > /dev/null; done
4. wait for 5~10mintes
5. In terminal B, ctrl-C stop the profiler
6. reboot
(On my system, with 6.15 at most 4 rounds of test would catch a kernel panic.)

I could not reproduce it with my KVM, maybe I need more trials.
Not sure whether anyone else could reproduce this. 


---
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <poll.h>
#include <signal.h>
#include <fcntl.h>
#include <elf.h>
#include <string.h>

#include <vector>
#include <string>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
using namespace std;


#define MAXN  512
#define MAXCPU 128
#define error(msg) do { perror(msg); exit(1); } while(0)

static long perf_event_open(struct perf_event_attr *perf_event,
		pid_t pid, int cpu, int group_fd, unsigned long flags) {
    return syscall(__NR_perf_event_open, perf_event,
		    pid, cpu, group_fd, flags);
}

struct pollfd polls[MAXCPU];
// res for cleanup
static long long psize;
map<int, pair<void*, long long>> res;
static long long eventc = 0;

void int_exit(int _) {
    for (auto x: res) {
        auto y = x.second;
        void* addr = y.first;
        munmap(addr, (1+MAXN)*psize);
        close(x.first);
    }
    res.clear();
    printf("total %lld events collect\n", eventc);
    exit(0);
}
int process_event(char *base, unsigned long long size, unsigned long long offset) {
	struct perf_event_header* p = NULL;
	offset%=size;
	p = (struct perf_event_header*) (base+offset);
	eventc++;
	return p->size;
}

int main(int argc, char *argv[]) {
	if (argc<2) { printf("Need cgroup name\n"); return 1; }
	char xb[256];
	snprintf(xb, sizeof(xb), "/sys/fs/cgroup/%s", argv[1]);
	int cgroup_id = open(xb, O_CLOEXEC);
	if (cgroup_id <= 0) error("error open cgroup dir");
	int cpu_num = sysconf(_SC_NPROCESSORS_ONLN);
	psize = sysconf(_SC_PAGE_SIZE); // getpagesize();
	struct perf_event_attr attr;
	memset(&attr, 0, sizeof(attr));
	attr.type = PERF_TYPE_SOFTWARE;
	attr.size = sizeof(attr);
	attr.config = PERF_COUNT_SW_CPU_CLOCK;
	attr.sample_freq = 9999;//777; // adjust it
	attr.freq = 1;
	attr.wakeup_events = 16;
	attr.sample_type = PERF_SAMPLE_CALLCHAIN;
	attr.sample_max_stack = 32;
	attr.exclude_callchain_user = 1;
	// start perf event
	int i, k, fd;
	void* addr;
	for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) {
		printf("attaching cpu %d\n", i);
		fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP);
		if (fd<0) error("fail to open perf event");
		addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0);
		if (addr == MAP_FAILED) error("mmap failed");
		res[fd] = {addr, 0};
		polls[k].fd = fd;
		polls[k].events = POLLIN;
		polls[k].revents = 0;
		k++;
	}
	signal(SIGINT, int_exit);
	signal(SIGTERM, int_exit);

	unsigned long long head;
	int event_size;
	struct perf_event_mmap_page *mp;
	while (poll(polls, k, -1)>0) {
		for (i=0; i<k; i++) {
			if ((polls[i].revents&POLLIN)==0) continue;
			fd = polls[i].fd;
			addr = res[fd].first;
			mp = (struct perf_event_mmap_page *)addr;
			head = res[fd].second;
			ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1);
			if (head>mp->data_head) head=mp->data_head;
			head = mp->data_head-((mp->data_head-head)%mp->data_size);
			while(head<mp->data_head) {
				head += process_event((char*)addr+mp->data_offset, mp->data_size, head);
			}
			res[fd].second = mp->data_head;
			ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0);
		}
	}
	int_exit(0);
	return 0;
}

next prev parent reply	other threads:[~2025-06-03 13:49 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-03  3:26 [PATCH] perf/core: restore __perf_remove_from_context when DETACH_EXIT not set David Wang
2025-06-03  8:33 ` [PATCH v2] " David Wang
2025-06-03  9:13   ` Peter Zijlstra
2025-06-03 10:44     ` David Wang
2025-06-03 12:50       ` Peter Zijlstra
2025-06-03 12:54         ` Peter Zijlstra
2025-06-03 13:03           ` David Wang
2025-06-03 13:49             ` David Wang [this message]
2025-06-03 13:22           ` Yeoreum Yun
2025-06-03 14:08             ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b3624f4.bde4.197360db3ea.Coremail.00107082@163.com \
    --to=00107082@163.com \
    --cc=acme@kernel.org \
    --cc=leo.yan@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=yeoreum.yun@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.