public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: "David Wang" <00107082@163.com>
To: "Peter Zijlstra" <peterz@infradead.org>
Cc: mingo@redhat.com, acme@kernel.org, namhyung@kernel.org,
	mingo@kernel.org, yeoreum.yun@arm.com, leo.yan@arm.com,
	linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2] perf/core: restore __perf_remove_from_context when DETACH_EXIT not set
Date: Tue, 3 Jun 2025 21:49:17 +0800 (CST)	[thread overview]
Message-ID: <b3624f4.bde4.197360db3ea.Coremail.00107082@163.com> (raw)
In-Reply-To: <6487969b.b907.19735e42e05.Coremail.00107082@163.com>




At 2025-06-03 21:03:55, "David Wang" <00107082@163.com> wrote:
>
>At 2025-06-03 20:54:40, "Peter Zijlstra" <peterz@infradead.org> wrote:
>>On Tue, Jun 03, 2025 at 02:50:56PM +0200, Peter Zijlstra wrote:
>>> On Tue, Jun 03, 2025 at 06:44:58PM +0800, David Wang wrote:
>>> 
>>> 
>>> > (As yeoreum.yun@arm.com pointed out,  the change in perf_remove_from_context() made
>>> > perf_event_set_state() happened before list_del_event(), resulting in perf_cgroup_event_disable()
>>> > not called.)
>>> 
>>> Aah, d'0h. Let me see what we should do there.
>>
>>Does this help? This way event_sched_out() will call
>>perf_cgroup_event_disable().
>>
>>
>>diff --git a/kernel/events/core.c b/kernel/events/core.c
>>index f34c99f8ce8f..adbb0372825f 100644
>>--- a/kernel/events/core.c
>>+++ b/kernel/events/core.c
>>@@ -2494,9 +2494,9 @@ __perf_remove_from_context(struct perf_event *event,
>> 	if (flags & DETACH_REVOKE)
>> 		state = PERF_EVENT_STATE_REVOKED;
>> 	if (flags & DETACH_DEAD) {
>>-		event->pending_disable = 1;
>> 		state = PERF_EVENT_STATE_DEAD;
>> 	}
>>+	event->pending_disable = 1;
>> 	event_sched_out(event, ctx);
>> 	perf_event_set_state(event, min(event->state, state));
>> 
>
>Ok, I will give it a try and update later.

Sadly no, caught a kernel panic at the first round....

I tried to use perf to reproduce this, but no luck so far. Following is the code I used to reproduce.

(The code is silly, but valid I think....)
To reproduce, I use following steps:
Open two terminals:
1. In terminal A
mkdir /sys/fs/cgroup/mytest
echo $$ > /sys/fs/cgroup/mytest/cgroup.procs
2. In terminal B
[g++ following code if not done yet g++ -o profiler xx.cpp]
./profiler mytest
3. Do something in terminal A, usually I would run following command under kernel source tree
for i in {1..200}; do find ./ -name nottobefound > /dev/null; done
4. wait for 5~10mintes
5. In terminal B, ctrl-C stop the profiler
6. reboot
(On my system, with 6.15 at most 4 rounds of test would catch a kernel panic.)

I could not reproduce it with my KVM, maybe I need more trials.
Not sure whether anyone else could reproduce this. 


---
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <poll.h>
#include <signal.h>
#include <fcntl.h>
#include <elf.h>
#include <string.h>

#include <vector>
#include <string>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
using namespace std;


#define MAXN  512
#define MAXCPU 128
#define error(msg) do { perror(msg); exit(1); } while(0)

static long perf_event_open(struct perf_event_attr *perf_event,
		pid_t pid, int cpu, int group_fd, unsigned long flags) {
    return syscall(__NR_perf_event_open, perf_event,
		    pid, cpu, group_fd, flags);
}

struct pollfd polls[MAXCPU];
// res for cleanup
static long long psize;
map<int, pair<void*, long long>> res;
static long long eventc = 0;

void int_exit(int _) {
    for (auto x: res) {
        auto y = x.second;
        void* addr = y.first;
        munmap(addr, (1+MAXN)*psize);
        close(x.first);
    }
    res.clear();
    printf("total %lld events collect\n", eventc);
    exit(0);
}
int process_event(char *base, unsigned long long size, unsigned long long offset) {
	struct perf_event_header* p = NULL;
	offset%=size;
	p = (struct perf_event_header*) (base+offset);
	eventc++;
	return p->size;
}

int main(int argc, char *argv[]) {
	if (argc<2) { printf("Need cgroup name\n"); return 1; }
	char xb[256];
	snprintf(xb, sizeof(xb), "/sys/fs/cgroup/%s", argv[1]);
	int cgroup_id = open(xb, O_CLOEXEC);
	if (cgroup_id <= 0) error("error open cgroup dir");
	int cpu_num = sysconf(_SC_NPROCESSORS_ONLN);
	psize = sysconf(_SC_PAGE_SIZE); // getpagesize();
	struct perf_event_attr attr;
	memset(&attr, 0, sizeof(attr));
	attr.type = PERF_TYPE_SOFTWARE;
	attr.size = sizeof(attr);
	attr.config = PERF_COUNT_SW_CPU_CLOCK;
	attr.sample_freq = 9999;//777; // adjust it
	attr.freq = 1;
	attr.wakeup_events = 16;
	attr.sample_type = PERF_SAMPLE_CALLCHAIN;
	attr.sample_max_stack = 32;
	attr.exclude_callchain_user = 1;
	// start perf event
	int i, k, fd;
	void* addr;
	for (i=0, k=0; i<cpu_num&&i<MAXCPU; i++) {
		printf("attaching cpu %d\n", i);
		fd = perf_event_open(&attr, cgroup_id, i, -1, PERF_FLAG_FD_CLOEXEC|PERF_FLAG_PID_CGROUP);
		if (fd<0) error("fail to open perf event");
		addr = mmap(NULL, (1+MAXN)*psize, PROT_READ, MAP_SHARED, fd, 0);
		if (addr == MAP_FAILED) error("mmap failed");
		res[fd] = {addr, 0};
		polls[k].fd = fd;
		polls[k].events = POLLIN;
		polls[k].revents = 0;
		k++;
	}
	signal(SIGINT, int_exit);
	signal(SIGTERM, int_exit);

	unsigned long long head;
	int event_size;
	struct perf_event_mmap_page *mp;
	while (poll(polls, k, -1)>0) {
		for (i=0; i<k; i++) {
			if ((polls[i].revents&POLLIN)==0) continue;
			fd = polls[i].fd;
			addr = res[fd].first;
			mp = (struct perf_event_mmap_page *)addr;
			head = res[fd].second;
			ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 1);
			if (head>mp->data_head) head=mp->data_head;
			head = mp->data_head-((mp->data_head-head)%mp->data_size);
			while(head<mp->data_head) {
				head += process_event((char*)addr+mp->data_offset, mp->data_size, head);
			}
			res[fd].second = mp->data_head;
			ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, 0);
		}
	}
	int_exit(0);
	return 0;
}

  reply	other threads:[~2025-06-03 13:49 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-03  3:26 [PATCH] perf/core: restore __perf_remove_from_context when DETACH_EXIT not set David Wang
2025-06-03  8:33 ` [PATCH v2] " David Wang
2025-06-03  9:13   ` Peter Zijlstra
2025-06-03 10:44     ` David Wang
2025-06-03 12:50       ` Peter Zijlstra
2025-06-03 12:54         ` Peter Zijlstra
2025-06-03 13:03           ` David Wang
2025-06-03 13:49             ` David Wang [this message]
2025-06-03 13:22           ` Yeoreum Yun
2025-06-03 14:08             ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b3624f4.bde4.197360db3ea.Coremail.00107082@163.com \
    --to=00107082@163.com \
    --cc=acme@kernel.org \
    --cc=leo.yan@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=yeoreum.yun@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox