From: Stephane Eranian <eranian@hpl.hp.com>
To: linux-ia64@vger.kernel.org
Subject: Re: [Linux-ia64] [BUG] perfmon doesn't send SIGPROF in kernel 2.5.64
Date: Wed, 16 Apr 2003 21:50:33 +0000 [thread overview]
Message-ID: <marc-linux-ia64-105590723705517@msgid-missing> (raw)
In-Reply-To: <marc-linux-ia64-105590723705454@msgid-missing>
[-- Attachment #1: Type: text/plain, Size: 623 bytes --]
Eric,
On Fri, Apr 11, 2003 at 04:33:33PM +0200, Eric Piel wrote:
> > Let me know how it goes, especially on SMP.
> For now I would say it doesn't work :-( I get the same results from
> realfeel4 than before.
Yes, it does not work. I found the source of the problem and it is
pretty subtle especially in system-wide mode. The deferred notification
does not work with kernel only threads which is likely what you have
running in system-wide mode with a mostly idle system.
The attached patch against 2.5.67 corrects the problem. Note that this
patch also includes other updates.
Let me know how it goes.
--
-Stephane
[-- Attachment #2: perfmon-2.5-030416 --]
[-- Type: text/plain, Size: 12191 bytes --]
diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet bk-official-extra/arch/ia64/kernel/perfmon.c bk-base/arch/ia64/kernel/perfmon.c
--- bk-official-extra/arch/ia64/kernel/perfmon.c Thu Apr 10 14:01:35 2003
+++ bk-base/arch/ia64/kernel/perfmon.c Tue Apr 15 15:58:21 2003
@@ -225,8 +225,9 @@
unsigned int protected:1; /* allow access to creator of context only */
unsigned int using_dbreg:1; /* using range restrictions (debug registers) */
unsigned int excl_idle:1; /* exclude idle task in system wide session */
+ unsigned int unsecure:1; /* sp = 0 for non self-monitored task */
unsigned int trap_reason:2; /* reason for going into pfm_block_ovfl_reset() */
- unsigned int reserved:21;
+ unsigned int reserved:20;
} pfm_context_flags_t;
#define PFM_TRAP_REASON_NONE 0x0 /* default value */
@@ -279,6 +280,7 @@
#define ctx_fl_using_dbreg ctx_flags.using_dbreg
#define ctx_fl_excl_idle ctx_flags.excl_idle
#define ctx_fl_trap_reason ctx_flags.trap_reason
+#define ctx_fl_unsecure ctx_flags.unsecure
/*
* global information about all sessions
@@ -1077,10 +1079,15 @@
* and it must be a valid CPU
*/
cpu = ffz(~pfx->ctx_cpu_mask);
+#ifdef CONFIG_SMP
if (cpu_online(cpu) == 0) {
+#else
+ if (cpu != 0) {
+#endif
DBprintk(("CPU%d is not online\n", cpu));
return -EINVAL;
}
+
/*
* check for pre-existing pinning, if conflicting reject
*/
@@ -1226,6 +1233,7 @@
ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
+ ctx->ctx_fl_unsecure = (ctx_flags & PFM_FL_UNSECURE) ? 1: 0;
ctx->ctx_fl_frozen = 0;
ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
@@ -1252,9 +1260,11 @@
DBprintk(("context=%p, pid=%d notify_task=%p\n",
(void *)ctx, task->pid, ctx->ctx_notify_task));
- DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d\n",
+ DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d unsecure=%d\n",
(void *)ctx, task->pid, ctx_flags, ctx->ctx_fl_inherit,
- ctx->ctx_fl_block, ctx->ctx_fl_system, ctx->ctx_fl_excl_idle));
+ ctx->ctx_fl_block, ctx->ctx_fl_system,
+ ctx->ctx_fl_excl_idle,
+ ctx->ctx_fl_unsecure));
/*
* when no notification is required, we can make this visible at the last moment
@@ -1883,7 +1893,10 @@
DBprintk(("unblocking %d \n", task->pid));
up(sem);
} else {
+ struct thread_info *info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
task->thread.pfm_ovfl_block_reset = 1;
+ ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
+ set_bit(TIF_NOTIFY_RESUME, &info->flags);
}
#if 0
/*
@@ -2052,7 +2065,7 @@
/*
* reinforce secure monitoring: cannot toggle psr.up
*/
- ia64_psr(regs)->sp = 1;
+ if (ctx->ctx_fl_unsecure == 0) ia64_psr(regs)->sp = 1;
return 0;
}
@@ -2733,12 +2746,13 @@
* again
*/
th->pfm_ovfl_block_reset = 0;
+ clear_thread_flag(TIF_NOTIFY_RESUME);
/*
* do some sanity checks first
*/
if (!ctx) {
- printk(KERN_DEBUG "perfmon: [%d] has no PFM context\n", current->pid);
+ printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
return;
}
/*
@@ -2901,14 +2915,17 @@
/*
* main overflow processing routine.
* it can be called from the interrupt path or explicitely during the context switch code
+ * Arguments:
+ * mode: 0=coming from PMU interrupt, 1=coming from ctxsw
+ *
* Return:
* new value of pmc[0]. if 0x0 then unfreeze, else keep frozen
*/
static unsigned long
-pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
+pfm_overflow_handler(int mode, struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
{
- unsigned long mask;
struct thread_struct *t;
+ unsigned long mask;
unsigned long old_val;
unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL;
int i;
@@ -2999,10 +3016,10 @@
/*
* check for sampling buffer
*
- * if present, record sample. We propagate notification ONLY when buffer
- * becomes full.
+ * if present, record sample only when a 64-bit counter has overflowed.
+ * We propagate notification ONLY when buffer becomes full.
*/
- if(CTX_HAS_SMPL(ctx)) {
+ if(CTX_HAS_SMPL(ctx) && ovfl_pmds) {
ret = pfm_record_sample(task, ctx, ovfl_pmds, regs);
if (ret == 1) {
/*
@@ -3047,12 +3064,55 @@
* ctx_notify_task could already be NULL, checked in pfm_notify_user()
*/
if (CTX_OVFL_NOBLOCK(ctx) == 0 && ctx->ctx_notify_task != task) {
- t->pfm_ovfl_block_reset = 1; /* will cause blocking */
ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCKSIG;
} else {
- t->pfm_ovfl_block_reset = 1; /* will cause blocking */
ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_SIG;
}
+ /*
+ * we cannot block in system wide mode and we do not go
+ * through the PMU ctxsw code. Therefore we can generate
+ * the notification here. In system wide mode, the current
+ * task maybe different from the task controlling the session
+ * on this CPU, therefore owner can be different from current.
+ *
+ * In per-process mode, this function gets called from
+ * the interrupt handler or pfm_load_regs(). The mode argument
+ * tells where we are coming from. When coming from the interrupt
+ * handler, it is safe to notify (send signal) right here because
+ * we do not hold any runqueue locks needed by send_sig_info().
+ *
+ * However when coming from ctxsw, we cannot send the signal here.
+ * It must be deferred until we are sure we do not hold any runqueue
+ * related locks. The current task maybe different from the owner
+ * only in UP mode. The deferral is implemented using the
+ * TIF_NOTIFY_RESUME mechanism. In this case, the pending work
+ * is checked when the task is about to leave the kernel (see
+ * entry.S). As of this version of perfmon, a kernel only
+ * task cannot be monitored in per-process mode. Therefore,
+ * when this function gets called from pfm_load_regs(), we know
+ * we have a user level task which will eventually either exit
+ * or leave the kernel, and thereby go through the checkpoint
+ * for TIF_*.
+ */
+ if (ctx->ctx_fl_system || mode == 0) {
+ pfm_notify_user(ctx);
+ ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
+ } else {
+ struct thread_info *info;
+
+ /*
+ * given that TIF_NOTIFY_RESUME is not specific to
+ * perfmon, we need to have a second level check to
+ * verify the source of the notification.
+ */
+ task->thread.pfm_ovfl_block_reset = 1;
+ /*
+ * when coming from ctxsw, current still points to the
+ * previous task, therefore we must work with task and not current.
+ */
+ info = ((struct thread_info *) ((char *) task + IA64_TASK_SIZE));
+ set_bit(TIF_NOTIFY_RESUME, &info->flags);
+ }
/*
* keep the PMU frozen until either pfm_restart() or
@@ -3060,7 +3120,10 @@
*/
ctx->ctx_fl_frozen = 1;
- DBprintk_ovfl(("return pmc0=0x%x must_block=%ld reason=%d\n",
+ DBprintk_ovfl(("current [%d] owner [%d] mode=%d return pmc0=0x%x must_block=%ld reason=%d\n",
+ current->pid,
+ PMU_OWNER() ? PMU_OWNER()->pid : -1,
+ mode,
ctx->ctx_fl_frozen ? 0x1 : 0x0,
t->pfm_ovfl_block_reset,
ctx->ctx_fl_trap_reason));
@@ -3115,7 +3178,7 @@
/*
* assume PMC[0].fr = 1 at this point
*/
- pmc0 = pfm_overflow_handler(task, ctx, pmc0, regs);
+ pmc0 = pfm_overflow_handler(0, task, ctx, pmc0, regs);
/*
* we can only update pmc0 when the overflow
* is for the current context. In UP the current
@@ -3455,7 +3518,7 @@
* Side effect on ctx_fl_frozen is possible.
*/
if (t->pmc[0] & ~0x1) {
- t->pmc[0] = pfm_overflow_handler(task, ctx, t->pmc[0], NULL);
+ t->pmc[0] = pfm_overflow_handler(1, task, ctx, t->pmc[0], NULL);
}
/*
@@ -3755,11 +3818,15 @@
preempt_disable();
/*
- * make sure child cannot mess up the monitoring session
+ * for secure sessions, make sure child cannot mess up
+ * the monitoring session.
*/
- ia64_psr(regs)->sp = 1;
- DBprintk(("enabling psr.sp for [%d]\n", task->pid));
-
+ if (ctx->ctx_fl_unsecure == 0) {
+ ia64_psr(regs)->sp = 1;
+ DBprintk(("enabling psr.sp for [%d]\n", task->pid));
+ } else {
+ DBprintk(("psr.sp=%d [%d]\n", ia64_psr(regs)->sp, task->pid));
+ }
/*
* if there was a virtual mapping for the sampling buffer
diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet bk-official-extra/arch/ia64/kernel/perfmon_mckinley.h bk-base/arch/ia64/kernel/perfmon_mckinley.h
--- bk-official-extra/arch/ia64/kernel/perfmon_mckinley.h Thu Apr 10 14:01:35 2003
+++ bk-base/arch/ia64/kernel/perfmon_mckinley.h Thu Apr 10 13:46:37 2003
@@ -25,8 +25,8 @@
/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_reserved, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_reserved, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_reserved, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff9fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff9fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_reserved, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_reserved, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_reserved, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
@@ -143,10 +143,7 @@
case 8: val8 = *val;
val13 = th->pmc[13];
val14 = th->pmc[14];
- *val |= 1UL << 2; /* bit 2 must always be 1 */
check_case1 = 1;
- break;
- case 9: *val |= 1UL << 2; /* bit 2 must always be 1 */
break;
case 13: val8 = th->pmc[8];
val13 = *val;
diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet bk-official-extra/include/asm-ia64/perfmon.h bk-base/include/asm-ia64/perfmon.h
--- bk-official-extra/include/asm-ia64/perfmon.h Thu Apr 10 14:02:30 2003
+++ bk-base/include/asm-ia64/perfmon.h Tue Apr 15 15:59:04 2003
@@ -41,6 +41,7 @@
#define PFM_FL_NOTIFY_BLOCK 0x04 /* block task on user level notifications */
#define PFM_FL_SYSTEM_WIDE 0x08 /* create a system wide context */
#define PFM_FL_EXCL_IDLE 0x20 /* exclude idle task from system wide session */
+#define PFM_FL_UNSECURE 0x40 /* allow unsecure monitoring for non self-monitoring task */
/*
* PMC flags
@@ -125,7 +126,7 @@
* Define the version numbers for both perfmon as a whole and the sampling buffer format.
*/
#define PFM_VERSION_MAJ 1U
-#define PFM_VERSION_MIN 3U
+#define PFM_VERSION_MIN 4U
#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
#define PFM_SMPL_VERSION_MAJ 1U
diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet bk-official-extra/include/asm-ia64/processor.h bk-base/include/asm-ia64/processor.h
--- bk-official-extra/include/asm-ia64/processor.h Thu Apr 10 14:02:30 2003
+++ bk-base/include/asm-ia64/processor.h Tue Apr 15 16:06:17 2003
@@ -291,7 +291,7 @@
#define start_thread(regs,new_ip,new_sp) do { \
set_fs(USER_DS); \
- regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL | IA64_PSR_SP)) \
+ regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL)) \
& ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS)); \
regs->cr_iip = new_ip; \
regs->ar_rsc = 0xf; /* eager mode, privilege level 3 */ \
prev parent reply other threads:[~2003-04-16 21:50 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-04-10 13:13 [Linux-ia64] [BUG] perfmon doesn't send SIGPROF in kernel 2.5.64 Eric Piel
2003-04-11 1:24 ` Stephane Eranian
2003-04-11 14:33 ` Eric Piel
2003-04-16 21:50 ` Stephane Eranian [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=marc-linux-ia64-105590723705517@msgid-missing \
--to=eranian@hpl.hp.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.