public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
From: Stephane Eranian <eranian@hpl.hp.com>
To: linux-ia64@vger.kernel.org
Subject: Re: [Linux-ia64] [BUG] perfmon doesn't send SIGPROF in kernel 2.5.64
Date: Wed, 16 Apr 2003 21:50:33 +0000	[thread overview]
Message-ID: <marc-linux-ia64-105590723705517@msgid-missing> (raw)
In-Reply-To: <marc-linux-ia64-105590723705454@msgid-missing>

[-- Attachment #1: Type: text/plain, Size: 623 bytes --]

Eric,

On Fri, Apr 11, 2003 at 04:33:33PM +0200, Eric Piel wrote:
> > Let me know how it goes, especially on SMP.
> For now I would say it doesn't work :-( I get the same results from
> realfeel4 than before.

Yes, it does not work. I found the source of the problem and it is 
pretty subtle especially in system-wide mode. The deferred notification
does not work with kernel only threads which is likely what you have 
running in system-wide mode with a mostly idle system.

The attached patch against 2.5.67 corrects the problem. Note that this
patch also includes other updates.

Let me know how it goes.

-- 
-Stephane

[-- Attachment #2: perfmon-2.5-030416 --]
[-- Type: text/plain, Size: 12191 bytes --]

diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet bk-official-extra/arch/ia64/kernel/perfmon.c bk-base/arch/ia64/kernel/perfmon.c
--- bk-official-extra/arch/ia64/kernel/perfmon.c	Thu Apr 10 14:01:35 2003
+++ bk-base/arch/ia64/kernel/perfmon.c	Tue Apr 15 15:58:21 2003
@@ -225,8 +225,9 @@
 	unsigned int protected:1;	/* allow access to creator of context only */
 	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
 	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
+	unsigned int unsecure:1;	/* sp = 0 for non self-monitored task */
 	unsigned int trap_reason:2;	/* reason for going into pfm_block_ovfl_reset() */
-	unsigned int reserved:21;
+	unsigned int reserved:20;
 } pfm_context_flags_t;
 
 #define PFM_TRAP_REASON_NONE		0x0	/* default value */
@@ -279,6 +280,7 @@
 #define ctx_fl_using_dbreg	ctx_flags.using_dbreg
 #define ctx_fl_excl_idle	ctx_flags.excl_idle
 #define ctx_fl_trap_reason	ctx_flags.trap_reason
+#define ctx_fl_unsecure		ctx_flags.unsecure
 
 /*
  * global information about all sessions
@@ -1077,10 +1079,15 @@
 		 * and it must be a valid CPU
 		 */
 		cpu = ffz(~pfx->ctx_cpu_mask);
+#ifdef CONFIG_SMP
 		if (cpu_online(cpu) == 0) {
+#else
+		if (cpu != 0) {
+#endif
 			DBprintk(("CPU%d is not online\n", cpu));
 			return -EINVAL;
 		}
+
 		/*
 		 * check for pre-existing pinning, if conflicting reject
 		 */
@@ -1226,6 +1233,7 @@
 	ctx->ctx_fl_block     = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
 	ctx->ctx_fl_system    = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
 	ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
+	ctx->ctx_fl_unsecure  = (ctx_flags & PFM_FL_UNSECURE) ? 1: 0;
 	ctx->ctx_fl_frozen    = 0;
 	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
 
@@ -1252,9 +1260,11 @@
 	DBprintk(("context=%p, pid=%d notify_task=%p\n",
 			(void *)ctx, task->pid, ctx->ctx_notify_task));
 
-	DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d\n", 
+	DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d excl_idle=%d unsecure=%d\n", 
 			(void *)ctx, task->pid, ctx_flags, ctx->ctx_fl_inherit, 
-			ctx->ctx_fl_block, ctx->ctx_fl_system, ctx->ctx_fl_excl_idle));
+			ctx->ctx_fl_block, ctx->ctx_fl_system, 
+			ctx->ctx_fl_excl_idle,
+			ctx->ctx_fl_unsecure));
 
 	/*
 	 * when no notification is required, we can make this visible at the last moment
@@ -1883,7 +1893,10 @@
 		DBprintk(("unblocking %d \n", task->pid));
 		up(sem);
 	} else {
+		struct thread_info *info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
 		task->thread.pfm_ovfl_block_reset = 1;
+		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
+		set_bit(TIF_NOTIFY_RESUME, &info->flags);
 	}
 #if 0
 	/*
@@ -2052,7 +2065,7 @@
 	/*
 	 * reinforce secure monitoring: cannot toggle psr.up
 	 */
-	ia64_psr(regs)->sp = 1;
+	if (ctx->ctx_fl_unsecure == 0) ia64_psr(regs)->sp = 1;
 
 	return 0;
 }
@@ -2733,12 +2746,13 @@
 	 * again
 	 */
 	th->pfm_ovfl_block_reset = 0;
+	clear_thread_flag(TIF_NOTIFY_RESUME);
 
 	/*
 	 * do some sanity checks first
 	 */
 	if (!ctx) {
-		printk(KERN_DEBUG "perfmon: [%d] has no PFM context\n", current->pid);
+		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
 		return;
 	}
 	/*
@@ -2901,14 +2915,17 @@
 /*
  * main overflow processing routine.
  * it can be called from the interrupt path or explicitely during the context switch code
+ * Arguments:
+ *	mode: 0=coming from PMU interrupt, 1=coming from ctxsw 
+ *	
  * Return:
  *	new value of pmc[0]. if 0x0 then unfreeze, else keep frozen
  */
 static unsigned long
-pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
+pfm_overflow_handler(int mode, struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
 {
-	unsigned long mask;
 	struct thread_struct *t;
+	unsigned long mask;
 	unsigned long old_val;
 	unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL;
 	int i;
@@ -2999,10 +3016,10 @@
 	/*
 	 * check for sampling buffer
 	 *
-	 * if present, record sample. We propagate notification ONLY when buffer
-	 * becomes full.
+	 * if present, record sample only when a 64-bit counter has overflowed.
+	 * We propagate notification ONLY when buffer becomes full.
 	 */
-	if(CTX_HAS_SMPL(ctx)) {
+	if(CTX_HAS_SMPL(ctx) && ovfl_pmds) {
 		ret = pfm_record_sample(task, ctx, ovfl_pmds, regs);
 		if (ret == 1) {
 			/*
@@ -3047,12 +3064,55 @@
 	 * ctx_notify_task could already be NULL, checked in pfm_notify_user() 
 	 */
 	if (CTX_OVFL_NOBLOCK(ctx) == 0 && ctx->ctx_notify_task != task) {
-		t->pfm_ovfl_block_reset = 1; /* will cause blocking */
 		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCKSIG;
 	} else {
-		t->pfm_ovfl_block_reset = 1; /* will cause blocking */
 		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_SIG;
 	}
+	/*
+	 * we cannot block in system wide mode and we do not go
+	 * through the PMU ctxsw code. Therefore we can generate
+	 * the notification here. In system wide mode, the current
+	 * task maybe different from the task controlling the session
+	 * on this CPU, therefore owner can be different from current.
+	 *
+	 * In per-process mode, this function gets called from 
+	 * the interrupt handler or pfm_load_regs(). The mode argument
+	 * tells where we are coming from. When coming from the interrupt
+	 * handler, it is safe to notify (send signal) right here because
+	 * we do not hold any runqueue locks needed by send_sig_info(). 
+	 *
+	 * However when coming from ctxsw, we cannot send the signal here.
+	 * It must be deferred until we are sure we do not hold any runqueue
+	 * related locks. The current task maybe different from the owner
+	 * only in UP mode. The deferral is implemented using the 
+	 * TIF_NOTIFY_RESUME mechanism. In this case, the pending work
+	 * is checked when the task is about to leave the kernel (see
+	 * entry.S). As of this version of perfmon, a kernel only
+	 * task cannot be monitored in per-process mode. Therefore,
+	 * when this function gets called from pfm_load_regs(), we know
+	 * we have a user level task which will eventually either exit
+	 * or leave the kernel, and thereby go through the checkpoint
+	 * for TIF_*.
+	 */
+	if (ctx->ctx_fl_system || mode == 0) {
+		pfm_notify_user(ctx);
+		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
+	} else {
+		struct thread_info *info;
+
+		/*
+		 * given that TIF_NOTIFY_RESUME is not specific to
+		 * perfmon, we need to have a second level check to
+		 * verify the source of the notification.
+		 */
+		task->thread.pfm_ovfl_block_reset = 1;
+		/*
+		 * when coming from ctxsw, current still points to the
+		 * previous task, therefore we must work with task and not current.
+		 */
+		info = ((struct thread_info *) ((char *) task + IA64_TASK_SIZE));
+		set_bit(TIF_NOTIFY_RESUME, &info->flags);
+	}
 
 	/*
 	 * keep the PMU frozen until either pfm_restart() or 
@@ -3060,7 +3120,10 @@
 	 */
 	ctx->ctx_fl_frozen = 1;
 
-	DBprintk_ovfl(("return pmc0=0x%x must_block=%ld reason=%d\n",
+	DBprintk_ovfl(("current [%d] owner [%d] mode=%d return pmc0=0x%x must_block=%ld reason=%d\n",
+		current->pid, 
+		PMU_OWNER() ? PMU_OWNER()->pid : -1,
+		mode,
 		ctx->ctx_fl_frozen ? 0x1 : 0x0, 
 		t->pfm_ovfl_block_reset,
 		ctx->ctx_fl_trap_reason));
@@ -3115,7 +3178,7 @@
 		/* 
 		 * assume PMC[0].fr = 1 at this point 
 		 */
-		pmc0 = pfm_overflow_handler(task, ctx, pmc0, regs);
+		pmc0 = pfm_overflow_handler(0, task, ctx, pmc0, regs);
 		/*
 		 * we can only update pmc0 when the overflow
 		 * is for the current context. In UP the current
@@ -3455,7 +3518,7 @@
 	 * Side effect on ctx_fl_frozen is possible.
 	 */
 	if (t->pmc[0] & ~0x1) {
-		t->pmc[0] = pfm_overflow_handler(task, ctx, t->pmc[0], NULL);
+		t->pmc[0] = pfm_overflow_handler(1, task, ctx, t->pmc[0], NULL);
 	}
 
 	/*
@@ -3755,11 +3818,15 @@
 
 	preempt_disable();
 	/*
-	 * make sure child cannot mess up the monitoring session
+	 * for secure sessions, make sure child cannot mess up 
+	 * the monitoring session.
 	 */
-	 ia64_psr(regs)->sp = 1;
-	 DBprintk(("enabling psr.sp for [%d]\n", task->pid));
-
+	if (ctx->ctx_fl_unsecure == 0) {
+		ia64_psr(regs)->sp = 1;
+	 	DBprintk(("enabling psr.sp for [%d]\n", task->pid));
+	} else {
+	 	DBprintk(("psr.sp=%d [%d]\n", ia64_psr(regs)->sp, task->pid));
+	}
 
 	/*
 	 * if there was a virtual mapping for the sampling buffer
diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet bk-official-extra/arch/ia64/kernel/perfmon_mckinley.h bk-base/arch/ia64/kernel/perfmon_mckinley.h
--- bk-official-extra/arch/ia64/kernel/perfmon_mckinley.h	Thu Apr 10 14:01:35 2003
+++ bk-base/arch/ia64/kernel/perfmon_mckinley.h	Thu Apr 10 13:46:37 2003
@@ -25,8 +25,8 @@
 /* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_reserved, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_reserved, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_reserved, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xffffffff3fffffffUL, 0xffffffff9fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
-/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xffffffff3ffffffcUL, 0xffffffff9fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xffffffff3fffffffUL, 0xffffffff3fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xffffffff3ffffffcUL, 0xffffffff3fffffffUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_reserved, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL,  pfm_mck_reserved, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
 /* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL,  pfm_mck_reserved, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
@@ -143,10 +143,7 @@
 		case  8: val8 = *val;
 			 val13 = th->pmc[13];
 			 val14 = th->pmc[14];
-			 *val |= 1UL << 2; /* bit 2 must always be 1 */
 			 check_case1 = 1;
-			 break;
-		case  9: *val |= 1UL << 2; /* bit 2 must always be 1 */
 			 break;
 		case 13: val8  = th->pmc[8];
 			 val13 = *val;
diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet bk-official-extra/include/asm-ia64/perfmon.h bk-base/include/asm-ia64/perfmon.h
--- bk-official-extra/include/asm-ia64/perfmon.h	Thu Apr 10 14:02:30 2003
+++ bk-base/include/asm-ia64/perfmon.h	Tue Apr 15 15:59:04 2003
@@ -41,6 +41,7 @@
 #define PFM_FL_NOTIFY_BLOCK    	 0x04	/* block task on user level notifications */
 #define PFM_FL_SYSTEM_WIDE	 0x08	/* create a system wide context */
 #define PFM_FL_EXCL_IDLE         0x20   /* exclude idle task from system wide session */
+#define PFM_FL_UNSECURE		 0x40   /* allow unsecure monitoring for non self-monitoring task */
 
 /*
  * PMC flags
@@ -125,7 +126,7 @@
  * Define the version numbers for both perfmon as a whole and the sampling buffer format.
  */
 #define PFM_VERSION_MAJ		1U
-#define PFM_VERSION_MIN		3U
+#define PFM_VERSION_MIN		4U
 #define PFM_VERSION		(((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
 
 #define PFM_SMPL_VERSION_MAJ	1U
diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet bk-official-extra/include/asm-ia64/processor.h bk-base/include/asm-ia64/processor.h
--- bk-official-extra/include/asm-ia64/processor.h	Thu Apr 10 14:02:30 2003
+++ bk-base/include/asm-ia64/processor.h	Tue Apr 15 16:06:17 2003
@@ -291,7 +291,7 @@
 
 #define start_thread(regs,new_ip,new_sp) do {							\
 	set_fs(USER_DS);									\
-	regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL | IA64_PSR_SP))	\
+	regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL))		\
 			 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS));		\
 	regs->cr_iip = new_ip;									\
 	regs->ar_rsc = 0xf;		/* eager mode, privilege level 3 */			\

      parent reply	other threads:[~2003-04-16 21:50 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-04-10 13:13 [Linux-ia64] [BUG] perfmon doesn't send SIGPROF in kernel 2.5.64 Eric Piel
2003-04-11  1:24 ` Stephane Eranian
2003-04-11 14:33 ` Eric Piel
2003-04-16 21:50 ` Stephane Eranian [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=marc-linux-ia64-105590723705517@msgid-missing \
    --to=eranian@hpl.hp.com \
    --cc=linux-ia64@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox