All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
To: linux-ia64@vger.kernel.org
Subject: Re: accessed/dirty bit handler tuning
Date: Mon, 03 Apr 2006 13:45:13 +0000	[thread overview]
Message-ID: <443126E9.6080209@bull.net> (raw)
In-Reply-To: <44157CF1.5060902@bull.net>

[-- Attachment #1: Type: text/plain, Size: 1458 bytes --]

Chen, Kenneth W wrote:

> Can you do some stress test experiments and let us know how many time ptc.l
> was actually executed in vhpt_miss/tlb_miss/dirty/access
> handler? Thanks.

Here is a patch that adds a small syscall to display or clear (./stat -clear)
the statistics.
Please verify if this is what you wanted (and the potential bugs...).

The 1st version (indicated by "#if 0") I tried, should have worked in virtual
mode. Unfortunately, I could not make it work (having this short deadline).
Could you have a look at it why it fails to work?

I ran a "make -j 16" of the kernel on an 8 processor machine.
(It is actually 2 Tiger boxes connected via a Scalability Port Switch.)
Unfortunately, the I/O is subsystem weak: a single SCSI disk.

Here is what I got:

             VHPT miss counter: 1674978
       VHPT miss - hash purged:       0
        VHPT miss - PTE purged:       0
             ITLB miss counter:     293
            ITLB miss - purged:       0
             DTLB miss counter:    3806
            DTLB miss - purged:       0
            DIRTY trap counter:     224
                DIRTY - purged:       0
         I-ACCESS trap counter:       2
             I-ACCESS - purged:       0
         D-ACCESS trap counter:  173227
             D-ACCESS - purged:       0

Unless I am mistaken, there is no purge observed.
It is very much curious having so few dirty and i-access traps...

Have you got some good & stressing tests?

Zoltan


[-- Attachment #2: stat.diff --]
[-- Type: text/plain, Size: 9633 bytes --]

--- save/arch/ia64/kernel/entry.S	2006-03-15 11:07:38.000000000 +0100
+++ linux-2.6.16/arch/ia64/kernel/entry.S	2006-04-03 12:48:03.000000000 +0200
@@ -1619,5 +1619,6 @@ sys_call_table:
 	data8 sys_ni_syscall			// reserved for pselect
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_unshare
+	data8 sys_trap_statistics		// 1297
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
--- save/arch/ia64/kernel/ivt.S	2006-03-30 16:19:18.000000000 +0200
+++ linux-2.6.16/arch/ia64/kernel/ivt.S	2006-04-03 14:58:20.000000000 +0200
@@ -108,6 +108,36 @@ ENTRY(vhpt_miss)
 	movl r18=PAGE_SHIFT
 	mov r25=cr.itir
 #endif
+#if 0
+	/*
+	 * Increment the VHPT miss counter - must be identity mapped.
+	 */
+	LOAD_PHYSICAL(p0, r17, fault_statistics + VHPT_idx * _ENTRY_SIZE_)
+	movl r21=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	movl r19=PAGE_OFFSET
+	movl r20=PAGE_SHIFT << 2		// ... and protection key == 0
+	movl r22=PAGE_KERNEL
+	;;
+	or r19=r19,r17				// __va(&fault_statistics[VHPT_idx]
+	and r21=r21,r17				// Clear ed, reserved bits, and PTE control bits
+	;;
+	mov cr.ifa=r19
+	mov cr.itir=r20
+	or r21=r21,r22				// Insert control bits
+	;;
+	itc.d r21
+	;;
+	srlz.d
+	// Unsafe: the translation can be killed in the mean time
+	ld8.bias.nta r17=[r19]			// = fault_statistics[VHPT_idx]
+	;;
+	add r17=1,r17
+	mov cr.ifa=r16				// Restore
+	mov cr.itir=r25
+	;;
+	// Unsafe: the translation can be killed in the mean time
+	st8 [r19]=r17				// Not atomic increment - who cares?
+#endif
 	;;
 	rsm psr.dt				// use physical addressing for data
 	mov r31=pr				// save the predicate registers
@@ -132,6 +162,17 @@ ENTRY(vhpt_miss)
 (p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
 
 	srlz.d
+	/*
+	 * Increment the VHPT miss counter.
+	 */
+	LOAD_PHYSICAL(p0, r19, fault_statistics + VHPT_idx * _ENTRY_SIZE_)
+	;;
+	ld8.bias.nta r28=[r19]
+	;;
+	add r28=1,r28
+	;;
+	st8 [r19]=r28				// Not atomic increment - who cares?
+
 	LOAD_PHYSICAL(p6, r19, swapper_pg_dir)	// region 5 is rooted at swapper_pg_dir
 
 	.pred.rel "mutex", p6, p7
@@ -197,11 +238,12 @@ ENTRY(vhpt_miss)
 	;;
 #ifdef CONFIG_SMP
 	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
+	 * We make sure the visibility of itc.* to generated purges (like ptc.ga)
+	 * before we re-read the *pgd ... PTE.
+	 * Having itc.i-d a new translation, there is no need for srlz.i, the rfi below
+	 * will do the serialization.
 	 */
-	dv_serialize_data
-
+(p7)	srlz.d
 	/*
 	 * Re-check pagetable entry.  If they changed, we may have received a ptc.g
 	 * between reading the pagetable and the "itc".  If so, flush the entry we
@@ -229,9 +271,34 @@ ENTRY(vhpt_miss)
 	mov r27=PAGE_SHIFT<<2
 	;;
 (p6)	ptc.l r22,r27				// purge PTE page translation
+	/*
+	 * Increment the VHPT miss - purge PTE page counter.
+	 * It is the next long. Accessed via its physical address.
+	 */
+	dv_serialize_data
+	LOAD_PHYSICAL(p6, r19, fault_statistics + (VHPT_idx + 1) * _ENTRY_SIZE_)
+	;;
+(p6)	ld8.bias.nta r17=[r19]
+	;;
+(p6)	add r17=1,r17
+	;;
+(p6)	st8 [r19]=r17				// Not atomic increment - who cares?
+
 (p7)	cmp.ne.or.andcm p6,p7=r25,r18		// did *pte change
 	;;
 (p6)	ptc.l r16,r27				// purge translation
+	/*
+	 * Increment the VHPT miss - purge PTE page counter.
+	 * It is in the 2nd next long. Accessed via its physical address.
+	 */
+	dv_serialize_data
+	LOAD_PHYSICAL(p6, r19, fault_statistics + (VHPT_idx + 2) * _ENTRY_SIZE_)
+	;;
+(p6)	ld8.bias.nta r17=[r19]
+	;;
+(p6)	add r17=1,r17
+	;;
+(p6)	st8 [r19]=r17				// Not atomic increment - who cares?
 #endif
 
 	mov pr=r31,-1				// restore predicate registers
@@ -266,16 +333,36 @@ ENTRY(itlb_miss)
 	;;
 #ifdef CONFIG_SMP
 	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
+	 * We make sure the visibility of itc.i to generated purges (like ptc.ga)
+	 * before we re-read the PTE.
+	 * There is no need for srlz.i, the rfi below will do the serialization.
 	 */
-	dv_serialize_data
-
+	srlz.d
 	ld8 r19=[r17]				// read *pte again and see if same
 	mov r20=PAGE_SHIFT<<2			// setup page size for purge
 	;;
 	cmp.ne p7,p0=r18,r19
 	;;
+	/*
+	 * Increment the ITLB miss counters.
+	 */
+	rsm psr.dt				// use physical addressing for data
+	;;
+	srlz.d
+	LOAD_PHYSICAL(p0, r19, fault_statistics + ITLB_idx * _ENTRY_SIZE_)
+	;;
+	ld8.bias.nta r28=[r19]
+	;;
+	add r28=1,r28
+	;;
+	st8 [r19]=r28,8				// Not atomic increment - who cares?
+	;;
+(p7)	ld8.bias.nta r28=[r19]			// Next long: ITLB miss - purged
+	;;
+(p7)	add r28=1,r28
+	;;
+(p7)	st8 [r19]=r28				// Not atomic increment - who cares?
+
 (p7)	ptc.l r16,r20
 #endif
 	mov pr=r31,-1
@@ -310,16 +397,35 @@ dtlb_fault:
 	;;
 #ifdef CONFIG_SMP
 	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
+	 * We make sure the visibility of itc.d to generated purges (like ptc.ga)
+	 * before we re-read the PTE.
 	 */
-	dv_serialize_data
-
+	srlz.d
 	ld8 r19=[r17]				// read *pte again and see if same
 	mov r20=PAGE_SHIFT<<2			// setup page size for purge
 	;;
 	cmp.ne p7,p0=r18,r19
 	;;
+	/*
+	 * Increment the DTLB miss counters.
+	 */
+	rsm psr.dt				// use physical addressing for data
+	;;
+	srlz.d
+	LOAD_PHYSICAL(p0, r19, fault_statistics + DTLB_idx * _ENTRY_SIZE_)
+	;;
+	ld8.bias.nta r28=[r19]
+	;;
+	add r28=1,r28
+	;;
+	st8 [r19]=r28,8				// Not atomic increment - who cares?
+	;;
+(p7)	ld8.bias.nta r28=[r19]			// Next long: DTLB miss - purged
+	;;
+(p7)	add r28=1,r28
+	;;
+(p7)	st8 [r19]=r28				// Not atomic increment - who cares?
+
 (p7)	ptc.l r16,r20
 #endif
 	mov pr=r31,-1
@@ -589,6 +695,26 @@ ENTRY(dirty_bit)
 	 *     very same dirty bit as we wanted to => our new translation is correct)
 	 */
 (p7)	ptc.l r16,r24
+	/*
+	 * Increment the DIRTY trap counters.
+	 */
+	rsm psr.dt				// use physical addressing for data
+	;;
+	srlz.d
+	LOAD_PHYSICAL(p0, r19, fault_statistics + DIRTY_idx * _ENTRY_SIZE_)
+	;;
+	ld8.bias.nta r27=[r19]
+	;;
+	add r27=1,r27
+	;;
+	st8 [r19]=r27,8				// Not atomic increment - who cares?
+	;;
+(p7)	ld8.bias.nta r27=[r19]			// Next long: DIRTY - purged
+	;;
+(p7)	add r27=1,r27
+	;;
+(p7)	st8 [r19]=r27				// Not atomic increment - who cares?
+
 	mov b0=r29				// restore b0
 	mov ar.ccv=r28
 #else
@@ -650,6 +776,26 @@ ENTRY(iaccess_bit)
 	mov r24=PAGE_SHIFT << 2
 	;;
 (p7)	ptc.l r16,r24
+	/*
+	 * Increment the I-ACCESS trap counters.
+	 */
+	rsm psr.dt				// use physical addressing for data
+	;;
+	srlz.d
+	LOAD_PHYSICAL(p0, r19, fault_statistics + IACC_idx * _ENTRY_SIZE_)
+	;;
+	ld8.bias.nta r27=[r19]
+	;;
+	add r27=1,r27
+	;;
+	st8 [r19]=r27,8				// Not atomic increment - who cares?
+	;;
+(p7)	ld8.bias.nta r27=[r19]			// Next long: I-ACCESS - purged
+	;;
+(p7)	add r27=1,r27
+	;;
+(p7)	st8 [r19]=r27				// Not atomic increment - who cares?
+
 	mov b0=r29				// restore b0
 	mov ar.ccv=r28
 #else
@@ -700,6 +846,26 @@ ENTRY(daccess_bit)
 	mov r24=PAGE_SHIFT << 2
 	;;
 (p7)	ptc.l r16,r24
+	/*
+	 * Increment the D-ACCESS trap counters.
+	 */
+	rsm psr.dt				// use physical addressing for data
+	;;
+	srlz.d
+	LOAD_PHYSICAL(p0, r19, fault_statistics + DACC_idx * _ENTRY_SIZE_)
+	;;
+	ld8.bias.nta r27=[r19]
+	;;
+	add r27=1,r27
+	;;
+	st8 [r19]=r27,8				// Not atomic increment - who cares?
+	;;
+(p7)	ld8.bias.nta r27=[r19]			// Next long: D-ACCESS - purged
+	;;
+(p7)	add r27=1,r27
+	;;
+(p7)	st8 [r19]=r27				// Not atomic increment - who cares?
+
 	mov b0=r29				// restore b0
 	mov ar.ccv=r28
 #else
--- save/kernel/sched.c	2006-03-15 11:09:03.000000000 +0100
+++ linux-2.6.16/kernel/sched.c	2006-04-03 15:06:57.000000000 +0200
@@ -199,6 +192,26 @@ struct prio_array {
 	struct list_head queue[MAX_PRIO];
 };
 
+
+long fault_statistics[MAX_TRAP_idx];
+
+
+/*
+ * Read / clear trap statistics
+ */
+asmlinkage long sys_trap_statistics(int index)
+{
+	if (index >= 0 && index < MAX_TRAP_idx)
+		return fault_statistics[index];
+	if (index == -1){
+		for (index = 0; index < MAX_TRAP_idx; index++)
+			fault_statistics[index] = 0;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+
 /*
  * This is the main, per-CPU runqueue data structure.
  *
--- save/include/asm-ia64/system.h	2006-03-15 11:08:53.000000000 +0100
+++ linux-2.6.16/include/asm-ia64/system.h	2006-04-03 14:26:19.000000000 +0200
@@ -262,4 +262,28 @@ void sched_cacheflush(void);
 
 #endif /* __ASSEMBLY__ */
 
+
+/*
+ * For trap statistics
+ */
+#define	VHPT_idx		0		// VHPT miss counter
+#define	VHPT_HASH_PTC_idx	1		// VHPT miss - hash purged
+#define	VHPT_PTE_PTC_idx	2		// VHPT miss - PTE purged
+#define	ITLB_idx		3		// ITLB miss counter
+#define	ITLB_PTC_idx		4		// ITLB miss - purged
+#define	DTLB_idx		5		// DTLB miss counter
+#define	DTLB_PTC_idx		6		// DTLB miss - purged
+#define	DIRTY_idx		7		// DIRTY trap counter
+#define	DIRTY_PTC_idx		8		// DIRTY - purged
+#define	IACC_idx		9		// I-ACCESS trap counter
+#define	IACC_PTC_idx		10		// I-ACCESS - purged
+#define	DACC_idx		11		// D-ACCESS trap counter
+#define	DACC_PTC_idx		12		// D-ACCESS - purged
+
+
+#define	MAX_TRAP_idx		13
+
+#define	_ENTRY_SIZE_		8
+
+
 #endif /* _ASM_IA64_SYSTEM_H */
--- save/include/asm-ia64/unistd.h	2006-03-15 11:08:53.000000000 +0100
+++ linux-2.6.16/include/asm-ia64/unistd.h	2006-04-03 12:46:40.000000000 +0200
@@ -290,7 +290,7 @@
 
 #include <linux/config.h>
 
-#define NR_syscalls			273 /* length of syscall table */
+#define NR_syscalls			274 /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 

[-- Attachment #3: stat.c --]
[-- Type: text/plain, Size: 1578 bytes --]

#include <sys/syscall.h>
#include <unistd.h>


/*
 * For trap statistics
 * Use "-1" to clear the counters
 */
#define	VHPT_idx		0		// VHPT miss counter
#define	VHPT_HASH_PTC_idx	1		// VHPT miss - hash purged
#define	VHPT_PTE_PTC_idx	2		// VHPT miss - PTE purged
#define	ITLB_idx		3		// ITLB miss counter
#define	ITLB_PTC_idx		4		// ITLB miss - purged
#define	DTLB_idx		5		// DTLB miss counter
#define	DTLB_PTC_idx		6		// DTLB miss - purged
#define	DIRTY_idx		7		// DIRTY trap counter
#define	DIRTY_PTC_idx		8		// DIRTY - purged
#define	IACC_idx		9		// I-ACCESS trap counter
#define	IACC_PTC_idx		10		// I-ACCESS - purged
#define	DACC_idx		11		// D-ACCESS trap counter
#define	DACC_PTC_idx		12		// D-ACCESS - purged


#define	MAX_TRAP_idx		13

#define	_ENTRY_SIZE_		8


#define	sys_trap_statistics	1297


char *names[] = {
	"VHPT miss counter",
	"VHPT miss - hash purged",
	"VHPT miss - PTE purged",
	"ITLB miss counter",
	"ITLB miss - purged",
	"DTLB miss counter",
	"DTLB miss - purged",
	"DIRTY trap counter",
	"DIRTY - purged",
	"I-ACCESS trap counter",
	"I-ACCESS - purged",
	"D-ACCESS trap counter",
	"D-ACCESS - purged",
};


main(int cnt, char *args[])
{
	int index;
	long count;

	if (cnt == 2 && strcmp(args[1], "-clear") == 0){
		if (syscall(sys_trap_statistics, -1) == -1){
			perror("sys_trap_statistics");
			exit(1);
		}
		exit(0);
	}
	for (index = 0; index < MAX_TRAP_idx; index++){
		count = syscall(sys_trap_statistics, index);
		if (count == -1){
			perror("sys_trap_statistics");
			exit(1);
		}
		printf("%30s: %7ld\n", names[index], count);
	}
	exit(0);
}

  parent reply	other threads:[~2006-04-03 13:45 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-03-13 14:08 accessed/dirty bit handler tuning Zoltan Menyhart
2006-03-13 16:31 ` Christoph Lameter
2006-03-13 16:55 ` Zoltan Menyhart
2006-03-13 19:46 ` Chen, Kenneth W
2006-03-13 20:05 ` Luck, Tony
2006-03-13 20:14 ` Chen, Kenneth W
2006-03-13 22:53 ` Chen, Kenneth W
2006-03-14 10:12 ` Zoltan Menyhart
2006-03-14 19:33 ` Chen, Kenneth W
2006-03-15 13:29 ` Zoltan Menyhart
2006-03-15 17:37 ` Chen, Kenneth W
2006-03-16  9:57 ` Zoltan Menyhart
2006-03-16 10:19 ` Luck, Tony
2006-03-16 19:12 ` Chen, Kenneth W
2006-03-29  8:11 ` Zoltan Menyhart
2006-03-29  8:28 ` Chen, Kenneth W
2006-03-29 13:37 ` Zoltan Menyhart
2006-03-29 17:01 ` Zoltan Menyhart
2006-03-29 22:57 ` Luck, Tony
2006-03-29 22:59 ` Chen, Kenneth W
2006-03-30 15:13 ` Zoltan Menyhart
2006-03-31 16:23 ` Zoltan Menyhart
2006-03-31 19:08 ` Chen, Kenneth W
2006-03-31 21:18 ` Zoltan Menyhart
2006-03-31 21:51 ` Chen, Kenneth W
2006-03-31 22:14 ` Chen, Kenneth W
2006-03-31 22:57 ` Zoltan Menyhart
2006-04-03  8:46 ` Zoltan Menyhart
2006-04-03 13:45 ` Zoltan Menyhart [this message]
2006-04-03 15:49 ` Luck, Tony
2006-04-03 15:57 ` Luck, Tony
2006-04-03 16:33 ` Zoltan Menyhart
2006-04-03 16:42 ` David Mosberger-Tang
2006-04-03 17:23 ` Zoltan Menyhart
2006-04-03 17:50 ` Luck, Tony
2006-04-03 18:27 ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=443126E9.6080209@bull.net \
    --to=zoltan.menyhart@bull.net \
    --cc=linux-ia64@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.