public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* [Linux-ia64] gettimeofday patch
@ 2003-02-18  3:09 Louis Yu-Kiu Kwan
  0 siblings, 0 replies; only message in thread
From: Louis Yu-Kiu Kwan @ 2003-02-18  3:09 UTC (permalink / raw)
  To: linux-ia64

[-- Attachment #1: Type: TEXT/PLAIN, Size: 253 bytes --]

Hi David,
   Louis has been working with me to produce a fast version of
gettimeofday.

This version executes in around 300 cycles on Itanium I (down from 900
or so for the original version), and so can be said to have
microsecond precision.  

Peter C

[-- Attachment #2: Type: TEXT/PLAIN, Size: 6007 bytes --]

===== arch/ia64/kernel/fsys.S 1.5 vs ? (writable without lock!)  =====
--- 1.5/arch/ia64/kernel/fsys.S	Thu Jan 30 17:16:51 2003
+++ ?/arch/ia64/kernel/fsys.S	Tue Feb 18 11:24:50 2003
@@ -123,6 +123,189 @@
 	br.ret.sptk.many b6
 END(fsys_set_tid_address)
 
+ENTRY(fsys_gettimeofday)
+
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	;;
+	ld4 r9=[r9]
+	;;	
+	and r9=TIF_ALLWORK_MASK,r9
+	;;
+	// r32, r33 should contain the 2 args of gettimeofday
+	
+	tnat.nz p6,p7=r32		// in case the args are NaT
+	cmp.ne p8, p0=0, r9
+	;;
+	
+(p7)	tnat.nz p6,p0=r33		
+(p8)	br.spnt.many fsys_fallback_syscall
+	;;
+(p6)	adds r8=EINVAL, r0		// r8 = EINVAL
+(p6)	adds r10=-1, r0			// r10 = -1	
+(p6)	br.ret.spnt.many b6		// return with r8 set to EINVAL
+
+	movl r17=xtime_lock
+	movl r19=xtime			// xtime is a timespec struct
+	movl r20=cpu_info__per_cpu
+	movl r26=jiffies
+	movl r27=wall_jiffies
+	movl r31=last_nsec_offset
+	movl r24=2361183241434822607	// for division hack (only for / 1000)
+	;;	
+	setf.sig f9=r24			// f9 is used for division hack
+	adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r20
+	adds r22=IA64_CPUINFO_ITM_DELTA_OFFSET, r20
+	adds r30=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r20
+	adds r3=IA64_TIMESPEC_TV_NSEC_OFFSET, r19	
+					// r3 = &xtime->tv_nsec
+	
+	
+while_loop_1:
+
+	// *** seq = read_seqbegin(&xtime_lock); ***
+	
+	ld4 r23=[r17]			// since &xtime_lock == &xtime_lock->sequence
+#ifdef CONFIG_SMP
+	mf
+#endif
+	;;				// barrier()
+	// now r23 = seq
+	
+	ld8 r14=[r31]			// r14 = old = last_nsec_offset		
+	
+	ld8 r28=[r26]			// r28 = jiffies
+	ld8 r29=[r27]			// r29 = wall_jiffies
+	;;
+	
+	ld8 r24=[r21]			// r24 now contains itm_next
+	ld8 r25=[r22]			// r25 now contains itm_delta
+
+	sub r28=r28, r29		// r28 now contains "lost"
+	;;
+	adds r28=1, r28			// r28 now contains "lost + 1"
+	;;
+	setf.sig f6=r28
+	setf.sig f7=r25
+	
+	ld8 r2=[r19]			// r2 = sec = xtime.tv_sec
+	;;
+	
+	ld8 r28=[r3]			// r28 = nsec = xtime.tv_nsec
+	xma.l f8=f6, f7, f0		// put lower 64-bits result of f6 * f7 in f8
+	;;
+	getf.sig r18=f8			// r18 now contains the (lost + 1) * itm_delta
+	;;
+	sub r18=r24, r18		// r18 is last_tick 
+	mov r25=ar.itc			// put time stamp into r25 (ITC) == now
+	;;
+	cmp.leu p7, p8 = r18, r25	// if last_tick <= now, p7 = 1
+	;;
+(p7)	ld8 r24=[r30]			// r24 contains local_cpu_data->nsec_per_cyc value
+(p7)	sub r25=r25, r18		// elasped_cycles in r25
+	;;
+(p7)	setf.sig f6=r24
+(p7)	setf.sig f7=r25
+	;;
+(p7)	xma.l f8=f6, f7, f0
+
+	;;
+(p7)	getf.sig r18=f8			// r18 = clasped_cycles * local_cpu_data->nsec_per_cyc
+	;;	
+(p7)	shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT
+	
+(p8) 	ld8 r18=[r31]			// r18 = last_time_offset (is unsigned long)
+
+	// now end of gettimeoffset, r18 should contain the desire result (offset)
+
+	// *** if (unlikely(read_seqretry(&xtime_lock, seq))) continue; ***
+
+	;; 				// barrier()
+	
+#ifdef CONFIG_SMP
+	mf
+#endif
+	adds r24=1, r0			// r24 = 1
+	ld4 r25=[r17]			// r25 = xtime_lock->sequence (load again)
+	;;
+	and r24=r24, r23		// r24 = seq & 1
+	xor r25=r25, r23		// r25 = xtime_lock->sequence ^ seq
+	;;
+	or r24=r24, r25			// now r24 = read_seqretry(&xtime_lock, seq)
+	;;
+	cmp.ne p7, p0=r24, r0
+	;;
+(p7)	br.spnt.many while_loop_1	// continue
+	
+	cmp.leu p7, p8 = r18, r14	// if (offset <= old)
+	;;
+(p7)	mov r18=r14			// offset = old
+(p7)	br.spnt.few loop_exit_1		// break
+	
+	mov ar.ccv=r18			// ar.ccv = offset
+	;;
+	cmpxchg8.acq r25=[r31], r14, ar.ccv	
+					// compare-and-exchange (atomic!)
+	;;
+	cmp.eq p8,p0 = r25, r14	
+	;;
+(p8)	br.sptk.many loop_exit_1
+	br.sptk.many while_loop_1
+
+loop_exit_1:
+
+	// at this point, r28 is nsec and r18 is offset
+
+	add r3=r28, r18			// r3 = (nsec + offset)
+	;;
+	// now we try to divide r3 by 1000 to get the value in usec instead of nsec
+	
+	shr.u r24 = r3, 3
+	;;
+	setf.sig f7 = r24
+	;;
+	xmpy.hu f6 = f7, f9
+	;;
+	getf.sig r3 = f6
+	;;
+	shr.u r3 = r3, 4
+	// end of division, r3 is divided by 1000 (=usec)
+	
+	addl r24=1000000, r0		// r24 = 1000000
+	;;
+	
+while_loop_2:
+	
+	cmp.geu p7, p8=r3, r24		// while (usec >= 1000000)
+	;;
+(p8)	br.sptk.many loop_exit_2
+
+	sub r3=r3, r24			// usec -= 1000000
+	adds r2=1, r2			// ++sec
+	
+	br.many while_loop_2
+	
+loop_exit_2:	 
+	
+	// finally, 	r2 = sec
+	// 		r3 = usec
+	
+	mov r24=r32			// we need to preserve this...
+	;;
+	st8 [r32]=r2, 8
+	;;
+	st8 [r32]=r3			// store them in the timeval struct
+	;;
+	mov r32=r24
+	
+	mov r8=r0			// success
+	
+	MCKINLEY_E9_WORKAROUND
+	
+	br.ret.sptk.many b6
+	// return to caller
+
+END(fsys_gettimeofday)
+
 	.rodata
 	.align 8
 	.globl fsyscall_table
@@ -190,7 +373,7 @@
 	data8 fsys_fallback_syscall	// setrlimit
 	data8 fsys_fallback_syscall	// getrlimit		// 1085
 	data8 fsys_fallback_syscall	// getrusage
-	data8 fsys_fallback_syscall	// gettimeofday
+	data8 fsys_gettimeofday		// gettimeofday
 	data8 fsys_fallback_syscall	// settimeofday
 	data8 fsys_fallback_syscall	// select
 	data8 fsys_fallback_syscall	// poll			// 1090
===== arch/ia64/tools/print_offsets.c 1.14 vs ? (writable without lock!)  =====
--- 1.14/arch/ia64/tools/print_offsets.c	Tue Feb 11 13:22:24 2003
+++ ?/arch/ia64/tools/print_offsets.c	Tue Feb 18 11:03:39 2003
@@ -170,6 +170,12 @@
     /* for assembly files which can't include sched.h: */
     { "IA64_CLONE_VFORK",		CLONE_VFORK },
     { "IA64_CLONE_VM",			CLONE_VM },
+	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
+    { "IA64_CPUINFO_ITM_DELTA_OFFSET", 		offsetof (struct cpuinfo_ia64, itm_delta) },
+    { "IA64_CPUINFO_ITM_NEXT_OFFSET", 		offsetof (struct cpuinfo_ia64, itm_next) },
+    { "IA64_CPUINFO_NSEC_PER_CYC_OFFSET",	offsetof (struct cpuinfo_ia64, nsec_per_cyc) },
+    { "IA64_TIMESPEC_TV_NSEC_OFFSET", 		offsetof (struct timespec, tv_nsec) },
+
 };
 
 static const char *tabs = "\t\t\t\t\t\t\t\t\t\t";

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2003-02-18  3:09 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-02-18  3:09 [Linux-ia64] gettimeofday patch Louis Yu-Kiu Kwan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox