Linux virtualization list
 help / color / mirror / Atom feed
* [PATCH 09/15] ia64/pv_ops/pv_time_ops: add sched_clock hook.
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64; +Cc: yamahata, xen-ia64-devel, virtualization
In-Reply-To: <1229925022-31365-1-git-send-email-yamahata@valinux.co.jp>

add sched_clock() hook to paravirtualize sched_clock().
ia64 sched_clock() is based on ar.itc which isn't stable
on virtualized environment because vcpu may move around on
pcpus. So it needs paravirtualization.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 arch/ia64/include/asm/paravirt.h |    7 +++++++
 arch/ia64/include/asm/timex.h    |    1 +
 arch/ia64/kernel/head.S          |   10 ++++++++--
 arch/ia64/kernel/paravirt.c      |    1 +
 arch/ia64/kernel/time.c          |    9 +++++++++
 5 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
index 56f69f9..a73e77a 100644
--- a/arch/ia64/include/asm/paravirt.h
+++ b/arch/ia64/include/asm/paravirt.h
@@ -225,6 +225,8 @@ struct pv_time_ops {
 	int (*do_steal_accounting)(unsigned long *new_itm);
 
 	void (*clocksource_resume)(void);
+
+	unsigned long long (*sched_clock)(void);
 };
 
 extern struct pv_time_ops pv_time_ops;
@@ -242,6 +244,11 @@ paravirt_do_steal_accounting(unsigned long *new_itm)
 	return pv_time_ops.do_steal_accounting(new_itm);
 }
 
+static inline unsigned long long paravirt_sched_clock(void)
+{
+	return pv_time_ops.sched_clock();
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #else
diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h
index 4e03cfe..86c7db8 100644
--- a/arch/ia64/include/asm/timex.h
+++ b/arch/ia64/include/asm/timex.h
@@ -40,5 +40,6 @@ get_cycles (void)
 }
 
 extern void ia64_cpu_local_tick (void);
+extern unsigned long long ia64_native_sched_clock (void);
 
 #endif /* _ASM_IA64_TIMEX_H */
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 59301c4..23f846d 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1050,7 +1050,7 @@ END(ia64_delay_loop)
  * except that the multiplication and the shift are done with 128-bit
  * intermediate precision so that we can produce a full 64-bit result.
  */
-GLOBAL_ENTRY(sched_clock)
+GLOBAL_ENTRY(ia64_native_sched_clock)
 	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
 	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
 	;;
@@ -1066,7 +1066,13 @@ GLOBAL_ENTRY(sched_clock)
 	;;
 	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
 	br.ret.sptk.many rp
-END(sched_clock)
+END(ia64_native_sched_clock)
+#ifndef CONFIG_PARAVIRT
+	//unsigned long long
+	//sched_clock(void) __attribute__((alias("ia64_native_sched_clock")));
+	.global sched_clock
+sched_clock = ia64_native_sched_clock
+#endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 GLOBAL_ENTRY(cycle_to_cputime)
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index 9f14c16..6bc33a6 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -366,4 +366,5 @@ ia64_native_do_steal_accounting(unsigned long *new_itm)
 
 struct pv_time_ops pv_time_ops = {
 	.do_steal_accounting = ia64_native_do_steal_accounting,
+	.sched_clock = ia64_native_sched_clock,
 };
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 65c10a4..91047f8 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -50,6 +50,15 @@ EXPORT_SYMBOL(last_cli_ip);
 #endif
 
 #ifdef CONFIG_PARAVIRT
+/* We need to define a real function for sched_clock, to override the
+   weak default version */
+unsigned long long sched_clock(void)
+{
+        return paravirt_sched_clock();
+}
+#endif
+
+#ifdef CONFIG_PARAVIRT
 static void
 paravirt_clocksource_resume(void)
 {
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH 08/15] ia64/pv_ops/xen: paravirtualize read/write ar.itc and ar.itm
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64; +Cc: yamahata, xen-ia64-devel, virtualization
In-Reply-To: <1229925022-31365-1-git-send-email-yamahata@valinux.co.jp>

paravirtualize ar.itc and ar.itm in order to support save/restore.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 arch/ia64/include/asm/xen/inst.h      |   21 +++++++++
 arch/ia64/include/asm/xen/interface.h |    9 ++++
 arch/ia64/include/asm/xen/minstate.h  |   11 ++++-
 arch/ia64/include/asm/xen/privop.h    |    2 +
 arch/ia64/kernel/asm-offsets.c        |    2 +
 arch/ia64/xen/xen_pv_ops.c            |   80 ++++++++++++++++++++++++++++++++-
 6 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h
index e8e01b2..90537dc 100644
--- a/arch/ia64/include/asm/xen/inst.h
+++ b/arch/ia64/include/asm/xen/inst.h
@@ -113,6 +113,27 @@
 .endm
 #define MOV_FROM_PSR(pred, reg, clob)	__MOV_FROM_PSR pred, reg, clob
 
+/* assuming ar.itc is read with interrupt disabled. */
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)		\
+(pred)	movl clob = XSI_ITC_OFFSET;				\
+	;;							\
+(pred)	ld8 clob = [clob];					\
+(pred)	mov reg = ar.itc;					\
+	;;							\
+(pred)	add reg = reg, clob;					\
+	;;							\
+(pred)	movl clob = XSI_ITC_LAST;				\
+	;;							\
+(pred)	ld8 clob = [clob];					\
+	;;							\
+(pred)	cmp.geu.unc pred_clob, p0 = clob, reg;			\
+	;;							\
+(pred_clob)	add reg = 1, clob;				\
+	;;							\
+(pred)	movl clob = XSI_ITC_LAST;				\
+	;;							\
+(pred)	st8 [clob] = reg
+
 
 #define MOV_TO_IFA(reg, clob)	\
 	movl clob = XSI_IFA;	\
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
index f00fab4..e951e74 100644
--- a/arch/ia64/include/asm/xen/interface.h
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -209,6 +209,15 @@ struct mapped_regs {
 			unsigned long krs[8];	/* kernel registers */
 			unsigned long tmp[16];	/* temp registers
 						   (e.g. for hyperprivops) */
+
+			/* itc paravirtualization
+			 * vAR.ITC = mAR.ITC + itc_offset
+			 * itc_last is one which was lastly passed to
+			 * the guest OS in order to prevent it from
+			 * going backwords.
+			 */
+			unsigned long itc_offset;
+			unsigned long itc_last;
 		};
 	};
 };
diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h
index 4d92d9b..c57fa91 100644
--- a/arch/ia64/include/asm/xen/minstate.h
+++ b/arch/ia64/include/asm/xen/minstate.h
@@ -1,3 +1,12 @@
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define XEN_ACCOUNT_GET_STAMP		\
+	MOV_FROM_ITC(pUStk, p6, r20, r2);
+#else
+#define XEN_ACCOUNT_GET_STAMP
+#endif
+
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
@@ -123,7 +132,7 @@
 	;;											\
 .mem.offset 0,0; st8.spill [r16]=r2,16;								\
 .mem.offset 8,0; st8.spill [r17]=r3,16;								\
-	ACCOUNT_GET_STAMP									\
+	XEN_ACCOUNT_GET_STAMP									\
 	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
 	;;											\
 	EXTRA;											\
diff --git a/arch/ia64/include/asm/xen/privop.h b/arch/ia64/include/asm/xen/privop.h
index 71ec754..2261dda 100644
--- a/arch/ia64/include/asm/xen/privop.h
+++ b/arch/ia64/include/asm/xen/privop.h
@@ -55,6 +55,8 @@
 #define XSI_BANK1_R16			(XSI_BASE + XSI_BANK1_R16_OFS)
 #define XSI_BANKNUM			(XSI_BASE + XSI_BANKNUM_OFS)
 #define XSI_IHA				(XSI_BASE + XSI_IHA_OFS)
+#define XSI_ITC_OFFSET			(XSI_BASE + XSI_ITC_OFFSET_OFS)
+#define XSI_ITC_LAST			(XSI_BASE + XSI_ITC_LAST_OFS)
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 742dbb1..af56501 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -316,5 +316,7 @@ void foo(void)
 	DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
 	DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
 	DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);
+	DEFINE_MAPPED_REG_OFS(XSI_ITC_OFFSET_OFS, itc_offset);
+	DEFINE_MAPPED_REG_OFS(XSI_ITC_LAST_OFS, itc_last);
 #endif /* CONFIG_XEN */
 }
diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c
index 46b418a..e83ede7 100644
--- a/arch/ia64/xen/xen_pv_ops.c
+++ b/arch/ia64/xen/xen_pv_ops.c
@@ -183,6 +183,75 @@ struct pv_fsys_data xen_fsys_data __initdata = {
  * intrinsics hooks.
  */
 
+static void
+xen_set_itm_with_offset(unsigned long val)
+{
+	/* ia64_cpu_local_tick() calls this with interrupt enabled. */
+	/* WARN_ON(!irqs_disabled()); */
+	xen_set_itm(val - XEN_MAPPEDREGS->itc_offset);
+}
+
+static unsigned long
+xen_get_itm_with_offset(void)
+{
+	/* unused at this moment */
+	printk(KERN_DEBUG "%s is called.\n", __func__);
+
+	WARN_ON(!irqs_disabled());
+	return ia64_native_getreg(_IA64_REG_CR_ITM) +
+		XEN_MAPPEDREGS->itc_offset;
+}
+
+/* ia64_set_itc() is only called by
+ * cpu_init() with ia64_set_itc(0) and ia64_sync_itc().
+ * So XEN_MAPPEDRESG->itc_offset cal be considered as almost constant.
+ */
+static void
+xen_set_itc(unsigned long val)
+{
+	unsigned long mitc;
+
+	WARN_ON(!irqs_disabled());
+	mitc = ia64_native_getreg(_IA64_REG_AR_ITC);
+	XEN_MAPPEDREGS->itc_offset = val - mitc;
+	XEN_MAPPEDREGS->itc_last = val;
+}
+
+static unsigned long
+xen_get_itc(void)
+{
+	unsigned long res;
+	unsigned long itc_offset;
+	unsigned long itc_last;
+	unsigned long ret_itc_last;
+
+	itc_offset = XEN_MAPPEDREGS->itc_offset;
+	do {
+		itc_last = XEN_MAPPEDREGS->itc_last;
+		res = ia64_native_getreg(_IA64_REG_AR_ITC);
+		res += itc_offset;
+		if (itc_last >= res)
+			res = itc_last + 1;
+		ret_itc_last = cmpxchg(&XEN_MAPPEDREGS->itc_last,
+				       itc_last, res);
+	} while (unlikely(ret_itc_last != itc_last));
+	return res;
+
+#if 0
+	/* ia64_itc_udelay() calls ia64_get_itc() with interrupt enabled.
+	   Should it be paravirtualized instead? */
+	WARN_ON(!irqs_disabled());
+	itc_offset = XEN_MAPPEDREGS->itc_offset;
+	itc_last = XEN_MAPPEDREGS->itc_last;
+	res = ia64_native_getreg(_IA64_REG_AR_ITC);
+	res += itc_offset;
+	if (itc_last >= res)
+		res = itc_last + 1;
+	XEN_MAPPEDREGS->itc_last = res;
+	return res;
+#endif
+}
+
 static void xen_setreg(int regnum, unsigned long val)
 {
 	switch (regnum) {
@@ -194,11 +263,14 @@ static void xen_setreg(int regnum, unsigned long val)
 		xen_set_eflag(val);
 		break;
 #endif
+	case _IA64_REG_AR_ITC:
+		xen_set_itc(val);
+		break;
 	case _IA64_REG_CR_TPR:
 		xen_set_tpr(val);
 		break;
 	case _IA64_REG_CR_ITM:
-		xen_set_itm(val);
+		xen_set_itm_with_offset(val);
 		break;
 	case _IA64_REG_CR_EOI:
 		xen_eoi(val);
@@ -222,6 +294,12 @@ static unsigned long xen_getreg(int regnum)
 		res = xen_get_eflag();
 		break;
 #endif
+	case _IA64_REG_AR_ITC:
+		res = xen_get_itc();
+		break;
+	case _IA64_REG_CR_ITM:
+		res = xen_get_itm_with_offset();
+		break;
 	case _IA64_REG_CR_IVR:
 		res = xen_get_ivr();
 		break;
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH 07/15] ia64/pv_ops: paravirtualize mov = ar.itc.
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64; +Cc: yamahata, xen-ia64-devel, virtualization
In-Reply-To: <1229925022-31365-1-git-send-email-yamahata@valinux.co.jp>

paravirtualize mov reg = ar.itc.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 arch/ia64/include/asm/native/inst.h |    5 +++++
 arch/ia64/kernel/entry.S            |    4 ++--
 arch/ia64/kernel/fsys.S             |    4 ++--
 arch/ia64/kernel/ivt.S              |    2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h
index 5e4e151..ad59fc6 100644
--- a/arch/ia64/include/asm/native/inst.h
+++ b/arch/ia64/include/asm/native/inst.h
@@ -77,6 +77,11 @@
 (pred)	mov reg = psr			\
 	CLOBBER(clob)
 
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)	\
+(pred)	mov reg = ar.itc				\
+	CLOBBER(clob)					\
+	CLOBBER_PRED(pred_clob)
+
 #define MOV_TO_IFA(reg, clob)	\
 	mov cr.ifa = reg	\
 	CLOBBER(clob)
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index d435f4a..c5709c6 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -735,7 +735,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
 __paravirt_work_processed_syscall:
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	adds r2=PT(LOADRS)+16,r12
-(pUStk)	mov.m r22=ar.itc			// fetch time at leave
+	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
 	;;
 (p6)	ld4 r31=[r18]				// load current_thread_info()->flags
@@ -984,7 +984,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	.pred.rel.mutex pUStk,pKStk
 	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
-(pUStk)	mov.m r22=ar.itc	// M  fetch time at leave
+	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
 	nop.i 0
 	;;
 #else
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 3544d75..3567d54 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -280,7 +280,7 @@ ENTRY(fsys_gettimeofday)
 (p9)	cmp.eq p13,p0 = 0,r30	// if mmio_ptr, clear p13 jitter control
 	;;
 	.pred.rel.mutex p8,p9
-(p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
+	MOV_FROM_ITC(p8, p6, r2, r10)	// CPU_TIMER. 36 clocks latency!!!
 (p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
 (p13)	ld8 r25 = [r19]		// get itc_lastcycle value
 	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
@@ -684,7 +684,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-	mov.m r30=ar.itc			// M    get cycle for accounting
+	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
 #else
 	nop.m 0
 #endif
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index f675d8e..ec9a5fd 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -804,7 +804,7 @@ ENTRY(break_fault)
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-	mov.m r30=ar.itc			// M    get cycle for accounting
+	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
 #else
 	mov b6=r30				// I0   setup syscall handler branch reg early
 #endif
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH 06/15] ia64/pv_ops/pvchecker: support mov = ar.itc paravirtualization
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64; +Cc: yamahata, xen-ia64-devel, virtualization
In-Reply-To: <1229925022-31365-1-git-send-email-yamahata@valinux.co.jp>

add suport for mov = ar.itc to pvchecker.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 arch/ia64/include/asm/native/pvchk_inst.h |    5 +++++
 arch/ia64/scripts/pvcheck.sed             |    1 +
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/arch/ia64/include/asm/native/pvchk_inst.h b/arch/ia64/include/asm/native/pvchk_inst.h
index b8e6eb1..13b289e 100644
--- a/arch/ia64/include/asm/native/pvchk_inst.h
+++ b/arch/ia64/include/asm/native/pvchk_inst.h
@@ -180,6 +180,11 @@
 	IS_PRED_IN(pred)			\
 	IS_RREG_OUT(reg)			\
 	IS_RREG_CLOB(clob)
+#define MOV_FROM_ITC(pred, pred_clob, reg, clob)	\
+	IS_PRED_IN(pred)				\
+	IS_PRED_CLOB(pred_clob)				\
+	IS_RREG_OUT(reg)				\
+	IS_RREG_CLOB(clob)
 #define MOV_TO_IFA(reg, clob)			\
 	IS_RREG_IN(reg)				\
 	IS_RREG_CLOB(clob)
diff --git a/arch/ia64/scripts/pvcheck.sed b/arch/ia64/scripts/pvcheck.sed
index ba66ac2..e59809a 100644
--- a/arch/ia64/scripts/pvcheck.sed
+++ b/arch/ia64/scripts/pvcheck.sed
@@ -17,6 +17,7 @@ s/mov.*=.*cr\.iip/.warning \"cr.iip should not used directly\"/g
 s/mov.*=.*cr\.ivr/.warning \"cr.ivr should not used directly\"/g
 s/mov.*=[^\.]*psr/.warning \"psr should not used directly\"/g	# avoid ar.fpsr
 s/mov.*=.*ar\.eflags/.warning \"ar.eflags should not used directly\"/g
+s/mov.*=.*ar\.itc.*/.warning \"ar.itc should not used directly\"/g
 s/mov.*cr\.ifa.*=.*/.warning \"cr.ifa should not used directly\"/g
 s/mov.*cr\.itir.*=.*/.warning \"cr.itir should not used directly\"/g
 s/mov.*cr\.iha.*=.*/.warning \"cr.iha should not used directly\"/g
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH 05/15] ia64/pv_ops: paravirtualize fsys.S.
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64; +Cc: yamahata, xen-ia64-devel, virtualization
In-Reply-To: <1229925022-31365-1-git-send-email-yamahata@valinux.co.jp>

paravirtualize fsys.S.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 arch/ia64/kernel/fsys.S |   14 +++++++-------
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 788319f..3544d75 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -419,7 +419,7 @@ EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
 	mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
 	;;
 
-	rsm psr.i				// mask interrupt delivery
+	RSM_PSR_I(p0, r18, r19)			// mask interrupt delivery
 	mov ar.ccv=0
 	andcm r14=r14,r17			// filter out SIGKILL & SIGSTOP
 
@@ -492,7 +492,7 @@ EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
 #ifdef CONFIG_SMP
 	st4.rel [r31]=r0			// release the lock
 #endif
-	ssm psr.i
+	SSM_PSR_I(p0, p9, r31)
 	;;
 
 	srlz.d					// ensure psr.i is set again
@@ -514,7 +514,7 @@ EX(.fail_efault, (p15) st8 [r34]=r3)
 #ifdef CONFIG_SMP
 	st4.rel [r31]=r0			// release the lock
 #endif
-	ssm psr.i
+	SSM_PSR_I(p0, p9, r17)
 	;;
 	srlz.d
 	br.sptk.many fsys_fallback_syscall	// with signal pending, do the heavy-weight syscall
@@ -522,7 +522,7 @@ EX(.fail_efault, (p15) st8 [r34]=r3)
 #ifdef CONFIG_SMP
 .lock_contention:
 	/* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
-	ssm psr.i
+	SSM_PSR_I(p0, p9, r17)
 	;;
 	srlz.d
 	br.sptk.many fsys_fallback_syscall
@@ -593,11 +593,11 @@ ENTRY(fsys_fallback_syscall)
 	adds r17=-1024,r15
 	movl r14=sys_call_table
 	;;
-	rsm psr.i
+	RSM_PSR_I(p0, r26, r27)
 	shladd r18=r17,3,r14
 	;;
 	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
-	mov r29=psr				// read psr (12 cyc load latency)
+	MOV_FROM_PSR(p0, r29, r26)		// read psr (12 cyc load latency)
 	mov r27=ar.rsc
 	mov r21=ar.fpsr
 	mov r26=ar.pfs
@@ -735,7 +735,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	mov rp=r14				// I0   set the real return addr
 	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
 	;;
-	ssm psr.i				// M2   we're on kernel stacks now, reenable irqs
+	SSM_PSR_I(p0, p6, r22)			// M2   we're on kernel stacks now, reenable irqs
 	cmp.eq p8,p0=r3,r0			// A
 (p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
 
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH 04/15] ia64/pv_ops/xen: preliminary to paravirtualizing fsys.S for xen.
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64; +Cc: yamahata, xen-ia64-devel, virtualization
In-Reply-To: <1229925022-31365-1-git-send-email-yamahata@valinux.co.jp>

This is a preliminary patch to paravirtualizing fsys.S.
compile fsys.S twice one for native and one for xen, and switch
them at run tine.
Later fsys.S will be paravirtualized.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 arch/ia64/include/asm/xen/inst.h |    3 +++
 arch/ia64/xen/Makefile           |    2 +-
 arch/ia64/xen/xen_pv_ops.c       |   14 ++++++++++++++
 3 files changed, 18 insertions(+), 1 deletions(-)

diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h
index 19c2ae1..e8e01b2 100644
--- a/arch/ia64/include/asm/xen/inst.h
+++ b/arch/ia64/include/asm/xen/inst.h
@@ -33,6 +33,9 @@
 #define __paravirt_work_processed_syscall_target \
 						xen_work_processed_syscall
 
+#define paravirt_fsyscall_table			xen_fsyscall_table
+#define paravirt_fsys_bubble_down		xen_fsys_bubble_down
+
 #define MOV_FROM_IFA(reg)	\
 	movl reg = XSI_IFA;	\
 	;;			\
diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile
index 0ad0224..b4ca2e6 100644
--- a/arch/ia64/xen/Makefile
+++ b/arch/ia64/xen/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_IA64_GENERIC) += machvec.o
 AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN
 
 # xen multi compile
-ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S
+ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S fsys.S
 ASM_PARAVIRT_OBJS = $(addprefix xen-,$(ASM_PARAVIRT_MULTI_COMPILE_SRCS:.S=.o))
 obj-y += $(ASM_PARAVIRT_OBJS)
 define paravirtualized_xen
diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c
index 5d491d9..46b418a 100644
--- a/arch/ia64/xen/xen_pv_ops.c
+++ b/arch/ia64/xen/xen_pv_ops.c
@@ -24,6 +24,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/pm.h>
+#include <linux/unistd.h>
 
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/xencomm.h>
@@ -166,6 +167,18 @@ static struct pv_init_ops xen_init_ops __initdata = {
 };
 
 /***************************************************************************
+ * pv_fsys_data
+ * addresses for fsys
+ */
+
+extern unsigned long xen_fsyscall_table[NR_syscalls];
+extern char xen_fsys_bubble_down[];
+struct pv_fsys_data xen_fsys_data __initdata = {
+	.fsyscall_table = (unsigned long *)xen_fsyscall_table,
+	.fsys_bubble_down = (void *)xen_fsys_bubble_down,
+};
+
+/***************************************************************************
  * pv_cpu_ops
  * intrinsics hooks.
  */
@@ -355,6 +368,7 @@ xen_setup_pv_ops(void)
 	xen_info_init();
 	pv_info = xen_info;
 	pv_init_ops = xen_init_ops;
+	pv_fsys_data = xen_fsys_data;
 	pv_cpu_ops = xen_cpu_ops;
 	pv_iosapic_ops = xen_iosapic_ops;
 	pv_irq_ops = xen_irq_ops;
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH 03/15] ia64/pv_ops: add hooks to paravirtualize fsyscall implementation.
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64; +Cc: yamahata, xen-ia64-devel, virtualization
In-Reply-To: <1229925022-31365-1-git-send-email-yamahata@valinux.co.jp>

Add two hooks, paravirt_get_fsyscall_table() and
paravirt_get_fsys_bubble_doen() to paravirtualize fsyscall implementation.
This patch just add the hooks fsyscall and don't paravirtualize it.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 arch/ia64/include/asm/native/inst.h |    3 +++
 arch/ia64/include/asm/paravirt.h    |   15 +++++++++++++++
 arch/ia64/kernel/Makefile           |    4 ++--
 arch/ia64/kernel/fsys.S             |   17 +++++++++--------
 arch/ia64/kernel/patch.c            |   26 +++++++++++++++++++++++---
 arch/ia64/mm/init.c                 |    3 ++-
 6 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h
index 0a1026c..5e4e151 100644
--- a/arch/ia64/include/asm/native/inst.h
+++ b/arch/ia64/include/asm/native/inst.h
@@ -30,6 +30,9 @@
 #define __paravirt_work_processed_syscall_target \
 						ia64_work_processed_syscall
 
+#define paravirt_fsyscall_table			ia64_native_fsyscall_table
+#define paravirt_fsys_bubble_down		ia64_native_fsys_bubble_down
+
 #ifdef CONFIG_PARAVIRT_GUEST_ASM_CLOBBER_CHECK
 # define PARAVIRT_POISON	0xdeadbeefbaadf00d
 # define CLOBBER(clob)				\
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
index 2bf3636..56f69f9 100644
--- a/arch/ia64/include/asm/paravirt.h
+++ b/arch/ia64/include/asm/paravirt.h
@@ -22,6 +22,21 @@
 #ifndef __ASM_PARAVIRT_H
 #define __ASM_PARAVIRT_H
 
+#ifndef __ASSEMBLY__
+/******************************************************************************
+ * fsys related addresses
+ */
+struct pv_fsys_data {
+	unsigned long *fsyscall_table;
+	void *fsys_bubble_down;
+};
+
+extern struct pv_fsys_data pv_fsys_data;
+
+unsigned long *paravirt_get_fsyscall_table(void);
+char *paravirt_get_fsys_bubble_down(void);
+#endif
+
 #ifdef CONFIG_PARAVIRT_GUEST
 
 #define PARAVIRT_HYPERVISOR_TYPE_DEFAULT	0
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c381ea9..1ab150e 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -111,9 +111,9 @@ include/asm-ia64/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
 clean-files += $(objtree)/include/asm-ia64/nr-irqs.h
 
 #
-# native ivt.S and entry.S
+# native ivt.S, entry.S and fsys.S
 #
-ASM_PARAVIRT_OBJS = ivt.o entry.o
+ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o
 define paravirtualized_native
 AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
 AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index c1625c7..788319f 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -25,6 +25,7 @@
 #include <asm/unistd.h>
 
 #include "entry.h"
+#include "paravirt_inst.h"
 
 /*
  * See Documentation/ia64/fsys.txt for details on fsyscalls.
@@ -602,7 +603,7 @@ ENTRY(fsys_fallback_syscall)
 	mov r26=ar.pfs
 END(fsys_fallback_syscall)
 	/* FALL THROUGH */
-GLOBAL_ENTRY(fsys_bubble_down)
+GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	.prologue
 	.altrp b6
 	.body
@@ -640,7 +641,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	 *
 	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
 	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
-	 * PSR.I  : already turned off by the time fsys_bubble_down gets
+	 * PSR.I  : already turned off by the time paravirt_fsys_bubble_down gets
 	 *	    invoked
 	 * PSR.DFL: always 0 (kernel never turns it on)
 	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
@@ -650,7 +651,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	 * PSR.DB : don't care --- kernel never enables kernel-level
 	 *	    breakpoints
 	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
-	 *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
+	 *          __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
 	 *          will trigger a taken branch; the taken-trap-handler then
 	 *          converts the syscall into a break-based system-call.
 	 */
@@ -741,14 +742,14 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	nop.m 0
 (p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
 	br.cond.spnt ia64_trace_syscall		// B
-END(fsys_bubble_down)
+END(paravirt_fsys_bubble_down)
 
 	.rodata
 	.align 8
-	.globl fsyscall_table
+	.globl paravirt_fsyscall_table
 
-	data8 fsys_bubble_down
-fsyscall_table:
+	data8 paravirt_fsys_bubble_down
+paravirt_fsyscall_table:
 	data8 fsys_ni_syscall
 	data8 0				// exit			// 1025
 	data8 0				// read
@@ -1033,4 +1034,4 @@ fsyscall_table:
 
 	// fill in zeros for the remaining entries
 	.zero:
-	.space fsyscall_table + 8*NR_syscalls - .zero, 0
+	.space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index 5660069..c88f530 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/string.h>
 
+#include <asm/paravirt.h>
 #include <asm/patch.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
@@ -169,16 +170,35 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 	ia64_srlz_i();
 }
 
+extern unsigned long ia64_native_fsyscall_table[NR_syscalls];
+extern char ia64_native_fsys_bubble_down[];
+struct pv_fsys_data pv_fsys_data __initdata = {
+	.fsyscall_table = (unsigned long *)ia64_native_fsyscall_table,
+	.fsys_bubble_down = (void *)ia64_native_fsys_bubble_down,
+};
+
+unsigned long * __init
+paravirt_get_fsyscall_table(void)
+{
+	return pv_fsys_data.fsyscall_table;
+}
+
+char * __init
+paravirt_get_fsys_bubble_down(void)
+{
+	return pv_fsys_data.fsys_bubble_down;
+}
+
 static void __init
 patch_fsyscall_table (unsigned long start, unsigned long end)
 {
-	extern unsigned long fsyscall_table[NR_syscalls];
+	u64 fsyscall_table = (u64)paravirt_get_fsyscall_table();
 	s32 *offp = (s32 *) start;
 	u64 ip;
 
 	while (offp < (s32 *) end) {
 		ip = (u64) ia64_imva((char *) offp + *offp);
-		ia64_patch_imm64(ip, (u64) fsyscall_table);
+		ia64_patch_imm64(ip, fsyscall_table);
 		ia64_fc(ip);
 		++offp;
 	}
@@ -189,7 +209,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end)
 static void __init
 patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
 {
-	extern char fsys_bubble_down[];
+	u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down();
 	s32 *offp = (s32 *) start;
 	u64 ip;
 
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 054bcd9..23f4dcf 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -35,6 +35,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/mca.h>
+#include <asm/paravirt.h>
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
@@ -667,8 +668,8 @@ mem_init (void)
 	 * code can tell them apart.
 	 */
 	for (i = 0; i < NR_syscalls; ++i) {
-		extern unsigned long fsyscall_table[NR_syscalls];
 		extern unsigned long sys_call_table[NR_syscalls];
+		unsigned long *fsyscall_table = paravirt_get_fsyscall_table();
 
 		if (!fsyscall_table[i] || nolwsys)
 			fsyscall_table[i] = sys_call_table[i] | 1;
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH 02/15] __initdata and const cannot be always a happy pair, as gcc-4.3.3 gives
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64
  Cc: Takashi Iwai, yamahata, xen-ia64-devel, virtualization
In-Reply-To: <1229925022-31365-1-git-send-email-yamahata@valinux.co.jp>

  arch/ia64/xen/xen_pv_ops.c:156: error: xen_init_ops causes a section type conflict
  arch/ia64/xen/xen_pv_ops.c:340: error: xen_iosapic_ops causes a section type conflict

This patch simply removes const from data with __initdata.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 arch/ia64/xen/xen_pv_ops.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c
index 04cd123..5d491d9 100644
--- a/arch/ia64/xen/xen_pv_ops.c
+++ b/arch/ia64/xen/xen_pv_ops.c
@@ -153,7 +153,7 @@ xen_post_smp_prepare_boot_cpu(void)
 	xen_setup_vcpu_info_placement();
 }
 
-static const struct pv_init_ops xen_init_ops __initdata = {
+static struct pv_init_ops xen_init_ops __initdata = {
 	.banner = xen_banner,
 
 	.reserve_memory = xen_reserve_memory,
@@ -260,7 +260,7 @@ xen_intrin_local_irq_restore(unsigned long mask)
 		xen_rsm_i();
 }
 
-static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+static struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.fc		= xen_fc,
 	.thash		= xen_thash,
 	.get_cpuid	= xen_get_cpuid,
@@ -337,7 +337,7 @@ xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
 	HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
 }
 
-static const struct pv_iosapic_ops xen_iosapic_ops __initdata = {
+static struct pv_iosapic_ops xen_iosapic_ops __initdata = {
 	.pcat_compat_init = xen_pcat_compat_init,
 	.__get_irq_chip = xen_iosapic_get_irq_chip,
 
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH 01/15] ia64: remove warnings.
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64; +Cc: yamahata, xen-ia64-devel, virtualization
In-Reply-To: <1229925022-31365-1-git-send-email-yamahata@valinux.co.jp>

this patch removes the following warnings.

>   CC      arch/ia64/kernel/patch.o
> /linux-2.6/arch/ia64/kernel/patch.c: In function 'ia64_patch_vtop':
> /linux-2.6/arch/ia64/kernel/patch.c:112: warning: passing argument 1 of 'paravirt_fc' makes integer from pointer without a cast
> /linux-2.6/arch/ia64/kernel/patch.c: In function 'ia64_patch_rse':
> /linux-2.6/arch/ia64/kernel/patch.c:135: warning: passing argument 1 of 'paravirt_fc' makes integer from pointer without a cast
> /linux-2.6/arch/ia64/kernel/patch.c: In function 'ia64_patch_mckinley_e9':
> /linux-2.6/arch/ia64/kernel/patch.c:166: warning: passing argument 1 of 'paravirt_fc' makes integer from pointer without a cast
> /linux-2.6/arch/ia64/kernel/patch.c:166: warning: passing argument 1 of 'paravirt_fc' makes integer from pointer without a cast
> /linux-2.6/arch/ia64/kernel/patch.c: In function 'patch_fsyscall_table':
> /linux-2.6/arch/ia64/kernel/patch.c:202: warning: passing argument 1 of 'paravirt_fc' makes integer from pointer without a cast
> /linux-2.6/arch/ia64/kernel/patch.c: In function 'patch_brl_fsys_bubble_down':
> /linux-2.6/arch/ia64/kernel/patch.c:220: warning: passing argument 1 of 'paravirt_fc' makes integer from pointer without a cast

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
---
 arch/ia64/kernel/patch.c |   10 +++++-----
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index b83b2c5..5660069 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -108,7 +108,7 @@ ia64_patch_vtop (unsigned long start, unsigned long end)
 
 		/* replace virtual address with corresponding physical address: */
 		ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
-		ia64_fc((void *) ip);
+		ia64_fc(ip);
 		++offp;
 	}
 	ia64_sync_i();
@@ -131,7 +131,7 @@ ia64_patch_rse (unsigned long start, unsigned long end)
 
 		b = (u64 *)(ip & -16);
 		b[1] &= ~0xf800000L;
-		ia64_fc((void *) ip);
+		ia64_fc(ip);
 		++offp;
 	}
 	ia64_sync_i();
@@ -162,7 +162,7 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 		wp[1] = 0x0084006880000200UL;
 		wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
 		wp[3] = 0x0004000000000200UL;
-		ia64_fc(wp); ia64_fc(wp + 2);
+		ia64_fc((unsigned long)wp); ia64_fc((unsigned long)(wp + 2));
 		++offp;
 	}
 	ia64_sync_i();
@@ -179,7 +179,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end)
 	while (offp < (s32 *) end) {
 		ip = (u64) ia64_imva((char *) offp + *offp);
 		ia64_patch_imm64(ip, (u64) fsyscall_table);
-		ia64_fc((void *) ip);
+		ia64_fc(ip);
 		++offp;
 	}
 	ia64_sync_i();
@@ -197,7 +197,7 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
 		ip = (u64) offp + *offp;
 		ia64_patch_imm60((u64) ia64_imva((void *) ip),
 				 (u64) (fsys_bubble_down - (ip & -16)) / 16);
-		ia64_fc((void *) ip);
+		ia64_fc(ip);
 		++offp;
 	}
 	ia64_sync_i();
-- 
1.6.0.2

^ permalink raw reply related

* [PATCH 00/15] ia64/pv_ops, xen: more paravirtualization. TAKE 4
From: Isaku Yamahata @ 2008-12-22  5:50 UTC (permalink / raw)
  To: tony.luck, linux-ia64; +Cc: yamahata, xen-ia64-devel, virtualization


This patch set is intended for the next merge window. They are just
enhancements of the already merged patches or ia64 porting from x86
paravirt techniques and that their quality is enough for merge.

This patch set is for more paravirtualization on ia64/xen domU.
This patch set does
- remove existing warnings
- paravirtualize fsys call (fsys.S) by multi compile
- paravirtualize gate page (gate.S) by multi compile
- support save/restore
  For this purpose, the followings needs to be paravirtualized
  - ar.itc instruction 
  - sched_clock()
  This is because timer may changed before/after saving/restoring.

For convenience the working full source is available from
http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/
branch: ia64-pv-ops-2008dec22-xen-ia64-optimized-domu

For the status of this patch series
http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge

thanks,

Changes from take 3
- removed trivial compilation error depending on .config

Changes from take 2
- two patches to remove warnings.
- rebased to 2.6.28-rc8

Changes from take 1
- refreshed to 2.6.28-rc6
  no essential change.


Diffstat:
 arch/ia64/include/asm/native/inst.h       |   13 ++
 arch/ia64/include/asm/native/patchlist.h  |   38 +++++++
 arch/ia64/include/asm/native/pvchk_inst.h |    8 ++
 arch/ia64/include/asm/paravirt.h          |   57 ++++++++++
 arch/ia64/include/asm/timex.h             |    1 +
 arch/ia64/include/asm/xen/inst.h          |   28 +++++
 arch/ia64/include/asm/xen/interface.h     |    9 ++
 arch/ia64/include/asm/xen/minstate.h      |   11 ++-
 arch/ia64/include/asm/xen/patchlist.h     |   38 +++++++
 arch/ia64/include/asm/xen/privop.h        |    2 +
 arch/ia64/kernel/Makefile                 |   36 +-----
 arch/ia64/kernel/Makefile.gate            |   27 +++++
 arch/ia64/kernel/asm-offsets.c            |    2 +
 arch/ia64/kernel/entry.S                  |    4 +-
 arch/ia64/kernel/fsys.S                   |   35 +++---
 arch/ia64/kernel/gate.S                   |  171 +++++++++++++++--------------
 arch/ia64/kernel/gate.lds.S               |   17 ++--
 arch/ia64/kernel/head.S                   |   10 ++-
 arch/ia64/kernel/ivt.S                    |    2 +-
 arch/ia64/kernel/paravirt.c               |    1 +
 arch/ia64/kernel/paravirt_patchlist.c     |   79 +++++++++++++
 arch/ia64/kernel/paravirt_patchlist.h     |   28 +++++
 arch/ia64/kernel/patch.c                  |   48 ++++++---
 arch/ia64/kernel/time.c                   |    9 ++
 arch/ia64/kernel/vmlinux.lds.S            |    6 +
 arch/ia64/mm/init.c                       |    9 +-
 arch/ia64/scripts/pvcheck.sed             |    1 +
 arch/ia64/xen/Kconfig                     |    1 +
 arch/ia64/xen/Makefile                    |   19 +++-
 arch/ia64/xen/gate-data.S                 |    3 +
 arch/ia64/xen/time.c                      |   48 ++++++++
 arch/ia64/xen/xen_pv_ops.c                |  132 +++++++++++++++++++++-
 32 files changed, 729 insertions(+), 164 deletions(-)

^ permalink raw reply

* Re: [PATCH 2/3] virtio: indirect ring entries (VIRTIO_RING_F_INDIRECT_DESC)
From: Ingo Oeser @ 2008-12-20 11:38 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Avi Kivity, linux-kernel, virtualization
In-Reply-To: <1229620222-22216-3-git-send-email-markmc@redhat.com>

Hi Mark,

On Thursday 18 December 2008, Mark McLoughlin wrote:
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 5777196..2330c4b 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -70,6 +73,55 @@ struct vring_virtqueue
>  
>  #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
>  
> +/* Set up an indirect table of descriptors and add it to the queue. */
> +static int vring_add_indirect(struct vring_virtqueue *vq,
> +			      struct scatterlist sg[],
> +			      unsigned int out,
> +			      unsigned int in)
> +{
> +	struct vring_desc *desc;
> +	unsigned head;
> +	int i;
> +
> +	desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC);

kmalloc() returns ZERO_SIZE_PTR, if (out + in) == 0

> +	if (!desc)
> +		return vq->vring.num;
> +
> +	/* Transfer entries from the sg list into the indirect page */
> +	for (i = 0; i < out; i++) {
> +		desc[i].flags = VRING_DESC_F_NEXT;
> +		desc[i].addr = sg_phys(sg);
> +		desc[i].len = sg->length;
> +		desc[i].next = i+1;
> +		sg++;
> +	}
> +	for (; i < (out + in); i++) {
> +		desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
> +		desc[i].addr = sg_phys(sg);
> +		desc[i].len = sg->length;
> +		desc[i].next = i+1;
> +		sg++;
> +	}
> +
> +	/* Last one doesn't continue. */
> +	desc[i-1].flags &= ~VRING_DESC_F_NEXT;
> +	desc[i-1].next = 0;

So this array index can fail (be -1).
Please check and avoid within this function.


Best Regards

Ingo Oeser

^ permalink raw reply

* RE: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Rose, Gregory V @ 2008-12-18 22:42 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, matthew@wil.cx, Greg KH, rdreier@cisco.com,
	Jike Song, linux-kernel@vger.kernel.org, horms@verge.net.au,
	kvm@vger.kernel.org, linux-pci@vger.kernel.org, mingo@elte.hu,
	virtualization@lists.linux-foundation.org, yinghai@kernel.org,
	bjorn.helgaas@hp.com
In-Reply-To: <200812171142.56170.jbarnes@virtuousgeek.org>

Jesse Barnes wrote:
> 
> Hm, that's not the answer I was hoping for. :)  (Was looking for,
> "Yeah we just need this bits queued and we'll send an update for
> e1000 right away." :) 
> 
> I really don't want the SR-IOV stuff to sit out another merge cycle
> though... Arg.

We will have drivers that support these API's posted to the 
lists within two or three days.  These drivers are RFC only 
and not to be pushed upstream.  More non-Xen testing needs to 
happen with the 82576 HW.

- Greg

^ permalink raw reply

* Re: [PATCH] AF_VMCHANNEL address family for guest<->host communication.
From: Evgeniy Polyakov @ 2008-12-18 12:30 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: netdev, kvm, David Miller, Anthony Liguori, virtualization
In-Reply-To: <20081217143146.GA20505@redhat.com>

Hi Gleb.

On Wed, Dec 17, 2008 at 04:31:46PM +0200, Gleb Natapov (gleb@redhat.com) wrote:
> Here it is. Sorry it is not in a patch format yet, but it gives
> general idea how it looks. The problem with connector is that 
> we need different IDX for different channels and there is no way
> to dynamically allocate them.

Looks very good. Especially liked how you used idx.val pairs to register
multiple users. Please add some comment in connector header on how you
use it and feel free to add my ack if needed.

-- 
	Evgeniy Polyakov

^ permalink raw reply

* RE: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Fischer, Anna @ 2008-12-18  6:37 UTC (permalink / raw)
  To: Zhao, Yu, Jesse Barnes
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	Chiang, Alexander, matthew@wil.cx, linux-pci@vger.kernel.org,
	rdreier@cisco.com, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org, horms@verge.net.au,
	kvm@vger.kernel.org, greg@kroah.com, mingo@elte.hu,
	yinghai@kernel.org, Helgaas, Bjorn
In-Reply-To: <4949B1E6.7030309@intel.com>

> From: Zhao, Yu [mailto:yu.zhao@intel.com]
> Sent: 18 December 2008 02:14
> To: Fischer, Anna
> Cc: Jesse Barnes; linux-pci@vger.kernel.org; Chiang, Alexander;
> Helgaas, Bjorn; grundler@parisc-linux.org; greg@kroah.com;
> mingo@elte.hu; matthew@wil.cx; randy.dunlap@oracle.com;
> rdreier@cisco.com; horms@verge.net.au; yinghai@kernel.org; linux-
> kernel@vger.kernel.org; kvm@vger.kernel.org;
> virtualization@lists.linux-foundation.org
> Subject: Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
>
> Fischer, Anna wrote:
> > I have two minor comments on this topic.
> >
> > 1) Currently the PF driver is called before the kernel initializes
> VFs and
> > their resources, and the current API does not allow the PF driver to
> > detect that easily if the allocation of the VFs and their resources
> > has succeeded or not. It would be quite useful if the PF driver gets
> > notified when the VFs have been created successfully as it might have
> > to do further device-specific work *after* IOV has been enabled.
>
> If the VF allocation fails in the PCI layer, then the SR-IOV core will
> invokes the callback again to notify the PF driver with zero VF count.
> The PF driver does not have to concern about this even the PCI layer
> code fails (and actually it's very rare).

Yes, this is good.


> And I'm not sure why the PF driver wants to do further work *after* the
> VF is allocated. Does this mean PF driver have to set up some internal
> resources related to SR-IOV/VF? If yes, I suggest the PF driver do it
> before VF allocation. The design philosophy of SR-IOV/VF is that VF is
> treated as hot-plug device, which means it should be immediately usable
> by VF driver (e.g. VF driver is pre-loaded) after it appears in the PCI
> subsystem. If that is not the purpose, then PF driver should handle it
> not depending on the SR-IOV, right?

Yes, you are right. In fact I was assuming in this case that the PF driver
might have to allocate VF specific resources before a PF <-> VF
communication can be established but this can be done before the VF PCI
device appears, so I was wrong with this. The current API is sufficient
to handle all of this, so I am withdrawing my concern here ;-)


> If you could elaborate your SR-IOV PF/VF h/w specific requirement, it
> would be help for me to answer this question :-)
>
> > 2) Configuration of SR-IOV: the current API allows to enable/disable
> > VFs from userspace via SYSFS. At the moment I am not quite clear what
> > exactly is supposed to control these capabilities. This could be
> > Linux tools or, on a virtualized system, hypervisor control tools.
>
> This depends on user application, you know, which depends on the usage
> environment (i.e. native, KVM or Xen).
>
> > One thing I am missing though is an in-kernel API for this which I
> > think might be useful. After all the PF driver controls the device,
> > and, for example, when a device error occurs (e.g. a hardware failure
> > which only the PF driver will be able to detect, not Linux), then the
> > PF driver might have to de-allocate all resources, shut down VFs and
> > reset the device, or something like that. In that case the PF driver
> > needs to have a way to notify the Linux SR-IOV code about this and
> > initiate cleaning up of VFs and their resources. At the moment, this
> > would have to go through userspace, I believe, and I think that is
> not
> > an optimal solution. Yu, do you have an opinion on how this would be
> > realized?
>
> Yes, the PF driver can use pci_iov_unregister to disable SR-IOV in case
> the fatal error occurs. This function also sends notification to user
> level through 'uevent' so user application can aware the change.

If pci_iov_unregister is accessible for kernel drivers than this is in fact
all we need. Thanks for the clarification.


I think the patchset looks very good.

Acked-by: Anna Fischer <anna.fischer@hp.com>

^ permalink raw reply

* Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Zhao, Yu @ 2008-12-18  2:39 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, matthew@wil.cx, Greg KH, rdreier@cisco.com,
	Jike Song, linux-kernel@vger.kernel.org, Rose, Gregory V,
	horms@verge.net.au, kvm@vger.kernel.org,
	linux-pci@vger.kernel.org, mingo@elte.hu,
	virtualization@lists.linux-foundation.org, yinghai@kernel.org,
	bjorn.helgaas@hp.com
In-Reply-To: <200812171207.34396.jbarnes@virtuousgeek.org>

Jesse Barnes wrote:
> On Wednesday, December 17, 2008 11:51 am Greg KH wrote:
>> On Wed, Dec 17, 2008 at 11:42:54AM -0800, Jesse Barnes wrote:
>>> I really don't want the SR-IOV stuff to sit out another merge cycle
>>> though... Arg.
>> Why, is there some rush to get it in?  As there is no in-kernel users of
>> it, I don't see the problem with postponing it until someone actually
>> needs it.
> 
> Well it *does* make development of SR-IOV drivers that much harder.  As you 
> know, out of tree development is a pain.  OTOH if any changes end up being 
> required, they can be done before the code is merged.

Yes, people write to me asking for the SR-IOV patch or update everyday 
-- I guess they don't want to let their competitors know they are 
working on it so they can't bring their questions up on the mailing list.

And I personally also have dozen of other patches related to PCI and KVM 
subsystems which depend on the SR-IOV change.

> Anyway, hopefully we won't have to worry about it because some driver will 
> come along soon that uses Yu's code. :)  If not, Yu might have to maintain a 
> separate git tree or something until the drivers are ready to be merged.

^ permalink raw reply

* Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Zhao, Yu @ 2008-12-18  2:26 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, linux-pci@vger.kernel.org, rdreier@cisco.com,
	linux-kernel@vger.kernel.org, Jesse Barnes,
	virtualization@lists.linux-foundation.org, horms@verge.net.au,
	kvm@vger.kernel.org, greg@kroah.com, mingo@elte.hu,
	yinghai@kernel.org, bjorn.helgaas@hp.com
In-Reply-To: <20081217141542.GB19967@parisc-linux.org>

Matthew Wilcox wrote:
> On Tue, Dec 16, 2008 at 03:23:53PM -0800, Jesse Barnes wrote:
>> I applied 1-9 to my linux-next branch; and at least patch #10 needs a respin, 
> 
> I still object to #2.  We should have the flexibility to have 'struct
> resource's that are not in this array in the pci_dev.  I would like to
> see the SR-IOV resources _not_ in this array (and indeed, I'd like to
> see PCI bridges keep their producer resources somewhere other than in
> this array).  I accept that there are still some problems with this, but

I understand your concern, and agree that using the array as resource 
manager is not the best way. But for now it's not possible as you know. 
We need a better resource manager for PCI subsystem to manage the 
various resources (traditional, device specific, bus related), which is 
another independent work from SR-IOV change.

> patch #2 moves us further from being able to achieve this goal, not
> closer.

The array is obvious straightforward and can be easily replaced with a 
more advanced resource manager in the future. So I don't think we going 
  further from or closer to the goal.

Thanks,
Yu

^ permalink raw reply

* Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Zhao, Yu @ 2008-12-18  2:13 UTC (permalink / raw)
  To: Fischer, Anna
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	Chiang, Alexander, matthew@wil.cx, linux-pci@vger.kernel.org,
	rdreier@cisco.com, linux-kernel@vger.kernel.org, Jesse Barnes,
	virtualization@lists.linux-foundation.org, horms@verge.net.au,
	kvm@vger.kernel.org, greg@kroah.com, mingo@elte.hu,
	yinghai@kernel.org, Helgaas, Bjorn
In-Reply-To: <0199E0D51A61344794750DC57738F58E5E29B33E3C@GVW1118EXC.americas.hpqcorp.net>

Fischer, Anna wrote:
> I have two minor comments on this topic.
> 
> 1) Currently the PF driver is called before the kernel initializes VFs and
> their resources, and the current API does not allow the PF driver to
> detect that easily if the allocation of the VFs and their resources
> has succeeded or not. It would be quite useful if the PF driver gets
> notified when the VFs have been created successfully as it might have
> to do further device-specific work *after* IOV has been enabled.

If the VF allocation fails in the PCI layer, then the SR-IOV core will 
invokes the callback again to notify the PF driver with zero VF count. 
The PF driver does not have to concern about this even the PCI layer 
code fails (and actually it's very rare).

And I'm not sure why the PF driver wants to do further work *after* the 
VF is allocated. Does this mean PF driver have to set up some internal 
resources related to SR-IOV/VF? If yes, I suggest the PF driver do it 
before VF allocation. The design philosophy of SR-IOV/VF is that VF is 
treated as hot-plug device, which means it should be immediately usable 
by VF driver (e.g. VF driver is pre-loaded) after it appears in the PCI 
subsystem. If that is not the purpose, then PF driver should handle it 
not depending on the SR-IOV, right?

If you could elaborate your SR-IOV PF/VF h/w specific requirement, it 
would be help for me to answer this question :-)

> 2) Configuration of SR-IOV: the current API allows to enable/disable
> VFs from userspace via SYSFS. At the moment I am not quite clear what
> exactly is supposed to control these capabilities. This could be
> Linux tools or, on a virtualized system, hypervisor control tools.

This depends on user application, you know, which depends on the usage 
environment (i.e. native, KVM or Xen).

> One thing I am missing though is an in-kernel API for this which I
> think might be useful. After all the PF driver controls the device,
> and, for example, when a device error occurs (e.g. a hardware failure
> which only the PF driver will be able to detect, not Linux), then the
> PF driver might have to de-allocate all resources, shut down VFs and
> reset the device, or something like that. In that case the PF driver
> needs to have a way to notify the Linux SR-IOV code about this and
> initiate cleaning up of VFs and their resources. At the moment, this
> would have to go through userspace, I believe, and I think that is not
> an optimal solution. Yu, do you have an opinion on how this would be
> realized?

Yes, the PF driver can use pci_iov_unregister to disable SR-IOV in case 
the fatal error occurs. This function also sends notification to user 
level through 'uevent' so user application can aware the change.

Thanks,
Yu

^ permalink raw reply

* Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Jesse Barnes @ 2008-12-17 20:07 UTC (permalink / raw)
  To: Greg KH
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, matthew@wil.cx, linux-pci@vger.kernel.org,
	rdreier@cisco.com, Jike Song, linux-kernel@vger.kernel.org,
	Rose, Gregory V, horms@verge.net.au, kvm@vger.kernel.org,
	mingo@elte.hu, virtualization@lists.linux-foundation.org,
	yinghai@kernel.org, bjorn.helgaas@hp.com
In-Reply-To: <20081217195143.GA25211@kroah.com>

On Wednesday, December 17, 2008 11:51 am Greg KH wrote:
> On Wed, Dec 17, 2008 at 11:42:54AM -0800, Jesse Barnes wrote:
> > I really don't want the SR-IOV stuff to sit out another merge cycle
> > though... Arg.
>
> Why, is there some rush to get it in?  As there is no in-kernel users of
> it, I don't see the problem with postponing it until someone actually
> needs it.

Well it *does* make development of SR-IOV drivers that much harder.  As you 
know, out of tree development is a pain.  OTOH if any changes end up being 
required, they can be done before the code is merged.

Anyway, hopefully we won't have to worry about it because some driver will 
come along soon that uses Yu's code. :)  If not, Yu might have to maintain a 
separate git tree or something until the drivers are ready to be merged.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply

* Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Greg KH @ 2008-12-17 19:51 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, matthew@wil.cx, linux-pci@vger.kernel.org,
	rdreier@cisco.com, Jike Song, linux-kernel@vger.kernel.org,
	Rose, Gregory V, horms@verge.net.au, kvm@vger.kernel.org,
	mingo@elte.hu, virtualization@lists.linux-foundation.org,
	yinghai@kernel.org, bjorn.helgaas@hp.com
In-Reply-To: <200812171142.56170.jbarnes@virtuousgeek.org>

On Wed, Dec 17, 2008 at 11:42:54AM -0800, Jesse Barnes wrote:
> 
> I really don't want the SR-IOV stuff to sit out another merge cycle though...  
> Arg.

Why, is there some rush to get it in?  As there is no in-kernel users of
it, I don't see the problem with postponing it until someone actually
needs it.

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Jesse Barnes @ 2008-12-17 19:42 UTC (permalink / raw)
  To: Rose, Gregory V
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, matthew@wil.cx, Greg KH, rdreier@cisco.com,
	Jike Song, linux-kernel@vger.kernel.org, horms@verge.net.au,
	kvm@vger.kernel.org, linux-pci@vger.kernel.org, mingo@elte.hu,
	virtualization@lists.linux-foundation.org, yinghai@kernel.org,
	bjorn.helgaas@hp.com
In-Reply-To: <43F901BD926A4E43B106BF17856F07554B525A02@orsmsx508.amr.corp.intel.com>

On Wednesday, December 17, 2008 11:05 am Rose, Gregory V wrote:
> -----Original Message-----
> From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org]
>
> On Wednesday, December 17, 2008 8:44 am Rose, Gregory V wrote:
> > As noted in the attached email to the netdev list, we (e1000_devel) will
> > support the API.
>
> Do you think you'll have those changes ready for 2.6.29?  Would merging
> core SR-IOV support now make that any more likely?
>
>
>
> I'm not sure about readiness for 2.6.29.  I can tell you that as soon as I
> get a Xen Dom0 kernel with these API's included it will take me less than a
> day to convert over to them from the current drivers I have that are using
> an older API from back in August.  The drivers are mostly functional, they
> have a few bugs.  I could do some quick regression testing to make sure
> that the API changes haven't broken anything and then some bug fixes to get
> everything ready for release.  Maybe two or three weeks for the major bugs.
>  I'll be out over the Christmas holidays so that puts us into middle or
> late January if I got the Xen Dom0 kernel today.  That seems unlikely but
> it gives you an idea of the time required.

Hm, that's not the answer I was hoping for. :)  (Was looking for, "Yeah we 
just need this bits queued and we'll send an update for e1000 right away." :)

I really don't want the SR-IOV stuff to sit out another merge cycle though...  
Arg.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply

* RE: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Rose, Gregory V @ 2008-12-17 19:42 UTC (permalink / raw)
  To: Jeremy Fitzhardinge, Zhao, Yu, Yu, Wilfred
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, matthew@wil.cx, Greg KH, rdreier@cisco.com,
	Jike Song, Jesse Barnes, linux-kernel@vger.kernel.org,
	horms@verge.net.au, kvm@vger.kernel.org,
	linux-pci@vger.kernel.org, mingo@elte.hu,
	virtualization@lists.linux-foundation.org, yinghai@kernel.org,
	bjorn.helgaas@hp.com
In-Reply-To: <49495447.5030904@goop.org>



Jeremy Fitzhardinge wrote:

> Which dom0 kernel are you using?  Is it based on my pvops-based dom0 work?

The kernel I'm currently using is an ad-hoc patchwork of changes to the 2.6.18 Xen Dom0 kernel that was available back in August.  The folks from OTC in Intel (Zhao Yu and his team) would be able to provide you more background on it as they did the work to enable MSI-X, SR-IOV and VT-d in that kernel so that my drivers would function.  I don't see Zhao Yu on the distro list for this email so I'll add him.

- Greg

^ permalink raw reply

* Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Jeremy Fitzhardinge @ 2008-12-17 19:34 UTC (permalink / raw)
  To: Rose, Gregory V
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, matthew@wil.cx, Greg KH, rdreier@cisco.com,
	Jike Song, Jesse Barnes, linux-kernel@vger.kernel.org,
	horms@verge.net.au, kvm@vger.kernel.org,
	linux-pci@vger.kernel.org, mingo@elte.hu,
	virtualization@lists.linux-foundation.org, yinghai@kernel.org,
	bjorn.helgaas@hp.com
In-Reply-To: <43F901BD926A4E43B106BF17856F07554B525A02@orsmsx508.amr.corp.intel.com>

Rose, Gregory V wrote:
> -----Original Message-----
> From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org] 
>
> On Wednesday, December 17, 2008 8:44 am Rose, Gregory V wrote:
>   
>> As noted in the attached email to the netdev list, we (e1000_devel) will
>> support the API.
>>     
>
> Do you think you'll have those changes ready for 2.6.29?  Would merging core
> SR-IOV support now make that any more likely?
>
>   
>
> I'm not sure about readiness for 2.6.29.  I can tell you that as soon as I get a Xen Dom0 kernel with these API's included it will take me less than a day to convert over to them from the current drivers I have that are using an older API from back in August.

Which dom0 kernel are you using?  Is it based on my pvops-based dom0 work?

    J

^ permalink raw reply

* RE: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Rose, Gregory V @ 2008-12-17 19:05 UTC (permalink / raw)
  To: Jesse Barnes
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, matthew@wil.cx, Greg KH, rdreier@cisco.com,
	Jike Song, linux-kernel@vger.kernel.org, horms@verge.net.au,
	kvm@vger.kernel.org, linux-pci@vger.kernel.org, mingo@elte.hu,
	virtualization@lists.linux-foundation.org, yinghai@kernel.org,
	bjorn.helgaas@hp.com
In-Reply-To: <200812171051.36645.jbarnes@virtuousgeek.org>


-----Original Message-----
From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org] 

On Wednesday, December 17, 2008 8:44 am Rose, Gregory V wrote:
> As noted in the attached email to the netdev list, we (e1000_devel) will
> support the API.

Do you think you'll have those changes ready for 2.6.29?  Would merging core
SR-IOV support now make that any more likely?

>>>>>>>>>

I'm not sure about readiness for 2.6.29.  I can tell you that as soon as I get a Xen Dom0 kernel with these API's included it will take me less than a day to convert over to them from the current drivers I have that are using an older API from back in August.  The drivers are mostly functional, they have a few bugs.  I could do some quick regression testing to make sure that the API changes haven't broken anything and then some bug fixes to get everything ready for release.  Maybe two or three weeks for the major bugs.  I'll be out over the Christmas holidays so that puts us into middle or late January if I got the Xen Dom0 kernel today.  That seems unlikely but it gives you an idea of the time required.

- Greg

^ permalink raw reply

* Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Jesse Barnes @ 2008-12-17 18:59 UTC (permalink / raw)
  To: Fischer, Anna
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	Chiang, Alexander, matthew@wil.cx, linux-pci@vger.kernel.org,
	rdreier@cisco.com, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org, horms@verge.net.au,
	kvm@vger.kernel.org, greg@kroah.com, mingo@elte.hu,
	yinghai@kernel.org, Helgaas, Bjorn
In-Reply-To: <0199E0D51A61344794750DC57738F58E5E29B33E3C@GVW1118EXC.americas.hpqcorp.net>

On Wednesday, December 17, 2008 3:42 am Fischer, Anna wrote:
> I have two minor comments on this topic.
>
> 1) Currently the PF driver is called before the kernel initializes VFs and
> their resources, and the current API does not allow the PF driver to
> detect that easily if the allocation of the VFs and their resources
> has succeeded or not. It would be quite useful if the PF driver gets
> notified when the VFs have been created successfully as it might have
> to do further device-specific work *after* IOV has been enabled.

You're thinking of after the VFs are created the VF drivers (which may or may 
not be part of the PF driver) may not be able to communicate back to the PF 
driver that something else needs to be done (I remember seeing this in the 
earlier thread, should have included it in my post, sorry)?  I'm not sure if 
it makes sense to add an interface like that to the core until we have feel 
for what the PF/VF drivers are going to want... Or do you have something 
specific in mind right now?  If/until we have something in the core, it seems 
like this could be done on a per PF/VF driver basis for now.

> 2) Configuration of SR-IOV: the current API allows to enable/disable
> VFs from userspace via SYSFS. At the moment I am not quite clear what
> exactly is supposed to control these capabilities. This could be
> Linux tools or, on a virtualized system, hypervisor control tools.
> One thing I am missing though is an in-kernel API for this which I
> think might be useful. After all the PF driver controls the device,
> and, for example, when a device error occurs (e.g. a hardware failure
> which only the PF driver will be able to detect, not Linux), then the
> PF driver might have to de-allocate all resources, shut down VFs and
> reset the device, or something like that. In that case the PF driver
> needs to have a way to notify the Linux SR-IOV code about this and
> initiate cleaning up of VFs and their resources. At the moment, this
> would have to go through userspace, I believe, and I think that is not
> an optimal solution. Yu, do you have an opinion on how this would be
> realized?

That's a good point, Yu?

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply

* Re: [PATCH 0/13 v7] PCI: Linux kernel SR-IOV support
From: Jesse Barnes @ 2008-12-17 18:51 UTC (permalink / raw)
  To: Rose, Gregory V
  Cc: randy.dunlap@oracle.com, grundler@parisc-linux.org,
	achiang@hp.com, matthew@wil.cx, Greg KH, rdreier@cisco.com,
	Jike Song, linux-kernel@vger.kernel.org, horms@verge.net.au,
	kvm@vger.kernel.org, linux-pci@vger.kernel.org, mingo@elte.hu,
	virtualization@lists.linux-foundation.org, yinghai@kernel.org,
	bjorn.helgaas@hp.com
In-Reply-To: <43F901BD926A4E43B106BF17856F07554B525811@orsmsx508.amr.corp.intel.com>

On Wednesday, December 17, 2008 8:44 am Rose, Gregory V wrote:
> As noted in the attached email to the netdev list, we (e1000_devel) will
> support the API.

Do you think you'll have those changes ready for 2.6.29?  Would merging core 
SR-IOV support now make that any more likely?

Thanks,
Jesse

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox