From mboxrd@z Thu Jan 1 00:00:00 1970 From: Paolo Bonzini Subject: [RFC PATCH] revive VCPUOP_register_time_memory_area Date: Thu, 03 Feb 2011 18:14:28 +0100 Message-ID: <4D4AE274.1090207@redhat.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------030300000305050803050001" Return-path: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: "xen-devel@lists.xensource.com" List-Id: xen-devel@lists.xenproject.org This is a multi-part message in MIME format. --------------030300000305050803050001 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Hi all, the attached patch tries to revive the support for userspace vcpu_time_info. Writing the information is done after context switch to prevent the memory corruption that led to the disabling of the hypercall. The patch also fixes the write of version_update_begin(_u.version) into the userspace vcpu_time_info. A while ago, Jeremy wrote that he was "not sure that the Xen clock algorithm can give strict enough monotonicity for usermode use". Has anybody else ever put some thought into this, and maybe shared (or not shared) his worries? The patch is mostly untested because Xen 4.1 fails to boot the (non-pvops) dom0 kernels I have around; a very similar patch _was_ tested though and worked. :) Paolo --------------030300000305050803050001 Content-Type: text/plain; name="vcpu-rfc.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="vcpu-rfc.patch" diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -994,15 +994,14 @@ arch_do_vcpu_op( break; } - /* - * XXX Disable for 4.0.0: __update_vcpu_system_time() writes to the given - * virtual address even when running in another domain's address space. - */ -#if 0 case VCPUOP_register_vcpu_time_memory_area: { struct vcpu_register_time_memory_area area; + rc = -EINVAL; + if ( v != current ) + break; + rc = -EFAULT; if ( copy_from_guest(&area, arg, 1) ) break; @@ -1017,7 +1016,6 @@ arch_do_vcpu_op( break; } -#endif case VCPUOP_get_physid: { @@ -1494,6 +1492,7 @@ void context_switch(struct vcpu *prev, s context_saved(prev); + update_vcpu_time_info_area(next); if (prev != next) update_runstate_area(next); diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -796,23 +796,14 @@ uint64_t tsc_ticks2ns(uint64_t ticks) return scale_delta(ticks, &t->tsc_scale); } -/* Explicitly OR with 1 just in case version number gets out of sync. */ -#define version_update_begin(v) (((v)+1)|1) -#define version_update_end(v) ((v)+1) - -static void __update_vcpu_system_time(struct vcpu *v, int force) +static void compute_vcpu_system_time(struct vcpu_time_info *dest, + struct vcpu *v) { struct cpu_time *t; - struct vcpu_time_info *u, _u; - XEN_GUEST_HANDLE(vcpu_time_info_t) user_u; struct domain *d = v->domain; s_time_t tsc_stamp = 0; - if ( v->vcpu_info == NULL ) - return; - t = &this_cpu(cpu_time); - u = &vcpu_info(v, time); if ( d->arch.vtsc ) { @@ -829,24 +820,39 @@ static void __update_vcpu_system_time(st tsc_stamp = t->local_tsc_stamp; } - memset(&_u, 0, sizeof(_u)); + memset(dest, 0, sizeof(*dest)); if ( d->arch.vtsc ) { - _u.tsc_timestamp = tsc_stamp; - _u.system_time = t->stime_local_stamp; - _u.tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac; - _u.tsc_shift = d->arch.vtsc_to_ns.shift; + dest->tsc_timestamp = tsc_stamp; + dest->system_time = t->stime_local_stamp; + dest->tsc_to_system_mul = d->arch.vtsc_to_ns.mul_frac; + dest->tsc_shift = d->arch.vtsc_to_ns.shift; } else { - _u.tsc_timestamp = t->local_tsc_stamp; - _u.system_time = t->stime_local_stamp; - _u.tsc_to_system_mul = t->tsc_scale.mul_frac; - _u.tsc_shift = (s8)t->tsc_scale.shift; + dest->tsc_timestamp = t->local_tsc_stamp; + dest->system_time = t->stime_local_stamp; + dest->tsc_to_system_mul = t->tsc_scale.mul_frac; + dest->tsc_shift = (s8)t->tsc_scale.shift; } if ( is_hvm_domain(d) ) - _u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset; + dest->tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset; +} + +/* Explicitly OR with 1 just in case version number gets out of sync. */ +#define version_update_begin(v) (((v)+1)|1) +#define version_update_end(v) ((v)+1) + +static void __update_vcpu_system_time(struct vcpu *v, int force) +{ + struct vcpu_time_info *u, _u; + + if ( v->vcpu_info == NULL ) + return; + + u = &vcpu_info(v, time); + compute_vcpu_system_time(&_u, v); /* Don't bother unless timestamp record has changed or we are forced. */ _u.version = u->version; /* make versions match for memcmp test */ @@ -861,20 +867,33 @@ static void __update_vcpu_system_time(st wmb(); /* 3. Update guest kernel version. */ u->version = version_update_end(u->version); +} + +void update_vcpu_time_info_area(struct vcpu *v) +{ + struct vcpu_time_info _u; + XEN_GUEST_HANDLE(vcpu_time_info_t) user_u; user_u = v->arch.time_info_guest; - if ( !guest_handle_is_null(user_u) ) - { - /* 1. Update userspace version. */ - __copy_field_to_guest(user_u, &_u, version); - wmb(); - /* 2. Update all other userspavce fields. */ - __copy_to_guest(user_u, &_u, 1); - wmb(); - /* 3. Update userspace version. */ - _u.version = version_update_end(_u.version); - __copy_field_to_guest(user_u, &_u, version); - } + if ( guest_handle_is_null(user_u) ) + return; + + compute_vcpu_system_time(&_u, v); + + /* 1. Update userspace version. Using copy_field_to_guest means that V + must be the VCPU whose page tables are in use. That is, it must always + be CURRENT except when called from schedule(). See also the BUG_ON in + force_update_vcpu_system_time. */ + __copy_field_from_guest(&_u, user_u, version); + _u.version = version_update_begin(_u.version); + __copy_field_to_guest(user_u, &_u, version); + wmb(); + /* 2. Update all other userspace fields. */ + __copy_to_guest(user_u, &_u, 1); + wmb(); + /* 3. Update userspace version. */ + _u.version = version_update_end(_u.version); + __copy_field_to_guest(user_u, &_u, version); } void update_vcpu_system_time(struct vcpu *v) @@ -884,7 +903,9 @@ void update_vcpu_system_time(struct vcpu void force_update_vcpu_system_time(struct vcpu *v) { + BUG_ON(v != current); __update_vcpu_system_time(v, 1); + update_vcpu_time_info_area(v); } void update_domain_wallclock_time(struct domain *d) @@ -950,7 +971,7 @@ int cpu_frequency_change(u64 freq) set_time_scale(&t->tsc_scale, freq); local_irq_enable(); - update_vcpu_system_time(current); + force_update_vcpu_system_time(current); /* A full epoch should pass before we check for deviation. */ if ( smp_processor_id() == 0 ) @@ -1034,7 +1055,7 @@ static void local_time_calibration(void) t->stime_local_stamp = c->stime_master_stamp; t->stime_master_stamp = c->stime_master_stamp; local_irq_enable(); - update_vcpu_system_time(current); + force_update_vcpu_system_time(current); goto out; } @@ -1137,7 +1158,7 @@ static void local_time_calibration(void) t->stime_master_stamp = curr_master_stime; local_irq_enable(); - update_vcpu_system_time(current); + force_update_vcpu_system_time(current); out: if ( smp_processor_id() == 0 ) @@ -1548,7 +1569,7 @@ int time_resume(void) do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW()); - update_vcpu_system_time(current); + force_update_vcpu_system_time(current); update_domain_rtc(); diff --git a/xen/include/asm-x86/time.h b/xen/include/asm-x86/time.h --- a/xen/include/asm-x86/time.h +++ b/xen/include/asm-x86/time.h @@ -69,6 +69,7 @@ void tsc_get_info(struct domain *d, uint void force_update_vcpu_system_time(struct vcpu *v); +void update_vcpu_time_info_area(struct vcpu *v); int host_tsc_is_safe(void); void cpuid_time_leaf(uint32_t sub_idx, unsigned int *eax, unsigned int *ebx, --------------030300000305050803050001 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --------------030300000305050803050001--