From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH, RFC] x86: IRQ affinity should track vCPU affinity Date: Tue, 15 Jun 2010 13:28:02 +0100 Message-ID: <4C178DF202000078000067F5@vpn.id2.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__Part80ADD6C2.0__=" Return-path: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: xen-devel@lists.xensource.com List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__Part80ADD6C2.0__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline With IRQs getting bound to the CPU the binding vCPU currently runs on there can result quite a bit of extra cross CPU traffic as soon as that vCPU moves to a different pCPU. Likewise, when a domain re-binds an event channel associated with a pIRQ, that IRQ's affinity should also be adjusted. The open issue is how to break ties for interrupts shared by multiple domains - currently, the last request (at any point in time) is being honored. Signed-off-by: Jan Beulich --- 2010-06-15.orig/xen/arch/ia64/xen/irq.c 2009-10-29 12:24:48.0000000= 00 +0100 +++ 2010-06-15/xen/arch/ia64/xen/irq.c 2010-06-15 09:21:05.000000000 = +0200 @@ -612,6 +612,11 @@ xen_debug_irq(unsigned long vector, stru } } =20 +void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *mask) +{ + /* FIXME */ +} + /* * Exit an interrupt context. Process softirqs if needed and possible: */ --- 2010-06-15.orig/xen/arch/x86/hvm/hvm.c 2010-06-11 11:41:35.0000000= 00 +0200 +++ 2010-06-15/xen/arch/x86/hvm/hvm.c 2010-06-15 09:21:05.000000000 = +0200 @@ -270,7 +270,7 @@ void hvm_migrate_pirqs(struct vcpu *v) continue; irq =3D desc - irq_desc; ASSERT(MSI_IRQ(irq)); - irq_set_affinity(irq, *cpumask_of(v->processor)); + irq_set_affinity(desc, cpumask_of(v->processor)); spin_unlock_irq(&desc->lock); } spin_unlock(&d->event_lock); --- 2010-06-15.orig/xen/arch/x86/irq.c 2010-06-11 11:41:35.000000000 = +0200 +++ 2010-06-15/xen/arch/x86/irq.c 2010-06-15 09:21:05.000000000 = +0200 @@ -501,16 +501,28 @@ void move_native_irq(int irq) } =20 /* For re-setting irq interrupt affinity for specific irq */ -void irq_set_affinity(int irq, cpumask_t mask) +void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask) { - struct irq_desc *desc =3D irq_to_desc(irq); - =20 if (!desc->handler->set_affinity) return; =20 ASSERT(spin_is_locked(&desc->lock)); + desc->status &=3D ~IRQ_MOVE_PENDING; + wmb(); + cpus_copy(desc->pending_mask, *mask); + wmb(); desc->status |=3D IRQ_MOVE_PENDING; - cpus_copy(desc->pending_mask, mask); +} + +void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask) +{ + unsigned long flags; + struct irq_desc *desc =3D domain_spin_lock_irq_desc(d, pirq, &flags); + + if ( !desc ) + return; + irq_set_affinity(desc, mask); + spin_unlock_irqrestore(&desc->lock, flags); } =20 DEFINE_PER_CPU(unsigned int, irq_count); --- 2010-06-15.orig/xen/common/event_channel.c 2010-06-11 11:41:35.0000000= 00 +0200 +++ 2010-06-15/xen/common/event_channel.c 2010-06-15 09:21:05.0000000= 00 +0200 @@ -295,10 +295,36 @@ static long evtchn_bind_ipi(evtchn_bind_ } =20 =20 +static void link_pirq_port(int port, struct evtchn *chn, struct vcpu *v) +{ + chn->u.pirq.prev_port =3D 0; + chn->u.pirq.next_port =3D v->pirq_evtchn_head; + if ( v->pirq_evtchn_head ) + evtchn_from_port(v->domain, v->pirq_evtchn_head) + ->u.pirq.prev_port =3D port; + v->pirq_evtchn_head =3D port; +} + +static void unlink_pirq_port(struct evtchn *chn, struct vcpu *v) +{ + struct domain *d =3D v->domain; + + if ( chn->u.pirq.prev_port ) + evtchn_from_port(d, chn->u.pirq.prev_port)->u.pirq.next_port =3D + chn->u.pirq.next_port; + else + v->pirq_evtchn_head =3D chn->u.pirq.next_port; + if ( chn->u.pirq.next_port ) + evtchn_from_port(d, chn->u.pirq.next_port)->u.pirq.prev_port =3D + chn->u.pirq.prev_port; +} + + static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) { struct evtchn *chn; struct domain *d =3D current->domain; + struct vcpu *v =3D d->vcpu[0]; int port, pirq =3D bind->pirq; long rc; =20 @@ -319,7 +345,7 @@ static long evtchn_bind_pirq(evtchn_bind chn =3D evtchn_from_port(d, port); =20 d->pirq_to_evtchn[pirq] =3D port; - rc =3D pirq_guest_bind(d->vcpu[0], pirq,=20 + rc =3D pirq_guest_bind(v, pirq, !!(bind->flags & BIND_PIRQ__WILL_SHARE)); if ( rc !=3D 0 ) { @@ -328,7 +354,8 @@ static long evtchn_bind_pirq(evtchn_bind } =20 chn->state =3D ECS_PIRQ; - chn->u.pirq =3D pirq; + chn->u.pirq.irq =3D pirq; + link_pirq_port(port, chn, v); =20 bind->port =3D port; =20 @@ -376,8 +403,9 @@ static long __evtchn_close(struct domain break; =20 case ECS_PIRQ: - pirq_guest_unbind(d1, chn1->u.pirq); - d1->pirq_to_evtchn[chn1->u.pirq] =3D 0; + pirq_guest_unbind(d1, chn1->u.pirq.irq); + d1->pirq_to_evtchn[chn1->u.pirq.irq] =3D 0; + unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]); break; =20 case ECS_VIRQ: @@ -688,7 +716,7 @@ static long evtchn_status(evtchn_status_ break; case ECS_PIRQ: status->status =3D EVTCHNSTAT_pirq; - status->u.pirq =3D chn->u.pirq; + status->u.pirq =3D chn->u.pirq.irq; break; case ECS_VIRQ: status->status =3D EVTCHNSTAT_virq; @@ -747,8 +775,16 @@ long evtchn_bind_vcpu(unsigned int port, break; case ECS_UNBOUND: case ECS_INTERDOMAIN: + chn->notify_vcpu_id =3D vcpu_id; + break; case ECS_PIRQ: + if ( chn->notify_vcpu_id =3D=3D vcpu_id ) + break; + unlink_pirq_port(chn, d->vcpu[chn->notify_vcpu_id]); chn->notify_vcpu_id =3D vcpu_id; + pirq_set_affinity(d, chn->u.pirq.irq, + cpumask_of(d->vcpu[vcpu_id]->processor)); + link_pirq_port(port, chn, d->vcpu[vcpu_id]); break; default: rc =3D -EINVAL; @@ -1064,6 +1100,23 @@ void evtchn_destroy_final(struct domain=20 } =20 =20 +void evtchn_move_pirqs(struct vcpu *v) +{ + struct domain *d =3D v->domain; + const cpumask_t *mask =3D cpumask_of(v->processor); + unsigned int port; + struct evtchn *chn; + + spin_lock(&d->event_lock); + for ( port =3D v->pirq_evtchn_head; port; port =3D chn->u.pirq.next_po= rt ) + { + chn =3D evtchn_from_port(d, port); + pirq_set_affinity(d, chn->u.pirq.irq, mask); + } + spin_unlock(&d->event_lock); +} + + static void domain_dump_evtchn_info(struct domain *d) { unsigned int port; @@ -1105,7 +1158,7 @@ static void domain_dump_evtchn_info(stru chn->u.interdomain.remote_port); break; case ECS_PIRQ: - printk(" p=3D%d", chn->u.pirq); + printk(" p=3D%d", chn->u.pirq.irq); break; case ECS_VIRQ: printk(" v=3D%d", chn->u.virq); --- 2010-06-15.orig/xen/common/schedule.c 2010-06-11 11:41:35.0000000= 00 +0200 +++ 2010-06-15/xen/common/schedule.c 2010-06-15 09:21:05.000000000 = +0200 @@ -272,6 +272,7 @@ int sched_move_domain(struct domain *d,=20 cpus_setall(v->cpu_affinity); v->processor =3D new_p; v->sched_priv =3D vcpu_priv[v->vcpu_id]; + evtchn_move_pirqs(v); =20 new_p =3D cycle_cpu(new_p, c->cpu_valid); } @@ -419,6 +420,9 @@ static void vcpu_migrate(struct vcpu *v) spin_unlock_irqrestore( per_cpu(schedule_data, old_cpu).schedule_lock, flags); =20 + if ( old_cpu !=3D new_cpu ) + evtchn_move_pirqs(v); + /* Wake on new CPU. */ vcpu_wake(v); } @@ -1094,6 +1098,9 @@ static void schedule(void) =20 stop_timer(&prev->periodic_timer); =20 + if ( next_slice.migrated ) + evtchn_move_pirqs(next); + /* Ensure that the domain has an up-to-date time base. */ update_vcpu_system_time(next); vcpu_periodic_timer_work(next); --- 2010-06-15.orig/xen/common/sched_credit.c 2010-05-20 09:59:27.0000000= 00 +0200 +++ 2010-06-15/xen/common/sched_credit.c 2010-06-15 09:21:05.0000000= 00 +0200 @@ -1168,7 +1168,7 @@ csched_runq_steal(int peer_cpu, int cpu, =20 static struct csched_vcpu * csched_load_balance(struct csched_private *prv, int cpu, - struct csched_vcpu *snext) + struct csched_vcpu *snext, bool_t *stolen) { struct csched_vcpu *speer; cpumask_t workers; @@ -1221,7 +1221,10 @@ csched_load_balance(struct csched_privat speer =3D csched_runq_steal(peer_cpu, cpu, snext->pri); spin_unlock(per_cpu(schedule_data, peer_cpu).schedule_lock); if ( speer !=3D NULL ) + { + *stolen =3D 1; return speer; + } } =20 out: @@ -1269,6 +1272,7 @@ csched_schedule( BUG_ON( is_idle_vcpu(current) || list_empty(runq) ); =20 snext =3D __runq_elem(runq->next); + ret.migrated =3D 0; =20 /* Tasklet work (which runs in idle VCPU context) overrides all else. = */ if ( tasklet_work_scheduled ) @@ -1288,7 +1292,7 @@ csched_schedule( if ( snext->pri > CSCHED_PRI_TS_OVER ) __runq_remove(snext); else - snext =3D csched_load_balance(prv, cpu, snext); + snext =3D csched_load_balance(prv, cpu, snext, &ret.migrated); =20 /* * Update idlers mask if necessary. When we're idling, other CPUs --- 2010-06-15.orig/xen/common/sched_credit2.c 2010-05-20 09:59:27.0000000= 00 +0200 +++ 2010-06-15/xen/common/sched_credit2.c 2010-06-15 09:22:13.0000000= 00 +0200 @@ -991,10 +991,17 @@ csched_schedule( } #endif =20 + ret.migrated =3D 0; + if ( !is_idle_vcpu(snext->vcpu) ) { snext->start_time =3D now; - snext->vcpu->processor =3D cpu; /* Safe because lock for old = processor is held */ + /* Safe because lock for old processor is held */ + if ( snext->vcpu->processor !=3D cpu ) + { + snext->vcpu->processor =3D cpu; + ret.migrated =3D 1; + } } =20 /* --- 2010-06-15.orig/xen/common/sched_sedf.c 2010-05-20 09:59:27.0000000= 00 +0200 +++ 2010-06-15/xen/common/sched_sedf.c 2010-06-15 09:21:05.000000000 = +0200 @@ -875,6 +875,8 @@ static struct task_slice sedf_do_schedul ret.time =3D EXTRA_QUANTUM; } =20 + ret.migrated =3D 0; + EDOM_INFO(ret.task)->sched_start_abs =3D now; CHECK(ret.time > 0); ASSERT(sedf_runnable(ret.task)); --- 2010-06-15.orig/xen/include/asm-x86/irq.h 2009-12-16 09:14:13.0000000= 00 +0100 +++ 2010-06-15/xen/include/asm-x86/irq.h 2010-06-15 09:21:05.0000000= 00 +0200 @@ -143,7 +143,7 @@ void move_native_irq(int irq); =20 void move_masked_irq(int irq); =20 -void irq_set_affinity(int irq, cpumask_t mask); +void irq_set_affinity(struct irq_desc *, const cpumask_t *mask); =20 #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq]) #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq]) --- 2010-06-15.orig/xen/include/xen/cpumask.h 2010-05-17 08:45:28.0000000= 00 +0200 +++ 2010-06-15/xen/include/xen/cpumask.h 2010-06-15 09:21:05.0000000= 00 +0200 @@ -206,7 +206,7 @@ static inline int __cpus_weight(const cp } =20 #define cpus_copy(dest, src) __cpus_copy(&(dest), &(src)) -static inline void __cpus_copy(cpumask_t *dstp, cpumask_t *srcp) +static inline void __cpus_copy(cpumask_t *dstp, const cpumask_t *srcp) { bitmap_copy(dstp->bits, srcp->bits, NR_CPUS); } --- 2010-06-15.orig/xen/include/xen/event.h 2010-06-11 11:41:35.0000000= 00 +0200 +++ 2010-06-15/xen/include/xen/event.h 2010-06-15 09:21:05.000000000 = +0200 @@ -47,6 +47,9 @@ long evtchn_bind_vcpu(unsigned int port, /* Unmask a local event-channel port. */ int evtchn_unmask(unsigned int port); =20 +/* Move all PIRQs after a vCPU was moved to another pCPU. */ +void evtchn_move_pirqs(struct vcpu *v); + /* Allocate/free a Xen-attached event channel port. */ int alloc_unbound_xen_event_channel( struct vcpu *local_vcpu, domid_t remote_domid); --- 2010-06-15.orig/xen/include/xen/irq.h 2009-10-29 12:24:49.0000000= 00 +0100 +++ 2010-06-15/xen/include/xen/irq.h 2010-06-15 09:21:05.000000000 = +0200 @@ -138,6 +138,7 @@ extern int pirq_guest_eoi(struct domain=20 extern int pirq_guest_unmask(struct domain *d); extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share); extern void pirq_guest_unbind(struct domain *d, int irq); +extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t = *); extern irq_desc_t *domain_spin_lock_irq_desc( struct domain *d, int irq, unsigned long *pflags); =20 --- 2010-06-15.orig/xen/include/xen/sched.h 2010-06-14 08:49:36.0000000= 00 +0200 +++ 2010-06-15/xen/include/xen/sched.h 2010-06-15 09:21:05.000000000 = +0200 @@ -61,7 +61,11 @@ struct evtchn u16 remote_port; struct domain *remote_dom; } interdomain; /* state =3D=3D ECS_INTERDOMAIN */ - u16 pirq; /* state =3D=3D ECS_PIRQ */ + struct { + u16 irq; + u16 next_port; + u16 prev_port; + } pirq; /* state =3D=3D ECS_PIRQ */ u16 virq; /* state =3D=3D ECS_VIRQ */ } u; #ifdef FLASK_ENABLE @@ -142,6 +146,9 @@ struct vcpu=20 */ int poll_evtchn; =20 + /* (over-)protected by ->domain->event_lock */ + int pirq_evtchn_head; + unsigned long pause_flags; atomic_t pause_count; =20 --- 2010-06-15.orig/xen/include/xen/sched-if.h 2010-05-20 09:59:27.0000000= 00 +0200 +++ 2010-06-15/xen/include/xen/sched-if.h 2010-06-15 09:21:05.0000000= 00 +0200 @@ -79,6 +79,7 @@ static inline void vcpu_schedule_unlock( struct task_slice { struct vcpu *task; s_time_t time; + bool_t migrated; }; =20 struct scheduler { --=__Part80ADD6C2.0__= Content-Type: text/plain; name="guest-irq-affinity.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="guest-irq-affinity.patch" With IRQs getting bound to the CPU the binding vCPU currently runs = on=0Athere can result quite a bit of extra cross CPU traffic as soon = as=0Athat vCPU moves to a different pCPU. Likewise, when a domain = re-binds=0Aan event channel associated with a pIRQ, that IRQ's affinity = should=0Aalso be adjusted.=0A=0AThe open issue is how to break ties for = interrupts shared by multiple=0Adomains - currently, the last request (at = any point in time) is being=0Ahonored.=0A=0ASigned-off-by: Jan Beulich = =0A=0A--- 2010-06-15.orig/xen/arch/ia64/xen/irq.c = 2009-10-29 12:24:48.000000000 +0100=0A+++ 2010-06-15/xen/arch/ia64/xen/irq.= c 2010-06-15 09:21:05.000000000 +0200=0A@@ -612,6 +612,11 @@ = xen_debug_irq(unsigned long vector, stru=0A }=0A }=0A =0A+void = pirq_set_affinity(struct domain *d, int irq, const cpumask_t *mask)=0A+{=0A= + /* FIXME */=0A+}=0A+=0A /*=0A * Exit an interrupt context. = Process softirqs if needed and possible:=0A */=0A--- 2010-06-15.orig/xen/a= rch/x86/hvm/hvm.c 2010-06-11 11:41:35.000000000 +0200=0A+++ = 2010-06-15/xen/arch/x86/hvm/hvm.c 2010-06-15 09:21:05.000000000 = +0200=0A@@ -270,7 +270,7 @@ void hvm_migrate_pirqs(struct vcpu *v)=0A = continue;=0A irq =3D desc - irq_desc;=0A ASSERT(MSI_= IRQ(irq));=0A- irq_set_affinity(irq, *cpumask_of(v->processor));=0A+= irq_set_affinity(desc, cpumask_of(v->processor));=0A = spin_unlock_irq(&desc->lock);=0A }=0A spin_unlock(&d->event_lock);= =0A--- 2010-06-15.orig/xen/arch/x86/irq.c 2010-06-11 11:41:35.0000000= 00 +0200=0A+++ 2010-06-15/xen/arch/x86/irq.c 2010-06-15 09:21:05.0000000= 00 +0200=0A@@ -501,16 +501,28 @@ void move_native_irq(int irq)=0A }=0A =0A = /* For re-setting irq interrupt affinity for specific irq */=0A-void = irq_set_affinity(int irq, cpumask_t mask)=0A+void irq_set_affinity(struct = irq_desc *desc, const cpumask_t *mask)=0A {=0A- struct irq_desc *desc = =3D irq_to_desc(irq);=0A- =0A if (!desc->handler->set_affinity)=0A = return;=0A =0A ASSERT(spin_is_locked(&desc->lock));=0A+ = desc->status &=3D ~IRQ_MOVE_PENDING;=0A+ wmb();=0A+ cpus_copy(desc->p= ending_mask, *mask);=0A+ wmb();=0A desc->status |=3D IRQ_MOVE_PENDIN= G;=0A- cpus_copy(desc->pending_mask, mask);=0A+}=0A+=0A+void pirq_set_af= finity(struct domain *d, int pirq, const cpumask_t *mask)=0A+{=0A+ = unsigned long flags;=0A+ struct irq_desc *desc =3D domain_spin_lock_irq_= desc(d, pirq, &flags);=0A+=0A+ if ( !desc )=0A+ return;=0A+ = irq_set_affinity(desc, mask);=0A+ spin_unlock_irqrestore(&desc->lock, = flags);=0A }=0A =0A DEFINE_PER_CPU(unsigned int, irq_count);=0A--- = 2010-06-15.orig/xen/common/event_channel.c 2010-06-11 11:41:35.0000000= 00 +0200=0A+++ 2010-06-15/xen/common/event_channel.c 2010-06-15 = 09:21:05.000000000 +0200=0A@@ -295,10 +295,36 @@ static long evtchn_bind_ip= i(evtchn_bind_=0A }=0A =0A =0A+static void link_pirq_port(int port, struct = evtchn *chn, struct vcpu *v)=0A+{=0A+ chn->u.pirq.prev_port =3D 0;=0A+ = chn->u.pirq.next_port =3D v->pirq_evtchn_head;=0A+ if ( v->pirq_evtchn= _head )=0A+ evtchn_from_port(v->domain, v->pirq_evtchn_head)=0A+ = ->u.pirq.prev_port =3D port;=0A+ v->pirq_evtchn_head =3D = port;=0A+}=0A+=0A+static void unlink_pirq_port(struct evtchn *chn, struct = vcpu *v)=0A+{=0A+ struct domain *d =3D v->domain;=0A+=0A+ if ( = chn->u.pirq.prev_port )=0A+ evtchn_from_port(d, chn->u.pirq.prev_por= t)->u.pirq.next_port =3D=0A+ chn->u.pirq.next_port;=0A+ = else=0A+ v->pirq_evtchn_head =3D chn->u.pirq.next_port;=0A+ if ( = chn->u.pirq.next_port )=0A+ evtchn_from_port(d, chn->u.pirq.next_por= t)->u.pirq.prev_port =3D=0A+ chn->u.pirq.prev_port;=0A+}=0A+=0A+= =0A static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)=0A {=0A = struct evtchn *chn;=0A struct domain *d =3D current->domain;=0A+ = struct vcpu *v =3D d->vcpu[0];=0A int port, pirq =3D = bind->pirq;=0A long rc;=0A =0A@@ -319,7 +345,7 @@ static = long evtchn_bind_pirq(evtchn_bind=0A chn =3D evtchn_from_port(d, = port);=0A =0A d->pirq_to_evtchn[pirq] =3D port;=0A- rc =3D = pirq_guest_bind(d->vcpu[0], pirq, =0A+ rc =3D pirq_guest_bind(v, = pirq,=0A !!(bind->flags & BIND_PIRQ__WILL_SHARE));= =0A if ( rc !=3D 0 )=0A {=0A@@ -328,7 +354,8 @@ static long = evtchn_bind_pirq(evtchn_bind=0A }=0A =0A chn->state =3D = ECS_PIRQ;=0A- chn->u.pirq =3D pirq;=0A+ chn->u.pirq.irq =3D = pirq;=0A+ link_pirq_port(port, chn, v);=0A =0A bind->port =3D = port;=0A =0A@@ -376,8 +403,9 @@ static long __evtchn_close(struct = domain=0A break;=0A =0A case ECS_PIRQ:=0A- pirq_guest_un= bind(d1, chn1->u.pirq);=0A- d1->pirq_to_evtchn[chn1->u.pirq] =3D = 0;=0A+ pirq_guest_unbind(d1, chn1->u.pirq.irq);=0A+ = d1->pirq_to_evtchn[chn1->u.pirq.irq] =3D 0;=0A+ unlink_pirq_port(chn= 1, d1->vcpu[chn1->notify_vcpu_id]);=0A break;=0A =0A case = ECS_VIRQ:=0A@@ -688,7 +716,7 @@ static long evtchn_status(evtchn_status_=0A= break;=0A case ECS_PIRQ:=0A status->status =3D = EVTCHNSTAT_pirq;=0A- status->u.pirq =3D chn->u.pirq;=0A+ = status->u.pirq =3D chn->u.pirq.irq;=0A break;=0A case = ECS_VIRQ:=0A status->status =3D EVTCHNSTAT_virq;=0A@@ -747,8 = +775,16 @@ long evtchn_bind_vcpu(unsigned int port,=0A break;=0A = case ECS_UNBOUND:=0A case ECS_INTERDOMAIN:=0A+ chn->notify_vcp= u_id =3D vcpu_id;=0A+ break;=0A case ECS_PIRQ:=0A+ if ( = chn->notify_vcpu_id =3D=3D vcpu_id )=0A+ break;=0A+ = unlink_pirq_port(chn, d->vcpu[chn->notify_vcpu_id]);=0A chn->notify= _vcpu_id =3D vcpu_id;=0A+ pirq_set_affinity(d, chn->u.pirq.irq,=0A+ = cpumask_of(d->vcpu[vcpu_id]->processor));=0A+ = link_pirq_port(port, chn, d->vcpu[vcpu_id]);=0A break;=0A = default:=0A rc =3D -EINVAL;=0A@@ -1064,6 +1100,23 @@ void = evtchn_destroy_final(struct domain =0A }=0A =0A =0A+void evtchn_move_pirqs(= struct vcpu *v)=0A+{=0A+ struct domain *d =3D v->domain;=0A+ const = cpumask_t *mask =3D cpumask_of(v->processor);=0A+ unsigned int = port;=0A+ struct evtchn *chn;=0A+=0A+ spin_lock(&d->event_lock);=0A+ = for ( port =3D v->pirq_evtchn_head; port; port =3D chn->u.pirq.next_port= )=0A+ {=0A+ chn =3D evtchn_from_port(d, port);=0A+ = pirq_set_affinity(d, chn->u.pirq.irq, mask);=0A+ }=0A+ spin_unlock(&d= ->event_lock);=0A+}=0A+=0A+=0A static void domain_dump_evtchn_info(struct = domain *d)=0A {=0A unsigned int port;=0A@@ -1105,7 +1158,7 @@ static = void domain_dump_evtchn_info(stru=0A chn->u.interdomain.= remote_port);=0A break;=0A case ECS_PIRQ:=0A- = printk(" p=3D%d", chn->u.pirq);=0A+ printk(" p=3D%d", = chn->u.pirq.irq);=0A break;=0A case ECS_VIRQ:=0A = printk(" v=3D%d", chn->u.virq);=0A--- 2010-06-15.orig/xen/common/sche= dule.c 2010-06-11 11:41:35.000000000 +0200=0A+++ 2010-06-15/xen/common/sch= edule.c 2010-06-15 09:21:05.000000000 +0200=0A@@ -272,6 +272,7 @@ int = sched_move_domain(struct domain *d, =0A cpus_setall(v->cpu_affinity= );=0A v->processor =3D new_p;=0A v->sched_priv =3D = vcpu_priv[v->vcpu_id];=0A+ evtchn_move_pirqs(v);=0A =0A = new_p =3D cycle_cpu(new_p, c->cpu_valid);=0A }=0A@@ -419,6 +420,9 @@ = static void vcpu_migrate(struct vcpu *v)=0A spin_unlock_irqrestore(=0A = per_cpu(schedule_data, old_cpu).schedule_lock, flags);=0A =0A+ = if ( old_cpu !=3D new_cpu )=0A+ evtchn_move_pirqs(v);=0A+=0A /* = Wake on new CPU. */=0A vcpu_wake(v);=0A }=0A@@ -1094,6 +1098,9 @@ = static void schedule(void)=0A =0A stop_timer(&prev->periodic_timer);=0A= =0A+ if ( next_slice.migrated )=0A+ evtchn_move_pirqs(next);=0A+= =0A /* Ensure that the domain has an up-to-date time base. */=0A = update_vcpu_system_time(next);=0A vcpu_periodic_timer_work(next);=0A---= 2010-06-15.orig/xen/common/sched_credit.c 2010-05-20 09:59:27.0000000= 00 +0200=0A+++ 2010-06-15/xen/common/sched_credit.c 2010-06-15 = 09:21:05.000000000 +0200=0A@@ -1168,7 +1168,7 @@ csched_runq_steal(int = peer_cpu, int cpu,=0A =0A static struct csched_vcpu *=0A csched_load_balanc= e(struct csched_private *prv, int cpu,=0A- struct csched_vcpu *snext)=0A= + struct csched_vcpu *snext, bool_t *stolen)=0A {=0A struct = csched_vcpu *speer;=0A cpumask_t workers;=0A@@ -1221,7 +1221,10 @@ = csched_load_balance(struct csched_privat=0A speer =3D csched_runq_s= teal(peer_cpu, cpu, snext->pri);=0A spin_unlock(per_cpu(schedule_da= ta, peer_cpu).schedule_lock);=0A if ( speer !=3D NULL )=0A+ = {=0A+ *stolen =3D 1;=0A return speer;=0A+ = }=0A }=0A =0A out:=0A@@ -1269,6 +1272,7 @@ csched_schedule(=0A = BUG_ON( is_idle_vcpu(current) || list_empty(runq) );=0A =0A snext =3D = __runq_elem(runq->next);=0A+ ret.migrated =3D 0;=0A =0A /* Tasklet = work (which runs in idle VCPU context) overrides all else. */=0A if ( = tasklet_work_scheduled )=0A@@ -1288,7 +1292,7 @@ csched_schedule(=0A = if ( snext->pri > CSCHED_PRI_TS_OVER )=0A __runq_remove(snext);=0A = else=0A- snext =3D csched_load_balance(prv, cpu, snext);=0A+ = snext =3D csched_load_balance(prv, cpu, snext, &ret.migrated);=0A =0A = /*=0A * Update idlers mask if necessary. When we're idling, other = CPUs=0A--- 2010-06-15.orig/xen/common/sched_credit2.c 2010-05-20 = 09:59:27.000000000 +0200=0A+++ 2010-06-15/xen/common/sched_credit2.c = 2010-06-15 09:22:13.000000000 +0200=0A@@ -991,10 +991,17 @@ csched_schedule= (=0A }=0A #endif=0A =0A+ ret.migrated =3D 0;=0A+=0A if ( = !is_idle_vcpu(snext->vcpu) )=0A {=0A snext->start_time =3D = now;=0A- snext->vcpu->processor =3D cpu; /* Safe because lock for = old processor is held */=0A+ /* Safe because lock for old processor = is held */=0A+ if ( snext->vcpu->processor !=3D cpu )=0A+ = {=0A+ snext->vcpu->processor =3D cpu;=0A+ ret.migrate= d =3D 1;=0A+ }=0A }=0A =0A /*=0A--- 2010-06-15.orig/xen/comm= on/sched_sedf.c 2010-05-20 09:59:27.000000000 +0200=0A+++ 2010-06-15/xen/co= mmon/sched_sedf.c 2010-06-15 09:21:05.000000000 +0200=0A@@ -875,6 = +875,8 @@ static struct task_slice sedf_do_schedul=0A ret.time =3D = EXTRA_QUANTUM;=0A }=0A =0A+ ret.migrated =3D 0;=0A+=0A = EDOM_INFO(ret.task)->sched_start_abs =3D now;=0A CHECK(ret.time > = 0);=0A ASSERT(sedf_runnable(ret.task));=0A--- 2010-06-15.orig/xen/inclu= de/asm-x86/irq.h 2009-12-16 09:14:13.000000000 +0100=0A+++ = 2010-06-15/xen/include/asm-x86/irq.h 2010-06-15 09:21:05.000000000 = +0200=0A@@ -143,7 +143,7 @@ void move_native_irq(int irq);=0A =0A void = move_masked_irq(int irq);=0A =0A-void irq_set_affinity(int irq, cpumask_t = mask);=0A+void irq_set_affinity(struct irq_desc *, const cpumask_t = *mask);=0A =0A #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq= ])=0A #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])=0A--- = 2010-06-15.orig/xen/include/xen/cpumask.h 2010-05-17 08:45:28.0000000= 00 +0200=0A+++ 2010-06-15/xen/include/xen/cpumask.h 2010-06-15 = 09:21:05.000000000 +0200=0A@@ -206,7 +206,7 @@ static inline int __cpus_wei= ght(const cp=0A }=0A =0A #define cpus_copy(dest, src) __cpus_copy(&(dest), = &(src))=0A-static inline void __cpus_copy(cpumask_t *dstp, cpumask_t = *srcp)=0A+static inline void __cpus_copy(cpumask_t *dstp, const cpumask_t = *srcp)=0A {=0A bitmap_copy(dstp->bits, srcp->bits, NR_CPUS);=0A }=0A--- = 2010-06-15.orig/xen/include/xen/event.h 2010-06-11 11:41:35.000000000 = +0200=0A+++ 2010-06-15/xen/include/xen/event.h 2010-06-15 09:21:05.0000000= 00 +0200=0A@@ -47,6 +47,9 @@ long evtchn_bind_vcpu(unsigned int port,=0A = /* Unmask a local event-channel port. */=0A int evtchn_unmask(unsigned int = port);=0A =0A+/* Move all PIRQs after a vCPU was moved to another pCPU. = */=0A+void evtchn_move_pirqs(struct vcpu *v);=0A+=0A /* Allocate/free a = Xen-attached event channel port. */=0A int alloc_unbound_xen_event_channel(= =0A struct vcpu *local_vcpu, domid_t remote_domid);=0A--- 2010-06-15.or= ig/xen/include/xen/irq.h 2009-10-29 12:24:49.000000000 +0100=0A+++ = 2010-06-15/xen/include/xen/irq.h 2010-06-15 09:21:05.000000000 = +0200=0A@@ -138,6 +138,7 @@ extern int pirq_guest_eoi(struct domain =0A = extern int pirq_guest_unmask(struct domain *d);=0A extern int pirq_guest_bi= nd(struct vcpu *v, int irq, int will_share);=0A extern void pirq_guest_unbi= nd(struct domain *d, int irq);=0A+extern void pirq_set_affinity(struct = domain *d, int irq, const cpumask_t *);=0A extern irq_desc_t *domain_spin_l= ock_irq_desc(=0A struct domain *d, int irq, unsigned long *pflags);=0A = =0A--- 2010-06-15.orig/xen/include/xen/sched.h 2010-06-14 08:49:36.0000000= 00 +0200=0A+++ 2010-06-15/xen/include/xen/sched.h 2010-06-15 = 09:21:05.000000000 +0200=0A@@ -61,7 +61,11 @@ struct evtchn=0A = u16 remote_port;=0A struct domain *remote_dom;=0A = } interdomain; /* state =3D=3D ECS_INTERDOMAIN */=0A- u16 = pirq; /* state =3D=3D ECS_PIRQ */=0A+ struct {=0A+ = u16 irq;=0A+ u16 next_port;=0A+ = u16 prev_port;=0A+ } pirq; /* state =3D=3D = ECS_PIRQ */=0A u16 virq; /* state =3D=3D ECS_VIRQ */=0A } = u;=0A #ifdef FLASK_ENABLE=0A@@ -142,6 +146,9 @@ struct vcpu =0A */=0A = int poll_evtchn;=0A =0A+ /* (over-)protected by = ->domain->event_lock */=0A+ int pirq_evtchn_head;=0A+=0A = unsigned long pause_flags;=0A atomic_t pause_count;=0A = =0A--- 2010-06-15.orig/xen/include/xen/sched-if.h 2010-05-20 = 09:59:27.000000000 +0200=0A+++ 2010-06-15/xen/include/xen/sched-if.h = 2010-06-15 09:21:05.000000000 +0200=0A@@ -79,6 +79,7 @@ static inline void = vcpu_schedule_unlock(=0A struct task_slice {=0A struct vcpu *task;=0A = s_time_t time;=0A+ bool_t migrated;=0A };=0A =0A struct = scheduler {=0A --=__Part80ADD6C2.0__= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --=__Part80ADD6C2.0__=--