All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiang Liu <jiang.liu@linux.intel.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>,
	linux-kernel@vger.kernel.org, x86@kernel.org
Subject: [Bugfix 4/5] x86/irq: Fix a race condition between vector assigning and cleanup
Date: Mon, 30 Nov 2015 16:09:29 +0800	[thread overview]
Message-ID: <1448870970-1461-4-git-send-email-jiang.liu@linux.intel.com> (raw)
In-Reply-To: <1448870970-1461-1-git-send-email-jiang.liu@linux.intel.com>

Joe Lawrence <joe.lawrence@stratus.com> reported an use after release
issue related to x86 IRQ management code. Please refer to following
link for more information:
https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1026840.html

Thomas pointed out that it's caused by a race condition between
__assign_irq_vector() and __send_cleanup_vector(). Based on Thomas'
draft patch, we solve this race condition by:
1) Use move_in_progress to signal that an IRQ cleanup IPI is needed
2) Use old_domain to save old CPU mask for IRQ cleanup
3) Use vector to protect move_in_progress and old_domain

This bugfix patch also helps to get rid of that atomic allocation in
__send_cleanup_vector().

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
---
 arch/x86/kernel/apic/vector.c |   77 ++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 57934ef1d032..b63d6f84c0bb 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -117,9 +117,9 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
 	static int current_offset = VECTOR_OFFSET_START % 16;
 	int cpu, err;
-	unsigned int dest = d->cfg.dest_apicid;
+	unsigned int dest;
 
-	if (d->move_in_progress)
+	if (cpumask_intersects(d->old_domain, cpu_online_mask))
 		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
@@ -144,13 +144,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 			cpumask_and(used_cpumask, d->domain, vector_cpumask);
 			err = apic->cpu_mask_to_apicid_and(mask, used_cpumask,
 							   &dest);
-			if (err)
-				break;
-			cpumask_andnot(d->old_domain, d->domain,
-				       vector_cpumask);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-			cpumask_copy(d->domain, used_cpumask);
+			if (!err) {
+				cpumask_andnot(d->old_domain, d->domain,
+					       vector_cpumask);
+				cpumask_copy(d->domain, used_cpumask);
+				d->cfg.dest_apicid = dest;
+			}
 			break;
 		}
 
@@ -183,14 +182,12 @@ next:
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
-		if (d->cfg.vector) {
+		if (d->cfg.vector)
 			cpumask_copy(d->old_domain, d->domain);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-		}
+		d->cfg.vector = vector;
+		d->cfg.dest_apicid = dest;
 		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
 		err = 0;
 		break;
@@ -198,7 +195,8 @@ next:
 
 	if (!err) {
 		/* cache destination APIC IDs into cfg->dest_apicid */
-		d->cfg.dest_apicid = dest;
+		cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+		d->move_in_progress = !cpumask_empty(d->old_domain);
 	}
 
 	return err;
@@ -230,7 +228,7 @@ static int assign_irq_vector_policy(int irq, int node,
 
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 	int cpu, vector = data->cfg.vector;
 
 	BUG_ON(!vector);
@@ -239,10 +237,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	data->cfg.vector = 0;
 	cpumask_clear(data->domain);
 
-	if (likely(!data->move_in_progress))
-		return;
-
-	desc = irq_to_desc(irq);
 	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
 		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
 		     vector++) {
@@ -424,10 +418,13 @@ static void __setup_vector_irq(int cpu)
 		struct irq_data *idata = irq_desc_get_irq_data(desc);
 
 		data = apic_chip_data(idata);
-		if (!data || !cpumask_test_cpu(cpu, data->domain))
-			continue;
-		vector = data->cfg.vector;
-		per_cpu(vector_irq, cpu)[vector] = desc;
+		if (data) {
+			cpumask_clear_cpu(cpu, data->old_domain);
+			if (cpumask_test_cpu(cpu, data->domain)) {
+				vector = data->cfg.vector;
+				per_cpu(vector_irq, cpu)[vector] = desc;
+			}
+		}
 	}
 	/* Mark the free vectors */
 	for (vector = 0; vector < NR_VECTORS; ++vector) {
@@ -509,20 +506,17 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
+	unsigned long flags;
 
-		for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i),
-					    IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	if (!data->move_in_progress)
+		goto out_unlock;
 	data->move_in_progress = 0;
+	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+	if (!cpumask_empty(data->old_domain))
+		apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+out_unlock:
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
@@ -566,14 +560,10 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 
 		/*
-		 * Check if the irq migration is in progress. If so, we
-		 * haven't received the cleanup request yet for this irq.
+		 * Nothing to cleanup if this cpu is not set
+		 * in the old_domain mask.
 		 */
-		if (data->move_in_progress)
-			goto unlock;
-
-		if (vector == data->cfg.vector &&
-		    cpumask_test_cpu(me, data->domain))
+		if (!cpumask_test_cpu(me, data->old_domain))
 			goto unlock;
 
 		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
@@ -589,6 +579,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 		}
 		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+		cpumask_clear_cpu(me, data->old_domain);
 unlock:
 		raw_spin_unlock(&desc->lock);
 	}
-- 
1.7.10.4


  parent reply	other threads:[~2015-11-30  8:06 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-24  0:59 irq_desc use-after-free in smp_irq_move_cleanup_interrupt Joe Lawrence
2015-11-25 18:16 ` Thomas Gleixner
2015-11-25 19:31   ` Thomas Gleixner
2015-11-25 21:12     ` Thomas Gleixner
2015-11-25 22:02       ` Joe Lawrence
2015-11-27  8:06       ` Jiang Liu
2015-11-27  8:25         ` Thomas Gleixner
2015-11-30  8:09           ` [Bugfix 1/5] x86/irq: Do not reuse struct apic_chip_data.old_domain as temporary buffer Jiang Liu
2015-11-30  8:09             ` [Bugfix 2/5] x86/irq: Enhance __assign_irq_vector() to rollback in case of failure Jiang Liu
2015-12-10 18:39               ` [tip:x86/urgent] " tip-bot for Jiang Liu
2015-11-30  8:09             ` [Bugfix 3/5] x86/irq: Fix a race window in x86_vector_free_irqs() Jiang Liu
2015-12-10 18:40               ` [tip:x86/urgent] " tip-bot for Jiang Liu
2015-11-30  8:09             ` Jiang Liu [this message]
2015-12-01 22:46               ` [Bugfix 4/5] x86/irq: Fix a race condition between vector assigning and cleanup Joe Lawrence
2015-12-08  0:29                 ` Joe Lawrence
2015-12-08 21:31                   ` Thomas Gleixner
2015-12-10 18:40               ` [tip:x86/urgent] " tip-bot for Jiang Liu
2015-11-30  8:09             ` [Bugfix 5/5] x86/irq: Trivial cleanups for x86 vector allocation code Jiang Liu
2015-12-10 18:42               ` [tip:x86/apic] " tip-bot for Jiang Liu
2015-12-10 18:39             ` [tip:x86/urgent] x86/irq: Do not reuse struct apic_chip_data.old_domain as temporary buffer tip-bot for Jiang Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1448870970-1461-4-git-send-email-jiang.liu@linux.intel.com \
    --to=jiang.liu@linux.intel.com \
    --cc=joe.lawrence@stratus.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.