AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: prevent immediate PASID reuse case
@ 2026-03-17 18:58 Eric Huang
  2026-03-18  7:37 ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Eric Huang @ 2026-03-17 18:58 UTC (permalink / raw)
  To: amd-gfx; +Cc: Felix.Kuehling, christian.koenig, Eric Huang

PASID resue could cause cache, TLBs and interrupt issues
when process immediately runs into hw states left by previous
process exited with the same PASID, to prevent the case, it
uses the same allocator as kernel pid's.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  1 +
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 9cab36322c16..0801c023f5a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -35,10 +35,13 @@
  * PASIDs are global address space identifiers that can be shared
  * between the GPU, an IOMMU and the driver. VMs on different devices
  * may use the same PASID if they share the same address
- * space. Therefore PASIDs are allocated using a global IDA. VMs are
- * looked up from the PASID per amdgpu_device.
+ * space. Therefore PASIDs are allocated using IDR cyclic allocator
+ * (similar to kernel PID allocation) which naturally delays reuse.
+ * VMs are looked up from the PASID per amdgpu_device.
  */
-static DEFINE_IDA(amdgpu_pasid_ida);
+
+static DEFINE_IDR(amdgpu_pasid_idr);
+static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock);
 
 /* Helper to free pasid from a fence callback */
 struct amdgpu_pasid_cb {
@@ -50,8 +53,8 @@ struct amdgpu_pasid_cb {
  * amdgpu_pasid_alloc - Allocate a PASID
  * @bits: Maximum width of the PASID in bits, must be at least 1
  *
- * Allocates a PASID of the given width while keeping smaller PASIDs
- * available if possible.
+ * Uses kernel's IDR cyclic allocator (same as PID allocation).
+ * Allocates sequentially with automatic wrap-around.
  *
  * Returns a positive integer on success. Returns %-EINVAL if bits==0.
  * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
@@ -59,14 +62,15 @@ struct amdgpu_pasid_cb {
  */
 int amdgpu_pasid_alloc(unsigned int bits)
 {
-	int pasid = -EINVAL;
+	int pasid;
 
-	for (bits = min(bits, 31U); bits > 0; bits--) {
-		pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
-					(1U << bits) - 1, GFP_KERNEL);
-		if (pasid != -ENOSPC)
-			break;
-	}
+	if (bits == 0)
+		return -EINVAL;
+
+	spin_lock(&amdgpu_pasid_idr_lock);
+	pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
+			         1U << bits, GFP_KERNEL);
+	spin_unlock(&amdgpu_pasid_idr_lock);
 
 	if (pasid >= 0)
 		trace_amdgpu_pasid_allocated(pasid);
@@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits)
 void amdgpu_pasid_free(u32 pasid)
 {
 	trace_amdgpu_pasid_freed(pasid);
-	ida_free(&amdgpu_pasid_ida, pasid);
+
+	spin_lock(&amdgpu_pasid_idr_lock);
+	idr_remove(&amdgpu_pasid_idr, pasid);
+	spin_unlock(&amdgpu_pasid_idr_lock);
 }
 
 static void amdgpu_pasid_free_cb(struct dma_fence *fence,
@@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
 		}
 	}
 }
+
+/**
+ * amdgpu_pasid_mgr_cleanup - cleanup PASID manager
+ *
+ * Cleanup the IDR allocator.
+ */
+void amdgpu_pasid_mgr_cleanup(void)
+{
+	spin_lock(&amdgpu_pasid_idr_lock);
+	idr_destroy(&amdgpu_pasid_idr);
+	spin_unlock(&amdgpu_pasid_idr_lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index b3649cd3af56..a57919478d3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits);
 void amdgpu_pasid_free(u32 pasid);
 void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 			       u32 pasid);
+void amdgpu_pasid_mgr_cleanup(void);
 
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 			       struct amdgpu_vmid *id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b89013a6aa0b..5b9bdb79efcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 	xa_destroy(&adev->vm_manager.pasids);
 
 	amdgpu_vmid_mgr_fini(adev);
+	amdgpu_pasid_mgr_cleanup();
 }
 
 /**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/amdgpu: prevent immediate PASID reuse case
  2026-03-17 18:58 [PATCH] drm/amdgpu: prevent immediate PASID reuse case Eric Huang
@ 2026-03-18  7:37 ` Christian König
  2026-03-18 14:03   ` Eric Huang
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2026-03-18  7:37 UTC (permalink / raw)
  To: Eric Huang, amd-gfx; +Cc: Felix.Kuehling

On 3/17/26 19:58, Eric Huang wrote:
> PASID resue could cause cache, TLBs and interrupt issues
> when process immediately runs into hw states left by previous
> process exited with the same PASID, to prevent the case, it
> uses the same allocator as kernel pid's.

The implementation looks good now, but that is still not a good justification for the change.

What potential HW state do we have which could cause problems here?

Regards,
Christian.

> 
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  1 +
>  3 files changed, 34 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 9cab36322c16..0801c023f5a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -35,10 +35,13 @@
>   * PASIDs are global address space identifiers that can be shared
>   * between the GPU, an IOMMU and the driver. VMs on different devices
>   * may use the same PASID if they share the same address
> - * space. Therefore PASIDs are allocated using a global IDA. VMs are
> - * looked up from the PASID per amdgpu_device.
> + * space. Therefore PASIDs are allocated using IDR cyclic allocator
> + * (similar to kernel PID allocation) which naturally delays reuse.
> + * VMs are looked up from the PASID per amdgpu_device.
>   */
> -static DEFINE_IDA(amdgpu_pasid_ida);
> +
> +static DEFINE_IDR(amdgpu_pasid_idr);
> +static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock);
>  
>  /* Helper to free pasid from a fence callback */
>  struct amdgpu_pasid_cb {
> @@ -50,8 +53,8 @@ struct amdgpu_pasid_cb {
>   * amdgpu_pasid_alloc - Allocate a PASID
>   * @bits: Maximum width of the PASID in bits, must be at least 1
>   *
> - * Allocates a PASID of the given width while keeping smaller PASIDs
> - * available if possible.
> + * Uses kernel's IDR cyclic allocator (same as PID allocation).
> + * Allocates sequentially with automatic wrap-around.
>   *
>   * Returns a positive integer on success. Returns %-EINVAL if bits==0.
>   * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
> @@ -59,14 +62,15 @@ struct amdgpu_pasid_cb {
>   */
>  int amdgpu_pasid_alloc(unsigned int bits)
>  {
> -	int pasid = -EINVAL;
> +	int pasid;
>  
> -	for (bits = min(bits, 31U); bits > 0; bits--) {
> -		pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
> -					(1U << bits) - 1, GFP_KERNEL);
> -		if (pasid != -ENOSPC)
> -			break;
> -	}
> +	if (bits == 0)
> +		return -EINVAL;
> +
> +	spin_lock(&amdgpu_pasid_idr_lock);
> +	pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
> +			         1U << bits, GFP_KERNEL);
> +	spin_unlock(&amdgpu_pasid_idr_lock);
>  
>  	if (pasid >= 0)
>  		trace_amdgpu_pasid_allocated(pasid);
> @@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits)
>  void amdgpu_pasid_free(u32 pasid)
>  {
>  	trace_amdgpu_pasid_freed(pasid);
> -	ida_free(&amdgpu_pasid_ida, pasid);
> +
> +	spin_lock(&amdgpu_pasid_idr_lock);
> +	idr_remove(&amdgpu_pasid_idr, pasid);
> +	spin_unlock(&amdgpu_pasid_idr_lock);
>  }
>  
>  static void amdgpu_pasid_free_cb(struct dma_fence *fence,
> @@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
>  		}
>  	}
>  }
> +
> +/**
> + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager
> + *
> + * Cleanup the IDR allocator.
> + */
> +void amdgpu_pasid_mgr_cleanup(void)
> +{
> +	spin_lock(&amdgpu_pasid_idr_lock);
> +	idr_destroy(&amdgpu_pasid_idr);
> +	spin_unlock(&amdgpu_pasid_idr_lock);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> index b3649cd3af56..a57919478d3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> @@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits);
>  void amdgpu_pasid_free(u32 pasid);
>  void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>  			       u32 pasid);
> +void amdgpu_pasid_mgr_cleanup(void);
>  
>  bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
>  			       struct amdgpu_vmid *id);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b89013a6aa0b..5b9bdb79efcf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>  	xa_destroy(&adev->vm_manager.pasids);
>  
>  	amdgpu_vmid_mgr_fini(adev);
> +	amdgpu_pasid_mgr_cleanup();
>  }
>  
>  /**


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/amdgpu: prevent immediate PASID reuse case
  2026-03-18  7:37 ` Christian König
@ 2026-03-18 14:03   ` Eric Huang
  0 siblings, 0 replies; 8+ messages in thread
From: Eric Huang @ 2026-03-18 14:03 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: Felix.Kuehling

On 2026-03-18 03:37, Christian König wrote:
> On 3/17/26 19:58, Eric Huang wrote:
>> PASID resue could cause cache, TLBs and interrupt issues
>> when process immediately runs into hw states left by previous
>> process exited with the same PASID, to prevent the case, it
>> uses the same allocator as kernel pid's.
> The implementation looks good now, but that is still not a good justification for the change.
>
> What potential HW state do we have which could cause problems here?
We do have an issue reported by customer 
https://ontrack-internal.amd.com/browse/SWDEV-578010, on which there are 
two apps, first app intentionally overflows a buffer, that causes bunch 
of GPU page faults, and then second app runs immediately and get 
unexpected page faults with the same pasid, so we have internal 
discussion and Felix think a proper solution would be in ID manager to 
make sure pasids don't get reused when there could still be outstanding 
interrupts in the IH ring with that pasid. That is the motivation for 
this change.

Regards,
Eric
>
> Regards,
> Christian.
>
>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++++++++-------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  1 +
>>   3 files changed, 34 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>> index 9cab36322c16..0801c023f5a5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
>> @@ -35,10 +35,13 @@
>>    * PASIDs are global address space identifiers that can be shared
>>    * between the GPU, an IOMMU and the driver. VMs on different devices
>>    * may use the same PASID if they share the same address
>> - * space. Therefore PASIDs are allocated using a global IDA. VMs are
>> - * looked up from the PASID per amdgpu_device.
>> + * space. Therefore PASIDs are allocated using IDR cyclic allocator
>> + * (similar to kernel PID allocation) which naturally delays reuse.
>> + * VMs are looked up from the PASID per amdgpu_device.
>>    */
>> -static DEFINE_IDA(amdgpu_pasid_ida);
>> +
>> +static DEFINE_IDR(amdgpu_pasid_idr);
>> +static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock);
>>   
>>   /* Helper to free pasid from a fence callback */
>>   struct amdgpu_pasid_cb {
>> @@ -50,8 +53,8 @@ struct amdgpu_pasid_cb {
>>    * amdgpu_pasid_alloc - Allocate a PASID
>>    * @bits: Maximum width of the PASID in bits, must be at least 1
>>    *
>> - * Allocates a PASID of the given width while keeping smaller PASIDs
>> - * available if possible.
>> + * Uses kernel's IDR cyclic allocator (same as PID allocation).
>> + * Allocates sequentially with automatic wrap-around.
>>    *
>>    * Returns a positive integer on success. Returns %-EINVAL if bits==0.
>>    * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
>> @@ -59,14 +62,15 @@ struct amdgpu_pasid_cb {
>>    */
>>   int amdgpu_pasid_alloc(unsigned int bits)
>>   {
>> -	int pasid = -EINVAL;
>> +	int pasid;
>>   
>> -	for (bits = min(bits, 31U); bits > 0; bits--) {
>> -		pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
>> -					(1U << bits) - 1, GFP_KERNEL);
>> -		if (pasid != -ENOSPC)
>> -			break;
>> -	}
>> +	if (bits == 0)
>> +		return -EINVAL;
>> +
>> +	spin_lock(&amdgpu_pasid_idr_lock);
>> +	pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
>> +			         1U << bits, GFP_KERNEL);
>> +	spin_unlock(&amdgpu_pasid_idr_lock);
>>   
>>   	if (pasid >= 0)
>>   		trace_amdgpu_pasid_allocated(pasid);
>> @@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits)
>>   void amdgpu_pasid_free(u32 pasid)
>>   {
>>   	trace_amdgpu_pasid_freed(pasid);
>> -	ida_free(&amdgpu_pasid_ida, pasid);
>> +
>> +	spin_lock(&amdgpu_pasid_idr_lock);
>> +	idr_remove(&amdgpu_pasid_idr, pasid);
>> +	spin_unlock(&amdgpu_pasid_idr_lock);
>>   }
>>   
>>   static void amdgpu_pasid_free_cb(struct dma_fence *fence,
>> @@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
>>   		}
>>   	}
>>   }
>> +
>> +/**
>> + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager
>> + *
>> + * Cleanup the IDR allocator.
>> + */
>> +void amdgpu_pasid_mgr_cleanup(void)
>> +{
>> +	spin_lock(&amdgpu_pasid_idr_lock);
>> +	idr_destroy(&amdgpu_pasid_idr);
>> +	spin_unlock(&amdgpu_pasid_idr_lock);
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>> index b3649cd3af56..a57919478d3b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>> @@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits);
>>   void amdgpu_pasid_free(u32 pasid);
>>   void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>>   			       u32 pasid);
>> +void amdgpu_pasid_mgr_cleanup(void);
>>   
>>   bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
>>   			       struct amdgpu_vmid *id);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index b89013a6aa0b..5b9bdb79efcf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>>   	xa_destroy(&adev->vm_manager.pasids);
>>   
>>   	amdgpu_vmid_mgr_fini(adev);
>> +	amdgpu_pasid_mgr_cleanup();
>>   }
>>   
>>   /**


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] drm/amdgpu: prevent immediate PASID reuse case
@ 2026-03-18 14:35 Eric Huang
  2026-03-18 14:51 ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Eric Huang @ 2026-03-18 14:35 UTC (permalink / raw)
  To: amd-gfx; +Cc: Felix.Kuehling, christian.koenig, Eric Huang

using idr cyclic allocator same as kernel pid's.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  1 +
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 9cab36322c16..0801c023f5a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -35,10 +35,13 @@
  * PASIDs are global address space identifiers that can be shared
  * between the GPU, an IOMMU and the driver. VMs on different devices
  * may use the same PASID if they share the same address
- * space. Therefore PASIDs are allocated using a global IDA. VMs are
- * looked up from the PASID per amdgpu_device.
+ * space. Therefore PASIDs are allocated using IDR cyclic allocator
+ * (similar to kernel PID allocation) which naturally delays reuse.
+ * VMs are looked up from the PASID per amdgpu_device.
  */
-static DEFINE_IDA(amdgpu_pasid_ida);
+
+static DEFINE_IDR(amdgpu_pasid_idr);
+static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock);
 
 /* Helper to free pasid from a fence callback */
 struct amdgpu_pasid_cb {
@@ -50,8 +53,8 @@ struct amdgpu_pasid_cb {
  * amdgpu_pasid_alloc - Allocate a PASID
  * @bits: Maximum width of the PASID in bits, must be at least 1
  *
- * Allocates a PASID of the given width while keeping smaller PASIDs
- * available if possible.
+ * Uses kernel's IDR cyclic allocator (same as PID allocation).
+ * Allocates sequentially with automatic wrap-around.
  *
  * Returns a positive integer on success. Returns %-EINVAL if bits==0.
  * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
@@ -59,14 +62,15 @@ struct amdgpu_pasid_cb {
  */
 int amdgpu_pasid_alloc(unsigned int bits)
 {
-	int pasid = -EINVAL;
+	int pasid;
 
-	for (bits = min(bits, 31U); bits > 0; bits--) {
-		pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
-					(1U << bits) - 1, GFP_KERNEL);
-		if (pasid != -ENOSPC)
-			break;
-	}
+	if (bits == 0)
+		return -EINVAL;
+
+	spin_lock(&amdgpu_pasid_idr_lock);
+	pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
+			         1U << bits, GFP_KERNEL);
+	spin_unlock(&amdgpu_pasid_idr_lock);
 
 	if (pasid >= 0)
 		trace_amdgpu_pasid_allocated(pasid);
@@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits)
 void amdgpu_pasid_free(u32 pasid)
 {
 	trace_amdgpu_pasid_freed(pasid);
-	ida_free(&amdgpu_pasid_ida, pasid);
+
+	spin_lock(&amdgpu_pasid_idr_lock);
+	idr_remove(&amdgpu_pasid_idr, pasid);
+	spin_unlock(&amdgpu_pasid_idr_lock);
 }
 
 static void amdgpu_pasid_free_cb(struct dma_fence *fence,
@@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
 		}
 	}
 }
+
+/**
+ * amdgpu_pasid_mgr_cleanup - cleanup PASID manager
+ *
+ * Cleanup the IDR allocator.
+ */
+void amdgpu_pasid_mgr_cleanup(void)
+{
+	spin_lock(&amdgpu_pasid_idr_lock);
+	idr_destroy(&amdgpu_pasid_idr);
+	spin_unlock(&amdgpu_pasid_idr_lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index b3649cd3af56..a57919478d3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits);
 void amdgpu_pasid_free(u32 pasid);
 void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 			       u32 pasid);
+void amdgpu_pasid_mgr_cleanup(void);
 
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 			       struct amdgpu_vmid *id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b89013a6aa0b..5b9bdb79efcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 	xa_destroy(&adev->vm_manager.pasids);
 
 	amdgpu_vmid_mgr_fini(adev);
+	amdgpu_pasid_mgr_cleanup();
 }
 
 /**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/amdgpu: prevent immediate PASID reuse case
  2026-03-18 14:35 Eric Huang
@ 2026-03-18 14:51 ` Christian König
  0 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2026-03-18 14:51 UTC (permalink / raw)
  To: Eric Huang, amd-gfx; +Cc: Felix.Kuehling

On 3/18/26 15:35, Eric Huang wrote:
> using idr cyclic allocator same as kernel pid's.

What I mean is that this needs more techincal background and not less. E.g. something like this:

It's possible that page faults are still pending in the IH ring buffer when the process exits and frees up its PASID.

Since we don't have a good way of flushing those out use the PASIDs in a cyclic manner.

Regards,
Christian.

> 
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  1 +
>  3 files changed, 34 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 9cab36322c16..0801c023f5a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -35,10 +35,13 @@
>   * PASIDs are global address space identifiers that can be shared
>   * between the GPU, an IOMMU and the driver. VMs on different devices
>   * may use the same PASID if they share the same address
> - * space. Therefore PASIDs are allocated using a global IDA. VMs are
> - * looked up from the PASID per amdgpu_device.
> + * space. Therefore PASIDs are allocated using IDR cyclic allocator
> + * (similar to kernel PID allocation) which naturally delays reuse.
> + * VMs are looked up from the PASID per amdgpu_device.
>   */
> -static DEFINE_IDA(amdgpu_pasid_ida);
> +
> +static DEFINE_IDR(amdgpu_pasid_idr);
> +static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock);
>  
>  /* Helper to free pasid from a fence callback */
>  struct amdgpu_pasid_cb {
> @@ -50,8 +53,8 @@ struct amdgpu_pasid_cb {
>   * amdgpu_pasid_alloc - Allocate a PASID
>   * @bits: Maximum width of the PASID in bits, must be at least 1
>   *
> - * Allocates a PASID of the given width while keeping smaller PASIDs
> - * available if possible.
> + * Uses kernel's IDR cyclic allocator (same as PID allocation).
> + * Allocates sequentially with automatic wrap-around.
>   *
>   * Returns a positive integer on success. Returns %-EINVAL if bits==0.
>   * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
> @@ -59,14 +62,15 @@ struct amdgpu_pasid_cb {
>   */
>  int amdgpu_pasid_alloc(unsigned int bits)
>  {
> -	int pasid = -EINVAL;
> +	int pasid;
>  
> -	for (bits = min(bits, 31U); bits > 0; bits--) {
> -		pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
> -					(1U << bits) - 1, GFP_KERNEL);
> -		if (pasid != -ENOSPC)
> -			break;
> -	}
> +	if (bits == 0)
> +		return -EINVAL;
> +
> +	spin_lock(&amdgpu_pasid_idr_lock);
> +	pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
> +			         1U << bits, GFP_KERNEL);
> +	spin_unlock(&amdgpu_pasid_idr_lock);
>  
>  	if (pasid >= 0)
>  		trace_amdgpu_pasid_allocated(pasid);
> @@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits)
>  void amdgpu_pasid_free(u32 pasid)
>  {
>  	trace_amdgpu_pasid_freed(pasid);
> -	ida_free(&amdgpu_pasid_ida, pasid);
> +
> +	spin_lock(&amdgpu_pasid_idr_lock);
> +	idr_remove(&amdgpu_pasid_idr, pasid);
> +	spin_unlock(&amdgpu_pasid_idr_lock);
>  }
>  
>  static void amdgpu_pasid_free_cb(struct dma_fence *fence,
> @@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
>  		}
>  	}
>  }
> +
> +/**
> + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager
> + *
> + * Cleanup the IDR allocator.
> + */
> +void amdgpu_pasid_mgr_cleanup(void)
> +{
> +	spin_lock(&amdgpu_pasid_idr_lock);
> +	idr_destroy(&amdgpu_pasid_idr);
> +	spin_unlock(&amdgpu_pasid_idr_lock);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> index b3649cd3af56..a57919478d3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> @@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits);
>  void amdgpu_pasid_free(u32 pasid);
>  void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>  			       u32 pasid);
> +void amdgpu_pasid_mgr_cleanup(void);
>  
>  bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
>  			       struct amdgpu_vmid *id);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b89013a6aa0b..5b9bdb79efcf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>  	xa_destroy(&adev->vm_manager.pasids);
>  
>  	amdgpu_vmid_mgr_fini(adev);
> +	amdgpu_pasid_mgr_cleanup();
>  }
>  
>  /**


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] drm/amdgpu: prevent immediate PASID reuse case
@ 2026-03-18 15:08 Eric Huang
  2026-03-19  7:23 ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Eric Huang @ 2026-03-18 15:08 UTC (permalink / raw)
  To: amd-gfx; +Cc: Felix.Kuehling, christian.koenig, Eric Huang

PASID resue could cause interrupt issue when process
immediately runs into hw state left by previous
process exited with the same PASID, it's possible that
page faults are still pending in the IH ring buffer when
the process exits and frees up its PASID. To prevent the
case, it uses idr cyclic allocator same as kernel pid's.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  1 +
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 9cab36322c16..0801c023f5a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -35,10 +35,13 @@
  * PASIDs are global address space identifiers that can be shared
  * between the GPU, an IOMMU and the driver. VMs on different devices
  * may use the same PASID if they share the same address
- * space. Therefore PASIDs are allocated using a global IDA. VMs are
- * looked up from the PASID per amdgpu_device.
+ * space. Therefore PASIDs are allocated using IDR cyclic allocator
+ * (similar to kernel PID allocation) which naturally delays reuse.
+ * VMs are looked up from the PASID per amdgpu_device.
  */
-static DEFINE_IDA(amdgpu_pasid_ida);
+
+static DEFINE_IDR(amdgpu_pasid_idr);
+static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock);
 
 /* Helper to free pasid from a fence callback */
 struct amdgpu_pasid_cb {
@@ -50,8 +53,8 @@ struct amdgpu_pasid_cb {
  * amdgpu_pasid_alloc - Allocate a PASID
  * @bits: Maximum width of the PASID in bits, must be at least 1
  *
- * Allocates a PASID of the given width while keeping smaller PASIDs
- * available if possible.
+ * Uses kernel's IDR cyclic allocator (same as PID allocation).
+ * Allocates sequentially with automatic wrap-around.
  *
  * Returns a positive integer on success. Returns %-EINVAL if bits==0.
  * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
@@ -59,14 +62,15 @@ struct amdgpu_pasid_cb {
  */
 int amdgpu_pasid_alloc(unsigned int bits)
 {
-	int pasid = -EINVAL;
+	int pasid;
 
-	for (bits = min(bits, 31U); bits > 0; bits--) {
-		pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
-					(1U << bits) - 1, GFP_KERNEL);
-		if (pasid != -ENOSPC)
-			break;
-	}
+	if (bits == 0)
+		return -EINVAL;
+
+	spin_lock(&amdgpu_pasid_idr_lock);
+	pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
+			         1U << bits, GFP_KERNEL);
+	spin_unlock(&amdgpu_pasid_idr_lock);
 
 	if (pasid >= 0)
 		trace_amdgpu_pasid_allocated(pasid);
@@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits)
 void amdgpu_pasid_free(u32 pasid)
 {
 	trace_amdgpu_pasid_freed(pasid);
-	ida_free(&amdgpu_pasid_ida, pasid);
+
+	spin_lock(&amdgpu_pasid_idr_lock);
+	idr_remove(&amdgpu_pasid_idr, pasid);
+	spin_unlock(&amdgpu_pasid_idr_lock);
 }
 
 static void amdgpu_pasid_free_cb(struct dma_fence *fence,
@@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
 		}
 	}
 }
+
+/**
+ * amdgpu_pasid_mgr_cleanup - cleanup PASID manager
+ *
+ * Cleanup the IDR allocator.
+ */
+void amdgpu_pasid_mgr_cleanup(void)
+{
+	spin_lock(&amdgpu_pasid_idr_lock);
+	idr_destroy(&amdgpu_pasid_idr);
+	spin_unlock(&amdgpu_pasid_idr_lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index b3649cd3af56..a57919478d3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits);
 void amdgpu_pasid_free(u32 pasid);
 void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 			       u32 pasid);
+void amdgpu_pasid_mgr_cleanup(void);
 
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 			       struct amdgpu_vmid *id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b89013a6aa0b..5b9bdb79efcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 	xa_destroy(&adev->vm_manager.pasids);
 
 	amdgpu_vmid_mgr_fini(adev);
+	amdgpu_pasid_mgr_cleanup();
 }
 
 /**
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/amdgpu: prevent immediate PASID reuse case
  2026-03-18 15:08 Eric Huang
@ 2026-03-19  7:23 ` Christian König
  2026-03-26 16:39   ` Leo Li
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2026-03-19  7:23 UTC (permalink / raw)
  To: Eric Huang, amd-gfx; +Cc: Felix.Kuehling

On 3/18/26 16:08, Eric Huang wrote:
> PASID resue could cause interrupt issue when process
> immediately runs into hw state left by previous
> process exited with the same PASID, it's possible that
> page faults are still pending in the IH ring buffer when
> the process exits and frees up its PASID. To prevent the
> case, it uses idr cyclic allocator same as kernel pid's.
> 
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  1 +
>  3 files changed, 34 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 9cab36322c16..0801c023f5a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -35,10 +35,13 @@
>   * PASIDs are global address space identifiers that can be shared
>   * between the GPU, an IOMMU and the driver. VMs on different devices
>   * may use the same PASID if they share the same address
> - * space. Therefore PASIDs are allocated using a global IDA. VMs are
> - * looked up from the PASID per amdgpu_device.
> + * space. Therefore PASIDs are allocated using IDR cyclic allocator
> + * (similar to kernel PID allocation) which naturally delays reuse.
> + * VMs are looked up from the PASID per amdgpu_device.
>   */
> -static DEFINE_IDA(amdgpu_pasid_ida);
> +
> +static DEFINE_IDR(amdgpu_pasid_idr);
> +static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock);
>  
>  /* Helper to free pasid from a fence callback */
>  struct amdgpu_pasid_cb {
> @@ -50,8 +53,8 @@ struct amdgpu_pasid_cb {
>   * amdgpu_pasid_alloc - Allocate a PASID
>   * @bits: Maximum width of the PASID in bits, must be at least 1
>   *
> - * Allocates a PASID of the given width while keeping smaller PASIDs
> - * available if possible.
> + * Uses kernel's IDR cyclic allocator (same as PID allocation).
> + * Allocates sequentially with automatic wrap-around.
>   *
>   * Returns a positive integer on success. Returns %-EINVAL if bits==0.
>   * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
> @@ -59,14 +62,15 @@ struct amdgpu_pasid_cb {
>   */
>  int amdgpu_pasid_alloc(unsigned int bits)
>  {
> -	int pasid = -EINVAL;
> +	int pasid;
>  
> -	for (bits = min(bits, 31U); bits > 0; bits--) {
> -		pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
> -					(1U << bits) - 1, GFP_KERNEL);
> -		if (pasid != -ENOSPC)
> -			break;
> -	}
> +	if (bits == 0)
> +		return -EINVAL;
> +
> +	spin_lock(&amdgpu_pasid_idr_lock);
> +	pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
> +			         1U << bits, GFP_KERNEL);
> +	spin_unlock(&amdgpu_pasid_idr_lock);
>  
>  	if (pasid >= 0)
>  		trace_amdgpu_pasid_allocated(pasid);
> @@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits)
>  void amdgpu_pasid_free(u32 pasid)
>  {
>  	trace_amdgpu_pasid_freed(pasid);
> -	ida_free(&amdgpu_pasid_ida, pasid);
> +
> +	spin_lock(&amdgpu_pasid_idr_lock);
> +	idr_remove(&amdgpu_pasid_idr, pasid);
> +	spin_unlock(&amdgpu_pasid_idr_lock);
>  }
>  
>  static void amdgpu_pasid_free_cb(struct dma_fence *fence,
> @@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
>  		}
>  	}
>  }
> +
> +/**
> + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager
> + *
> + * Cleanup the IDR allocator.
> + */
> +void amdgpu_pasid_mgr_cleanup(void)
> +{
> +	spin_lock(&amdgpu_pasid_idr_lock);
> +	idr_destroy(&amdgpu_pasid_idr);
> +	spin_unlock(&amdgpu_pasid_idr_lock);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> index b3649cd3af56..a57919478d3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> @@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits);
>  void amdgpu_pasid_free(u32 pasid);
>  void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>  			       u32 pasid);
> +void amdgpu_pasid_mgr_cleanup(void);
>  
>  bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
>  			       struct amdgpu_vmid *id);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b89013a6aa0b..5b9bdb79efcf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2864,6 +2864,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>  	xa_destroy(&adev->vm_manager.pasids);
>  
>  	amdgpu_vmid_mgr_fini(adev);
> +	amdgpu_pasid_mgr_cleanup();
>  }
>  
>  /**


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/amdgpu: prevent immediate PASID reuse case
  2026-03-19  7:23 ` Christian König
@ 2026-03-26 16:39   ` Leo Li
  0 siblings, 0 replies; 8+ messages in thread
From: Leo Li @ 2026-03-26 16:39 UTC (permalink / raw)
  To: Christian König, Eric Huang, amd-gfx; +Cc: Felix.Kuehling



On 2026-03-19 03:23, Christian König wrote:
> On 3/18/26 16:08, Eric Huang wrote:
>> PASID resue could cause interrupt issue when process
>> immediately runs into hw state left by previous
>> process exited with the same PASID, it's possible that
>> page faults are still pending in the IH ring buffer when
>> the process exits and frees up its PASID. To prevent the
>> case, it uses idr cyclic allocator same as kernel pid's.
>>
>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> 
> Reviewed-by: Christian König <christian.koenig@amd.com>

Hi Eric,
I'm getting a lock prover warning with this change, should it use spin_lock_irqsave() instead?
- Leo

[    9.139038] ================================
[    9.139041] WARNING: inconsistent lock state
[    9.139044] 6.19.0-MANJARO-ASDN-minimal+ #200 Not tainted
[    9.139047] --------------------------------
[    9.139049] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
[    9.139050] swapper/11/0 [HC1[1]:SC0[0]:HE0:SE1] takes:
[    9.139054] ffffffffc1e6f2f8 (amdgpu_pasid_idr_lock){?.+.}-{3:3}, at: amdgpu_pasid_free+0x2b/0xa0 [amdgpu]
[    9.139444] {HARDIRQ-ON-W} state was registered at:
[    9.139446]   lock_acquire+0xdb/0x2e0
[    9.139456]   _raw_spin_lock+0x34/0x80
[    9.139463]   amdgpu_pasid_alloc+0x20/0xd0 [amdgpu]
[    9.139717]   amdgpu_driver_open_kms+0x9a/0x310 [amdgpu]
[    9.139953]   drm_file_alloc+0x20d/0x310
[    9.139961]   drm_client_init+0x7b/0x160
[    9.139966]   amdgpu_amdkfd_drm_client_create+0x51/0x80 [amdgpu]
[    9.140285]   amdgpu_pci_probe+0x2ac/0x480 [amdgpu]
[    9.140516]   local_pci_probe+0x42/0x90
[    9.140522]   pci_device_probe+0xd7/0x260
[    9.140525]   really_probe+0xde/0x380
[    9.140531]   __driver_probe_device+0x78/0x150
[    9.140534]   driver_probe_device+0x1f/0xa0
[    9.140536]   __driver_attach+0xcf/0x210
[    9.140538]   bus_for_each_dev+0x7b/0xd0
[    9.140541]   bus_add_driver+0x11b/0x200
[    9.140543]   driver_register+0x75/0xe0
[    9.140546]   do_one_initcall+0x5b/0x360
[    9.140551]   do_init_module+0x62/0x240
[    9.140556]   init_module_from_file+0xd3/0xf0
[    9.140558]   idempotent_init_module+0x109/0x310
[    9.140560]   __x64_sys_finit_module+0x71/0xe0
[    9.140561]   do_syscall_64+0x99/0x490
[    9.140566]   entry_SYSCALL_64_after_hwframe+0x76/0x7e
[    9.140569] irq event stamp: 84734
[    9.140571] hardirqs last  enabled at (84733): [<ffffffffb0ac1ae7>] cpuidle_enter_state+0x107/0x4e0
[    9.140576] hardirqs last disabled at (84734): [<ffffffffb0abc353>] common_interrupt+0x13/0xd0
[    9.140580] softirqs last  enabled at (84712): [<ffffffffafb37aa8>] __irq_exit_rcu+0xe8/0x160
[    9.140585] softirqs last disabled at (84703): [<ffffffffafb37aa8>] __irq_exit_rcu+0xe8/0x160
[    9.140588] 
               other info that might help us debug this:
[    9.140589]  Possible unsafe locking scenario:

[    9.140590]        CPU0
[    9.140591]        ----
[    9.140592]   lock(amdgpu_pasid_idr_lock);
[    9.140594]   <Interrupt>
[    9.140595]     lock(amdgpu_pasid_idr_lock);
[    9.140597] 
                *** DEADLOCK ***

[    9.140598] 2 locks held by swapper/11/0:
[    9.140600]  #0: ffff88c753a32900 (&ring->fence_drv.lock){-...}-{3:3}, at: dma_fence_signal+0x24/0x70
[    9.140610]  #1: ffff88c741b58ab0 (&fence->lock){-...}-{3:3}, at: dma_fence_signal+0x24/0x70
[    9.140615] 
               stack backtrace:
[    9.140619] CPU: 11 UID: 0 PID: 0 Comm: swapper/11 Not tainted 6.19.0-MANJARO-ASDN-minimal+ #200 PREEMPT(full)  456c95f30fba7b087f768dc486320b859d317b7b
[    9.140624] Hardware name: HP HP Spectre Laptop 14-fd0xxx - 5CD411LN4C/8CDD, BIOS W81 Ver. 00.46.00 05/10/2024
[    9.140627] Call Trace:
[    9.140629]  <IRQ>
[    9.140636]  dump_stack_lvl+0x6c/0xa0
[    9.140639]  print_usage_bug.part.0+0x22b/0x2c0
[    9.140642]  mark_lock+0x821/0xa00
[    9.140644]  ? find_held_lock+0x2b/0x80
[    9.140646]  ? local_clock+0x15/0x30
[    9.140651]  ? lock_release+0x1c4/0x4a0
[    9.140654]  __lock_acquire+0x10ff/0x2250
[    9.140662]  lock_acquire+0xdb/0x2e0
[    9.140664]  ? amdgpu_pasid_free+0x2b/0xa0 [amdgpu a0c72c9b7ef197ec09c5986009831d93cb38f16a]
[    9.140925]  _raw_spin_lock+0x34/0x80
[    9.140927]  ? amdgpu_pasid_free+0x2b/0xa0 [amdgpu a0c72c9b7ef197ec09c5986009831d93cb38f16a]
[    9.141178]  amdgpu_pasid_free+0x2b/0xa0 [amdgpu a0c72c9b7ef197ec09c5986009831d93cb38f16a]
[    9.141430]  amdgpu_pasid_free_cb+0x19/0x60 [amdgpu a0c72c9b7ef197ec09c5986009831d93cb38f16a]
[    9.141681]  dma_fence_signal_timestamp_locked+0x9d/0x160
[    9.141685]  dma_fence_signal+0x37/0x70
[    9.141689]  drm_sched_job_done.isra.0+0x6d/0x1b0 [gpu_sched 4c9c47cecdaeac2a93e031dbb2cb233cb20e7757]
[    9.141696]  dma_fence_signal_timestamp_locked+0x9d/0x160
[    9.141699]  dma_fence_signal+0x37/0x70
[    9.141702]  amdgpu_fence_process+0xdd/0x150 [amdgpu a0c72c9b7ef197ec09c5986009831d93cb38f16a]
[    9.141941]  sdma_v6_0_process_trap_irq+0x5a/0x70 [amdgpu a0c72c9b7ef197ec09c5986009831d93cb38f16a]
[    9.142225]  amdgpu_irq_dispatch+0x189/0x2a0 [amdgpu a0c72c9b7ef197ec09c5986009831d93cb38f16a]
[    9.142489]  amdgpu_ih_process+0x66/0x190 [amdgpu a0c72c9b7ef197ec09c5986009831d93cb38f16a]
[    9.142745]  amdgpu_irq_handler+0x23/0x60 [amdgpu a0c72c9b7ef197ec09c5986009831d93cb38f16a]
[    9.143000]  __handle_irq_event_percpu+0x95/0x2f0
[    9.143007]  handle_irq_event+0x3b/0x80
[    9.143009]  handle_edge_irq+0xf5/0x1e0
[    9.143012]  __common_interrupt+0x79/0x1a0
[    9.143016]  ? tick_irq_enter+0x77/0xb0
[    9.143020]  common_interrupt+0x9c/0xd0
[    9.143023]  </IRQ>
[    9.143023]  <TASK>
[    9.143025]  asm_common_interrupt+0x26/0x40
[    9.143027] RIP: 0010:cpuidle_enter_state+0x10d/0x4e0
[    9.143032] Code: 01 48 0f a3 05 14 e8 dc 00 0f 82 a7 02 00 00 31 ff e8 67 f1 0e ff 45 84 ff 0f 85 8b 02 00 00 e8 59 cd 22 ff fb 0f 1f 44 00 00 <45> 85 f6 0f 88 cf 01 00 00 49 63 ce 48 2b 2c 24 48 6b d1 68 48 89
[    9.143033] RSP: 0018:ffffd1304023fe80 EFLAGS: 00000286
[    9.143036] RAX: 0000000000014afd RBX: 0000000000000003 RCX: ffffffffb0ac1ae7
[    9.143037] RDX: ffff88c740b30000 RSI: ffffffffb13de1de RDI: ffffffffb13a30c3
[    9.143038] RBP: 0000000220ab5620 R08: 0000000000000000 R09: 0000000000000001
[    9.143039] R10: 000000000000000b R11: 0000000000000000 R12: ffff88c747caf000
[    9.143040] R13: ffffffffb17b2100 R14: 0000000000000003 R15: 0000000000000000
[    9.143042]  ? cpuidle_enter_state+0x107/0x4e0
[    9.143046]  cpuidle_enter+0x31/0x50
[    9.143052]  do_idle+0x1d6/0x240
[    9.143057]  cpu_startup_entry+0x29/0x30
[    9.143059]  start_secondary+0x11c/0x150
[    9.143065]  common_startup_64+0x13e/0x141
[    9.143072]  </TASK>



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-03-26 16:39 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-17 18:58 [PATCH] drm/amdgpu: prevent immediate PASID reuse case Eric Huang
2026-03-18  7:37 ` Christian König
2026-03-18 14:03   ` Eric Huang
  -- strict thread matches above, loose matches on Subject: below --
2026-03-18 14:35 Eric Huang
2026-03-18 14:51 ` Christian König
2026-03-18 15:08 Eric Huang
2026-03-19  7:23 ` Christian König
2026-03-26 16:39   ` Leo Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox