linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1] mm/gup: remove (VM_)BUG_ONs
@ 2025-06-04 14:05 David Hildenbrand
  2025-06-04 14:22 ` Vlastimil Babka
                   ` (7 more replies)
  0 siblings, 8 replies; 55+ messages in thread
From: David Hildenbrand @ 2025-06-04 14:05 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mm, David Hildenbrand, Andrew Morton, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe, John Hubbard,
	Peter Xu

Especially once we hit one of the assertions in
sanity_check_pinned_pages(), observing follow-up assertions failing
in other code can give good clues about what went wrong, so use
VM_WARN_ON_ONCE instead.

While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
well. Add one comment for the pfn_valid() check.

We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.

Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
something > 0 we're in bigger trouble. Convert the other BUG_ON's into
VM_WARN_ON_ONCE as well, they are in a similar domain "should never
happen", but more reasonable to check for during early testing.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---

Wanted to do this for a long time, but my todo list keeps growing ...

Based on mm/mm-unstable

---
 include/linux/mmdebug.h | 12 ++++++++++++
 mm/gup.c                | 41 +++++++++++++++++++----------------------
 2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index a0a3894900ed4..14a45979cccc9 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
 	}								\
 	unlikely(__ret_warn_once);					\
 })
+#define VM_WARN_ON_ONCE_VMA(cond, vma)		({			\
+	static bool __section(".data..once") __warned;			\
+	int __ret_warn_once = !!(cond);					\
+									\
+	if (unlikely(__ret_warn_once && !__warned)) {			\
+		dump_vma(vma);						\
+		__warned = true;					\
+		WARN_ON(1);						\
+	}								\
+	unlikely(__ret_warn_once);					\
+})
 #define VM_WARN_ON_VMG(cond, vmg)		({			\
 	int __ret_warn = !!(cond);					\
 									\
@@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
 #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_VMA(cond, vma)  BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_VMG(cond, vmg)  BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
diff --git a/mm/gup.c b/mm/gup.c
index e065a49842a87..3c3931fcdd820 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -64,11 +64,11 @@ static inline void sanity_check_pinned_pages(struct page **pages,
 		    !folio_test_anon(folio))
 			continue;
 		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
-			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
+			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
 		else
 			/* Either a PTE-mapped or a PMD-mapped THP. */
-			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
-				       !PageAnonExclusive(page), page);
+			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
+					     !PageAnonExclusive(page), page);
 	}
 }
 
@@ -760,8 +760,8 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
 	if (!pmd_write(pmdval) && gup_must_unshare(vma, flags, page))
 		return ERR_PTR(-EMLINK);
 
-	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
-			!PageAnonExclusive(page), page);
+	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
+			     !PageAnonExclusive(page), page);
 
 	ret = try_grab_folio(page_folio(page), 1, flags);
 	if (ret)
@@ -899,8 +899,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 		goto out;
 	}
 
-	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
-		       !PageAnonExclusive(page), page);
+	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
+			     !PageAnonExclusive(page), page);
 
 	/* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
 	ret = try_grab_folio(folio, 1, flags);
@@ -1180,7 +1180,7 @@ static int faultin_page(struct vm_area_struct *vma,
 	if (unshare) {
 		fault_flags |= FAULT_FLAG_UNSHARE;
 		/* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */
-		VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
+		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_WRITE);
 	}
 
 	ret = handle_mm_fault(vma, address, fault_flags, NULL);
@@ -1760,10 +1760,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 		}
 
 		/* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
-		if (!*locked) {
-			BUG_ON(ret < 0);
-			BUG_ON(ret >= nr_pages);
-		}
+		VM_WARN_ON_ONCE(!*locked && (ret < 0 || ret >= nr_pages));
 
 		if (ret > 0) {
 			nr_pages -= ret;
@@ -1808,7 +1805,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 
 		ret = mmap_read_lock_killable(mm);
 		if (ret) {
-			BUG_ON(ret > 0);
 			if (!pages_done)
 				pages_done = ret;
 			break;
@@ -1819,11 +1815,11 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 				       pages, locked);
 		if (!*locked) {
 			/* Continue to retry until we succeeded */
-			BUG_ON(ret != 0);
+			VM_WARN_ON_ONCE(ret != 0);
 			goto retry;
 		}
 		if (ret != 1) {
-			BUG_ON(ret > 1);
+			VM_WARN_ON_ONCE(ret > 1);
 			if (!pages_done)
 				pages_done = ret;
 			break;
@@ -1885,10 +1881,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
 	int gup_flags;
 	long ret;
 
-	VM_BUG_ON(!PAGE_ALIGNED(start));
-	VM_BUG_ON(!PAGE_ALIGNED(end));
-	VM_BUG_ON_VMA(start < vma->vm_start, vma);
-	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
+	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
+	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
+	VM_WARN_ON_ONCE_VMA(start < vma->vm_start, vma);
+	VM_WARN_ON_ONCE_VMA(end   > vma->vm_end, vma);
 	mmap_assert_locked(mm);
 
 	/*
@@ -1957,8 +1953,8 @@ long faultin_page_range(struct mm_struct *mm, unsigned long start,
 	int gup_flags;
 	long ret;
 
-	VM_BUG_ON(!PAGE_ALIGNED(start));
-	VM_BUG_ON(!PAGE_ALIGNED(end));
+	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
+	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
 	mmap_assert_locked(mm);
 
 	/*
@@ -2908,7 +2904,8 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
 		} else if (pte_special(pte))
 			goto pte_unmap;
 
-		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+		/* If it's not marked as special it must have a valid memmap. */
+		VM_WARN_ON_ONCE(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
 
 		folio = try_grab_folio_fast(page, 1, flags);

base-commit: 2d0c297637e7d59771c1533847c666cdddc19884
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:05 [PATCH v1] mm/gup: remove (VM_)BUG_ONs David Hildenbrand
@ 2025-06-04 14:22 ` Vlastimil Babka
  2025-06-04 14:26   ` Suren Baghdasaryan
  2025-06-04 14:48 ` Lorenzo Stoakes
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 55+ messages in thread
From: Vlastimil Babka @ 2025-06-04 14:22 UTC (permalink / raw)
  To: David Hildenbrand, linux-kernel
  Cc: linux-mm, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe,
	John Hubbard, Peter Xu

On 6/4/25 16:05, David Hildenbrand wrote:
> Especially once we hit one of the assertions in
> sanity_check_pinned_pages(), observing follow-up assertions failing
> in other code can give good clues about what went wrong, so use
> VM_WARN_ON_ONCE instead.
> 
> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> well. Add one comment for the pfn_valid() check.
> 
> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> 
> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> happen", but more reasonable to check for during early testing.
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Peter Xu <peterx@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>

Makes sense, BUG_ONs bad.

Acked-by: Vlastimil Babka <vbabka@suse.cz>

> ---
> 
> Wanted to do this for a long time, but my todo list keeps growing ...
> 
> Based on mm/mm-unstable
> 
> ---
>  include/linux/mmdebug.h | 12 ++++++++++++
>  mm/gup.c                | 41 +++++++++++++++++++----------------------
>  2 files changed, 31 insertions(+), 22 deletions(-)
> 
> diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
> index a0a3894900ed4..14a45979cccc9 100644
> --- a/include/linux/mmdebug.h
> +++ b/include/linux/mmdebug.h
> @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  	}								\
>  	unlikely(__ret_warn_once);					\
>  })
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)		({			\
> +	static bool __section(".data..once") __warned;			\
> +	int __ret_warn_once = !!(cond);					\
> +									\
> +	if (unlikely(__ret_warn_once && !__warned)) {			\
> +		dump_vma(vma);						\
> +		__warned = true;					\
> +		WARN_ON(1);						\
> +	}								\
> +	unlikely(__ret_warn_once);					\
> +})
>  #define VM_WARN_ON_VMG(cond, vmg)		({			\
>  	int __ret_warn = !!(cond);					\
>  									\
> @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_VMG(cond, vmg)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
> diff --git a/mm/gup.c b/mm/gup.c
> index e065a49842a87..3c3931fcdd820 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -64,11 +64,11 @@ static inline void sanity_check_pinned_pages(struct page **pages,
>  		    !folio_test_anon(folio))
>  			continue;
>  		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
>  		else
>  			/* Either a PTE-mapped or a PMD-mapped THP. */
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
> -				       !PageAnonExclusive(page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
> +					     !PageAnonExclusive(page), page);
>  	}
>  }
>  
> @@ -760,8 +760,8 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
>  	if (!pmd_write(pmdval) && gup_must_unshare(vma, flags, page))
>  		return ERR_PTR(-EMLINK);
>  
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -			!PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>  
>  	ret = try_grab_folio(page_folio(page), 1, flags);
>  	if (ret)
> @@ -899,8 +899,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
>  		goto out;
>  	}
>  
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -		       !PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>  
>  	/* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
>  	ret = try_grab_folio(folio, 1, flags);
> @@ -1180,7 +1180,7 @@ static int faultin_page(struct vm_area_struct *vma,
>  	if (unshare) {
>  		fault_flags |= FAULT_FLAG_UNSHARE;
>  		/* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */
> -		VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
> +		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_WRITE);
>  	}
>  
>  	ret = handle_mm_fault(vma, address, fault_flags, NULL);
> @@ -1760,10 +1760,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  		}
>  
>  		/* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
> -		if (!*locked) {
> -			BUG_ON(ret < 0);
> -			BUG_ON(ret >= nr_pages);
> -		}
> +		VM_WARN_ON_ONCE(!*locked && (ret < 0 || ret >= nr_pages));
>  
>  		if (ret > 0) {
>  			nr_pages -= ret;
> @@ -1808,7 +1805,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  
>  		ret = mmap_read_lock_killable(mm);
>  		if (ret) {
> -			BUG_ON(ret > 0);
>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1819,11 +1815,11 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  				       pages, locked);
>  		if (!*locked) {
>  			/* Continue to retry until we succeeded */
> -			BUG_ON(ret != 0);
> +			VM_WARN_ON_ONCE(ret != 0);
>  			goto retry;
>  		}
>  		if (ret != 1) {
> -			BUG_ON(ret > 1);
> +			VM_WARN_ON_ONCE(ret > 1);
>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1885,10 +1881,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
>  	int gup_flags;
>  	long ret;
>  
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> -	VM_BUG_ON_VMA(start < vma->vm_start, vma);
> -	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE_VMA(start < vma->vm_start, vma);
> +	VM_WARN_ON_ONCE_VMA(end   > vma->vm_end, vma);
>  	mmap_assert_locked(mm);
>  
>  	/*
> @@ -1957,8 +1953,8 @@ long faultin_page_range(struct mm_struct *mm, unsigned long start,
>  	int gup_flags;
>  	long ret;
>  
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
>  	mmap_assert_locked(mm);
>  
>  	/*
> @@ -2908,7 +2904,8 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
>  		} else if (pte_special(pte))
>  			goto pte_unmap;
>  
> -		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> +		/* If it's not marked as special it must have a valid memmap. */
> +		VM_WARN_ON_ONCE(!pfn_valid(pte_pfn(pte)));
>  		page = pte_page(pte);
>  
>  		folio = try_grab_folio_fast(page, 1, flags);
> 
> base-commit: 2d0c297637e7d59771c1533847c666cdddc19884



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:22 ` Vlastimil Babka
@ 2025-06-04 14:26   ` Suren Baghdasaryan
  0 siblings, 0 replies; 55+ messages in thread
From: Suren Baghdasaryan @ 2025-06-04 14:26 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: David Hildenbrand, linux-kernel, linux-mm, Andrew Morton,
	Lorenzo Stoakes, Liam R. Howlett, Mike Rapoport, Michal Hocko,
	Jason Gunthorpe, John Hubbard, Peter Xu

On Wed, Jun 4, 2025 at 7:22 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 6/4/25 16:05, David Hildenbrand wrote:
> > Especially once we hit one of the assertions in
> > sanity_check_pinned_pages(), observing follow-up assertions failing
> > in other code can give good clues about what went wrong, so use
> > VM_WARN_ON_ONCE instead.
> >
> > While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> > well. Add one comment for the pfn_valid() check.
> >
> > We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> >
> > Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> > something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> > VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> > happen", but more reasonable to check for during early testing.
> >
> > Cc: Andrew Morton <akpm@linux-foundation.org>
> > Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> > Cc: Vlastimil Babka <vbabka@suse.cz>
> > Cc: Mike Rapoport <rppt@kernel.org>
> > Cc: Suren Baghdasaryan <surenb@google.com>
> > Cc: Michal Hocko <mhocko@suse.com>
> > Cc: Jason Gunthorpe <jgg@ziepe.ca>
> > Cc: John Hubbard <jhubbard@nvidia.com>
> > Cc: Peter Xu <peterx@redhat.com>
> > Signed-off-by: David Hildenbrand <david@redhat.com>
>
> Makes sense, BUG_ONs bad.
>
> Acked-by: Vlastimil Babka <vbabka@suse.cz>

Reviewed-by: Suren Baghdasaryan <surenb@google.com>

>
> > ---
> >
> > Wanted to do this for a long time, but my todo list keeps growing ...
> >
> > Based on mm/mm-unstable
> >
> > ---
> >  include/linux/mmdebug.h | 12 ++++++++++++
> >  mm/gup.c                | 41 +++++++++++++++++++----------------------
> >  2 files changed, 31 insertions(+), 22 deletions(-)
> >
> > diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
> > index a0a3894900ed4..14a45979cccc9 100644
> > --- a/include/linux/mmdebug.h
> > +++ b/include/linux/mmdebug.h
> > @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
> >       }                                                               \
> >       unlikely(__ret_warn_once);                                      \
> >  })
> > +#define VM_WARN_ON_ONCE_VMA(cond, vma)               ({                      \
> > +     static bool __section(".data..once") __warned;                  \
> > +     int __ret_warn_once = !!(cond);                                 \
> > +                                                                     \
> > +     if (unlikely(__ret_warn_once && !__warned)) {                   \
> > +             dump_vma(vma);                                          \
> > +             __warned = true;                                        \
> > +             WARN_ON(1);                                             \
> > +     }                                                               \
> > +     unlikely(__ret_warn_once);                                      \
> > +})
> >  #define VM_WARN_ON_VMG(cond, vmg)            ({                      \
> >       int __ret_warn = !!(cond);                                      \
> >                                                                       \
> > @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
> >  #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
> >  #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
> >  #define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
> > +#define VM_WARN_ON_ONCE_VMA(cond, vma)  BUILD_BUG_ON_INVALID(cond)
> >  #define VM_WARN_ON_VMG(cond, vmg)  BUILD_BUG_ON_INVALID(cond)
> >  #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
> >  #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
> > diff --git a/mm/gup.c b/mm/gup.c
> > index e065a49842a87..3c3931fcdd820 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -64,11 +64,11 @@ static inline void sanity_check_pinned_pages(struct page **pages,
> >                   !folio_test_anon(folio))
> >                       continue;
> >               if (!folio_test_large(folio) || folio_test_hugetlb(folio))
> > -                     VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
> > +                     VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
> >               else
> >                       /* Either a PTE-mapped or a PMD-mapped THP. */
> > -                     VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
> > -                                    !PageAnonExclusive(page), page);
> > +                     VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
> > +                                          !PageAnonExclusive(page), page);
> >       }
> >  }
> >
> > @@ -760,8 +760,8 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
> >       if (!pmd_write(pmdval) && gup_must_unshare(vma, flags, page))
> >               return ERR_PTR(-EMLINK);
> >
> > -     VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> > -                     !PageAnonExclusive(page), page);
> > +     VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> > +                          !PageAnonExclusive(page), page);
> >
> >       ret = try_grab_folio(page_folio(page), 1, flags);
> >       if (ret)
> > @@ -899,8 +899,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
> >               goto out;
> >       }
> >
> > -     VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> > -                    !PageAnonExclusive(page), page);
> > +     VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> > +                          !PageAnonExclusive(page), page);
> >
> >       /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
> >       ret = try_grab_folio(folio, 1, flags);
> > @@ -1180,7 +1180,7 @@ static int faultin_page(struct vm_area_struct *vma,
> >       if (unshare) {
> >               fault_flags |= FAULT_FLAG_UNSHARE;
> >               /* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */
> > -             VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
> > +             VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_WRITE);
> >       }
> >
> >       ret = handle_mm_fault(vma, address, fault_flags, NULL);
> > @@ -1760,10 +1760,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
> >               }
> >
> >               /* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
> > -             if (!*locked) {
> > -                     BUG_ON(ret < 0);
> > -                     BUG_ON(ret >= nr_pages);
> > -             }
> > +             VM_WARN_ON_ONCE(!*locked && (ret < 0 || ret >= nr_pages));
> >
> >               if (ret > 0) {
> >                       nr_pages -= ret;
> > @@ -1808,7 +1805,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
> >
> >               ret = mmap_read_lock_killable(mm);
> >               if (ret) {
> > -                     BUG_ON(ret > 0);
> >                       if (!pages_done)
> >                               pages_done = ret;
> >                       break;
> > @@ -1819,11 +1815,11 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
> >                                      pages, locked);
> >               if (!*locked) {
> >                       /* Continue to retry until we succeeded */
> > -                     BUG_ON(ret != 0);
> > +                     VM_WARN_ON_ONCE(ret != 0);
> >                       goto retry;
> >               }
> >               if (ret != 1) {
> > -                     BUG_ON(ret > 1);
> > +                     VM_WARN_ON_ONCE(ret > 1);
> >                       if (!pages_done)
> >                               pages_done = ret;
> >                       break;
> > @@ -1885,10 +1881,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
> >       int gup_flags;
> >       long ret;
> >
> > -     VM_BUG_ON(!PAGE_ALIGNED(start));
> > -     VM_BUG_ON(!PAGE_ALIGNED(end));
> > -     VM_BUG_ON_VMA(start < vma->vm_start, vma);
> > -     VM_BUG_ON_VMA(end   > vma->vm_end, vma);
> > +     VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> > +     VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
> > +     VM_WARN_ON_ONCE_VMA(start < vma->vm_start, vma);
> > +     VM_WARN_ON_ONCE_VMA(end   > vma->vm_end, vma);
> >       mmap_assert_locked(mm);
> >
> >       /*
> > @@ -1957,8 +1953,8 @@ long faultin_page_range(struct mm_struct *mm, unsigned long start,
> >       int gup_flags;
> >       long ret;
> >
> > -     VM_BUG_ON(!PAGE_ALIGNED(start));
> > -     VM_BUG_ON(!PAGE_ALIGNED(end));
> > +     VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> > +     VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
> >       mmap_assert_locked(mm);
> >
> >       /*
> > @@ -2908,7 +2904,8 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
> >               } else if (pte_special(pte))
> >                       goto pte_unmap;
> >
> > -             VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> > +             /* If it's not marked as special it must have a valid memmap. */
> > +             VM_WARN_ON_ONCE(!pfn_valid(pte_pfn(pte)));
> >               page = pte_page(pte);
> >
> >               folio = try_grab_folio_fast(page, 1, flags);
> >
> > base-commit: 2d0c297637e7d59771c1533847c666cdddc19884
>


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:05 [PATCH v1] mm/gup: remove (VM_)BUG_ONs David Hildenbrand
  2025-06-04 14:22 ` Vlastimil Babka
@ 2025-06-04 14:48 ` Lorenzo Stoakes
  2025-06-04 14:58   ` David Hildenbrand
  2025-06-04 15:42   ` Linus Torvalds
  2025-06-04 15:14 ` Jason Gunthorpe
                   ` (5 subsequent siblings)
  7 siblings, 2 replies; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-04 14:48 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: linux-kernel, linux-mm, Andrew Morton, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Jason Gunthorpe, John Hubbard, Peter Xu, Linus Torvalds

+Linus in case he has an opinion about BUG_ON() in general...

On Wed, Jun 04, 2025 at 04:05:44PM +0200, David Hildenbrand wrote:
> Especially once we hit one of the assertions in
> sanity_check_pinned_pages(), observing follow-up assertions failing
> in other code can give good clues about what went wrong, so use
> VM_WARN_ON_ONCE instead.

I guess the situation where you'd actually want a BUG_ON() is one where
carrying on might cause further corruption so you just want things to stop.

But usually we're already pretty screwed if the thing happened right? So
it's rare if ever that this would be legit?

Linus's point of view is that we shouldn't use them _at all_ right? So
maybe even this situation isn't one where we'd want to use one?

>
> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> well. Add one comment for the pfn_valid() check.

Yeah VM_BUG_ON() is just _weird_. Maybe we should get rid of all of them
full stop?

>
> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.

I checked the implementation vs. the other VM_WARN_ON_ONCE_*()'s and it
looks good.

I wonder if we can find a way to not duplicate this code... but one for a
follow up I think :>)

>
> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> happen", but more reasonable to check for during early testing.
>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Peter Xu <peterx@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>

LGTM so,

Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

One nit below.

> ---
>
> Wanted to do this for a long time, but my todo list keeps growing ...

Sounds familiar :) Merge window a chance to do some of these things...

>
> Based on mm/mm-unstable
>
> ---
>  include/linux/mmdebug.h | 12 ++++++++++++
>  mm/gup.c                | 41 +++++++++++++++++++----------------------
>  2 files changed, 31 insertions(+), 22 deletions(-)
>
> diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
> index a0a3894900ed4..14a45979cccc9 100644
> --- a/include/linux/mmdebug.h
> +++ b/include/linux/mmdebug.h
> @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  	}								\
>  	unlikely(__ret_warn_once);					\
>  })
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)		({			\
> +	static bool __section(".data..once") __warned;			\
> +	int __ret_warn_once = !!(cond);					\
> +									\
> +	if (unlikely(__ret_warn_once && !__warned)) {			\
> +		dump_vma(vma);						\
> +		__warned = true;					\
> +		WARN_ON(1);						\
> +	}								\
> +	unlikely(__ret_warn_once);					\
> +})

An aside, I wonder if we could somehow make this generic for various
WARN_ON_ONCE()'s?

>  #define VM_WARN_ON_VMG(cond, vmg)		({			\
>  	int __ret_warn = !!(cond);					\
>  									\
> @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_VMG(cond, vmg)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
> diff --git a/mm/gup.c b/mm/gup.c
> index e065a49842a87..3c3931fcdd820 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -64,11 +64,11 @@ static inline void sanity_check_pinned_pages(struct page **pages,
>  		    !folio_test_anon(folio))
>  			continue;
>  		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
>  		else
>  			/* Either a PTE-mapped or a PMD-mapped THP. */
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
> -				       !PageAnonExclusive(page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
> +					     !PageAnonExclusive(page), page);

Nit but wouldn't VM_WARN_ON_ONCE_FOLIO() work better here?

>  	}
>  }
>
> @@ -760,8 +760,8 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
>  	if (!pmd_write(pmdval) && gup_must_unshare(vma, flags, page))
>  		return ERR_PTR(-EMLINK);
>
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -			!PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>
>  	ret = try_grab_folio(page_folio(page), 1, flags);
>  	if (ret)
> @@ -899,8 +899,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
>  		goto out;
>  	}
>
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -		       !PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>
>  	/* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
>  	ret = try_grab_folio(folio, 1, flags);
> @@ -1180,7 +1180,7 @@ static int faultin_page(struct vm_area_struct *vma,
>  	if (unshare) {
>  		fault_flags |= FAULT_FLAG_UNSHARE;
>  		/* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */
> -		VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
> +		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_WRITE);
>  	}
>
>  	ret = handle_mm_fault(vma, address, fault_flags, NULL);
> @@ -1760,10 +1760,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  		}
>
>  		/* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
> -		if (!*locked) {
> -			BUG_ON(ret < 0);
> -			BUG_ON(ret >= nr_pages);
> -		}
> +		VM_WARN_ON_ONCE(!*locked && (ret < 0 || ret >= nr_pages));

Yeah this is neater too :)

>
>  		if (ret > 0) {
>  			nr_pages -= ret;
> @@ -1808,7 +1805,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>
>  		ret = mmap_read_lock_killable(mm);
>  		if (ret) {
> -			BUG_ON(ret > 0);

This is a really weird one... not sure why this was ever there...

>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1819,11 +1815,11 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  				       pages, locked);
>  		if (!*locked) {
>  			/* Continue to retry until we succeeded */
> -			BUG_ON(ret != 0);
> +			VM_WARN_ON_ONCE(ret != 0);
>  			goto retry;
>  		}
>  		if (ret != 1) {
> -			BUG_ON(ret > 1);
> +			VM_WARN_ON_ONCE(ret > 1);
>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1885,10 +1881,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
>  	int gup_flags;
>  	long ret;
>
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> -	VM_BUG_ON_VMA(start < vma->vm_start, vma);
> -	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE_VMA(start < vma->vm_start, vma);
> +	VM_WARN_ON_ONCE_VMA(end   > vma->vm_end, vma);
>  	mmap_assert_locked(mm);
>
>  	/*
> @@ -1957,8 +1953,8 @@ long faultin_page_range(struct mm_struct *mm, unsigned long start,
>  	int gup_flags;
>  	long ret;
>
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
>  	mmap_assert_locked(mm);
>
>  	/*
> @@ -2908,7 +2904,8 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
>  		} else if (pte_special(pte))
>  			goto pte_unmap;
>
> -		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> +		/* If it's not marked as special it must have a valid memmap. */

Nice to add a bit of documentation here too!

> +		VM_WARN_ON_ONCE(!pfn_valid(pte_pfn(pte)));
>  		page = pte_page(pte);
>
>  		folio = try_grab_folio_fast(page, 1, flags);
>
> base-commit: 2d0c297637e7d59771c1533847c666cdddc19884
> --
> 2.49.0
>


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:48 ` Lorenzo Stoakes
@ 2025-06-04 14:58   ` David Hildenbrand
  2025-06-04 15:44     ` Lorenzo Stoakes
  2025-06-04 15:42   ` Linus Torvalds
  1 sibling, 1 reply; 55+ messages in thread
From: David Hildenbrand @ 2025-06-04 14:58 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: linux-kernel, linux-mm, Andrew Morton, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Jason Gunthorpe, John Hubbard, Peter Xu, Linus Torvalds

On 04.06.25 16:48, Lorenzo Stoakes wrote:
> +Linus in case he has an opinion about BUG_ON() in general...
> 
> On Wed, Jun 04, 2025 at 04:05:44PM +0200, David Hildenbrand wrote:
>> Especially once we hit one of the assertions in
>> sanity_check_pinned_pages(), observing follow-up assertions failing
>> in other code can give good clues about what went wrong, so use
>> VM_WARN_ON_ONCE instead.
> 
> I guess the situation where you'd actually want a BUG_ON() is one where
> carrying on might cause further corruption so you just want things to stop.

Yes. Like, serious data corruption would be avoidable.

> 
> But usually we're already pretty screwed if the thing happened right? So
> it's rare if ever that this would be legit?
> 
> Linus's point of view is that we shouldn't use them _at all_ right? So
> maybe even this situation isn't one where we'd want to use one?

I think the grey zone is actual data corruption. But one has to have a 
pretty good reason to use a BUG_ON and not a WARN_ON_ONCE() + recovery.

> 
>>
>> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
>> well. Add one comment for the pfn_valid() check.
> 
> Yeah VM_BUG_ON() is just _weird_. Maybe we should get rid of all of them
> full stop?

That's my thinking a well.

> 
>>
>> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> 
> I checked the implementation vs. the other VM_WARN_ON_ONCE_*()'s and it
> looks good.
> 
> I wonder if we can find a way to not duplicate this code... but one for a
> follow up I think :>)
> 
>>
>> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
>> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
>> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
>> happen", but more reasonable to check for during early testing.
>>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>> Cc: Vlastimil Babka <vbabka@suse.cz>
>> Cc: Mike Rapoport <rppt@kernel.org>
>> Cc: Suren Baghdasaryan <surenb@google.com>
>> Cc: Michal Hocko <mhocko@suse.com>
>> Cc: Jason Gunthorpe <jgg@ziepe.ca>
>> Cc: John Hubbard <jhubbard@nvidia.com>
>> Cc: Peter Xu <peterx@redhat.com>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> 
> LGTM so,
> 
> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> 
> 

Thanks!

> One nit below.
> 
>> ---
>>
>> Wanted to do this for a long time, but my todo list keeps growing ...
> 
> Sounds familiar :) Merge window a chance to do some of these things...
> 
>>
>> Based on mm/mm-unstable
>>
>> ---
>>   include/linux/mmdebug.h | 12 ++++++++++++
>>   mm/gup.c                | 41 +++++++++++++++++++----------------------
>>   2 files changed, 31 insertions(+), 22 deletions(-)
>>
>> diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
>> index a0a3894900ed4..14a45979cccc9 100644
>> --- a/include/linux/mmdebug.h
>> +++ b/include/linux/mmdebug.h
>> @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>>   	}								\
>>   	unlikely(__ret_warn_once);					\
>>   })
>> +#define VM_WARN_ON_ONCE_VMA(cond, vma)		({			\
>> +	static bool __section(".data..once") __warned;			\
>> +	int __ret_warn_once = !!(cond);					\
>> +									\
>> +	if (unlikely(__ret_warn_once && !__warned)) {			\
>> +		dump_vma(vma);						\
>> +		__warned = true;					\
>> +		WARN_ON(1);						\
>> +	}								\
>> +	unlikely(__ret_warn_once);					\
>> +})
> 
> An aside, I wonder if we could somehow make this generic for various
> WARN_ON_ONCE()'s?

Yeah, probably. Maybe it will get .... ugly :)

> 
>>   #define VM_WARN_ON_VMG(cond, vmg)		({			\
>>   	int __ret_warn = !!(cond);					\
>>   									\
>> @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>>   #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>>   #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>>   #define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
>> +#define VM_WARN_ON_ONCE_VMA(cond, vma)  BUILD_BUG_ON_INVALID(cond)
>>   #define VM_WARN_ON_VMG(cond, vmg)  BUILD_BUG_ON_INVALID(cond)
>>   #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
>>   #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
>> diff --git a/mm/gup.c b/mm/gup.c
>> index e065a49842a87..3c3931fcdd820 100644
>> --- a/mm/gup.c
>> +++ b/mm/gup.c
>> @@ -64,11 +64,11 @@ static inline void sanity_check_pinned_pages(struct page **pages,
>>   		    !folio_test_anon(folio))
>>   			continue;
>>   		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
>> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
>> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
>>   		else
>>   			/* Either a PTE-mapped or a PMD-mapped THP. */
>> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
>> -				       !PageAnonExclusive(page), page);
>> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
>> +					     !PageAnonExclusive(page), page);
> 
> Nit but wouldn't VM_WARN_ON_ONCE_FOLIO() work better here?

No, we want the actual problematic page here, as that can give us clues 
what is going wrong.

For the small-folio case above we could use it, though.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:05 [PATCH v1] mm/gup: remove (VM_)BUG_ONs David Hildenbrand
  2025-06-04 14:22 ` Vlastimil Babka
  2025-06-04 14:48 ` Lorenzo Stoakes
@ 2025-06-04 15:14 ` Jason Gunthorpe
  2025-06-04 16:01   ` David Hildenbrand
  2025-06-04 17:25 ` SeongJae Park
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 55+ messages in thread
From: Jason Gunthorpe @ 2025-06-04 15:14 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: linux-kernel, linux-mm, Andrew Morton, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, John Hubbard, Peter Xu

On Wed, Jun 04, 2025 at 04:05:44PM +0200, David Hildenbrand wrote:
> Especially once we hit one of the assertions in
> sanity_check_pinned_pages(), observing follow-up assertions failing
> in other code can give good clues about what went wrong, so use
> VM_WARN_ON_ONCE instead.
> 
> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> well. Add one comment for the pfn_valid() check.
> 
> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.

I'm fine with this, but IMHO, using ON_ONCE is wasteful here.

They have been BUG_ONs for ages, they really do never happen. If there
was ever a case to justify not using ON_ONCE this is it..

Jason


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:48 ` Lorenzo Stoakes
  2025-06-04 14:58   ` David Hildenbrand
@ 2025-06-04 15:42   ` Linus Torvalds
  2025-06-04 16:05     ` Lorenzo Stoakes
  1 sibling, 1 reply; 55+ messages in thread
From: Linus Torvalds @ 2025-06-04 15:42 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: David Hildenbrand, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe, John Hubbard,
	Peter Xu

On Wed, 4 Jun 2025 at 07:49, Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote:
>
> Linus's point of view is that we shouldn't use them _at all_ right? So
> maybe even this situation isn't one where we'd want to use one?

So I think BUG_ON() basically grew from

 (a) laziness. Not a good reason.

 (b) we historically had a model where we'd just kill processes on
fatal errors, particularly page faults

That (b) in particular *used* to work quite well for recovery - a
couple of decades ago ago.  A kernel bug would print out the
backtrace, and then kill the process (literally with do_exit()) and
try to run something else.

It was wonderfully useful in that you'd get an oops, and the system
would continue, but that *thread* wouldn't continue.

And decades ago, it worked quite well, because the system was much
simpler, and the likelihood that we held any critical locks was
generally pretty low.

But then SMP happened, and at first it wasn't a huge deal: we had one
special lock, and the exit path would just clean *that* lock up, and
life continued to be good.

But that was literally over two decades ago, and none of the above
actually ever used BUG_ON(). The page fault code would literally do

        die("Oops", regs, error_code);

on a fatal page fault. A "BUG_ON()" didn't even exist back then, and
die() looked like this:

        console_verbose();
        spin_lock_irq(&die_lock);
        printk("%s: %04lx\n", str, err & 0xffff);
        show_registers(regs);

        spin_unlock_irq(&die_lock);
        do_exit(SIGSEGV);

which tried to simply serialize the error output, and then kill the process.

When it worked, it worked quite well.

(And yes, page faults are very relevant, because this is what BUG
looked like back then:

    #define BUG() *(int *)0 = 0

so it all depended on that page fault printing out the state and exiting)

But as you can well imagine, it worked increasingly badly with
increasing complexity and locking depth.

When you come from that kind of "kill the process on errors" and you
then realize that you can't really do that any more, you end up with
BUG_ON().

The BUG_ON() thing was introduced in 2.5.0, and initially came from
debug code in the block layer rewrite.

And in that particular context, it actually made sense: this was new
code that changed the block elevator, and if that code got it wrong,
you were pretty much *guaranteed* disk corruption.

But then it became a pattern. And I think that pattern is basically never good.

I really think that the *ONLY* situation where BUG() is valid is when
you absolutely *know* that corruption will happen, and you cannot
continue.

Very much *not* some kind of "this is problematic, and who knows what
corruption it might cause".  But "I *know* I can't continue without
major system because the hardware is broken sh*t".

In other words, don't use it. Ever. Unless you can explain exactly why
without any handwaving.

Cloud providers or others can do "panic-on-warn" if they want to stop
the machine at the first sign of trouble.

                  Linus


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:58   ` David Hildenbrand
@ 2025-06-04 15:44     ` Lorenzo Stoakes
  0 siblings, 0 replies; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-04 15:44 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: linux-kernel, linux-mm, Andrew Morton, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Jason Gunthorpe, John Hubbard, Peter Xu, Linus Torvalds

On Wed, Jun 04, 2025 at 04:58:25PM +0200, David Hildenbrand wrote:
> On 04.06.25 16:48, Lorenzo Stoakes wrote:
> > +Linus in case he has an opinion about BUG_ON() in general...
> >
> > On Wed, Jun 04, 2025 at 04:05:44PM +0200, David Hildenbrand wrote:
> > > Especially once we hit one of the assertions in
> > > sanity_check_pinned_pages(), observing follow-up assertions failing
> > > in other code can give good clues about what went wrong, so use
> > > VM_WARN_ON_ONCE instead.
> >
> > I guess the situation where you'd actually want a BUG_ON() is one where
> > carrying on might cause further corruption so you just want things to stop.
>
> Yes. Like, serious data corruption would be avoidable.

Yeah, I just wonder how often this is ever reliably for sure the case...

>
> >
> > But usually we're already pretty screwed if the thing happened right? So
> > it's rare if ever that this would be legit?
> >
> > Linus's point of view is that we shouldn't use them _at all_ right? So
> > maybe even this situation isn't one where we'd want to use one?
>
> I think the grey zone is actual data corruption. But one has to have a
> pretty good reason to use a BUG_ON and not a WARN_ON_ONCE() + recovery.

Right.

>
> >
> > >
> > > While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> > > well. Add one comment for the pfn_valid() check.
> >
> > Yeah VM_BUG_ON() is just _weird_. Maybe we should get rid of all of them
> > full stop?
>
> That's my thinking a well.

:)

>
> >
> > >
> > > We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> >
> > I checked the implementation vs. the other VM_WARN_ON_ONCE_*()'s and it
> > looks good.
> >
> > I wonder if we can find a way to not duplicate this code... but one for a
> > follow up I think :>)
> >
> > >
> > > Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> > > something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> > > VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> > > happen", but more reasonable to check for during early testing.
> > >
> > > Cc: Andrew Morton <akpm@linux-foundation.org>
> > > Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > > Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> > > Cc: Vlastimil Babka <vbabka@suse.cz>
> > > Cc: Mike Rapoport <rppt@kernel.org>
> > > Cc: Suren Baghdasaryan <surenb@google.com>
> > > Cc: Michal Hocko <mhocko@suse.com>
> > > Cc: Jason Gunthorpe <jgg@ziepe.ca>
> > > Cc: John Hubbard <jhubbard@nvidia.com>
> > > Cc: Peter Xu <peterx@redhat.com>
> > > Signed-off-by: David Hildenbrand <david@redhat.com>
> >
> > LGTM so,
> >
> > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> >
> >
>
> Thanks!
>
> > One nit below.
> >
> > > ---
> > >
> > > Wanted to do this for a long time, but my todo list keeps growing ...
> >
> > Sounds familiar :) Merge window a chance to do some of these things...
> >
> > >
> > > Based on mm/mm-unstable
> > >
> > > ---
> > >   include/linux/mmdebug.h | 12 ++++++++++++
> > >   mm/gup.c                | 41 +++++++++++++++++++----------------------
> > >   2 files changed, 31 insertions(+), 22 deletions(-)
> > >
> > > diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
> > > index a0a3894900ed4..14a45979cccc9 100644
> > > --- a/include/linux/mmdebug.h
> > > +++ b/include/linux/mmdebug.h
> > > @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
> > >   	}								\
> > >   	unlikely(__ret_warn_once);					\
> > >   })
> > > +#define VM_WARN_ON_ONCE_VMA(cond, vma)		({			\
> > > +	static bool __section(".data..once") __warned;			\
> > > +	int __ret_warn_once = !!(cond);					\
> > > +									\
> > > +	if (unlikely(__ret_warn_once && !__warned)) {			\
> > > +		dump_vma(vma);						\
> > > +		__warned = true;					\
> > > +		WARN_ON(1);						\
> > > +	}								\
> > > +	unlikely(__ret_warn_once);					\
> > > +})
> >
> > An aside, I wonder if we could somehow make this generic for various
> > WARN_ON_ONCE()'s?
>
> Yeah, probably. Maybe it will get .... ugly :)
>
> >
> > >   #define VM_WARN_ON_VMG(cond, vmg)		({			\
> > >   	int __ret_warn = !!(cond);					\
> > >   									\
> > > @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
> > >   #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
> > >   #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
> > >   #define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
> > > +#define VM_WARN_ON_ONCE_VMA(cond, vma)  BUILD_BUG_ON_INVALID(cond)
> > >   #define VM_WARN_ON_VMG(cond, vmg)  BUILD_BUG_ON_INVALID(cond)
> > >   #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
> > >   #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
> > > diff --git a/mm/gup.c b/mm/gup.c
> > > index e065a49842a87..3c3931fcdd820 100644
> > > --- a/mm/gup.c
> > > +++ b/mm/gup.c
> > > @@ -64,11 +64,11 @@ static inline void sanity_check_pinned_pages(struct page **pages,
> > >   		    !folio_test_anon(folio))
> > >   			continue;
> > >   		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
> > > -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
> > > +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
> > >   		else
> > >   			/* Either a PTE-mapped or a PMD-mapped THP. */
> > > -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
> > > -				       !PageAnonExclusive(page), page);
> > > +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
> > > +					     !PageAnonExclusive(page), page);
> >
> > Nit but wouldn't VM_WARN_ON_ONCE_FOLIO() work better here?
>
> No, we want the actual problematic page here, as that can give us clues what
> is going wrong.

Ah yeah... didn't notice we're checking both folio and
page... PageAnonExclusive() seems to be a weird beast:

	/*
	 * HugeTLB stores this information on the head page; THP keeps it per
	 * page
	 */

But anyway I'm digressing :)

>
> For the small-folio case above we could use it, though.

Ack, no big deal though.

>
> --
> Cheers,
>
> David / dhildenb
>


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 15:14 ` Jason Gunthorpe
@ 2025-06-04 16:01   ` David Hildenbrand
  0 siblings, 0 replies; 55+ messages in thread
From: David Hildenbrand @ 2025-06-04 16:01 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: linux-kernel, linux-mm, Andrew Morton, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, John Hubbard, Peter Xu

On 04.06.25 17:14, Jason Gunthorpe wrote:
> On Wed, Jun 04, 2025 at 04:05:44PM +0200, David Hildenbrand wrote:
>> Especially once we hit one of the assertions in
>> sanity_check_pinned_pages(), observing follow-up assertions failing
>> in other code can give good clues about what went wrong, so use
>> VM_WARN_ON_ONCE instead.
>>
>> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
>> well. Add one comment for the pfn_valid() check.
>>
>> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> 
> I'm fine with this, but IMHO, using ON_ONCE is wasteful here.
> 
> They have been BUG_ONs for ages, they really do never happen. If there
> was ever a case to justify not using ON_ONCE this is it..

Well, history told us that if one of them happens, we get a whole flood.

... which is usually not particularly helpful when trying to extract 
information from a syslog :)

DEBUG_VM is already "wasteful" ... ;)

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 15:42   ` Linus Torvalds
@ 2025-06-04 16:05     ` Lorenzo Stoakes
  0 siblings, 0 replies; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-04 16:05 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: David Hildenbrand, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe, John Hubbard,
	Peter Xu

On Wed, Jun 04, 2025 at 08:42:30AM -0700, Linus Torvalds wrote:
[snip]

> I really think that the *ONLY* situation where BUG() is valid is when
> you absolutely *know* that corruption will happen, and you cannot
> continue.
>
> Very much *not* some kind of "this is problematic, and who knows what
> corruption it might cause".  But "I *know* I can't continue without
> major system because the hardware is broken sh*t".
>
> In other words, don't use it. Ever. Unless you can explain exactly why
> without any handwaving.

Thanks, this aligns with my understanding.

This does make VM_BUG_ON_xxx() look even more silly :) so I think we definitely
need to get rid of that...

'Absolutely definitely corruption but we only when CONFIG_DEBUG_VM is set' is
you know, insane.

>
> Cloud providers or others can do "panic-on-warn" if they want to stop
> the machine at the first sign of trouble.
>
>                   Linus

Yeah, I have seen people object to WARN_ON()'s because of this though 'hey
some people might panic here!!'. My view on that is - right, they can,
that's fine, they asked for it :)

Cheers, Lorenzo


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:05 [PATCH v1] mm/gup: remove (VM_)BUG_ONs David Hildenbrand
                   ` (2 preceding siblings ...)
  2025-06-04 15:14 ` Jason Gunthorpe
@ 2025-06-04 17:25 ` SeongJae Park
  2025-06-04 19:12 ` Liam R. Howlett
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 55+ messages in thread
From: SeongJae Park @ 2025-06-04 17:25 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: SeongJae Park, linux-kernel, linux-mm, Andrew Morton,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe, John Hubbard,
	Peter Xu

On Wed,  4 Jun 2025 16:05:44 +0200 David Hildenbrand <david@redhat.com> wrote:

> Especially once we hit one of the assertions in
> sanity_check_pinned_pages(), observing follow-up assertions failing
> in other code can give good clues about what went wrong, so use
> VM_WARN_ON_ONCE instead.
> 
> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> well. Add one comment for the pfn_valid() check.
> 
> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> 
> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> happen", but more reasonable to check for during early testing.
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Peter Xu <peterx@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>

Acked-by: SeongJae Park <sj@kernel.org>


Thanks,
SJ

[...]


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:05 [PATCH v1] mm/gup: remove (VM_)BUG_ONs David Hildenbrand
                   ` (3 preceding siblings ...)
  2025-06-04 17:25 ` SeongJae Park
@ 2025-06-04 19:12 ` Liam R. Howlett
  2025-06-04 19:16   ` David Hildenbrand
  2025-06-05  1:07 ` John Hubbard
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 55+ messages in thread
From: Liam R. Howlett @ 2025-06-04 19:12 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: linux-kernel, linux-mm, Andrew Morton, Lorenzo Stoakes,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Jason Gunthorpe, John Hubbard, Peter Xu

* David Hildenbrand <david@redhat.com> [250604 10:06]:
> Especially once we hit one of the assertions in
> sanity_check_pinned_pages(), observing follow-up assertions failing
> in other code can give good clues about what went wrong, so use
> VM_WARN_ON_ONCE instead.
> 
> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> well. Add one comment for the pfn_valid() check.
> 
> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> 
> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> happen", but more reasonable to check for during early testing.
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Peter Xu <peterx@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>

seems okay, besides the one nit.

Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

> ---
> 
> Wanted to do this for a long time, but my todo list keeps growing ...
> 
> Based on mm/mm-unstable
> 
> ---
>  include/linux/mmdebug.h | 12 ++++++++++++
>  mm/gup.c                | 41 +++++++++++++++++++----------------------
>  2 files changed, 31 insertions(+), 22 deletions(-)
> 
> diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
> index a0a3894900ed4..14a45979cccc9 100644
> --- a/include/linux/mmdebug.h
> +++ b/include/linux/mmdebug.h
> @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  	}								\
>  	unlikely(__ret_warn_once);					\
>  })
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)		({			\
> +	static bool __section(".data..once") __warned;			\
> +	int __ret_warn_once = !!(cond);					\
> +									\
> +	if (unlikely(__ret_warn_once && !__warned)) {			\
> +		dump_vma(vma);						\
> +		__warned = true;					\
> +		WARN_ON(1);						\
> +	}								\
> +	unlikely(__ret_warn_once);					\
> +})
>  #define VM_WARN_ON_VMG(cond, vmg)		({			\
>  	int __ret_warn = !!(cond);					\
>  									\
> @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_VMG(cond, vmg)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
> diff --git a/mm/gup.c b/mm/gup.c
> index e065a49842a87..3c3931fcdd820 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -64,11 +64,11 @@ static inline void sanity_check_pinned_pages(struct page **pages,
>  		    !folio_test_anon(folio))
>  			continue;
>  		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
>  		else
>  			/* Either a PTE-mapped or a PMD-mapped THP. */
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
> -				       !PageAnonExclusive(page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
> +					     !PageAnonExclusive(page), page);
>  	}
>  }
>  
> @@ -760,8 +760,8 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
>  	if (!pmd_write(pmdval) && gup_must_unshare(vma, flags, page))
>  		return ERR_PTR(-EMLINK);
>  
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -			!PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>  
>  	ret = try_grab_folio(page_folio(page), 1, flags);
>  	if (ret)
> @@ -899,8 +899,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
>  		goto out;
>  	}
>  
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -		       !PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>  
>  	/* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
>  	ret = try_grab_folio(folio, 1, flags);
> @@ -1180,7 +1180,7 @@ static int faultin_page(struct vm_area_struct *vma,
>  	if (unshare) {
>  		fault_flags |= FAULT_FLAG_UNSHARE;
>  		/* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */
> -		VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
> +		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_WRITE);
>  	}
>  
>  	ret = handle_mm_fault(vma, address, fault_flags, NULL);
> @@ -1760,10 +1760,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  		}
>  
>  		/* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
> -		if (!*locked) {
> -			BUG_ON(ret < 0);
> -			BUG_ON(ret >= nr_pages);
> -		}
> +		VM_WARN_ON_ONCE(!*locked && (ret < 0 || ret >= nr_pages));

nit, we are losing accuracy on the value of ret here.  I doubt it makes
much of a difference though.

>  
>  		if (ret > 0) {
>  			nr_pages -= ret;
> @@ -1808,7 +1805,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  
>  		ret = mmap_read_lock_killable(mm);
>  		if (ret) {
> -			BUG_ON(ret > 0);
>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1819,11 +1815,11 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  				       pages, locked);
>  		if (!*locked) {
>  			/* Continue to retry until we succeeded */
> -			BUG_ON(ret != 0);
> +			VM_WARN_ON_ONCE(ret != 0);
>  			goto retry;
>  		}
>  		if (ret != 1) {
> -			BUG_ON(ret > 1);
> +			VM_WARN_ON_ONCE(ret > 1);
>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1885,10 +1881,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
>  	int gup_flags;
>  	long ret;
>  
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> -	VM_BUG_ON_VMA(start < vma->vm_start, vma);
> -	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE_VMA(start < vma->vm_start, vma);
> +	VM_WARN_ON_ONCE_VMA(end   > vma->vm_end, vma);
>  	mmap_assert_locked(mm);
>  
>  	/*
> @@ -1957,8 +1953,8 @@ long faultin_page_range(struct mm_struct *mm, unsigned long start,
>  	int gup_flags;
>  	long ret;
>  
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
>  	mmap_assert_locked(mm);
>  
>  	/*
> @@ -2908,7 +2904,8 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
>  		} else if (pte_special(pte))
>  			goto pte_unmap;
>  
> -		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> +		/* If it's not marked as special it must have a valid memmap. */
> +		VM_WARN_ON_ONCE(!pfn_valid(pte_pfn(pte)));
>  		page = pte_page(pte);
>  
>  		folio = try_grab_folio_fast(page, 1, flags);
> 
> base-commit: 2d0c297637e7d59771c1533847c666cdddc19884
> -- 
> 2.49.0
> 


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 19:12 ` Liam R. Howlett
@ 2025-06-04 19:16   ` David Hildenbrand
  0 siblings, 0 replies; 55+ messages in thread
From: David Hildenbrand @ 2025-06-04 19:16 UTC (permalink / raw)
  To: Liam R. Howlett, linux-kernel, linux-mm, Andrew Morton,
	Lorenzo Stoakes, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe, John Hubbard,
	Peter Xu

On 04.06.25 21:12, Liam R. Howlett wrote:
> * David Hildenbrand <david@redhat.com> [250604 10:06]:
>> Especially once we hit one of the assertions in
>> sanity_check_pinned_pages(), observing follow-up assertions failing
>> in other code can give good clues about what went wrong, so use
>> VM_WARN_ON_ONCE instead.
>>
>> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
>> well. Add one comment for the pfn_valid() check.
>>
>> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
>>
>> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
>> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
>> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
>> happen", but more reasonable to check for during early testing.
>>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>> Cc: Vlastimil Babka <vbabka@suse.cz>
>> Cc: Mike Rapoport <rppt@kernel.org>
>> Cc: Suren Baghdasaryan <surenb@google.com>
>> Cc: Michal Hocko <mhocko@suse.com>
>> Cc: Jason Gunthorpe <jgg@ziepe.ca>
>> Cc: John Hubbard <jhubbard@nvidia.com>
>> Cc: Peter Xu <peterx@redhat.com>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> 
> seems okay, besides the one nit.
> 
> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

[...]

>>   
>>   	ret = handle_mm_fault(vma, address, fault_flags, NULL);
>> @@ -1760,10 +1760,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>>   		}
>>   
>>   		/* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
>> -		if (!*locked) {
>> -			BUG_ON(ret < 0);
>> -			BUG_ON(ret >= nr_pages);
>> -		}
>> +		VM_WARN_ON_ONCE(!*locked && (ret < 0 || ret >= nr_pages));
> 
> nit, we are losing accuracy on the value of ret here.  I doubt it makes
> much of a difference though.

Yeah I doubt this will matter.

Thanks!

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:05 [PATCH v1] mm/gup: remove (VM_)BUG_ONs David Hildenbrand
                   ` (4 preceding siblings ...)
  2025-06-04 19:12 ` Liam R. Howlett
@ 2025-06-05  1:07 ` John Hubbard
  2025-06-05  5:37   ` Vlastimil Babka
  2025-06-05  7:10 ` Michal Hocko
  2025-06-06  8:12 ` David Hildenbrand
  7 siblings, 1 reply; 55+ messages in thread
From: John Hubbard @ 2025-06-05  1:07 UTC (permalink / raw)
  To: David Hildenbrand, linux-kernel
  Cc: linux-mm, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Jason Gunthorpe, Peter Xu

On 6/4/25 7:05 AM, David Hildenbrand wrote:
> Especially once we hit one of the assertions in
> sanity_check_pinned_pages(), observing follow-up assertions failing
> in other code can give good clues about what went wrong, so use
> VM_WARN_ON_ONCE instead.
> 
> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> well. Add one comment for the pfn_valid() check.

It would be a nice touch to add Linus' notes here, with the BUG() history
and all. It answers a FAQ about BUG vs. WARN* that is really nice
to have in the commit log.

This looks great.

Looking ahead, should we also *delete* VM_BUG_ON*() items? (Not as part 
of this patch, of course.)

In any case, 

Reviewed-by: John Hubbard <jhubbard@nvidia.com>

thanks,
-- 
John Hubbard

> 
> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> 
> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> happen", but more reasonable to check for during early testing.
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Peter Xu <peterx@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
> 
> Wanted to do this for a long time, but my todo list keeps growing ...
> 
> Based on mm/mm-unstable
> 
> ---
>  include/linux/mmdebug.h | 12 ++++++++++++
>  mm/gup.c                | 41 +++++++++++++++++++----------------------
>  2 files changed, 31 insertions(+), 22 deletions(-)
> 
> diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
> index a0a3894900ed4..14a45979cccc9 100644
> --- a/include/linux/mmdebug.h
> +++ b/include/linux/mmdebug.h
> @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  	}								\
>  	unlikely(__ret_warn_once);					\
>  })
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)		({			\
> +	static bool __section(".data..once") __warned;			\
> +	int __ret_warn_once = !!(cond);					\
> +									\
> +	if (unlikely(__ret_warn_once && !__warned)) {			\
> +		dump_vma(vma);						\
> +		__warned = true;					\
> +		WARN_ON(1);						\
> +	}								\
> +	unlikely(__ret_warn_once);					\
> +})
>  #define VM_WARN_ON_VMG(cond, vmg)		({			\
>  	int __ret_warn = !!(cond);					\
>  									\
> @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_VMG(cond, vmg)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
> diff --git a/mm/gup.c b/mm/gup.c
> index e065a49842a87..3c3931fcdd820 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -64,11 +64,11 @@ static inline void sanity_check_pinned_pages(struct page **pages,
>  		    !folio_test_anon(folio))
>  			continue;
>  		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
>  		else
>  			/* Either a PTE-mapped or a PMD-mapped THP. */
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
> -				       !PageAnonExclusive(page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
> +					     !PageAnonExclusive(page), page);
>  	}
>  }
>  
> @@ -760,8 +760,8 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
>  	if (!pmd_write(pmdval) && gup_must_unshare(vma, flags, page))
>  		return ERR_PTR(-EMLINK);
>  
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -			!PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>  
>  	ret = try_grab_folio(page_folio(page), 1, flags);
>  	if (ret)
> @@ -899,8 +899,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
>  		goto out;
>  	}
>  
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -		       !PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>  
>  	/* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
>  	ret = try_grab_folio(folio, 1, flags);
> @@ -1180,7 +1180,7 @@ static int faultin_page(struct vm_area_struct *vma,
>  	if (unshare) {
>  		fault_flags |= FAULT_FLAG_UNSHARE;
>  		/* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */
> -		VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
> +		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_WRITE);
>  	}
>  
>  	ret = handle_mm_fault(vma, address, fault_flags, NULL);
> @@ -1760,10 +1760,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  		}
>  
>  		/* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
> -		if (!*locked) {
> -			BUG_ON(ret < 0);
> -			BUG_ON(ret >= nr_pages);
> -		}
> +		VM_WARN_ON_ONCE(!*locked && (ret < 0 || ret >= nr_pages));
>  
>  		if (ret > 0) {
>  			nr_pages -= ret;
> @@ -1808,7 +1805,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  
>  		ret = mmap_read_lock_killable(mm);
>  		if (ret) {
> -			BUG_ON(ret > 0);
>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1819,11 +1815,11 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  				       pages, locked);
>  		if (!*locked) {
>  			/* Continue to retry until we succeeded */
> -			BUG_ON(ret != 0);
> +			VM_WARN_ON_ONCE(ret != 0);
>  			goto retry;
>  		}
>  		if (ret != 1) {
> -			BUG_ON(ret > 1);
> +			VM_WARN_ON_ONCE(ret > 1);
>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1885,10 +1881,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
>  	int gup_flags;
>  	long ret;
>  
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> -	VM_BUG_ON_VMA(start < vma->vm_start, vma);
> -	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE_VMA(start < vma->vm_start, vma);
> +	VM_WARN_ON_ONCE_VMA(end   > vma->vm_end, vma);
>  	mmap_assert_locked(mm);
>  
>  	/*
> @@ -1957,8 +1953,8 @@ long faultin_page_range(struct mm_struct *mm, unsigned long start,
>  	int gup_flags;
>  	long ret;
>  
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
>  	mmap_assert_locked(mm);
>  
>  	/*
> @@ -2908,7 +2904,8 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
>  		} else if (pte_special(pte))
>  			goto pte_unmap;
>  
> -		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> +		/* If it's not marked as special it must have a valid memmap. */
> +		VM_WARN_ON_ONCE(!pfn_valid(pte_pfn(pte)));
>  		page = pte_page(pte);
>  
>  		folio = try_grab_folio_fast(page, 1, flags);
> 
> base-commit: 2d0c297637e7d59771c1533847c666cdddc19884




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-05  1:07 ` John Hubbard
@ 2025-06-05  5:37   ` Vlastimil Babka
  2025-06-05  6:08     ` David Hildenbrand
  0 siblings, 1 reply; 55+ messages in thread
From: Vlastimil Babka @ 2025-06-05  5:37 UTC (permalink / raw)
  To: John Hubbard, David Hildenbrand, linux-kernel
  Cc: linux-mm, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe,
	Peter Xu

On 6/5/25 03:07, John Hubbard wrote:
> On 6/4/25 7:05 AM, David Hildenbrand wrote:
>> Especially once we hit one of the assertions in
>> sanity_check_pinned_pages(), observing follow-up assertions failing
>> in other code can give good clues about what went wrong, so use
>> VM_WARN_ON_ONCE instead.
>> 
>> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
>> well. Add one comment for the pfn_valid() check.
> 
> It would be a nice touch to add Linus' notes here, with the BUG() history
> and all. It answers a FAQ about BUG vs. WARN* that is really nice
> to have in the commit log.

Perhaps then rather put it somewhere appropriate in Documentation/process/
than a random commit log?

> This looks great.
> 
> Looking ahead, should we also *delete* VM_BUG_ON*() items? (Not as part 
> of this patch, of course.)
> 
> In any case, 
> 
> Reviewed-by: John Hubbard <jhubbard@nvidia.com>
> 
> thanks,



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-05  5:37   ` Vlastimil Babka
@ 2025-06-05  6:08     ` David Hildenbrand
  2025-06-05  8:48       ` Vlastimil Babka
  0 siblings, 1 reply; 55+ messages in thread
From: David Hildenbrand @ 2025-06-05  6:08 UTC (permalink / raw)
  To: Vlastimil Babka, John Hubbard, linux-kernel
  Cc: linux-mm, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe,
	Peter Xu

On 05.06.25 07:37, Vlastimil Babka wrote:
> On 6/5/25 03:07, John Hubbard wrote:
>> On 6/4/25 7:05 AM, David Hildenbrand wrote:
>>> Especially once we hit one of the assertions in
>>> sanity_check_pinned_pages(), observing follow-up assertions failing
>>> in other code can give good clues about what went wrong, so use
>>> VM_WARN_ON_ONCE instead.
>>>
>>> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
>>> well. Add one comment for the pfn_valid() check.
>>
>> It would be a nice touch to add Linus' notes here, with the BUG() history
>> and all. It answers a FAQ about BUG vs. WARN* that is really nice
>> to have in the commit log.
> 
> Perhaps then rather put it somewhere appropriate in Documentation/process/
> than a random commit log?

I mean, I documented most of that already in coding-style.rst. :)

The full BUG history is not in there, but not sure if that is really required if ...
we're not supposed to use it.

Is there anything important missing?


commit 1cfd9d7e43d5a1cf739d1420b10b1e65feb02f88
Author: David Hildenbrand <david@redhat.com>
Date:   Fri Sep 23 13:34:24 2022 +0200

     coding-style.rst: document BUG() and WARN() rules ("do not crash the kernel")
     
     Linus notes [1] that the introduction of new code that uses VM_BUG_ON()
     is just as bad as BUG_ON(), because it will crash the kernel on
     distributions that enable CONFIG_DEBUG_VM (like Fedora):
     
         VM_BUG_ON() has the exact same semantics as BUG_ON. It is literally
         no different, the only difference is "we can make the code smaller
         because these are less important". [2]
     
     This resulted in a more generic discussion about usage of BUG() and
     friends. While there might be corner cases that still deserve a BUG_ON(),
     most BUG_ON() cases should simply use WARN_ON_ONCE() and implement a
     recovery path if reasonable:
     
         The only possible case where BUG_ON can validly be used is "I have
         some fundamental data corruption and cannot possibly return an
         error". [2]
     
     As a very good approximation is the general rule:
     
         "absolutely no new BUG_ON() calls _ever_" [2]
     
     ... not even if something really shouldn't ever happen and is merely for
     documenting that an invariant always has to hold. However, there are sill
     exceptions where BUG_ON() may be used:
     
         If you have a "this is major internal corruption, there's no way we can
         continue", then BUG_ON() is appropriate. [3]
     
     There is only one good BUG_ON():
     
         Now, that said, there is one very valid sub-form of BUG_ON():
         BUILD_BUG_ON() is absolutely 100% fine. [2]
     
     While WARN will also crash the machine with panic_on_warn set, that's
     exactly to be expected:
     
         So we have two very different cases: the "virtual machine with good
         logging where a dead machine is fine" - use 'panic_on_warn'. And
         the actual real hardware with real drivers, running real loads by
         users. [4]
     
     The basic idea is that warnings will similarly get reported by users
     and be found during testing. However, in contrast to a BUG(), there is a
     way to actually influence the expected behavior (e.g., panic_on_warn)
     and to eventually keep the machine alive to extract some debug info.
     
     Ingo notes that not all WARN_ON_ONCE cases need recovery. If we don't ever
     expect this code to trigger in any case, recovery code is not really
     helpful.
     
         I'd prefer to keep all these warnings 'simple' - i.e. no attempted
         recovery & control flow, unless we ever expect these to trigger.
         [5]
     
     There have been different rules floating around that were never properly
     documented. Let's try to clarify.
     
     [1] https://lkml.kernel.org/r/CAHk-=wiEAH+ojSpAgx_Ep=NKPWHU8AdO3V56BXcCsU97oYJ1EA@mail.gmail.com
     [2] https://lore.kernel.org/r/CAHk-=wg40EAZofO16Eviaj7mfqDhZ2gVEbvfsMf6gYzspRjYvw@mail.gmail.com
     [3] https://lkml.kernel.org/r/CAHk-=wit-DmhMfQErY29JSPjFgebx_Ld+pnerc4J2Ag990WwAA@mail.gmail.com
     [4] https://lore.kernel.org/r/CAHk-=wgF7K2gSSpy=m_=K3Nov4zaceUX9puQf1TjkTJLA2XC_g@mail.gmail.com
     [5] https://lore.kernel.org/r/YwIW+mVeZoTOxn%2F4@gmail.com
     


-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:05 [PATCH v1] mm/gup: remove (VM_)BUG_ONs David Hildenbrand
                   ` (5 preceding siblings ...)
  2025-06-05  1:07 ` John Hubbard
@ 2025-06-05  7:10 ` Michal Hocko
  2025-06-06  8:10   ` David Hildenbrand
  2025-06-06  8:12 ` David Hildenbrand
  7 siblings, 1 reply; 55+ messages in thread
From: Michal Hocko @ 2025-06-05  7:10 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: linux-kernel, linux-mm, Andrew Morton, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On Wed 04-06-25 16:05:44, David Hildenbrand wrote:
> Especially once we hit one of the assertions in
> sanity_check_pinned_pages(), observing follow-up assertions failing
> in other code can give good clues about what went wrong, so use
> VM_WARN_ON_ONCE instead.
> 
> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> well. Add one comment for the pfn_valid() check.
> 
> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> 
> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> happen", but more reasonable to check for during early testing.

The patch itself makes sense and I think it is good time to revisit old
discussion [1] and finally drop VM_BUG_ON altogether and replace it by
VM_WARN_ON which could be still a useful debugging aid.

[1] https://lore.kernel.org/all/c9abf109-80f2-88f5-4aae-d6fd4a30bcd3@google.com/T/#u
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Peter Xu <peterx@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
> 
> Wanted to do this for a long time, but my todo list keeps growing ...
> 
> Based on mm/mm-unstable
> 
> ---
>  include/linux/mmdebug.h | 12 ++++++++++++
>  mm/gup.c                | 41 +++++++++++++++++++----------------------
>  2 files changed, 31 insertions(+), 22 deletions(-)
> 
> diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
> index a0a3894900ed4..14a45979cccc9 100644
> --- a/include/linux/mmdebug.h
> +++ b/include/linux/mmdebug.h
> @@ -89,6 +89,17 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  	}								\
>  	unlikely(__ret_warn_once);					\
>  })
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)		({			\
> +	static bool __section(".data..once") __warned;			\
> +	int __ret_warn_once = !!(cond);					\
> +									\
> +	if (unlikely(__ret_warn_once && !__warned)) {			\
> +		dump_vma(vma);						\
> +		__warned = true;					\
> +		WARN_ON(1);						\
> +	}								\
> +	unlikely(__ret_warn_once);					\
> +})
>  #define VM_WARN_ON_VMG(cond, vmg)		({			\
>  	int __ret_warn = !!(cond);					\
>  									\
> @@ -115,6 +126,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi);
>  #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
> +#define VM_WARN_ON_ONCE_VMA(cond, vma)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ON_VMG(cond, vmg)  BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
>  #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
> diff --git a/mm/gup.c b/mm/gup.c
> index e065a49842a87..3c3931fcdd820 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -64,11 +64,11 @@ static inline void sanity_check_pinned_pages(struct page **pages,
>  		    !folio_test_anon(folio))
>  			continue;
>  		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
>  		else
>  			/* Either a PTE-mapped or a PMD-mapped THP. */
> -			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
> -				       !PageAnonExclusive(page), page);
> +			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
> +					     !PageAnonExclusive(page), page);
>  	}
>  }
>  
> @@ -760,8 +760,8 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
>  	if (!pmd_write(pmdval) && gup_must_unshare(vma, flags, page))
>  		return ERR_PTR(-EMLINK);
>  
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -			!PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>  
>  	ret = try_grab_folio(page_folio(page), 1, flags);
>  	if (ret)
> @@ -899,8 +899,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
>  		goto out;
>  	}
>  
> -	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> -		       !PageAnonExclusive(page), page);
> +	VM_WARN_ON_ONCE_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
> +			     !PageAnonExclusive(page), page);
>  
>  	/* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */
>  	ret = try_grab_folio(folio, 1, flags);
> @@ -1180,7 +1180,7 @@ static int faultin_page(struct vm_area_struct *vma,
>  	if (unshare) {
>  		fault_flags |= FAULT_FLAG_UNSHARE;
>  		/* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */
> -		VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
> +		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_WRITE);
>  	}
>  
>  	ret = handle_mm_fault(vma, address, fault_flags, NULL);
> @@ -1760,10 +1760,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  		}
>  
>  		/* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */
> -		if (!*locked) {
> -			BUG_ON(ret < 0);
> -			BUG_ON(ret >= nr_pages);
> -		}
> +		VM_WARN_ON_ONCE(!*locked && (ret < 0 || ret >= nr_pages));
>  
>  		if (ret > 0) {
>  			nr_pages -= ret;
> @@ -1808,7 +1805,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  
>  		ret = mmap_read_lock_killable(mm);
>  		if (ret) {
> -			BUG_ON(ret > 0);
>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1819,11 +1815,11 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
>  				       pages, locked);
>  		if (!*locked) {
>  			/* Continue to retry until we succeeded */
> -			BUG_ON(ret != 0);
> +			VM_WARN_ON_ONCE(ret != 0);
>  			goto retry;
>  		}
>  		if (ret != 1) {
> -			BUG_ON(ret > 1);
> +			VM_WARN_ON_ONCE(ret > 1);
>  			if (!pages_done)
>  				pages_done = ret;
>  			break;
> @@ -1885,10 +1881,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
>  	int gup_flags;
>  	long ret;
>  
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> -	VM_BUG_ON_VMA(start < vma->vm_start, vma);
> -	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE_VMA(start < vma->vm_start, vma);
> +	VM_WARN_ON_ONCE_VMA(end   > vma->vm_end, vma);
>  	mmap_assert_locked(mm);
>  
>  	/*
> @@ -1957,8 +1953,8 @@ long faultin_page_range(struct mm_struct *mm, unsigned long start,
>  	int gup_flags;
>  	long ret;
>  
> -	VM_BUG_ON(!PAGE_ALIGNED(start));
> -	VM_BUG_ON(!PAGE_ALIGNED(end));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(start));
> +	VM_WARN_ON_ONCE(!PAGE_ALIGNED(end));
>  	mmap_assert_locked(mm);
>  
>  	/*
> @@ -2908,7 +2904,8 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
>  		} else if (pte_special(pte))
>  			goto pte_unmap;
>  
> -		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> +		/* If it's not marked as special it must have a valid memmap. */
> +		VM_WARN_ON_ONCE(!pfn_valid(pte_pfn(pte)));
>  		page = pte_page(pte);
>  
>  		folio = try_grab_folio_fast(page, 1, flags);
> 
> base-commit: 2d0c297637e7d59771c1533847c666cdddc19884
> -- 
> 2.49.0

-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-05  6:08     ` David Hildenbrand
@ 2025-06-05  8:48       ` Vlastimil Babka
  2025-06-05 12:29         ` David Hildenbrand
  0 siblings, 1 reply; 55+ messages in thread
From: Vlastimil Babka @ 2025-06-05  8:48 UTC (permalink / raw)
  To: David Hildenbrand, John Hubbard, linux-kernel
  Cc: linux-mm, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe,
	Peter Xu

On 6/5/25 08:08, David Hildenbrand wrote:
> On 05.06.25 07:37, Vlastimil Babka wrote:
>> On 6/5/25 03:07, John Hubbard wrote:
>>> On 6/4/25 7:05 AM, David Hildenbrand wrote:
>>>> Especially once we hit one of the assertions in
>>>> sanity_check_pinned_pages(), observing follow-up assertions failing
>>>> in other code can give good clues about what went wrong, so use
>>>> VM_WARN_ON_ONCE instead.
>>>>
>>>> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
>>>> well. Add one comment for the pfn_valid() check.
>>>
>>> It would be a nice touch to add Linus' notes here, with the BUG() history
>>> and all. It answers a FAQ about BUG vs. WARN* that is really nice
>>> to have in the commit log.
>> 
>> Perhaps then rather put it somewhere appropriate in Documentation/process/
>> than a random commit log?
> 
> I mean, I documented most of that already in coding-style.rst. :)

Thanks for the reminder, looks good to me :)

> The full BUG history is not in there, but not sure if that is really required if ...
> we're not supposed to use it.

We could put links to the history excursion email (and appropriate older
emails you link in the commit log below) to the References appendix of the
coding-style file, but it's not that critical.

> Is there anything important missing?
> 
> 
> commit 1cfd9d7e43d5a1cf739d1420b10b1e65feb02f88
> Author: David Hildenbrand <david@redhat.com>
> Date:   Fri Sep 23 13:34:24 2022 +0200
> 
>      coding-style.rst: document BUG() and WARN() rules ("do not crash the kernel")
>      
>      Linus notes [1] that the introduction of new code that uses VM_BUG_ON()
>      is just as bad as BUG_ON(), because it will crash the kernel on
>      distributions that enable CONFIG_DEBUG_VM (like Fedora):
>      
>          VM_BUG_ON() has the exact same semantics as BUG_ON. It is literally
>          no different, the only difference is "we can make the code smaller
>          because these are less important". [2]
>      
>      This resulted in a more generic discussion about usage of BUG() and
>      friends. While there might be corner cases that still deserve a BUG_ON(),
>      most BUG_ON() cases should simply use WARN_ON_ONCE() and implement a
>      recovery path if reasonable:
>      
>          The only possible case where BUG_ON can validly be used is "I have
>          some fundamental data corruption and cannot possibly return an
>          error". [2]
>      
>      As a very good approximation is the general rule:
>      
>          "absolutely no new BUG_ON() calls _ever_" [2]
>      
>      ... not even if something really shouldn't ever happen and is merely for
>      documenting that an invariant always has to hold. However, there are sill
>      exceptions where BUG_ON() may be used:
>      
>          If you have a "this is major internal corruption, there's no way we can
>          continue", then BUG_ON() is appropriate. [3]
>      
>      There is only one good BUG_ON():
>      
>          Now, that said, there is one very valid sub-form of BUG_ON():
>          BUILD_BUG_ON() is absolutely 100% fine. [2]
>      
>      While WARN will also crash the machine with panic_on_warn set, that's
>      exactly to be expected:
>      
>          So we have two very different cases: the "virtual machine with good
>          logging where a dead machine is fine" - use 'panic_on_warn'. And
>          the actual real hardware with real drivers, running real loads by
>          users. [4]
>      
>      The basic idea is that warnings will similarly get reported by users
>      and be found during testing. However, in contrast to a BUG(), there is a
>      way to actually influence the expected behavior (e.g., panic_on_warn)
>      and to eventually keep the machine alive to extract some debug info.
>      
>      Ingo notes that not all WARN_ON_ONCE cases need recovery. If we don't ever
>      expect this code to trigger in any case, recovery code is not really
>      helpful.
>      
>          I'd prefer to keep all these warnings 'simple' - i.e. no attempted
>          recovery & control flow, unless we ever expect these to trigger.
>          [5]
>      
>      There have been different rules floating around that were never properly
>      documented. Let's try to clarify.
>      
>      [1] https://lkml.kernel.org/r/CAHk-=wiEAH+ojSpAgx_Ep=NKPWHU8AdO3V56BXcCsU97oYJ1EA@mail.gmail.com
>      [2] https://lore.kernel.org/r/CAHk-=wg40EAZofO16Eviaj7mfqDhZ2gVEbvfsMf6gYzspRjYvw@mail.gmail.com
>      [3] https://lkml.kernel.org/r/CAHk-=wit-DmhMfQErY29JSPjFgebx_Ld+pnerc4J2Ag990WwAA@mail.gmail.com
>      [4] https://lore.kernel.org/r/CAHk-=wgF7K2gSSpy=m_=K3Nov4zaceUX9puQf1TjkTJLA2XC_g@mail.gmail.com
>      [5] https://lore.kernel.org/r/YwIW+mVeZoTOxn%2F4@gmail.com
>      
> 
> 



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-05  8:48       ` Vlastimil Babka
@ 2025-06-05 12:29         ` David Hildenbrand
  0 siblings, 0 replies; 55+ messages in thread
From: David Hildenbrand @ 2025-06-05 12:29 UTC (permalink / raw)
  To: Vlastimil Babka, John Hubbard, linux-kernel
  Cc: linux-mm, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jason Gunthorpe,
	Peter Xu

On 05.06.25 10:48, Vlastimil Babka wrote:
> On 6/5/25 08:08, David Hildenbrand wrote:
>> On 05.06.25 07:37, Vlastimil Babka wrote:
>>> On 6/5/25 03:07, John Hubbard wrote:
>>>> On 6/4/25 7:05 AM, David Hildenbrand wrote:
>>>>> Especially once we hit one of the assertions in
>>>>> sanity_check_pinned_pages(), observing follow-up assertions failing
>>>>> in other code can give good clues about what went wrong, so use
>>>>> VM_WARN_ON_ONCE instead.
>>>>>
>>>>> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
>>>>> well. Add one comment for the pfn_valid() check.
>>>>
>>>> It would be a nice touch to add Linus' notes here, with the BUG() history
>>>> and all. It answers a FAQ about BUG vs. WARN* that is really nice
>>>> to have in the commit log.
>>>
>>> Perhaps then rather put it somewhere appropriate in Documentation/process/
>>> than a random commit log?
>>
>> I mean, I documented most of that already in coding-style.rst. :)
> 
> Thanks for the reminder, looks good to me :)
> 
>> The full BUG history is not in there, but not sure if that is really required if ...
>> we're not supposed to use it.
> 
> We could put links to the history excursion email (and appropriate older
> emails you link in the commit log below) to the References appendix of the
> coding-style file, but it's not that critical.

Not sure if it's really of value. I mean, whoever questions the rules 
(for whatever reason ;) ) should probably look at the git log ...

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-05  7:10 ` Michal Hocko
@ 2025-06-06  8:10   ` David Hildenbrand
  2025-06-06  8:31     ` Michal Hocko
  0 siblings, 1 reply; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06  8:10 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-kernel, linux-mm, Andrew Morton, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On 05.06.25 09:10, Michal Hocko wrote:
> On Wed 04-06-25 16:05:44, David Hildenbrand wrote:
>> Especially once we hit one of the assertions in
>> sanity_check_pinned_pages(), observing follow-up assertions failing
>> in other code can give good clues about what went wrong, so use
>> VM_WARN_ON_ONCE instead.
>>
>> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
>> well. Add one comment for the pfn_valid() check.
>>
>> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
>>
>> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
>> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
>> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
>> happen", but more reasonable to check for during early testing.
> 
> The patch itself makes sense and I think it is good time to revisit old
> discussion [1] and finally drop VM_BUG_ON altogether and replace it by
> VM_WARN_ON which could be still a useful debugging aid.

Yes. I think we should check all cases if they are really relevant (and 
are clear), and also handle BUG_ON on the way.

... essentially what I did in this patch :)

Thanks!

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-04 14:05 [PATCH v1] mm/gup: remove (VM_)BUG_ONs David Hildenbrand
                   ` (6 preceding siblings ...)
  2025-06-05  7:10 ` Michal Hocko
@ 2025-06-06  8:12 ` David Hildenbrand
  7 siblings, 0 replies; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-mm, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Jason Gunthorpe, John Hubbard, Peter Xu

On 04.06.25 16:05, David Hildenbrand wrote:
> Especially once we hit one of the assertions in
> sanity_check_pinned_pages(), observing follow-up assertions failing
> in other code can give good clues about what went wrong, so use
> VM_WARN_ON_ONCE instead.
> 
> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> well. Add one comment for the pfn_valid() check.
> 
> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> 
> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> happen", but more reasonable to check for during early testing.
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Mike Rapoport <rppt@kernel.org>
> Cc: Suren Baghdasaryan <surenb@google.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Peter Xu <peterx@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
> 
> Wanted to do this for a long time, but my todo list keeps growing ...
> 
> Based on mm/mm-unstable

Thanks everybody for the review -- that must be one of my patches with
most RB's/ACK's/feedback :)

The following fixup on top to use the _FOLIO variant where possible
(thanks Lorenzo).

 From c3b2567f169f10f96b94d61369d8492e3095f187 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 6 Jun 2025 10:08:20 +0200
Subject: [PATCH] fixup

Signed-off-by: David Hildenbrand <david@redhat.com>
---
  mm/gup.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/gup.c b/mm/gup.c
index 3c3931fcdd820..25fd0d895e453 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -64,7 +64,7 @@ static inline void sanity_check_pinned_pages(struct page **pages,
  		    !folio_test_anon(folio))
  			continue;
  		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
-			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page), page);
+			VM_WARN_ON_ONCE_FOLIO(!PageAnonExclusive(&folio->page), folio);
  		else
  			/* Either a PTE-mapped or a PMD-mapped THP. */
  			VM_WARN_ON_ONCE_PAGE(!PageAnonExclusive(&folio->page) &&
-- 
2.49.0


-- 
Cheers,

David / dhildenb



^ permalink raw reply related	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06  8:10   ` David Hildenbrand
@ 2025-06-06  8:31     ` Michal Hocko
  2025-06-06  9:01       ` David Hildenbrand
  0 siblings, 1 reply; 55+ messages in thread
From: Michal Hocko @ 2025-06-06  8:31 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: linux-kernel, linux-mm, Andrew Morton, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On Fri 06-06-25 10:10:14, David Hildenbrand wrote:
> On 05.06.25 09:10, Michal Hocko wrote:
> > On Wed 04-06-25 16:05:44, David Hildenbrand wrote:
> > > Especially once we hit one of the assertions in
> > > sanity_check_pinned_pages(), observing follow-up assertions failing
> > > in other code can give good clues about what went wrong, so use
> > > VM_WARN_ON_ONCE instead.
> > > 
> > > While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
> > > well. Add one comment for the pfn_valid() check.
> > > 
> > > We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
> > > 
> > > Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
> > > something > 0 we're in bigger trouble. Convert the other BUG_ON's into
> > > VM_WARN_ON_ONCE as well, they are in a similar domain "should never
> > > happen", but more reasonable to check for during early testing.
> > 
> > The patch itself makes sense and I think it is good time to revisit old
> > discussion [1] and finally drop VM_BUG_ON altogether and replace it by
> > VM_WARN_ON which could be still a useful debugging aid.
> 
> Yes. I think we should check all cases if they are really relevant (and are
> clear), and also handle BUG_ON on the way.

There are more than 600 VM_BUG_ON instances. I am not sure it is
feasible to review single one of them. Turning them into VM_WARN_ON
should be reasonably safe as they are not enabled in production
environment anyway so we cannot really rely on those. Having them in
WARN form would be still useful for debugging and those that really need
a crash dump while debugging can achieve the same result.

So while I agree that many of them could be dropped or made more clear
those could be dealt with after a mass move. An advantage of this would
be that we can drop VM_BUG_ON* and stop new instances from being added.

Just my 2cents
-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06  8:31     ` Michal Hocko
@ 2025-06-06  9:01       ` David Hildenbrand
  2025-06-06 10:13         ` Michal Hocko
  0 siblings, 1 reply; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06  9:01 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-kernel, linux-mm, Andrew Morton, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On 06.06.25 10:31, Michal Hocko wrote:
> On Fri 06-06-25 10:10:14, David Hildenbrand wrote:
>> On 05.06.25 09:10, Michal Hocko wrote:
>>> On Wed 04-06-25 16:05:44, David Hildenbrand wrote:
>>>> Especially once we hit one of the assertions in
>>>> sanity_check_pinned_pages(), observing follow-up assertions failing
>>>> in other code can give good clues about what went wrong, so use
>>>> VM_WARN_ON_ONCE instead.
>>>>
>>>> While at it, let's just convert all VM_BUG_ON to VM_WARN_ON_ONCE as
>>>> well. Add one comment for the pfn_valid() check.
>>>>
>>>> We have to introduce VM_WARN_ON_ONCE_VMA() to make that fly.
>>>>
>>>> Drop the BUG_ON after mmap_read_lock_killable(), if that ever returns
>>>> something > 0 we're in bigger trouble. Convert the other BUG_ON's into
>>>> VM_WARN_ON_ONCE as well, they are in a similar domain "should never
>>>> happen", but more reasonable to check for during early testing.
>>>
>>> The patch itself makes sense and I think it is good time to revisit old
>>> discussion [1] and finally drop VM_BUG_ON altogether and replace it by
>>> VM_WARN_ON which could be still a useful debugging aid.
>>
>> Yes. I think we should check all cases if they are really relevant (and are
>> clear), and also handle BUG_ON on the way.
> 
> There are more than 600 VM_BUG_ON instances.I am not sure it is
> feasible to review single one of them.

I'm sure we would be done if we would have started back when Dave R. 
sent his patch ... :)

It's not that many files, and many checks in a file are often of a 
similar kind.

Like many "folio_test_locked" checks.

> Turning them into VM_WARN_ON
> should be reasonably safe as they are not enabled in production
> environment anyway so we cannot really rely on those. Having them in
> WARN form would be still useful for debugging and those that really need
> a crash dump while debugging can achieve the same result.

One question is if we should be VM_WARN_ON vs. VM_WARN_ON_ONCE ...

VM_BUG_ON is essentially a "once" thing so far, but then, we don't 
continue ... so probably most should be _ONCE.

> 
> So while I agree that many of them could be dropped or made more clear
> those could be dealt with after a mass move. An advantage of this would
> be that we can drop VM_BUG_ON* and stop new instances from being added.

As a first step we could probably just #define them to go to the 
VM_WARN_ON_* variants and see what happens.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06  9:01       ` David Hildenbrand
@ 2025-06-06 10:13         ` Michal Hocko
  2025-06-06 10:19           ` Lorenzo Stoakes
  2025-06-06 10:27           ` David Hildenbrand
  0 siblings, 2 replies; 55+ messages in thread
From: Michal Hocko @ 2025-06-06 10:13 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: linux-kernel, linux-mm, Andrew Morton, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
> On 06.06.25 10:31, Michal Hocko wrote:
[...]
> > Turning them into VM_WARN_ON
> > should be reasonably safe as they are not enabled in production
> > environment anyway so we cannot really rely on those. Having them in
> > WARN form would be still useful for debugging and those that really need
> > a crash dump while debugging can achieve the same result.
> 
> One question is if we should be VM_WARN_ON vs. VM_WARN_ON_ONCE ...

*WARN_ONCE ha a very limited use from code paths which are generally
shared by many callers. You just see one and then nothing. Some time ago
we have discussed an option to have _ONCE per call trace but I haven't
see any follow up.

Anyway starting without _ONCE seems like safer option because we are not
losing potentially useful debugging information. Afterall this is
debugging only thing. But no strong position on my side.

> VM_BUG_ON is essentially a "once" thing so far, but then, we don't continue
> ... so probably most should be _ONCE.
> 
> > 
> > So while I agree that many of them could be dropped or made more clear
> > those could be dealt with after a mass move. An advantage of this would
> > be that we can drop VM_BUG_ON* and stop new instances from being added.
> 
> As a first step we could probably just #define them to go to the
> VM_WARN_ON_* variants and see what happens.

You meand VM_BUG_ON expand to VM_WARN_ON by default?

-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 10:13         ` Michal Hocko
@ 2025-06-06 10:19           ` Lorenzo Stoakes
  2025-06-06 10:28             ` David Hildenbrand
  2025-06-06 10:28             ` Michal Hocko
  2025-06-06 10:27           ` David Hildenbrand
  1 sibling, 2 replies; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-06 10:19 UTC (permalink / raw)
  To: Michal Hocko
  Cc: David Hildenbrand, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
> On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
> > On 06.06.25 10:31, Michal Hocko wrote:
> [...]
> > > Turning them into VM_WARN_ON
> > > should be reasonably safe as they are not enabled in production
> > > environment anyway so we cannot really rely on those. Having them in
> > > WARN form would be still useful for debugging and those that really need
> > > a crash dump while debugging can achieve the same result.
> >
> > One question is if we should be VM_WARN_ON vs. VM_WARN_ON_ONCE ...
>
> *WARN_ONCE ha a very limited use from code paths which are generally
> shared by many callers. You just see one and then nothing. Some time ago
> we have discussed an option to have _ONCE per call trace but I haven't
> see any follow up.
>
> Anyway starting without _ONCE seems like safer option because we are not
> losing potentially useful debugging information. Afterall this is
> debugging only thing. But no strong position on my side.
>
> > VM_BUG_ON is essentially a "once" thing so far, but then, we don't continue
> > ... so probably most should be _ONCE.
> >
> > >
> > > So while I agree that many of them could be dropped or made more clear
> > > those could be dealt with after a mass move. An advantage of this would
> > > be that we can drop VM_BUG_ON* and stop new instances from being added.
> >
> > As a first step we could probably just #define them to go to the
> > VM_WARN_ON_* variants and see what happens.
>
> You meand VM_BUG_ON expand to VM_WARN_ON by default?

Sorry to interject in the conversation, but to boldly throw my two English pence
into the mix:

As the "king of churn" (TM) you'll not be surprised to hear that I'm in favour
of us just doing a big patch and convert all VM_BUG_ON() -> VM_WARN_ON_ONCE()
and remove VM_BUG_ON*().

Pull the band-aid off... I think better than a #define if this indeed what you
meant David.

But of course, you'd expect me to have this opinion ;)

>
> --
> Michal Hocko
> SUSE Labs

Cheers, Lorenzo


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 10:13         ` Michal Hocko
  2025-06-06 10:19           ` Lorenzo Stoakes
@ 2025-06-06 10:27           ` David Hildenbrand
  1 sibling, 0 replies; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06 10:27 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-kernel, linux-mm, Andrew Morton, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On 06.06.25 12:13, Michal Hocko wrote:
> On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
>> On 06.06.25 10:31, Michal Hocko wrote:
> [...]
>>> Turning them into VM_WARN_ON
>>> should be reasonably safe as they are not enabled in production
>>> environment anyway so we cannot really rely on those. Having them in
>>> WARN form would be still useful for debugging and those that really need
>>> a crash dump while debugging can achieve the same result.
>>
>> One question is if we should be VM_WARN_ON vs. VM_WARN_ON_ONCE ...
> 
> *WARN_ONCE ha a very limited use from code paths which are generally
> shared by many callers. You just see one and then nothing. Some time ago
> we have discussed an option to have _ONCE per call trace but I haven't
> see any follow up.

While true, getting a flood of the same events is absolutely not helpful 
in my experience.

Usually one cares about a single instance of any events, even if 
triggered by various code paths.

Sure, there are some other cases ... which brings me back of my original 
point of doing it stepwise.

> 
> Anyway starting without _ONCE seems like safer option because we are not
> losing potentially useful debugging information. Afterall this is
> debugging only thing. But no strong position on my side.

Yeah.

> 
>> VM_BUG_ON is essentially a "once" thing so far, but then, we don't continue
>> ... so probably most should be _ONCE.
>>
>>>
>>> So while I agree that many of them could be dropped or made more clear
>>> those could be dealt with after a mass move. An advantage of this would
>>> be that we can drop VM_BUG_ON* and stop new instances from being added.
>>
>> As a first step we could probably just #define them to go to the
>> VM_WARN_ON_* variants and see what happens.
> 
> You meand VM_BUG_ON expand to VM_WARN_ON by default?

That's one approach with little churn, that still allows us easily to go 
through them and re-evaluate them later as we actually remove them.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 10:19           ` Lorenzo Stoakes
@ 2025-06-06 10:28             ` David Hildenbrand
  2025-06-06 11:04               ` Lorenzo Stoakes
  2025-06-06 10:28             ` Michal Hocko
  1 sibling, 1 reply; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06 10:28 UTC (permalink / raw)
  To: Lorenzo Stoakes, Michal Hocko
  Cc: linux-kernel, linux-mm, Andrew Morton, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan,
	Jason Gunthorpe, John Hubbard, Peter Xu

On 06.06.25 12:19, Lorenzo Stoakes wrote:
> On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
>> On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
>>> On 06.06.25 10:31, Michal Hocko wrote:
>> [...]
>>>> Turning them into VM_WARN_ON
>>>> should be reasonably safe as they are not enabled in production
>>>> environment anyway so we cannot really rely on those. Having them in
>>>> WARN form would be still useful for debugging and those that really need
>>>> a crash dump while debugging can achieve the same result.
>>>
>>> One question is if we should be VM_WARN_ON vs. VM_WARN_ON_ONCE ...
>>
>> *WARN_ONCE ha a very limited use from code paths which are generally
>> shared by many callers. You just see one and then nothing. Some time ago
>> we have discussed an option to have _ONCE per call trace but I haven't
>> see any follow up.
>>
>> Anyway starting without _ONCE seems like safer option because we are not
>> losing potentially useful debugging information. Afterall this is
>> debugging only thing. But no strong position on my side.
>>
>>> VM_BUG_ON is essentially a "once" thing so far, but then, we don't continue
>>> ... so probably most should be _ONCE.
>>>
>>>>
>>>> So while I agree that many of them could be dropped or made more clear
>>>> those could be dealt with after a mass move. An advantage of this would
>>>> be that we can drop VM_BUG_ON* and stop new instances from being added.
>>>
>>> As a first step we could probably just #define them to go to the
>>> VM_WARN_ON_* variants and see what happens.
>>
>> You meand VM_BUG_ON expand to VM_WARN_ON by default?
> 
> Sorry to interject in the conversation, but to boldly throw my two English pence
> into the mix:
> 
> As the "king of churn" (TM) you'll not be surprised to hear that I'm in favour
> of us just doing a big patch and convert all VM_BUG_ON() -> VM_WARN_ON_ONCE()
> and remove VM_BUG_ON*().
> 
> Pull the band-aid off... I think better than a #define if this indeed what you
> meant David.
> 
> But of course, you'd expect me to have this opinion ;)

See my reply to Michal regarding keeping VM_BUG_ON() until we actually 
decided what the right cleanup is.

I don't have a very strong opinion on any of this ... as long as 
VM_BUG_ON() goes away one way or the other.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 10:19           ` Lorenzo Stoakes
  2025-06-06 10:28             ` David Hildenbrand
@ 2025-06-06 10:28             ` Michal Hocko
  1 sibling, 0 replies; 55+ messages in thread
From: Michal Hocko @ 2025-06-06 10:28 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: David Hildenbrand, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On Fri 06-06-25 11:19:28, Lorenzo Stoakes wrote:
> As the "king of churn" (TM) you'll not be surprised to hear that I'm in favour
> of us just doing a big patch and convert all VM_BUG_ON() -> VM_WARN_ON_ONCE()
> and remove VM_BUG_ON*().

Yes please! I really think this is the most viable way to get rid of
VM_BUG_ON which I believe is a generally agreed way to go.

Thanks and let there be a force with your boldness.
-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 10:28             ` David Hildenbrand
@ 2025-06-06 11:04               ` Lorenzo Stoakes
  2025-06-06 11:44                 ` David Hildenbrand
  2025-06-06 17:57                 ` John Hubbard
  0 siblings, 2 replies; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-06 11:04 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On Fri, Jun 06, 2025 at 12:28:28PM +0200, David Hildenbrand wrote:
> On 06.06.25 12:19, Lorenzo Stoakes wrote:
> > On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
> > > On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
> > > > On 06.06.25 10:31, Michal Hocko wrote:
> > > [...]
> > > > > Turning them into VM_WARN_ON
> > > > > should be reasonably safe as they are not enabled in production
> > > > > environment anyway so we cannot really rely on those. Having them in
> > > > > WARN form would be still useful for debugging and those that really need
> > > > > a crash dump while debugging can achieve the same result.
> > > >
> > > > One question is if we should be VM_WARN_ON vs. VM_WARN_ON_ONCE ...
> > >
> > > *WARN_ONCE ha a very limited use from code paths which are generally
> > > shared by many callers. You just see one and then nothing. Some time ago
> > > we have discussed an option to have _ONCE per call trace but I haven't
> > > see any follow up.
> > >
> > > Anyway starting without _ONCE seems like safer option because we are not
> > > losing potentially useful debugging information. Afterall this is
> > > debugging only thing. But no strong position on my side.
> > >
> > > > VM_BUG_ON is essentially a "once" thing so far, but then, we don't continue
> > > > ... so probably most should be _ONCE.
> > > >
> > > > >
> > > > > So while I agree that many of them could be dropped or made more clear
> > > > > those could be dealt with after a mass move. An advantage of this would
> > > > > be that we can drop VM_BUG_ON* and stop new instances from being added.
> > > >
> > > > As a first step we could probably just #define them to go to the
> > > > VM_WARN_ON_* variants and see what happens.
> > >
> > > You meand VM_BUG_ON expand to VM_WARN_ON by default?
> >
> > Sorry to interject in the conversation, but to boldly throw my two English pence
> > into the mix:
> >
> > As the "king of churn" (TM) you'll not be surprised to hear that I'm in favour
> > of us just doing a big patch and convert all VM_BUG_ON() -> VM_WARN_ON_ONCE()
> > and remove VM_BUG_ON*().
> >
> > Pull the band-aid off... I think better than a #define if this indeed what you
> > meant David.
> >
> > But of course, you'd expect me to have this opinion ;)
>
> See my reply to Michal regarding keeping VM_BUG_ON() until we actually
> decided what the right cleanup is.

Sure, but to me the concept of VM_BUG_ON() is surely fundamentally broken - if
BUG_ON() means 'stop everything we're going to corrupt' then it makes no sense
to add a '...but only if CONFIG_DEBUG_VM is set' in there.

So to me the only assessment needed is 'do we want to warn on this or not?'.

And as you say, really WARN_ON_ONCE() seems appropriate, because nearly always
we will get flooded with useless information.

I think this being debug code gives us a lot of leeway here.

After the big change, we can always revisit individual cases and see if the
warning is valid at all.

>
> I don't have a very strong opinion on any of this ... as long as VM_BUG_ON()
> goes away one way or the other.

Am happy to come up with the churn-meister version of this patch and take the
heat :P

>
> --
> Cheers,
>
> David / dhildenb
>

Cheers, Lorenzo


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 11:04               ` Lorenzo Stoakes
@ 2025-06-06 11:44                 ` David Hildenbrand
  2025-06-06 11:56                   ` Michal Hocko
  2025-06-06 12:12                   ` Lorenzo Stoakes
  2025-06-06 17:57                 ` John Hubbard
  1 sibling, 2 replies; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06 11:44 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On 06.06.25 13:04, Lorenzo Stoakes wrote:
> On Fri, Jun 06, 2025 at 12:28:28PM +0200, David Hildenbrand wrote:
>> On 06.06.25 12:19, Lorenzo Stoakes wrote:
>>> On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
>>>> On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
>>>>> On 06.06.25 10:31, Michal Hocko wrote:
>>>> [...]
>>>>>> Turning them into VM_WARN_ON
>>>>>> should be reasonably safe as they are not enabled in production
>>>>>> environment anyway so we cannot really rely on those. Having them in
>>>>>> WARN form would be still useful for debugging and those that really need
>>>>>> a crash dump while debugging can achieve the same result.
>>>>>
>>>>> One question is if we should be VM_WARN_ON vs. VM_WARN_ON_ONCE ...
>>>>
>>>> *WARN_ONCE ha a very limited use from code paths which are generally
>>>> shared by many callers. You just see one and then nothing. Some time ago
>>>> we have discussed an option to have _ONCE per call trace but I haven't
>>>> see any follow up.
>>>>
>>>> Anyway starting without _ONCE seems like safer option because we are not
>>>> losing potentially useful debugging information. Afterall this is
>>>> debugging only thing. But no strong position on my side.
>>>>
>>>>> VM_BUG_ON is essentially a "once" thing so far, but then, we don't continue
>>>>> ... so probably most should be _ONCE.
>>>>>
>>>>>>
>>>>>> So while I agree that many of them could be dropped or made more clear
>>>>>> those could be dealt with after a mass move. An advantage of this would
>>>>>> be that we can drop VM_BUG_ON* and stop new instances from being added.
>>>>>
>>>>> As a first step we could probably just #define them to go to the
>>>>> VM_WARN_ON_* variants and see what happens.
>>>>
>>>> You meand VM_BUG_ON expand to VM_WARN_ON by default?
>>>
>>> Sorry to interject in the conversation, but to boldly throw my two English pence
>>> into the mix:
>>>
>>> As the "king of churn" (TM) you'll not be surprised to hear that I'm in favour
>>> of us just doing a big patch and convert all VM_BUG_ON() -> VM_WARN_ON_ONCE()
>>> and remove VM_BUG_ON*().
>>>
>>> Pull the band-aid off... I think better than a #define if this indeed what you
>>> meant David.
>>>
>>> But of course, you'd expect me to have this opinion ;)
>>
>> See my reply to Michal regarding keeping VM_BUG_ON() until we actually
>> decided what the right cleanup is.
> 
> Sure, but to me the concept of VM_BUG_ON() is surely fundamentally broken - if
> BUG_ON() means 'stop everything we're going to corrupt' then it makes no sense
> to add a '...but only if CONFIG_DEBUG_VM is set' in there.
> 
> So to me the only assessment needed is 'do we want to warn on this or not?'.

Well, when done carefully, it would be when reworking a VM_BUG_ON:

(a) Should this really only be checked with DEBUG_VM or should this
     actually be a WARN_ON_ONCE() + recovery
(b) Does this check even still make sense in current code, or were we
     just extra careful initially.
(c) Do we even understand why it is checked or should we add a comment.
(d) Should we use one of the _PAGE / _FOLIO / _VMA etc. variants instead
     or even add new ones.

One could argue that the same is true for any other VM_WARN_ON ... but 
my point from the beginning was that if we're already touching them, why 
not spend some extra time and do it properly ..

... but yeah, 600 instances are a bit much.

:)

But yeah, VM_BUG_ON is easier to replace than BUG_ON, because ... not 
even Fedora is running with CONFIG_DEBUG_VM anymore, so these checks are 
mostly only there in actual debug kernels.

So agreed, let's move forward with a simple conversion.

> 
> And as you say, really WARN_ON_ONCE() seems appropriate, because nearly always
> we will get flooded with useless information.
> 
> I think this being debug code gives us a lot of leeway here.
> 
> After the big change, we can always revisit individual cases and see if the
> warning is valid at all.
> 
>>
>> I don't have a very strong opinion on any of this ... as long as VM_BUG_ON()
>> goes away one way or the other.
> 
> Am happy to come up with the churn-meister version of this patch and take the
> heat :P

I assume with "this patch" you mean "a patch that gets rid of VM_BUG_ON 
completely", because I want this patch here (that started the 
discussion) to go in first.

Fascinating how you are always looking for work :P

Thanks!

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 11:44                 ` David Hildenbrand
@ 2025-06-06 11:56                   ` Michal Hocko
  2025-06-06 12:12                   ` Lorenzo Stoakes
  1 sibling, 0 replies; 55+ messages in thread
From: Michal Hocko @ 2025-06-06 11:56 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Lorenzo Stoakes, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On Fri 06-06-25 13:44:12, David Hildenbrand wrote:
> On 06.06.25 13:04, Lorenzo Stoakes wrote:
> > On Fri, Jun 06, 2025 at 12:28:28PM +0200, David Hildenbrand wrote:
> > > On 06.06.25 12:19, Lorenzo Stoakes wrote:
> > > > On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
> > > > > On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
> > > > > > On 06.06.25 10:31, Michal Hocko wrote:
> > > > > [...]
> > > > > > > Turning them into VM_WARN_ON
> > > > > > > should be reasonably safe as they are not enabled in production
> > > > > > > environment anyway so we cannot really rely on those. Having them in
> > > > > > > WARN form would be still useful for debugging and those that really need
> > > > > > > a crash dump while debugging can achieve the same result.
> > > > > > 
> > > > > > One question is if we should be VM_WARN_ON vs. VM_WARN_ON_ONCE ...
> > > > > 
> > > > > *WARN_ONCE ha a very limited use from code paths which are generally
> > > > > shared by many callers. You just see one and then nothing. Some time ago
> > > > > we have discussed an option to have _ONCE per call trace but I haven't
> > > > > see any follow up.
> > > > > 
> > > > > Anyway starting without _ONCE seems like safer option because we are not
> > > > > losing potentially useful debugging information. Afterall this is
> > > > > debugging only thing. But no strong position on my side.
> > > > > 
> > > > > > VM_BUG_ON is essentially a "once" thing so far, but then, we don't continue
> > > > > > ... so probably most should be _ONCE.
> > > > > > 
> > > > > > > 
> > > > > > > So while I agree that many of them could be dropped or made more clear
> > > > > > > those could be dealt with after a mass move. An advantage of this would
> > > > > > > be that we can drop VM_BUG_ON* and stop new instances from being added.
> > > > > > 
> > > > > > As a first step we could probably just #define them to go to the
> > > > > > VM_WARN_ON_* variants and see what happens.
> > > > > 
> > > > > You meand VM_BUG_ON expand to VM_WARN_ON by default?
> > > > 
> > > > Sorry to interject in the conversation, but to boldly throw my two English pence
> > > > into the mix:
> > > > 
> > > > As the "king of churn" (TM) you'll not be surprised to hear that I'm in favour
> > > > of us just doing a big patch and convert all VM_BUG_ON() -> VM_WARN_ON_ONCE()
> > > > and remove VM_BUG_ON*().
> > > > 
> > > > Pull the band-aid off... I think better than a #define if this indeed what you
> > > > meant David.
> > > > 
> > > > But of course, you'd expect me to have this opinion ;)
> > > 
> > > See my reply to Michal regarding keeping VM_BUG_ON() until we actually
> > > decided what the right cleanup is.
> > 
> > Sure, but to me the concept of VM_BUG_ON() is surely fundamentally broken - if
> > BUG_ON() means 'stop everything we're going to corrupt' then it makes no sense
> > to add a '...but only if CONFIG_DEBUG_VM is set' in there.
> > 
> > So to me the only assessment needed is 'do we want to warn on this or not?'.
> 
> Well, when done carefully, it would be when reworking a VM_BUG_ON:
> 
> (a) Should this really only be checked with DEBUG_VM or should this
>     actually be a WARN_ON_ONCE() + recovery
> (b) Does this check even still make sense in current code, or were we
>     just extra careful initially.
> (c) Do we even understand why it is checked or should we add a comment.
> (d) Should we use one of the _PAGE / _FOLIO / _VMA etc. variants instead
>     or even add new ones.

This is surelly a very responsible approach and I salute to that. But
then the reality hits...
 
> One could argue that the same is true for any other VM_WARN_ON ... but my
> point from the beginning was that if we're already touching them, why not
> spend some extra time and do it properly ..
> 
> ... but yeah, 600 instances are a bit much.

... exactly this. And I believe that the same could be achieved post
factum while having the ugly VM_BUG on gone.

> So agreed, let's move forward with a simple conversion.

Good call IMHO.

[...]
> > Am happy to come up with the churn-meister version of this patch and take the
> > heat :P
> 
> I assume with "this patch" you mean "a patch that gets rid of VM_BUG_ON
> completely", because I want this patch here (that started the discussion) to
> go in first.

Yes this makes sense.

> Fascinating how you are always looking for work :P

Thanks to both of you!
-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 11:44                 ` David Hildenbrand
  2025-06-06 11:56                   ` Michal Hocko
@ 2025-06-06 12:12                   ` Lorenzo Stoakes
  2025-06-06 12:17                     ` David Hildenbrand
  1 sibling, 1 reply; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-06 12:12 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On Fri, Jun 06, 2025 at 01:44:12PM +0200, David Hildenbrand wrote:
[snip]
> I assume with "this patch" you mean "a patch that gets rid of VM_BUG_ON
> completely", because I want this patch here (that started the discussion) to
> go in first.

Yes, and sure.

I can wait until this is merged upstream and take a look afterwards?

>
> Fascinating how you are always looking for work :P

I mean I have an insane amount of work to do this cycle anyway... but the Call
of Churnthulu is loud to me ;)

>
> Thanks!
>
> --
> Cheers,
>
> David / dhildenb
>


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 12:12                   ` Lorenzo Stoakes
@ 2025-06-06 12:17                     ` David Hildenbrand
  0 siblings, 0 replies; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06 12:17 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, John Hubbard, Peter Xu

On 06.06.25 14:12, Lorenzo Stoakes wrote:
> On Fri, Jun 06, 2025 at 01:44:12PM +0200, David Hildenbrand wrote:
> [snip]
>> I assume with "this patch" you mean "a patch that gets rid of VM_BUG_ON
>> completely", because I want this patch here (that started the discussion) to
>> go in first.
> 
> Yes, and sure.
> 
> I can wait until this is merged upstream and take a look afterwards?

Or base it on mm-new for now.

I'm afraid there will be a lot of competing churn moving/reworking (or 
god forbid, adding) VM_BUG*, and not all goes through Andrew's tree

... so maybe doing stuff per subsystem might make it easier, not sure 
what will be best here.

> 
>>
>> Fascinating how you are always looking for work :P
> 
> I mean I have an insane amount of work to do this cycle anyway... but the Call
> of Churnthulu is loud to me ;)

:D

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 11:04               ` Lorenzo Stoakes
  2025-06-06 11:44                 ` David Hildenbrand
@ 2025-06-06 17:57                 ` John Hubbard
  2025-06-06 18:06                   ` Lorenzo Stoakes
  1 sibling, 1 reply; 55+ messages in thread
From: John Hubbard @ 2025-06-06 17:57 UTC (permalink / raw)
  To: Lorenzo Stoakes, David Hildenbrand
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, Peter Xu

On 6/6/25 4:04 AM, Lorenzo Stoakes wrote:
> On Fri, Jun 06, 2025 at 12:28:28PM +0200, David Hildenbrand wrote:
>> On 06.06.25 12:19, Lorenzo Stoakes wrote:
>>> On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
>>>> On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
>>>>> On 06.06.25 10:31, Michal Hocko wrote:
>>>> [...]
> So to me the only assessment needed is 'do we want to warn on this or not?'.
> 
> And as you say, really WARN_ON_ONCE() seems appropriate, because nearly always
> we will get flooded with useless information.
> 

As yet another victim of such WARN_ON() floods at times, I've followed
this thread with great interest. And after reflecting on it a bit, I believe
that, surprisingly enough, WARN_ON() is a better replacement for VM_BUG_ON()
than WARN_ON_ONCE(), because:

* The only time you'll be flooded with WARN_ON() messages is when *two*
things happen at once:

    a) Something that used to completely crash the machine (a VM_BUG_ON
       condition) happens, and

    b) You're in a loop and it keeps on happening. Yes, in -mm, that does
       happen a lot (per-page loops, for example), but still.

* It's *so* easy to miss a WARN_ON_ONCE(). We don't want that, not for a
critical failure case that used to be a VM_BUG_ON().


thanks,
-- 
John Hubbard



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 17:57                 ` John Hubbard
@ 2025-06-06 18:06                   ` Lorenzo Stoakes
  2025-06-06 18:15                     ` David Hildenbrand
  0 siblings, 1 reply; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-06 18:06 UTC (permalink / raw)
  To: John Hubbard
  Cc: David Hildenbrand, Michal Hocko, linux-kernel, linux-mm,
	Andrew Morton, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, Peter Xu

On Fri, Jun 06, 2025 at 10:57:44AM -0700, John Hubbard wrote:
> On 6/6/25 4:04 AM, Lorenzo Stoakes wrote:
> > On Fri, Jun 06, 2025 at 12:28:28PM +0200, David Hildenbrand wrote:
> >> On 06.06.25 12:19, Lorenzo Stoakes wrote:
> >>> On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
> >>>> On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
> >>>>> On 06.06.25 10:31, Michal Hocko wrote:
> >>>> [...]
> > So to me the only assessment needed is 'do we want to warn on this or not?'.
> >
> > And as you say, really WARN_ON_ONCE() seems appropriate, because nearly always
> > we will get flooded with useless information.
> >
>
> As yet another victim of such WARN_ON() floods at times, I've followed
> this thread with great interest. And after reflecting on it a bit, I believe
> that, surprisingly enough, WARN_ON() is a better replacement for VM_BUG_ON()
> than WARN_ON_ONCE(), because:

Right, these shouldn't be happening _at all_.

I'm easy on this point, I'd say in that case VM_WARN_ON() is the most
_conservative_ approach, since these are things that must not happen, and
so it's not unreasonable to fail to repress repetitions of the 'impossible'
:)

But I get the general point about ...WARN_ON_ONCE() avoiding floods.

David, what do you think?

>
> * The only time you'll be flooded with WARN_ON() messages is when *two*
> things happen at once:
>
>     a) Something that used to completely crash the machine (a VM_BUG_ON
>        condition) happens, and
>
>     b) You're in a loop and it keeps on happening. Yes, in -mm, that does
>        happen a lot (per-page loops, for example), but still.
>
> * It's *so* easy to miss a WARN_ON_ONCE(). We don't want that, not for a
> critical failure case that used to be a VM_BUG_ON().

However, I do dispute this point - warnings are pretty easy to pick up from
my point of view unless your dmesg is absolutely rammed, and if you're
concerned you can panic_on_warn right?

I treat any warning that I see for instance in a test run on qemu as a
'must fix' problem, let alone if observed on an actual hardware system.

Are you thinking of scenarios for instance where you have a lot of debug
output in dmesg and thus these fly by, and when you retry the operation it
won't show again and thus missed that way?

But of course this won't do much to help you should the operation be one
you happen to only perform once however! :)

>
>
> thanks,
> --
> John Hubbard
>

Cheers, Lorenzo


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:06                   ` Lorenzo Stoakes
@ 2025-06-06 18:15                     ` David Hildenbrand
  2025-06-06 18:21                       ` John Hubbard
  0 siblings, 1 reply; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06 18:15 UTC (permalink / raw)
  To: Lorenzo Stoakes, John Hubbard
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, Peter Xu

On 06.06.25 20:06, Lorenzo Stoakes wrote:
> On Fri, Jun 06, 2025 at 10:57:44AM -0700, John Hubbard wrote:
>> On 6/6/25 4:04 AM, Lorenzo Stoakes wrote:
>>> On Fri, Jun 06, 2025 at 12:28:28PM +0200, David Hildenbrand wrote:
>>>> On 06.06.25 12:19, Lorenzo Stoakes wrote:
>>>>> On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
>>>>>> On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
>>>>>>> On 06.06.25 10:31, Michal Hocko wrote:
>>>>>> [...]
>>> So to me the only assessment needed is 'do we want to warn on this or not?'.
>>>
>>> And as you say, really WARN_ON_ONCE() seems appropriate, because nearly always
>>> we will get flooded with useless information.
>>>
>>
>> As yet another victim of such WARN_ON() floods at times, I've followed
>> this thread with great interest. And after reflecting on it a bit, I believe
>> that, surprisingly enough, WARN_ON() is a better replacement for VM_BUG_ON()
>> than WARN_ON_ONCE(), because:
> 
> Right, these shouldn't be happening _at all_.
 > > I'm easy on this point, I'd say in that case VM_WARN_ON() is the most
> _conservative_ approach, since these are things that must not happen, and
> so it's not unreasonable to fail to repress repetitions of the 'impossible'
> :)
> 
> But I get the general point about ...WARN_ON_ONCE() avoiding floods.
> 
> David, what do you think?

Well, in this patch here I deliberately want _ONCE for the unpin sanity 
checks. Because if they start happening (IOW, now after 5 years observed 
for the first time?) I *absolutely don't* want to get flooded and 
*really* figure out what is going on by seeing what else failed.

And crashing on VM_BUG_ON() and not observing anything else was also not 
particularly helpful :)

Because ... they shouldn't be happening ...

(well, it goes back to my initial point about requiring individual 
decisions etc ...)

Not sure what's best now in the general case, in the end I don't care 
that much.

Roll a dice? ;)

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:15                     ` David Hildenbrand
@ 2025-06-06 18:21                       ` John Hubbard
  2025-06-06 18:23                         ` David Hildenbrand
  0 siblings, 1 reply; 55+ messages in thread
From: John Hubbard @ 2025-06-06 18:21 UTC (permalink / raw)
  To: David Hildenbrand, Lorenzo Stoakes
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, Peter Xu



On 6/6/25 11:15 AM, David Hildenbrand wrote:
> On 06.06.25 20:06, Lorenzo Stoakes wrote:
>> On Fri, Jun 06, 2025 at 10:57:44AM -0700, John Hubbard wrote:
>>> On 6/6/25 4:04 AM, Lorenzo Stoakes wrote:
>>>> On Fri, Jun 06, 2025 at 12:28:28PM +0200, David Hildenbrand wrote:
>>>>> On 06.06.25 12:19, Lorenzo Stoakes wrote:
>>>>>> On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
>>>>>>> On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
>>>>>>>> On 06.06.25 10:31, Michal Hocko wrote:
>>>>>>> [...]
>>>> So to me the only assessment needed is 'do we want to warn on this or not?'.
>>>>
>>>> And as you say, really WARN_ON_ONCE() seems appropriate, because nearly always
>>>> we will get flooded with useless information.
>>>>
>>>
>>> As yet another victim of such WARN_ON() floods at times, I've followed
>>> this thread with great interest. And after reflecting on it a bit, I believe
>>> that, surprisingly enough, WARN_ON() is a better replacement for VM_BUG_ON()
>>> than WARN_ON_ONCE(), because:
>>
>> Right, these shouldn't be happening _at all_.
>  > > I'm easy on this point, I'd say in that case VM_WARN_ON() is the most
>> _conservative_ approach, since these are things that must not happen, and
>> so it's not unreasonable to fail to repress repetitions of the 'impossible'
>> :)
>>
>> But I get the general point about ...WARN_ON_ONCE() avoiding floods.
>>
>> David, what do you think?
> 
> Well, in this patch here I deliberately want _ONCE for the unpin sanity 
> checks. Because if they start happening (IOW, now after 5 years observed 
> for the first time?) I *absolutely don't* want to get flooded and 
> *really* figure out what is going on by seeing what else failed.
> 
> And crashing on VM_BUG_ON() and not observing anything else was also not 
> particularly helpful :)
> 
> Because ... they shouldn't be happening ...
> 
> (well, it goes back to my initial point about requiring individual 
> decisions etc ...)
> 
> Not sure what's best now in the general case, in the end I don't care 
> that much.
> 
> Roll a dice? ;)

One last data point: I've often logged onto systems that were running
long enough that the dmesg had long since rolled over. And this makes
the WARN_ON_ONCE() items disappear.



> 

thanks,
-- 
John Hubbard



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:21                       ` John Hubbard
@ 2025-06-06 18:23                         ` David Hildenbrand
  2025-06-06 18:31                           ` John Hubbard
                                             ` (2 more replies)
  0 siblings, 3 replies; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06 18:23 UTC (permalink / raw)
  To: John Hubbard, Lorenzo Stoakes
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, Peter Xu

On 06.06.25 20:21, John Hubbard wrote:
> 
> 
> On 6/6/25 11:15 AM, David Hildenbrand wrote:
>> On 06.06.25 20:06, Lorenzo Stoakes wrote:
>>> On Fri, Jun 06, 2025 at 10:57:44AM -0700, John Hubbard wrote:
>>>> On 6/6/25 4:04 AM, Lorenzo Stoakes wrote:
>>>>> On Fri, Jun 06, 2025 at 12:28:28PM +0200, David Hildenbrand wrote:
>>>>>> On 06.06.25 12:19, Lorenzo Stoakes wrote:
>>>>>>> On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
>>>>>>>> On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
>>>>>>>>> On 06.06.25 10:31, Michal Hocko wrote:
>>>>>>>> [...]
>>>>> So to me the only assessment needed is 'do we want to warn on this or not?'.
>>>>>
>>>>> And as you say, really WARN_ON_ONCE() seems appropriate, because nearly always
>>>>> we will get flooded with useless information.
>>>>>
>>>>
>>>> As yet another victim of such WARN_ON() floods at times, I've followed
>>>> this thread with great interest. And after reflecting on it a bit, I believe
>>>> that, surprisingly enough, WARN_ON() is a better replacement for VM_BUG_ON()
>>>> than WARN_ON_ONCE(), because:
>>>
>>> Right, these shouldn't be happening _at all_.
>>   > > I'm easy on this point, I'd say in that case VM_WARN_ON() is the most
>>> _conservative_ approach, since these are things that must not happen, and
>>> so it's not unreasonable to fail to repress repetitions of the 'impossible'
>>> :)
>>>
>>> But I get the general point about ...WARN_ON_ONCE() avoiding floods.
>>>
>>> David, what do you think?
>>
>> Well, in this patch here I deliberately want _ONCE for the unpin sanity
>> checks. Because if they start happening (IOW, now after 5 years observed
>> for the first time?) I *absolutely don't* want to get flooded and
>> *really* figure out what is going on by seeing what else failed.
>>
>> And crashing on VM_BUG_ON() and not observing anything else was also not
>> particularly helpful :)
>>
>> Because ... they shouldn't be happening ...
>>
>> (well, it goes back to my initial point about requiring individual
>> decisions etc ...)
>>
>> Not sure what's best now in the general case, in the end I don't care
>> that much.
>>
>> Roll a dice? ;)
> 
> One last data point: I've often logged onto systems that were running
> long enough that the dmesg had long since rolled over. And this makes
> the WARN_ON_ONCE() items disappear.

I think what would be *really* helpful would be quick access to the very 
first warning that triggered. At least that's what I usually dig for ... :)

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:23                         ` David Hildenbrand
@ 2025-06-06 18:31                           ` John Hubbard
  2025-06-06 18:36                             ` David Hildenbrand
  2025-06-06 18:34                           ` Lorenzo Stoakes
  2025-06-06 18:42                           ` Jason Gunthorpe
  2 siblings, 1 reply; 55+ messages in thread
From: John Hubbard @ 2025-06-06 18:31 UTC (permalink / raw)
  To: David Hildenbrand, Lorenzo Stoakes
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, Peter Xu

...
>>>> David, what do you think?
>>>
>>> Well, in this patch here I deliberately want _ONCE for the unpin sanity
>>> checks. Because if they start happening (IOW, now after 5 years observed
>>> for the first time?) I *absolutely don't* want to get flooded and
>>> *really* figure out what is going on by seeing what else failed.
>>>
>>> And crashing on VM_BUG_ON() and not observing anything else was also not
>>> particularly helpful :)
>>>
>>> Because ... they shouldn't be happening ...
>>>
>>> (well, it goes back to my initial point about requiring individual
>>> decisions etc ...)
>>>
>>> Not sure what's best now in the general case, in the end I don't care
>>> that much.
>>>
>>> Roll a dice? ;)
>>
>> One last data point: I've often logged onto systems that were running
>> long enough that the dmesg had long since rolled over. And this makes
>> the WARN_ON_ONCE() items disappear.
> 
> I think what would be *really* helpful would be quick access to the very 
> first warning that triggered. At least that's what I usually dig for ... :)
> 

These two use cases cannot be simultaneously solved, at least not
perfectly. Fortunately, since a VM_BUG_ON() case cannot ever happen,
either choice will work. hahaha :)

thanks,
-- 
John Hubbard



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:23                         ` David Hildenbrand
  2025-06-06 18:31                           ` John Hubbard
@ 2025-06-06 18:34                           ` Lorenzo Stoakes
  2025-06-06 18:42                           ` Jason Gunthorpe
  2 siblings, 0 replies; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-06 18:34 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: John Hubbard, Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, Peter Xu

Overall,

Since David and I are somewhat indifferent on this point and you would very
much prefer a VM_WARN_ON() - when I drop the churnageddon I'll go with
VM_WARN_ON() :)

And we can obvious adjust case-by-case after that if needed (probably none
of these ever trigger tbh).

I think the general feeling in the room re: VM_BUG_ON() is 'kill it with
fire I don't care how' :P

And you know, it's understandable...

On Fri, Jun 06, 2025 at 08:23:25PM +0200, David Hildenbrand wrote:
> On 06.06.25 20:21, John Hubbard wrote:
> >
> >
> > On 6/6/25 11:15 AM, David Hildenbrand wrote:
> > > On 06.06.25 20:06, Lorenzo Stoakes wrote:
> > > > On Fri, Jun 06, 2025 at 10:57:44AM -0700, John Hubbard wrote:
> > > > > On 6/6/25 4:04 AM, Lorenzo Stoakes wrote:
> > > > > > On Fri, Jun 06, 2025 at 12:28:28PM +0200, David Hildenbrand wrote:
> > > > > > > On 06.06.25 12:19, Lorenzo Stoakes wrote:
> > > > > > > > On Fri, Jun 06, 2025 at 12:13:27PM +0200, Michal Hocko wrote:
> > > > > > > > > On Fri 06-06-25 11:01:18, David Hildenbrand wrote:
> > > > > > > > > > On 06.06.25 10:31, Michal Hocko wrote:
> > > > > > > > > [...]
> > > > > > So to me the only assessment needed is 'do we want to warn on this or not?'.
> > > > > >
> > > > > > And as you say, really WARN_ON_ONCE() seems appropriate, because nearly always
> > > > > > we will get flooded with useless information.
> > > > > >
> > > > >
> > > > > As yet another victim of such WARN_ON() floods at times, I've followed
> > > > > this thread with great interest. And after reflecting on it a bit, I believe
> > > > > that, surprisingly enough, WARN_ON() is a better replacement for VM_BUG_ON()
> > > > > than WARN_ON_ONCE(), because:
> > > >
> > > > Right, these shouldn't be happening _at all_.
> > >   > > I'm easy on this point, I'd say in that case VM_WARN_ON() is the most
> > > > _conservative_ approach, since these are things that must not happen, and
> > > > so it's not unreasonable to fail to repress repetitions of the 'impossible'
> > > > :)
> > > >
> > > > But I get the general point about ...WARN_ON_ONCE() avoiding floods.
> > > >
> > > > David, what do you think?
> > >
> > > Well, in this patch here I deliberately want _ONCE for the unpin sanity
> > > checks. Because if they start happening (IOW, now after 5 years observed
> > > for the first time?) I *absolutely don't* want to get flooded and
> > > *really* figure out what is going on by seeing what else failed.
> > >
> > > And crashing on VM_BUG_ON() and not observing anything else was also not
> > > particularly helpful :)
> > >
> > > Because ... they shouldn't be happening ...
> > >
> > > (well, it goes back to my initial point about requiring individual
> > > decisions etc ...)
> > >
> > > Not sure what's best now in the general case, in the end I don't care
> > > that much.
> > >
> > > Roll a dice? ;)
> >
> > One last data point: I've often logged onto systems that were running
> > long enough that the dmesg had long since rolled over. And this makes
> > the WARN_ON_ONCE() items disappear.
>
> I think what would be *really* helpful would be quick access to the very
> first warning that triggered. At least that's what I usually dig for ... :)

YES!

I wonder if there's some systemd thingy that does this somehow...

>
> --
> Cheers,
>
> David / dhildenb
>


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:31                           ` John Hubbard
@ 2025-06-06 18:36                             ` David Hildenbrand
  2025-06-06 18:39                               ` John Hubbard
  0 siblings, 1 reply; 55+ messages in thread
From: David Hildenbrand @ 2025-06-06 18:36 UTC (permalink / raw)
  To: John Hubbard, Lorenzo Stoakes
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, Peter Xu

On 06.06.25 20:31, John Hubbard wrote:
> ...
>>>>> David, what do you think?
>>>>
>>>> Well, in this patch here I deliberately want _ONCE for the unpin sanity
>>>> checks. Because if they start happening (IOW, now after 5 years observed
>>>> for the first time?) I *absolutely don't* want to get flooded and
>>>> *really* figure out what is going on by seeing what else failed.
>>>>
>>>> And crashing on VM_BUG_ON() and not observing anything else was also not
>>>> particularly helpful :)
>>>>
>>>> Because ... they shouldn't be happening ...
>>>>
>>>> (well, it goes back to my initial point about requiring individual
>>>> decisions etc ...)
>>>>
>>>> Not sure what's best now in the general case, in the end I don't care
>>>> that much.
>>>>
>>>> Roll a dice? ;)
>>>
>>> One last data point: I've often logged onto systems that were running
>>> long enough that the dmesg had long since rolled over. And this makes
>>> the WARN_ON_ONCE() items disappear.
>>
>> I think what would be *really* helpful would be quick access to the very
>> first warning that triggered. At least that's what I usually dig for ... :)
>>
> 
> These two use cases cannot be simultaneously solved, at least not
> perfectly.

I guess we'd have to store the first WARN data separately away, just 
like when tainting the kernel or so. Not sure.

Fortunately, since a VM_BUG_ON() case cannot ever happen,

I mean, also WARN and friends are for things that should never happen ... :)

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:36                             ` David Hildenbrand
@ 2025-06-06 18:39                               ` John Hubbard
  0 siblings, 0 replies; 55+ messages in thread
From: John Hubbard @ 2025-06-06 18:39 UTC (permalink / raw)
  To: David Hildenbrand, Lorenzo Stoakes
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Jason Gunthorpe, Peter Xu

On 6/6/25 11:36 AM, David Hildenbrand wrote:
> On 06.06.25 20:31, John Hubbard wrote:
>> ...
>>> I think what would be *really* helpful would be quick access to the very
>>> first warning that triggered. At least that's what I usually dig 
>>> for ... :)
>>>
>>
>> These two use cases cannot be simultaneously solved, at least not
>> perfectly.
> 
> I guess we'd have to store the first WARN data separately away, just 
> like when tainting the kernel or so. Not sure.
> 

Tainting seems like a great building block. I like this idea, because
it does seem to mitigate the missing WARN_ON_ONCE(), especially if we
can capture the whole thing, backtrace and all.

thanks,
-- 
John Hubbard



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:23                         ` David Hildenbrand
  2025-06-06 18:31                           ` John Hubbard
  2025-06-06 18:34                           ` Lorenzo Stoakes
@ 2025-06-06 18:42                           ` Jason Gunthorpe
  2025-06-06 18:46                             ` Lorenzo Stoakes
  2 siblings, 1 reply; 55+ messages in thread
From: Jason Gunthorpe @ 2025-06-06 18:42 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: John Hubbard, Lorenzo Stoakes, Michal Hocko, linux-kernel,
	linux-mm, Andrew Morton, Liam R. Howlett, Vlastimil Babka,
	Mike Rapoport, Suren Baghdasaryan, Peter Xu

On Fri, Jun 06, 2025 at 08:23:25PM +0200, David Hildenbrand wrote:
> > One last data point: I've often logged onto systems that were running
> > long enough that the dmesg had long since rolled over. And this makes
> > the WARN_ON_ONCE() items disappear.
> 
> I think what would be *really* helpful would be quick access to the very
> first warning that triggered. At least that's what I usually dig for ... :)

That's basically my point, it doesn't make sense to expose two APIs to
developers with a choice like this. The WARN_ON infrastructure should
deal with it consistently, maybe even configurable by the admin.

Keeping the first warn in a buffer is definately a good option.

Otherwise how is the patch author supposed to decide which API to
call in each case?

Jason


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:42                           ` Jason Gunthorpe
@ 2025-06-06 18:46                             ` Lorenzo Stoakes
  2025-06-06 19:03                               ` Lorenzo Stoakes
  0 siblings, 1 reply; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-06 18:46 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: David Hildenbrand, John Hubbard, Michal Hocko, linux-kernel,
	linux-mm, Andrew Morton, Liam R. Howlett, Vlastimil Babka,
	Mike Rapoport, Suren Baghdasaryan, Peter Xu

On Fri, Jun 06, 2025 at 03:42:12PM -0300, Jason Gunthorpe wrote:
> On Fri, Jun 06, 2025 at 08:23:25PM +0200, David Hildenbrand wrote:
> > > One last data point: I've often logged onto systems that were running
> > > long enough that the dmesg had long since rolled over. And this makes
> > > the WARN_ON_ONCE() items disappear.
> >
> > I think what would be *really* helpful would be quick access to the very
> > first warning that triggered. At least that's what I usually dig for ... :)
>
> That's basically my point, it doesn't make sense to expose two APIs to
> developers with a choice like this. The WARN_ON infrastructure should
> deal with it consistently, maybe even configurable by the admin.
>
> Keeping the first warn in a buffer is definately a good option.
>
> Otherwise how is the patch author supposed to decide which API to
> call in each case?
>
> Jason

To clarify - are we talking the first instance of a specific warning, or
the first warning in general?


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 18:46                             ` Lorenzo Stoakes
@ 2025-06-06 19:03                               ` Lorenzo Stoakes
  2025-06-07 13:42                                 ` Jason Gunthorpe
  0 siblings, 1 reply; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-06 19:03 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: David Hildenbrand, John Hubbard, Michal Hocko, linux-kernel,
	linux-mm, Andrew Morton, Liam R. Howlett, Vlastimil Babka,
	Mike Rapoport, Suren Baghdasaryan, Peter Xu

On Fri, Jun 06, 2025 at 07:46:52PM +0100, Lorenzo Stoakes wrote:
> On Fri, Jun 06, 2025 at 03:42:12PM -0300, Jason Gunthorpe wrote:
> > On Fri, Jun 06, 2025 at 08:23:25PM +0200, David Hildenbrand wrote:
> > > > One last data point: I've often logged onto systems that were running
> > > > long enough that the dmesg had long since rolled over. And this makes
> > > > the WARN_ON_ONCE() items disappear.
> > >
> > > I think what would be *really* helpful would be quick access to the very
> > > first warning that triggered. At least that's what I usually dig for ... :)
> >
> > That's basically my point, it doesn't make sense to expose two APIs to
> > developers with a choice like this. The WARN_ON infrastructure should
> > deal with it consistently, maybe even configurable by the admin.
> >
> > Keeping the first warn in a buffer is definately a good option.
> >
> > Otherwise how is the patch author supposed to decide which API to
> > call in each case?
> >
> > Jason
>
> To clarify - are we talking the first instance of a specific warning, or
> the first warning in general?

OK sorry I'm being dumb, it is -per warning- reading the thread :P

So I guess you would have the macro establish a static buffer for each instance,
and then some interface for gathering those up and outputting them?

Always output to dmesg, but only populate if not already warned...

Or maybe rather than a static buffer (as those will add up quick and you'd have
to figure out how much space to take) it could be some dynamicly growing
kmalloc()'d thing because how often do you expect a warning, but guess you'd
have to be careful to ensure you're safely allocating given warning can be in
any context...

Or maybe another circular buffer just for this... hm yeah I guess that'd be the
most sane.

And I guess we'd not want a new interface for this like WARN_ON_ONCE_STORED()
because that'd be... weird and how would anyone think to use that and nearly all
cases wouldn't.

So I guess you'd want to convert WARN_ON() to use this, and maybe WARN_ON_ONCE()
to also use it but just not output if already seen.

It is quite a nice idea. Could be a debug feature though as would we want it for
prod?

It's late here and my brain is fried so if this is incoherent forgive me :))


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-06 19:03                               ` Lorenzo Stoakes
@ 2025-06-07 13:42                                 ` Jason Gunthorpe
  2025-06-07 13:53                                   ` Lorenzo Stoakes
  0 siblings, 1 reply; 55+ messages in thread
From: Jason Gunthorpe @ 2025-06-07 13:42 UTC (permalink / raw)
  To: Lorenzo Stoakes
  Cc: David Hildenbrand, John Hubbard, Michal Hocko, linux-kernel,
	linux-mm, Andrew Morton, Liam R. Howlett, Vlastimil Babka,
	Mike Rapoport, Suren Baghdasaryan, Peter Xu

On Fri, Jun 06, 2025 at 08:03:15PM +0100, Lorenzo Stoakes wrote:
> On Fri, Jun 06, 2025 at 07:46:52PM +0100, Lorenzo Stoakes wrote:
> > On Fri, Jun 06, 2025 at 03:42:12PM -0300, Jason Gunthorpe wrote:
> > > On Fri, Jun 06, 2025 at 08:23:25PM +0200, David Hildenbrand wrote:
> > > > > One last data point: I've often logged onto systems that were running
> > > > > long enough that the dmesg had long since rolled over. And this makes
> > > > > the WARN_ON_ONCE() items disappear.
> > > >
> > > > I think what would be *really* helpful would be quick access to the very
> > > > first warning that triggered. At least that's what I usually dig for ... :)
> > >
> > > That's basically my point, it doesn't make sense to expose two APIs to
> > > developers with a choice like this. The WARN_ON infrastructure should
> > > deal with it consistently, maybe even configurable by the admin.
> > >
> > > Keeping the first warn in a buffer is definately a good option.
> > >
> > > Otherwise how is the patch author supposed to decide which API to
> > > call in each case?
> > >
> > > Jason
> >
> > To clarify - are we talking the first instance of a specific warning, or
> > the first warning in general?
> 
> OK sorry I'm being dumb, it is -per warning- reading the thread :P
> 
> So I guess you would have the macro establish a static buffer for each instance,
> and then some interface for gathering those up and outputting them?

Honestly, that seems unnecessary, just a single buffer for the first
global warning. Maybe with a 1 min rate limit for replacement or
something.

The kernel doesn't run around spewing warnings as a general rule.

> And I guess we'd not want a new interface for this like WARN_ON_ONCE_STORED()
> because that'd be... weird and how would anyone think to use that and nearly all
> cases wouldn't.

No! Delete WARN_ON_ONCE and say the new global mechanism is good
enough to capture the first WARN_ON, everyone always uses it always
and then nobody needs to think about this anymore when writing new
code.

Jason


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-07 13:42                                 ` Jason Gunthorpe
@ 2025-06-07 13:53                                   ` Lorenzo Stoakes
  2025-06-07 18:00                                     ` John Hubbard
  0 siblings, 1 reply; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-07 13:53 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: David Hildenbrand, John Hubbard, Michal Hocko, linux-kernel,
	linux-mm, Andrew Morton, Liam R. Howlett, Vlastimil Babka,
	Mike Rapoport, Suren Baghdasaryan, Peter Xu

On Sat, Jun 07, 2025 at 10:42:14AM -0300, Jason Gunthorpe wrote:
> On Fri, Jun 06, 2025 at 08:03:15PM +0100, Lorenzo Stoakes wrote:
> > On Fri, Jun 06, 2025 at 07:46:52PM +0100, Lorenzo Stoakes wrote:
> > > On Fri, Jun 06, 2025 at 03:42:12PM -0300, Jason Gunthorpe wrote:
> > > > On Fri, Jun 06, 2025 at 08:23:25PM +0200, David Hildenbrand wrote:
> > > > > > One last data point: I've often logged onto systems that were running
> > > > > > long enough that the dmesg had long since rolled over. And this makes
> > > > > > the WARN_ON_ONCE() items disappear.
> > > > >
> > > > > I think what would be *really* helpful would be quick access to the very
> > > > > first warning that triggered. At least that's what I usually dig for ... :)
> > > >
> > > > That's basically my point, it doesn't make sense to expose two APIs to
> > > > developers with a choice like this. The WARN_ON infrastructure should
> > > > deal with it consistently, maybe even configurable by the admin.
> > > >
> > > > Keeping the first warn in a buffer is definately a good option.
> > > >
> > > > Otherwise how is the patch author supposed to decide which API to
> > > > call in each case?
> > > >
> > > > Jason
> > >
> > > To clarify - are we talking the first instance of a specific warning, or
> > > the first warning in general?
> >
> > OK sorry I'm being dumb, it is -per warning- reading the thread :P
> >
> > So I guess you would have the macro establish a static buffer for each instance,
> > and then some interface for gathering those up and outputting them?
>
> Honestly, that seems unnecessary, just a single buffer for the first
> global warning. Maybe with a 1 min rate limit for replacement or
> something.
>
> The kernel doesn't run around spewing warnings as a general rule.

Well, if you have WARN_ON()'s you tend to get that in loops if one goes.

But then in that case obviously you only usually truly care about the first.

>
> > And I guess we'd not want a new interface for this like WARN_ON_ONCE_STORED()
> > because that'd be... weird and how would anyone think to use that and nearly all
> > cases wouldn't.
>
> No! Delete WARN_ON_ONCE and say the new global mechanism is good
> enough to capture the first WARN_ON, everyone always uses it always
> and then nobody needs to think about this anymore when writing new
> code.
>
> Jason

Well that is simpler :)

I have encountered situations where I've had more than one and needed 2nd+
but it is rare as you say.

My late night incoherent babbling yesterday was perhaps because I
misunderstood David/John as to what they encountered in the past... maybe
they can clarify...

I do find myself grepping dmesg to find the first warning and it's
_annoying_ to do so, so this would be handy.

But I'm not sure it'd justify getting rid of WARN_ON_ONCE() when you are in
a loop or something and now your dmesg is going to go to hell, still useful
not to do that, esp. if you know there's no value to further warnings


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-07 13:53                                   ` Lorenzo Stoakes
@ 2025-06-07 18:00                                     ` John Hubbard
  2025-06-09  9:57                                       ` Vlastimil Babka
  2025-06-11  9:32                                       ` David Hildenbrand
  0 siblings, 2 replies; 55+ messages in thread
From: John Hubbard @ 2025-06-07 18:00 UTC (permalink / raw)
  To: Lorenzo Stoakes, Jason Gunthorpe
  Cc: David Hildenbrand, Michal Hocko, linux-kernel, linux-mm,
	Andrew Morton, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Peter Xu

On 6/7/25 6:53 AM, Lorenzo Stoakes wrote:
> On Sat, Jun 07, 2025 at 10:42:14AM -0300, Jason Gunthorpe wrote:
>> On Fri, Jun 06, 2025 at 08:03:15PM +0100, Lorenzo Stoakes wrote:
>>> On Fri, Jun 06, 2025 at 07:46:52PM +0100, Lorenzo Stoakes wrote:
>>>> On Fri, Jun 06, 2025 at 03:42:12PM -0300, Jason Gunthorpe wrote:
>>>>> On Fri, Jun 06, 2025 at 08:23:25PM +0200, David Hildenbrand wrote:
...
>>> And I guess we'd not want a new interface for this like WARN_ON_ONCE_STORED()
>>> because that'd be... weird and how would anyone think to use that and nearly all
>>> cases wouldn't.
>>
>> No! Delete WARN_ON_ONCE and say the new global mechanism is good
>> enough to capture the first WARN_ON, everyone always uses it always
>> and then nobody needs to think about this anymore when writing new
>> code.
Interesting. This would cause WARN_ON*() to act similarly to lockdep, in
that the first instance is the only one that works.

That works, but if-and-only-if the system is kept completely WARN-free.
However, I have a mitigation for that, below.

> 
> Well that is simpler :)
> 
> I have encountered situations where I've had more than one and needed 2nd+
> but it is rare as you say.
> 
> My late night incoherent babbling yesterday was perhaps because I
> misunderstood David/John as to what they encountered in the past... maybe
> they can clarify...

I've debugged lots of production systems, often these were large HPC 
clusters and supercomputers. I've seen:

a) Long up-times, with (of course!) relatively small dmesg buffer sizes,
so that early logs are long gone. This means that WARN_ON_ONCE() is
quite often gone (overwritten). This is common.

The worst part is that if you go to reproduce a problem, you don't
see the next warning in the logs!! This is devastating, especially if
the site makes it hard to ask for a system reboot. (If you have
~20,000 nodes in the cluster, a reboot is not a small affair.)

b) WARN_ON() loops that fill up the dmesg buffer immediately, which
of course also overwrites anything leading up to the first WARN_ON.

However, (b) is rare on production kernels--it's more commonly
self-inflicted, if I make a mistake and write WARN_ON() when I should
have written WARN_ON_ONCE(), in a custom build to explore a problem.

c) Other printk-based noise in a loop, occasionally this also wraps
the dmesg buffer. Especially if a customer has done a bit of their
own "special" logging code.


> 
> I do find myself grepping dmesg to find the first warning and it's
> _annoying_ to do so, so this would be handy.
> 
> But I'm not sure it'd justify getting rid of WARN_ON_ONCE() when you are in
> a loop or something and now your dmesg is going to go to hell, still useful
> not to do that, esp. if you know there's no value to further warnings

In all cases, the ideal outcome is a dmesg buffer that includes (let's
fantasize for a moment):

1) the earliest dmesg output (showing any oddness with system
configuration, and what hardware was brought online, etc), plus

2) the first problem that is logged, plus

3) ...sometimes two or even three things happen in order to get to the
problem, so (again, fantasizing!) really the first *three* warnings
would be good to have.

So this is just a 3-element array of global warnings--not another
circular buffer, but a little larger than a TAINT or lockdep type
of capture.

thanks,
-- 
John Hubbard



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-07 18:00                                     ` John Hubbard
@ 2025-06-09  9:57                                       ` Vlastimil Babka
  2025-07-24 10:54                                         ` Vlastimil Babka
  2025-06-11  9:32                                       ` David Hildenbrand
  1 sibling, 1 reply; 55+ messages in thread
From: Vlastimil Babka @ 2025-06-09  9:57 UTC (permalink / raw)
  To: John Hubbard, Lorenzo Stoakes, Jason Gunthorpe
  Cc: David Hildenbrand, Michal Hocko, linux-kernel, linux-mm,
	Andrew Morton, Liam R. Howlett, Mike Rapoport, Suren Baghdasaryan,
	Peter Xu

On 6/7/25 8:00 PM, John Hubbard wrote:
> On 6/7/25 6:53 AM, Lorenzo Stoakes wrote:
>>
>> Well that is simpler :)
>>
>> I have encountered situations where I've had more than one and needed
>> 2nd+
>> but it is rare as you say.
>>
>> My late night incoherent babbling yesterday was perhaps because I
>> misunderstood David/John as to what they encountered in the past... maybe
>> they can clarify...
> 
> I've debugged lots of production systems, often these were large HPC
> clusters and supercomputers. I've seen:
> 
> a) Long up-times, with (of course!) relatively small dmesg buffer sizes,
> so that early logs are long gone. This means that WARN_ON_ONCE() is
> quite often gone (overwritten). This is common.

There's no e.g. journald storing them permanently? I think trying to
hard in the kernel to provide this "recall first warning" if userspace
can handle this, is suboptimal. I think there are two main scenarios:

- the warning is indeed not fatal - userspace can likely save it
- it's (part of) something fatal - the system will crash before it
disappears from the ring buffer

> The worst part is that if you go to reproduce a problem, you don't
> see the next warning in the logs!! This is devastating, especially if
> the site makes it hard to ask for a system reboot. (If you have
> ~20,000 nodes in the cluster, a reboot is not a small affair.)

Assuming you know how to reproduce the problem... I wonder if it would
help if there was a way (sysctl?) to re-arm all the _ONCE warnings. It
shouldn't be that hard hopefully?




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-07 18:00                                     ` John Hubbard
  2025-06-09  9:57                                       ` Vlastimil Babka
@ 2025-06-11  9:32                                       ` David Hildenbrand
  2025-06-11 12:03                                         ` Jason Gunthorpe
  1 sibling, 1 reply; 55+ messages in thread
From: David Hildenbrand @ 2025-06-11  9:32 UTC (permalink / raw)
  To: John Hubbard, Lorenzo Stoakes, Jason Gunthorpe
  Cc: Michal Hocko, linux-kernel, linux-mm, Andrew Morton,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Peter Xu

On 07.06.25 20:00, John Hubbard wrote:
> On 6/7/25 6:53 AM, Lorenzo Stoakes wrote:
>> On Sat, Jun 07, 2025 at 10:42:14AM -0300, Jason Gunthorpe wrote:
>>> On Fri, Jun 06, 2025 at 08:03:15PM +0100, Lorenzo Stoakes wrote:
>>>> On Fri, Jun 06, 2025 at 07:46:52PM +0100, Lorenzo Stoakes wrote:
>>>>> On Fri, Jun 06, 2025 at 03:42:12PM -0300, Jason Gunthorpe wrote:
>>>>>> On Fri, Jun 06, 2025 at 08:23:25PM +0200, David Hildenbrand wrote:
> ...
>>>> And I guess we'd not want a new interface for this like WARN_ON_ONCE_STORED()
>>>> because that'd be... weird and how would anyone think to use that and nearly all
>>>> cases wouldn't.
>>>
>>> No! Delete WARN_ON_ONCE and say the new global mechanism is good
>>> enough to capture the first WARN_ON, everyone always uses it always
>>> and then nobody needs to think about this anymore when writing new
>>> code.
> Interesting. This would cause WARN_ON*() to act similarly to lockdep, in
> that the first instance is the only one that works.
> 
> That works, but if-and-only-if the system is kept completely WARN-free.
> However, I have a mitigation for that, below.
> 
>>
>> Well that is simpler :)
>>
>> I have encountered situations where I've had more than one and needed 2nd+
>> but it is rare as you say.
>>
>> My late night incoherent babbling yesterday was perhaps because I
>> misunderstood David/John as to what they encountered in the past... maybe
>> they can clarify...
> 
> I've debugged lots of production systems, often these were large HPC
> clusters and supercomputers. I've seen:
> 
> a) Long up-times, with (of course!) relatively small dmesg buffer sizes,
> so that early logs are long gone. This means that WARN_ON_ONCE() is
> quite often gone (overwritten). This is common.
> 
> The worst part is that if you go to reproduce a problem, you don't
> see the next warning in the logs!! This is devastating, especially if
> the site makes it hard to ask for a system reboot. (If you have
> ~20,000 nodes in the cluster, a reboot is not a small affair.)
> 
> b) WARN_ON() loops that fill up the dmesg buffer immediately, which
> of course also overwrites anything leading up to the first WARN_ON.
> 
> However, (b) is rare on production kernels--it's more commonly
> self-inflicted, if I make a mistake and write WARN_ON() when I should
> have written WARN_ON_ONCE(), in a custom build to explore a problem.
> 
> c) Other printk-based noise in a loop, occasionally this also wraps
> the dmesg buffer. Especially if a customer has done a bit of their
> own "special" logging code.
> 
> 
>>
>> I do find myself grepping dmesg to find the first warning and it's
>> _annoying_ to do so, so this would be handy.
>>
>> But I'm not sure it'd justify getting rid of WARN_ON_ONCE() when you are in
>> a loop or something and now your dmesg is going to go to hell, still useful
>> not to do that, esp. if you know there's no value to further warnings
> 
> In all cases, the ideal outcome is a dmesg buffer that includes (let's
> fantasize for a moment):
> 
> 1) the earliest dmesg output (showing any oddness with system
> configuration, and what hardware was brought online, etc), plus
> 
> 2) the first problem that is logged, plus
> 
> 3) ...sometimes two or even three things happen in order to get to the
> problem, so (again, fantasizing!) really the first *three* warnings
> would be good to have.
> 
> So this is just a 3-element array of global warnings--not another
> circular buffer, but a little larger than a TAINT or lockdep type
> of capture.

Can't we do something very simple to get started: remove VM_WARN_ON_ONCE 
and implement VM_WARN_ON as something that uses a single global variable 
to print up to *magic value* 10 warnings. Then we'll simply stop 
printing any further warnings.

Races when updating that global variable? Who cars if we end up printing 11.

That is, have a global limit over all of the VM_WARN.

Later, we can do more advanced stuff, such as storing them in some other 
buffer to persist them etc.

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-11  9:32                                       ` David Hildenbrand
@ 2025-06-11 12:03                                         ` Jason Gunthorpe
  2025-06-11 12:06                                           ` Lorenzo Stoakes
  0 siblings, 1 reply; 55+ messages in thread
From: Jason Gunthorpe @ 2025-06-11 12:03 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: John Hubbard, Lorenzo Stoakes, Michal Hocko, linux-kernel,
	linux-mm, Andrew Morton, Liam R. Howlett, Vlastimil Babka,
	Mike Rapoport, Suren Baghdasaryan, Peter Xu

On Wed, Jun 11, 2025 at 11:32:07AM +0200, David Hildenbrand wrote:

> Later, we can do more advanced stuff, such as storing them in some other
> buffer to persist them etc.

Yeah, you should go ahead with this series, and hopefully someone will
be inspired to separately do some work on the WARN framework..

Jason


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-11 12:03                                         ` Jason Gunthorpe
@ 2025-06-11 12:06                                           ` Lorenzo Stoakes
  0 siblings, 0 replies; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-06-11 12:06 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: David Hildenbrand, John Hubbard, Michal Hocko, linux-kernel,
	linux-mm, Andrew Morton, Liam R. Howlett, Vlastimil Babka,
	Mike Rapoport, Suren Baghdasaryan, Peter Xu

On Wed, Jun 11, 2025 at 09:03:24AM -0300, Jason Gunthorpe wrote:
> On Wed, Jun 11, 2025 at 11:32:07AM +0200, David Hildenbrand wrote:
>
> > Later, we can do more advanced stuff, such as storing them in some other
> > buffer to persist them etc.
>
> Yeah, you should go ahead with this series, and hopefully someone will
> be inspired to separately do some work on the WARN framework..

+1 fwiw :)

Big believer in doing the practical iterative 'next step'...

If I get time + nobody else is looking at it, I'll look at doing the big churny
BUG_ON() change also!

>
> Jason


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-06-09  9:57                                       ` Vlastimil Babka
@ 2025-07-24 10:54                                         ` Vlastimil Babka
  2025-07-24 10:56                                           ` Lorenzo Stoakes
  0 siblings, 1 reply; 55+ messages in thread
From: Vlastimil Babka @ 2025-07-24 10:54 UTC (permalink / raw)
  To: John Hubbard, Lorenzo Stoakes, Jason Gunthorpe
  Cc: David Hildenbrand, Michal Hocko, linux-kernel, linux-mm,
	Andrew Morton, Liam R. Howlett, Mike Rapoport, Suren Baghdasaryan,
	Peter Xu

On 6/9/25 11:57, Vlastimil Babka wrote:
> On 6/7/25 8:00 PM, John Hubbard wrote:
>> On 6/7/25 6:53 AM, Lorenzo Stoakes wrote:
>> The worst part is that if you go to reproduce a problem, you don't
>> see the next warning in the logs!! This is devastating, especially if
>> the site makes it hard to ask for a system reboot. (If you have
>> ~20,000 nodes in the cluster, a reboot is not a small affair.)
> 
> Assuming you know how to reproduce the problem... I wonder if it would
> help if there was a way (sysctl?) to re-arm all the _ONCE warnings. It
> shouldn't be that hard hopefully?

Oh hey it already exists, since 2017

echo 1 > /sys/kernel/debug/clear_warn_once


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-07-24 10:54                                         ` Vlastimil Babka
@ 2025-07-24 10:56                                           ` Lorenzo Stoakes
  2025-07-24 17:27                                             ` John Hubbard
  0 siblings, 1 reply; 55+ messages in thread
From: Lorenzo Stoakes @ 2025-07-24 10:56 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: John Hubbard, Jason Gunthorpe, David Hildenbrand, Michal Hocko,
	linux-kernel, linux-mm, Andrew Morton, Liam R. Howlett,
	Mike Rapoport, Suren Baghdasaryan, Peter Xu

On Thu, Jul 24, 2025 at 12:54:26PM +0200, Vlastimil Babka wrote:
> On 6/9/25 11:57, Vlastimil Babka wrote:
> > On 6/7/25 8:00 PM, John Hubbard wrote:
> >> On 6/7/25 6:53 AM, Lorenzo Stoakes wrote:
> >> The worst part is that if you go to reproduce a problem, you don't
> >> see the next warning in the logs!! This is devastating, especially if
> >> the site makes it hard to ask for a system reboot. (If you have
> >> ~20,000 nodes in the cluster, a reboot is not a small affair.)
> >
> > Assuming you know how to reproduce the problem... I wonder if it would
> > help if there was a way (sysctl?) to re-arm all the _ONCE warnings. It
> > shouldn't be that hard hopefully?
>
> Oh hey it already exists, since 2017
>
> echo 1 > /sys/kernel/debug/clear_warn_once

Ohhh! Nice!


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH v1] mm/gup: remove (VM_)BUG_ONs
  2025-07-24 10:56                                           ` Lorenzo Stoakes
@ 2025-07-24 17:27                                             ` John Hubbard
  0 siblings, 0 replies; 55+ messages in thread
From: John Hubbard @ 2025-07-24 17:27 UTC (permalink / raw)
  To: Lorenzo Stoakes, Vlastimil Babka
  Cc: Jason Gunthorpe, David Hildenbrand, Michal Hocko, linux-kernel,
	linux-mm, Andrew Morton, Liam R. Howlett, Mike Rapoport,
	Suren Baghdasaryan, Peter Xu

On 7/24/25 3:56 AM, Lorenzo Stoakes wrote:
> On Thu, Jul 24, 2025 at 12:54:26PM +0200, Vlastimil Babka wrote:
>> On 6/9/25 11:57, Vlastimil Babka wrote:
>>> On 6/7/25 8:00 PM, John Hubbard wrote:
>>>> On 6/7/25 6:53 AM, Lorenzo Stoakes wrote:
>>>> The worst part is that if you go to reproduce a problem, you don't
>>>> see the next warning in the logs!! This is devastating, especially if
>>>> the site makes it hard to ask for a system reboot. (If you have
>>>> ~20,000 nodes in the cluster, a reboot is not a small affair.)
>>>
>>> Assuming you know how to reproduce the problem... I wonder if it would
>>> help if there was a way (sysctl?) to re-arm all the _ONCE warnings. It
>>> shouldn't be that hard hopefully?
>>
>> Oh hey it already exists, since 2017
>>
>> echo 1 > /sys/kernel/debug/clear_warn_once
> 
> Ohhh! Nice!

WOW, how did I go all this time without knowing about that? It's just
what I always wanted! :)

thanks,
-- 
John Hubbard



^ permalink raw reply	[flat|nested] 55+ messages in thread

end of thread, other threads:[~2025-07-24 17:27 UTC | newest]

Thread overview: 55+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-04 14:05 [PATCH v1] mm/gup: remove (VM_)BUG_ONs David Hildenbrand
2025-06-04 14:22 ` Vlastimil Babka
2025-06-04 14:26   ` Suren Baghdasaryan
2025-06-04 14:48 ` Lorenzo Stoakes
2025-06-04 14:58   ` David Hildenbrand
2025-06-04 15:44     ` Lorenzo Stoakes
2025-06-04 15:42   ` Linus Torvalds
2025-06-04 16:05     ` Lorenzo Stoakes
2025-06-04 15:14 ` Jason Gunthorpe
2025-06-04 16:01   ` David Hildenbrand
2025-06-04 17:25 ` SeongJae Park
2025-06-04 19:12 ` Liam R. Howlett
2025-06-04 19:16   ` David Hildenbrand
2025-06-05  1:07 ` John Hubbard
2025-06-05  5:37   ` Vlastimil Babka
2025-06-05  6:08     ` David Hildenbrand
2025-06-05  8:48       ` Vlastimil Babka
2025-06-05 12:29         ` David Hildenbrand
2025-06-05  7:10 ` Michal Hocko
2025-06-06  8:10   ` David Hildenbrand
2025-06-06  8:31     ` Michal Hocko
2025-06-06  9:01       ` David Hildenbrand
2025-06-06 10:13         ` Michal Hocko
2025-06-06 10:19           ` Lorenzo Stoakes
2025-06-06 10:28             ` David Hildenbrand
2025-06-06 11:04               ` Lorenzo Stoakes
2025-06-06 11:44                 ` David Hildenbrand
2025-06-06 11:56                   ` Michal Hocko
2025-06-06 12:12                   ` Lorenzo Stoakes
2025-06-06 12:17                     ` David Hildenbrand
2025-06-06 17:57                 ` John Hubbard
2025-06-06 18:06                   ` Lorenzo Stoakes
2025-06-06 18:15                     ` David Hildenbrand
2025-06-06 18:21                       ` John Hubbard
2025-06-06 18:23                         ` David Hildenbrand
2025-06-06 18:31                           ` John Hubbard
2025-06-06 18:36                             ` David Hildenbrand
2025-06-06 18:39                               ` John Hubbard
2025-06-06 18:34                           ` Lorenzo Stoakes
2025-06-06 18:42                           ` Jason Gunthorpe
2025-06-06 18:46                             ` Lorenzo Stoakes
2025-06-06 19:03                               ` Lorenzo Stoakes
2025-06-07 13:42                                 ` Jason Gunthorpe
2025-06-07 13:53                                   ` Lorenzo Stoakes
2025-06-07 18:00                                     ` John Hubbard
2025-06-09  9:57                                       ` Vlastimil Babka
2025-07-24 10:54                                         ` Vlastimil Babka
2025-07-24 10:56                                           ` Lorenzo Stoakes
2025-07-24 17:27                                             ` John Hubbard
2025-06-11  9:32                                       ` David Hildenbrand
2025-06-11 12:03                                         ` Jason Gunthorpe
2025-06-11 12:06                                           ` Lorenzo Stoakes
2025-06-06 10:28             ` Michal Hocko
2025-06-06 10:27           ` David Hildenbrand
2025-06-06  8:12 ` David Hildenbrand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).