Linux Documentation

Linux Documentation
 help / color / mirror / Atom feed

* [PATCH 6/6] hugetlb: pass hugetlb reservation ranges in base-page indices
From: Jane Chu @ 2026-04-09 23:41 UTC (permalink / raw)
  To: akpm, david, muchun.song, osalvador
  Cc: lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb, mhocko,
	corbet, skhan, hughd, baolin.wang, peterx, linux-mm, linux-doc,
	linux-kernel
In-Reply-To: <20260409234158.837786-1-jane.chu@oracle.com>

hugetlb_reserve_pages() consume indices in hugepage granularity although
some callers naturally compute offsets in PAGE_SIZE units.

Teach the reservation helpers to accept base-page index ranges and
convert to hugepage indices internally before operating on the
reservation map. This keeps the internal representation unchanged while
making the API contract more uniform for callers.

Update hugetlbfs and memfd call sites to pass base-page indices, and
adjust the documentation to describe the new calling convention. Add
alignment warnings in hugetlb_reserve_pages() to catch invalid ranges
early.

No functional changes.

Signed-off-by: Jane Chu <jane.chu@oracle.com>
---
 Documentation/mm/hugetlbfs_reserv.rst | 12 +++++------
 fs/hugetlbfs/inode.c                  | 29 ++++++++++++---------------
 mm/hugetlb.c                          | 26 ++++++++++++++++--------
 mm/memfd.c                            |  9 +++++----
 4 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/Documentation/mm/hugetlbfs_reserv.rst b/Documentation/mm/hugetlbfs_reserv.rst
index a49115db18c7..60a52b28f0b4 100644
--- a/Documentation/mm/hugetlbfs_reserv.rst
+++ b/Documentation/mm/hugetlbfs_reserv.rst
@@ -112,8 +112,8 @@ flag was specified in either the shmget() or mmap() call.  If NORESERVE
 was specified, then this routine returns immediately as no reservations
 are desired.
 
-The arguments 'from' and 'to' are huge page indices into the mapping or
-underlying file.  For shmget(), 'from' is always 0 and 'to' corresponds to
+The arguments 'from' and 'to' are base page indices into the mapping or
+underlying file. For shmget(), 'from' is always 0 and 'to' corresponds to
 the length of the segment/mapping.  For mmap(), the offset argument could
 be used to specify the offset into the underlying file.  In such a case,
 the 'from' and 'to' arguments have been adjusted by this offset.
@@ -136,10 +136,10 @@ to indicate this VMA owns the reservations.
 
 The reservation map is consulted to determine how many huge page reservations
 are needed for the current mapping/segment.  For private mappings, this is
-always the value (to - from).  However, for shared mappings it is possible that
-some reservations may already exist within the range (to - from).  See the
-section :ref:`Reservation Map Modifications <resv_map_modifications>`
-for details on how this is accomplished.
+always the number of huge pages covered by the range [from, to).  However,
+for shared mappings it is possible that some reservations may already exist
+within the range [from, to).  See the section :ref:`Reservation Map Modifications
+<resv_map_modifications>` for details on how this is accomplished.
 
 The mapping may be associated with a subpool.  If so, the subpool is consulted
 to ensure there is sufficient space for the mapping.  It is possible that the
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a72d46ff7980..ec05ed30b70f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -157,10 +157,8 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
 	if (inode->i_flags & S_PRIVATE)
 		vma_flags_set(&vma_flags, VMA_NORESERVE_BIT);
 
-	if (hugetlb_reserve_pages(inode,
-			desc->pgoff >> huge_page_order(h),
-			len >> huge_page_shift(h), desc,
-			vma_flags) < 0)
+	if (hugetlb_reserve_pages(inode, desc->pgoff, len >> PAGE_SHIFT, desc,
+				  vma_flags) < 0)
 		goto out;
 
 	ret = 0;
@@ -408,8 +406,8 @@ static void hugetlb_unmap_file_folio(struct hstate *h,
 	unsigned long v_end;
 	pgoff_t start, end;
 
-	start = index * pages_per_huge_page(h);
-	end = (index + 1) * pages_per_huge_page(h);
+	start = index;
+	end = start + pages_per_huge_page(h);
 
 	i_mmap_lock_write(mapping);
 retry:
@@ -518,6 +516,8 @@ static void remove_inode_single_folio(struct hstate *h, struct inode *inode,
 		struct address_space *mapping, struct folio *folio,
 		pgoff_t index, bool truncate_op)
 {
+	pgoff_t next_index;
+
 	/*
 	 * If folio is mapped, it was faulted in after being
 	 * unmapped in caller or hugetlb_vmdelete_list() skips
@@ -540,8 +540,9 @@ static void remove_inode_single_folio(struct hstate *h, struct inode *inode,
 	VM_BUG_ON_FOLIO(folio_test_hugetlb_restore_reserve(folio), folio);
 	hugetlb_delete_from_page_cache(folio);
 	if (!truncate_op) {
+		next_index = index + pages_per_huge_page(h);
 		if (unlikely(hugetlb_unreserve_pages(inode, index,
-							index + 1, 1)))
+						     next_index, 1)))
 			hugetlb_fix_reserve_counts(inode);
 	}
 
@@ -575,7 +576,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 	struct address_space *mapping = &inode->i_data;
 	const pgoff_t end = lend >> PAGE_SHIFT;
 	struct folio_batch fbatch;
-	pgoff_t next, idx;
+	pgoff_t next;
 	int i, freed = 0;
 	bool truncate_op = (lend == LLONG_MAX);
 
@@ -592,9 +593,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 			/*
 			 * Remove folio that was part of folio_batch.
 			 */
-			idx = folio->index >> huge_page_order(h);
 			remove_inode_single_folio(h, inode, mapping, folio,
-						  idx, truncate_op);
+						  folio->index, truncate_op);
 			freed++;
 
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -604,9 +604,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 	}
 
 	if (truncate_op)
-		(void)hugetlb_unreserve_pages(inode,
-				lstart >> huge_page_shift(h),
-				LONG_MAX, freed);
+		(void)hugetlb_unreserve_pages(inode, lstart >> PAGE_SHIFT,
+					      LONG_MAX, freed);
 }
 
 static void hugetlbfs_evict_inode(struct inode *inode)
@@ -1561,9 +1560,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	inode->i_size = size;
 	clear_nlink(inode);
 
-	if (hugetlb_reserve_pages(inode, 0,
-			size >> huge_page_shift(hstate_inode(inode)), NULL,
-			acctflag) < 0)
+	if (hugetlb_reserve_pages(inode, 0, size >> PAGE_SHIFT, NULL, acctflag) < 0)
 		file = ERR_PTR(-ENOMEM);
 	else
 		file = alloc_file_pseudo(inode, mnt, name, O_RDWR,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 47ef41b6fb2e..eb4ab5bd0c9f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6532,10 +6532,11 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
 }
 
 /*
- * Update the reservation map for the range [from, to].
+ * Update the reservation map for the range [from, to) where 'from' and 'to'
+ * are base-page indices that are expected to be huge page aligned.
  *
- * Returns the number of entries that would be added to the reservation map
- * associated with the range [from, to].  This number is greater or equal to
+ * Returns the number of huge pages that would be added to the reservation map
+ * associated with the range [from, to).  This number is greater or equal to
  * zero. -EINVAL or -ENOMEM is returned in case of any errors.
  */
 
@@ -6550,6 +6551,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	struct resv_map *resv_map;
 	struct hugetlb_cgroup *h_cg = NULL;
 	long gbl_reserve, regions_needed = 0;
+	long from_idx, to_idx;
 	int err;
 
 	/* This should never happen */
@@ -6558,6 +6560,12 @@ long hugetlb_reserve_pages(struct inode *inode,
 		return -EINVAL;
 	}
 
+	VM_WARN_ON(!IS_ALIGNED(from, 1UL << huge_page_order(h)));
+	VM_WARN_ON(!IS_ALIGNED(to,   1UL << huge_page_order(h)));
+
+	from_idx = from >> huge_page_order(h);
+	to_idx = to >> huge_page_order(h);
+
 	/*
 	 * Only apply hugepage reservation if asked. At fault time, an
 	 * attempt will be made for VM_NORESERVE to allocate a page
@@ -6580,7 +6588,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 		 */
 		resv_map = inode_resv_map(inode);
 
-		chg = region_chg(resv_map, from, to, &regions_needed);
+		chg = region_chg(resv_map, from_idx, to_idx, &regions_needed);
 	} else {
 		/* Private mapping. */
 		resv_map = resv_map_alloc();
@@ -6589,7 +6597,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 			goto out_err;
 		}
 
-		chg = to - from;
+		chg = to_idx - from_idx;
 
 		set_vma_desc_resv_map(desc, resv_map);
 		set_vma_desc_resv_flags(desc, HPAGE_RESV_OWNER);
@@ -6644,7 +6652,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 	 * else has to be done for private mappings here
 	 */
 	if (!desc || vma_desc_test(desc, VMA_MAYSHARE_BIT)) {
-		add = region_add(resv_map, from, to, regions_needed, h, h_cg);
+		add = region_add(resv_map, from_idx, to_idx, regions_needed, h, h_cg);
 
 		if (unlikely(add < 0)) {
 			hugetlb_acct_memory(h, -gbl_reserve);
@@ -6712,7 +6720,7 @@ long hugetlb_reserve_pages(struct inode *inode,
 		 * region_add failed or didn't run.
 		 */
 		if (chg >= 0 && add < 0)
-			region_abort(resv_map, from, to, regions_needed);
+			region_abort(resv_map, from_idx, to_idx, regions_needed);
 	if (desc && is_vma_desc_resv_set(desc, HPAGE_RESV_OWNER)) {
 		kref_put(&resv_map->refs, resv_map_release);
 		set_vma_desc_resv_map(desc, NULL);
@@ -6728,13 +6736,15 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 	long chg = 0;
 	struct hugepage_subpool *spool = subpool_inode(inode);
 	long gbl_reserve;
+	long start_idx = start >> huge_page_order(h);
+	long end_idx = end >> huge_page_order(h);
 
 	/*
 	 * Since this routine can be called in the evict inode path for all
 	 * hugetlbfs inodes, resv_map could be NULL.
 	 */
 	if (resv_map) {
-		chg = region_del(resv_map, start, end);
+		chg = region_del(resv_map, start_idx, end_idx);
 		/*
 		 * region_del() can fail in the rare case where a region
 		 * must be split and another region descriptor can not be
diff --git a/mm/memfd.c b/mm/memfd.c
index 56c8833c4195..59c174c7533c 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -80,14 +80,15 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t index)
 		struct inode *inode = file_inode(memfd);
 		struct hstate *h = hstate_file(memfd);
 		long nr_resv;
-		pgoff_t idx;
+		pgoff_t next_index;
 		int err = -ENOMEM;
 
 		gfp_mask = htlb_alloc_mask(h);
 		gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
-		idx = index >> huge_page_order(h);
+		next_index = index + pages_per_huge_page(h); 
 
-		nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, EMPTY_VMA_FLAGS);
+		nr_resv = hugetlb_reserve_pages(inode, index, next_index, NULL,
+						EMPTY_VMA_FLAGS);
 		if (nr_resv < 0)
 			return ERR_PTR(nr_resv);
 
@@ -137,7 +138,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t index)
 		}
 err_unresv:
 		if (nr_resv > 0)
-			hugetlb_unreserve_pages(inode, idx, idx + 1, 0);
+			hugetlb_unreserve_pages(inode, index, next_index, 0);
 		return ERR_PTR(err);
 	}
 #endif
-- 
2.43.5


^ permalink raw reply related

* [PATCH 3/6] hugetlb: make hugetlb_fault_mutex_hash() take PAGE_SIZE index
From: Jane Chu @ 2026-04-09 23:41 UTC (permalink / raw)
  To: akpm, david, muchun.song, osalvador
  Cc: lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb, mhocko,
	corbet, skhan, hughd, baolin.wang, peterx, linux-mm, linux-doc,
	linux-kernel
In-Reply-To: <20260409234158.837786-1-jane.chu@oracle.com>

hugetlb_fault_mutex_hash() is used to serialize faults and page cache
operations on the same hugetlb file offset. The helper currently expects
its index argument in hugetlb page granularity, so callers have to
open-code conversions from the PAGE_SIZE-based indices commonly used
in the rest of MM helpers.

Change hugetlb_fault_mutex_hash() to take a PAGE_SIZE-based index
instead, and perform the hugetlb-granularity conversion inside the helper.
Update all callers accordingly.

This makes the helper interface consistent with filemap_get_folio(),
and linear_page_index(), while preserving the same lock selection for
a given hugetlb file offset.

Signed-off-by: Jane Chu <jane.chu@oracle.com>
---
 fs/hugetlbfs/inode.c | 19 ++++++++++---------
 mm/hugetlb.c         | 28 +++++++++++++++++++---------
 mm/memfd.c           | 11 ++++++-----
 mm/userfaultfd.c     |  7 +++----
 4 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cf79fb830377..e24e9bf54e14 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -575,7 +575,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 	struct address_space *mapping = &inode->i_data;
 	const pgoff_t end = lend >> PAGE_SHIFT;
 	struct folio_batch fbatch;
-	pgoff_t next, index;
+	pgoff_t next, idx;
 	int i, freed = 0;
 	bool truncate_op = (lend == LLONG_MAX);
 
@@ -586,15 +586,15 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 			struct folio *folio = fbatch.folios[i];
 			u32 hash = 0;
 
-			index = folio->index >> huge_page_order(h);
-			hash = hugetlb_fault_mutex_hash(mapping, index);
+			hash = hugetlb_fault_mutex_hash(mapping, folio->index);
 			mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
 			/*
 			 * Remove folio that was part of folio_batch.
 			 */
+			idx = folio->index >> huge_page_order(h);
 			remove_inode_single_folio(h, inode, mapping, folio,
-						  index, truncate_op);
+						  idx, truncate_op);
 			freed++;
 
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -734,7 +734,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 	struct mm_struct *mm = current->mm;
 	loff_t hpage_size = huge_page_size(h);
 	unsigned long hpage_shift = huge_page_shift(h);
-	pgoff_t start, index, end;
+	pgoff_t start, end, idx, index;
 	int error;
 	u32 hash;
 
@@ -774,7 +774,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 	vm_flags_init(&pseudo_vma, VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
 	pseudo_vma.vm_file = file;
 
-	for (index = start; index < end; index++) {
+	for (idx = start; idx < end; idx++) {
 		/*
 		 * This is supposed to be the vaddr where the page is being
 		 * faulted in, but we have no vaddr here.
@@ -794,14 +794,15 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 		}
 
 		/* addr is the offset within the file (zero based) */
-		addr = index * hpage_size;
+		addr = idx * hpage_size;
 
 		/* mutex taken here, fault path and hole punch */
+		index = idx << huge_page_order(h);
 		hash = hugetlb_fault_mutex_hash(mapping, index);
 		mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
 		/* See if already present in mapping to avoid alloc/free */
-		folio = filemap_get_folio(mapping, index << huge_page_order(h));
+		folio = filemap_get_folio(mapping, index);
 		if (!IS_ERR(folio)) {
 			folio_put(folio);
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -824,7 +825,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 		}
 		folio_zero_user(folio, addr);
 		__folio_mark_uptodate(folio);
-		error = hugetlb_add_to_page_cache(folio, mapping, index);
+		error = hugetlb_add_to_page_cache(folio, mapping, idx);
 		if (unlikely(error)) {
 			restore_reserve_on_error(h, &pseudo_vma, addr, folio);
 			folio_put(folio);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38b39eaf46cc..9d5ae1f87850 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5515,7 +5515,7 @@ static vm_fault_t hugetlb_wp(struct vm_fault *vmf)
 		 */
 		if (cow_from_owner) {
 			struct address_space *mapping = vma->vm_file->f_mapping;
-			pgoff_t idx;
+			pgoff_t index;
 			u32 hash;
 
 			folio_put(old_folio);
@@ -5528,8 +5528,9 @@ static vm_fault_t hugetlb_wp(struct vm_fault *vmf)
 			 *
 			 * Reacquire both after unmap operation.
 			 */
-			idx = vma_hugecache_offset(h, vma, vmf->address);
-			hash = hugetlb_fault_mutex_hash(mapping, idx);
+			index = linear_page_index(vma, vmf->address);
+			hash = hugetlb_fault_mutex_hash(mapping, index);
+
 			hugetlb_vma_unlock_read(vma);
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 
@@ -5664,6 +5665,10 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_fault *vmf,
 						  unsigned long reason)
 {
 	u32 hash;
+	pgoff_t index;
+
+	index = linear_page_index((const struct vm_area_struct *)vmf, vmf->address);
+	hash = hugetlb_fault_mutex_hash(mapping, index);
 
 	/*
 	 * vma_lock and hugetlb_fault_mutex must be dropped before handling
@@ -5671,7 +5676,6 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_fault *vmf,
 	 * userfault, any vma operation should be careful from here.
 	 */
 	hugetlb_vma_unlock_read(vmf->vma);
-	hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff);
 	mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 	return handle_userfault(vmf, reason);
 }
@@ -5696,7 +5700,8 @@ static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm, unsigned
 static vm_fault_t hugetlb_no_page(struct address_space *mapping,
 			struct vm_fault *vmf)
 {
-	u32 hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff);
+	u32 hash;
+	pgoff_t index;
 	bool new_folio, new_anon_folio = false;
 	struct vm_area_struct *vma = vmf->vma;
 	struct mm_struct *mm = vma->vm_mm;
@@ -5707,6 +5712,8 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
 	unsigned long size;
 	pte_t new_pte;
 
+	index = vmf->pgoff << huge_page_order(h);
+	hash = hugetlb_fault_mutex_hash(mapping, index);
 	/*
 	 * Currently, we are forced to kill the process in the event the
 	 * original mapper has unmapped pages from the child due to a failed
@@ -5920,13 +5927,14 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
 }
 
 #ifdef CONFIG_SMP
-u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
+/* 'index' is expected to be in PAGE_SIZE granularity */
+u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t index)
 {
 	unsigned long key[2];
 	u32 hash;
 
 	key[0] = (unsigned long) mapping;
-	key[1] = idx;
+	key[1] = index >> huge_page_order(hstate_inode(mapping->host)); 
 
 	hash = jhash2((u32 *)&key, sizeof(key)/(sizeof(u32)), 0);
 
@@ -5937,7 +5945,7 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
  * For uniprocessor systems we always use a single mutex, so just
  * return 0 and avoid the hashing overhead.
  */
-u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
+u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t index)
 {
 	return 0;
 }
@@ -5952,6 +5960,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct hstate *h = hstate_vma(vma);
 	struct address_space *mapping;
 	bool need_wait_lock = false;
+	pgoff_t index;
 	struct vm_fault vmf = {
 		.vma = vma,
 		.address = address & huge_page_mask(h),
@@ -5972,8 +5981,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * get spurious allocation failures if two CPUs race to instantiate
 	 * the same page in the page cache.
 	 */
+	index = linear_page_index(vma, vmf.address);
 	mapping = vma->vm_file->f_mapping;
-	hash = hugetlb_fault_mutex_hash(mapping, vmf.pgoff);
+	hash = hugetlb_fault_mutex_hash(mapping, index);
 	mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
 	/*
diff --git a/mm/memfd.c b/mm/memfd.c
index fb425f4e315f..911ff8220d05 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -64,7 +64,7 @@ static void memfd_tag_pins(struct xa_state *xas)
  * (memfd_pin_folios()) cannot find a folio in the page cache at a given
  * index in the mapping.
  */
-struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
+struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t index)
 {
 #ifdef CONFIG_HUGETLB_PAGE
 	struct folio *folio;
@@ -79,12 +79,13 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
 		 */
 		struct inode *inode = file_inode(memfd);
 		struct hstate *h = hstate_file(memfd);
-		int err = -ENOMEM;
 		long nr_resv;
+		pgoff_t idx;
+		int err = -ENOMEM;
 
 		gfp_mask = htlb_alloc_mask(h);
 		gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
-		idx >>= huge_page_order(h);
+		idx = index >> huge_page_order(h);
 
 		nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, EMPTY_VMA_FLAGS);
 		if (nr_resv < 0)
@@ -116,7 +117,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
 			 * races with concurrent allocations, as required by all other
 			 * callers of hugetlb_add_to_page_cache().
 			 */
-			hash = hugetlb_fault_mutex_hash(memfd->f_mapping, idx);
+			hash = hugetlb_fault_mutex_hash(memfd->f_mapping, index);
 			mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
 			err = hugetlb_add_to_page_cache(folio,
@@ -140,7 +141,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
 		return ERR_PTR(err);
 	}
 #endif
-	return shmem_read_folio(memfd->f_mapping, idx);
+	return shmem_read_folio(memfd->f_mapping, index);
 }
 
 /*
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index c053aa4389b6..9482b25d3d84 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -504,7 +504,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
 	long copied;
 	struct folio *folio;
 	unsigned long vma_hpagesize;
-	pgoff_t idx;
+	pgoff_t index;
 	u32 hash;
 	struct address_space *mapping;
 
@@ -573,10 +573,9 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
 		 * in the case of shared pmds.  fault mutex prevents
 		 * races with other faulting threads.
 		 */
-		idx = linear_page_index(dst_vma, dst_addr);
-		idx >>= huge_page_order(hstate_vma(dst_vma));
+		index = linear_page_index(dst_vma, dst_addr);
 		mapping = dst_vma->vm_file->f_mapping;
-		hash = hugetlb_fault_mutex_hash(mapping, idx);
+		hash = hugetlb_fault_mutex_hash(mapping, index);
 		mutex_lock(&hugetlb_fault_mutex_table[hash]);
 		hugetlb_vma_lock_read(dst_vma);
 
-- 
2.43.5


^ permalink raw reply related

* [PATCH 2/6] hugetlb: remove the hugetlb_linear_page_index() helper
From: Jane Chu @ 2026-04-09 23:41 UTC (permalink / raw)
  To: akpm, david, muchun.song, osalvador
  Cc: lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb, mhocko,
	corbet, skhan, hughd, baolin.wang, peterx, linux-mm, linux-doc,
	linux-kernel
In-Reply-To: <20260409234158.837786-1-jane.chu@oracle.com>

hugetlb_linear_page_index() is just linear_page_index() converted from
base-page units to hugetlb page units.

Open-code that conversion at its remaining call site in
mfill_atomic_hugetlb() and drop the helper.

No functional change intended.

Signed-off-by: Jane Chu <jane.chu@oracle.com>
---
 include/linux/hugetlb.h | 17 -----------------
 mm/userfaultfd.c        |  3 ++-
 2 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c64c6e5e50f5..71691a2b6855 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -792,23 +792,6 @@ static inline unsigned huge_page_shift(struct hstate *h)
 	return h->order + PAGE_SHIFT;
 }
 
-/**
- * hugetlb_linear_page_index() - linear_page_index() but in hugetlb
- *				 page size granularity.
- * @vma: the hugetlb VMA
- * @address: the virtual address within the VMA
- *
- * Return: the page offset within the mapping in huge page units.
- */
-static inline pgoff_t hugetlb_linear_page_index(struct vm_area_struct *vma,
-		unsigned long address)
-{
-	struct hstate *h = hstate_vma(vma);
-
-	return ((address - vma->vm_start) >> huge_page_shift(h)) +
-		(vma->vm_pgoff >> huge_page_order(h));
-}
-
 static inline bool order_is_gigantic(unsigned int order)
 {
 	return order > MAX_PAGE_ORDER;
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 2c565c7134b6..c053aa4389b6 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -573,7 +573,8 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
 		 * in the case of shared pmds.  fault mutex prevents
 		 * races with other faulting threads.
 		 */
-		idx = hugetlb_linear_page_index(dst_vma, dst_addr);
+		idx = linear_page_index(dst_vma, dst_addr);
+		idx >>= huge_page_order(hstate_vma(dst_vma));
 		mapping = dst_vma->vm_file->f_mapping;
 		hash = hugetlb_fault_mutex_hash(mapping, idx);
 		mutex_lock(&hugetlb_fault_mutex_table[hash]);
-- 
2.43.5


^ permalink raw reply related

* [PATCH 5/6] hugetlb: make hugetlb_add_to_page_cache() use PAGE_SIZE-based index
From: Jane Chu @ 2026-04-09 23:41 UTC (permalink / raw)
  To: akpm, david, muchun.song, osalvador
  Cc: lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb, mhocko,
	corbet, skhan, hughd, baolin.wang, peterx, linux-mm, linux-doc,
	linux-kernel
In-Reply-To: <20260409234158.837786-1-jane.chu@oracle.com>

hugetlb_add_to_page_cache() currently takes a parameter named 'idx',
but internally converts it from hugetlb page units into PAGE_SIZE-based
page-cache index units before calling __filemap_add_folio().

Make hugetlb_add_to_page_cache() take a PAGE_SIZE-based index directly
and update its callers accordingly.  This removes the internal shift,
keeps the index units consistent with filemap_lock_folio() and
__filemap_add_folio(), and simplifies the surrounding code.

Signed-off-by: Jane Chu <jane.chu@oracle.com>
---
 fs/hugetlbfs/inode.c    |  2 +-
 include/linux/hugetlb.h |  2 +-
 mm/hugetlb.c            | 21 ++++++++-------------
 mm/memfd.c              |  2 +-
 4 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e24e9bf54e14..a72d46ff7980 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -825,7 +825,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 		}
 		folio_zero_user(folio, addr);
 		__folio_mark_uptodate(folio);
-		error = hugetlb_add_to_page_cache(folio, mapping, idx);
+		error = hugetlb_add_to_page_cache(folio, mapping, index);
 		if (unlikely(error)) {
 			restore_reserve_on_error(h, &pseudo_vma, addr, folio);
 			folio_put(folio);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 71691a2b6855..a51a5e12859c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -713,7 +713,7 @@ struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
 					  nodemask_t *nmask, gfp_t gfp_mask);
 
 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
-			pgoff_t idx);
+			pgoff_t index);
 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 				unsigned long address, struct folio *folio);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 138e5ecf818e..47ef41b6fb2e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5625,15 +5625,14 @@ bool hugetlbfs_pagecache_present(struct hstate *h,
 }
 
 int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
-			   pgoff_t idx)
+			   pgoff_t index)
 {
 	struct inode *inode = mapping->host;
 	struct hstate *h = hstate_inode(inode);
 	int err;
 
-	idx <<= huge_page_order(h);
 	__folio_set_locked(folio);
-	err = __filemap_add_folio(mapping, folio, idx, GFP_KERNEL, NULL);
+	err = __filemap_add_folio(mapping, folio, index, GFP_KERNEL, NULL);
 
 	if (unlikely(err)) {
 		__folio_clear_locked(folio);
@@ -5724,7 +5723,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
 	 * before we get page_table_lock.
 	 */
 	new_folio = false;
-	folio = filemap_lock_folio(mapping, vmf->pgoff << huge_page_order(h));
+	folio = filemap_lock_folio(mapping, index);
 	if (IS_ERR(folio)) {
 		size = i_size_read(mapping->host) >> huge_page_shift(h);
 		if (vmf->pgoff >= size)
@@ -5788,8 +5787,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
 		new_folio = true;
 
 		if (vma->vm_flags & VM_MAYSHARE) {
-			int err = hugetlb_add_to_page_cache(folio, mapping,
-							vmf->pgoff);
+			int err = hugetlb_add_to_page_cache(folio, mapping, index);
 			if (err) {
 				/*
 				 * err can't be -EEXIST which implies someone
@@ -6173,7 +6171,6 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 			     uffd_flags_t flags,
 			     struct folio **foliop)
 {
-	pgoff_t idx;
 	spinlock_t *ptl;
 	struct folio *folio;
 	pte_t _dst_pte, dst_ptep;
@@ -6183,13 +6180,11 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 	struct mm_struct *dst_mm = dst_vma->vm_mm;
 	bool wp_enabled = (flags & MFILL_ATOMIC_WP);
 	int vm_shared = dst_vma->vm_flags & VM_SHARED;
+	pgoff_t index = linear_page_index(dst_vma, dst_addr);
 	struct address_space *mapping = dst_vma->vm_file->f_mapping;
 	bool is_continue = uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE);
 	int ret = -ENOMEM;
 
-	idx = linear_page_index(dst_vma, dst_addr);
-	idx >>= huge_page_order(h);
-
 	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
 		ptl = huge_pte_lock(h, dst_mm, dst_pte);
 
@@ -6211,7 +6206,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 
 	if (is_continue) {
 		ret = -EFAULT;
-		folio = filemap_lock_folio(mapping, idx << huge_page_order(h));
+		folio = filemap_lock_folio(mapping, index);
 		if (IS_ERR(folio))
 			goto out;
 		folio_in_pagecache = true;
@@ -6307,7 +6302,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 	/* Add shared, newly allocated pages to the page cache. */
 	if (vm_shared && !is_continue) {
 		ret = -EFAULT;
-		if (idx >= (i_size_read(mapping->host) >> huge_page_shift(h)))
+		if (index >= (i_size_read(mapping->host) >> PAGE_SHIFT))
 			goto out_release_nounlock;
 
 		/*
@@ -6316,7 +6311,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 		 * hugetlb_fault_mutex_table that here must be hold by
 		 * the caller.
 		 */
-		ret = hugetlb_add_to_page_cache(folio, mapping, idx);
+		ret = hugetlb_add_to_page_cache(folio, mapping, index);
 		if (ret)
 			goto out_release_nounlock;
 		folio_in_pagecache = true;
diff --git a/mm/memfd.c b/mm/memfd.c
index 911ff8220d05..56c8833c4195 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -122,7 +122,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t index)
 
 			err = hugetlb_add_to_page_cache(folio,
 							memfd->f_mapping,
-							idx);
+							index);
 
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 
-- 
2.43.5


^ permalink raw reply related

* [PATCH 1/6] hugetlb: open-code hugetlb folio lookup index conversion
From: Jane Chu @ 2026-04-09 23:41 UTC (permalink / raw)
  To: akpm, david, muchun.song, osalvador
  Cc: lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb, mhocko,
	corbet, skhan, hughd, baolin.wang, peterx, linux-mm, linux-doc,
	linux-kernel
In-Reply-To: <20260409234158.837786-1-jane.chu@oracle.com>

This patch removes `filemap_lock_hugetlb_folio()` and open-codes
the index conversion at each call site, making it explicit when
hugetlb code is translating a hugepage index into the base-page index
expected by `filemap_lock_folio()`.  As part of that cleanup,
it also uses a base-page index directly in `hugetlbfs_zero_partial_page()`,
where the byte offset is already page-granular. Overall, the change
makes the indexing model more obvious at the call sites and avoids
hiding the huge-index to base-index conversion inside a helper.

Suggested-by: David Hildenbrand <david@kernel.org>
Signed-off-by: Jane Chu <jane.chu@oracle.com>
---
 fs/hugetlbfs/inode.c    | 20 ++++++++++----------
 include/linux/hugetlb.h | 12 ------------
 mm/hugetlb.c            |  4 ++--
 3 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cd6b22f6e2b1..cf79fb830377 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -242,9 +242,9 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	struct hstate *h = hstate_file(file);
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
-	unsigned long index = iocb->ki_pos >> huge_page_shift(h);
+	unsigned long idx = iocb->ki_pos >> huge_page_shift(h);
 	unsigned long offset = iocb->ki_pos & ~huge_page_mask(h);
-	unsigned long end_index;
+	unsigned long end_idx;
 	loff_t isize;
 	ssize_t retval = 0;
 
@@ -257,10 +257,10 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		isize = i_size_read(inode);
 		if (!isize)
 			break;
-		end_index = (isize - 1) >> huge_page_shift(h);
-		if (index > end_index)
+		end_idx = (isize - 1) >> huge_page_shift(h);
+		if (idx > end_idx)
 			break;
-		if (index == end_index) {
+		if (idx == end_idx) {
 			nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
 			if (nr <= offset)
 				break;
@@ -268,7 +268,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		nr = nr - offset;
 
 		/* Find the folio */
-		folio = filemap_lock_hugetlb_folio(h, mapping, index);
+		folio = filemap_lock_folio(mapping, idx << huge_page_order(h));
 		if (IS_ERR(folio)) {
 			/*
 			 * We have a HOLE, zero out the user-buffer for the
@@ -307,10 +307,10 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
 				retval = -EFAULT;
 			break;
 		}
-		index += offset >> huge_page_shift(h);
+		idx += offset >> huge_page_shift(h);
 		offset &= ~huge_page_mask(h);
 	}
-	iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset;
+	iocb->ki_pos = ((loff_t)idx << huge_page_shift(h)) + offset;
 	return retval;
 }
 
@@ -652,10 +652,10 @@ static void hugetlbfs_zero_partial_page(struct hstate *h,
 					loff_t start,
 					loff_t end)
 {
-	pgoff_t idx = start >> huge_page_shift(h);
+	pgoff_t index = start >> PAGE_SHIFT; 
 	struct folio *folio;
 
-	folio = filemap_lock_hugetlb_folio(h, mapping, idx);
+	folio = filemap_lock_folio(mapping, index);
 	if (IS_ERR(folio))
 		return;
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 9c098a02a09e..c64c6e5e50f5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -829,12 +829,6 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
 	return huge_page_size(h) / 512;
 }
 
-static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
-				struct address_space *mapping, pgoff_t idx)
-{
-	return filemap_lock_folio(mapping, idx << huge_page_order(h));
-}
-
 #include <asm/hugetlb.h>
 
 #ifndef is_hugepage_only_range
@@ -1106,12 +1100,6 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
 	return NULL;
 }
 
-static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
-				struct address_space *mapping, pgoff_t idx)
-{
-	return NULL;
-}
-
 static inline int isolate_or_dissolve_huge_folio(struct folio *folio,
 						struct list_head *list)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a786034ac95c..38b39eaf46cc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5724,7 +5724,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping,
 	 * before we get page_table_lock.
 	 */
 	new_folio = false;
-	folio = filemap_lock_hugetlb_folio(h, mapping, vmf->pgoff);
+	folio = filemap_lock_folio(mapping, vmf->pgoff << huge_page_order(h));
 	if (IS_ERR(folio)) {
 		size = i_size_read(mapping->host) >> huge_page_shift(h);
 		if (vmf->pgoff >= size)
@@ -6208,7 +6208,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 
 	if (is_continue) {
 		ret = -EFAULT;
-		folio = filemap_lock_hugetlb_folio(h, mapping, idx);
+		folio = filemap_lock_folio(mapping, idx << huge_page_order(h));
 		if (IS_ERR(folio))
 			goto out;
 		folio_in_pagecache = true;
-- 
2.43.5


^ permalink raw reply related

* [PATCH 4/6] hugetlb: drop vma_hugecache_offset() in favor of linear_page_index()
From: Jane Chu @ 2026-04-09 23:41 UTC (permalink / raw)
  To: akpm, david, muchun.song, osalvador
  Cc: lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb, mhocko,
	corbet, skhan, hughd, baolin.wang, peterx, linux-mm, linux-doc,
	linux-kernel
In-Reply-To: <20260409234158.837786-1-jane.chu@oracle.com>

vma_hugecache_offset() converts a hugetlb VMA address into a mapping
offset in hugepage units. While the helper is small, its name is not very
clear, and the resulting code is harder to follow than using the common MM
helper directly.

Use linear_page_index() instead, with an explicit conversion from
PAGE_SIZE units to hugepage units at each call site, and remove
vma_hugecache_offset().

This makes the code a bit more direct and avoids a hugetlb-specific helper
whose behavior is already expressible with existing MM primitives.

Signed-off-by: Jane Chu <jane.chu@oracle.com>
---
 mm/hugetlb.c | 49 +++++++++++++++++++++----------------------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9d5ae1f87850..138e5ecf818e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1006,17 +1006,6 @@ static long region_count(struct resv_map *resv, long f, long t)
 	return chg;
 }
 
-/*
- * Convert the address within this vma to the page offset within
- * the mapping, huge page units here.
- */
-static pgoff_t vma_hugecache_offset(struct hstate *h,
-			struct vm_area_struct *vma, unsigned long address)
-{
-	return ((address - vma->vm_start) >> huge_page_shift(h)) +
-			(vma->vm_pgoff >> huge_page_order(h));
-}
-
 /*
  * Flags for MAP_PRIVATE reservations.  These are stored in the bottom
  * bits of the reservation map pointer, which are always clear due to
@@ -2465,7 +2454,9 @@ static long __vma_reservation_common(struct hstate *h,
 	if (!resv)
 		return 1;
 
-	idx = vma_hugecache_offset(h, vma, addr);
+	idx = linear_page_index(vma, addr);
+	idx >>= huge_page_order(h);
+
 	switch (mode) {
 	case VMA_NEEDS_RESV:
 		ret = region_chg(resv, idx, idx + 1, &dummy_out_regions_needed);
@@ -4718,8 +4709,10 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 	if (!resv || !is_vma_resv_set(vma, HPAGE_RESV_OWNER))
 		return;
 
-	start = vma_hugecache_offset(h, vma, vma->vm_start);
-	end = vma_hugecache_offset(h, vma, vma->vm_end);
+	start = linear_page_index(vma, vma->vm_start); 
+	start >>= huge_page_order(h);
+	end = linear_page_index(vma, vma->vm_end); 
+	end >>= huge_page_order(h);
 
 	reserve = (end - start) - region_count(resv, start, end);
 	hugetlb_cgroup_uncharge_counter(resv, start, end);
@@ -5960,14 +5953,13 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct hstate *h = hstate_vma(vma);
 	struct address_space *mapping;
 	bool need_wait_lock = false;
-	pgoff_t index;
+	pgoff_t index = linear_page_index(vma, address & huge_page_mask(h));
 	struct vm_fault vmf = {
 		.vma = vma,
 		.address = address & huge_page_mask(h),
 		.real_address = address,
 		.flags = flags,
-		.pgoff = vma_hugecache_offset(h, vma,
-				address & huge_page_mask(h)),
+		.pgoff = index >> huge_page_order(h),
 		/* TODO: Track hugetlb faults using vm_fault */
 
 		/*
@@ -5981,7 +5973,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * get spurious allocation failures if two CPUs race to instantiate
 	 * the same page in the page cache.
 	 */
-	index = linear_page_index(vma, vmf.address);
 	mapping = vma->vm_file->f_mapping;
 	hash = hugetlb_fault_mutex_hash(mapping, index);
 	mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -6182,20 +6173,22 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 			     uffd_flags_t flags,
 			     struct folio **foliop)
 {
-	struct mm_struct *dst_mm = dst_vma->vm_mm;
-	bool is_continue = uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE);
-	bool wp_enabled = (flags & MFILL_ATOMIC_WP);
+	pgoff_t idx;
+	spinlock_t *ptl;
+	struct folio *folio;
+	pte_t _dst_pte, dst_ptep;
+	bool folio_in_pagecache = false;
 	struct hstate *h = hstate_vma(dst_vma);
-	struct address_space *mapping = dst_vma->vm_file->f_mapping;
-	pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
 	unsigned long size = huge_page_size(h);
+	struct mm_struct *dst_mm = dst_vma->vm_mm;
+	bool wp_enabled = (flags & MFILL_ATOMIC_WP);
 	int vm_shared = dst_vma->vm_flags & VM_SHARED;
-	pte_t _dst_pte;
-	spinlock_t *ptl;
+	struct address_space *mapping = dst_vma->vm_file->f_mapping;
+	bool is_continue = uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE);
 	int ret = -ENOMEM;
-	struct folio *folio;
-	bool folio_in_pagecache = false;
-	pte_t dst_ptep;
+
+	idx = linear_page_index(dst_vma, dst_addr);
+	idx >>= huge_page_order(h);
 
 	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
 		ptl = huge_pte_lock(h, dst_mm, dst_pte);
-- 
2.43.5


^ permalink raw reply related

* [PATCH 0/6] hugetlb: normalize exported interfaces to use base-page indices
From: Jane Chu @ 2026-04-09 23:41 UTC (permalink / raw)
  To: akpm, david, muchun.song, osalvador
  Cc: lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb, mhocko,
	corbet, skhan, hughd, baolin.wang, peterx, linux-mm, linux-doc,
	linux-kernel

This series stems from a discussion with David. [1]
The series makes a small cleanup to a few hugetlb interfaces used
outside the subsystem by standardizing them on base-page indices.
Hopefully this makes the interface semantics a bit more coherent with
the rest of mm, while the internal hugetlb code continue to use hugepage
indices where that remains the more natural fit.

It is based off mm-stable, 3/30/2026, b2c31180b9d6.

[1] https://lore.kernel.org/linux-mm/9ec9edd1-0f4c-4da2-ae78-0e7b251a9e25@kernel.org/

Jane Chu (6):
  hugetlb: open-code hugetlb folio lookup index conversion
  hugetlb: remove the hugetlb_linear_page_index() helper
  hugetlb: make hugetlb_fault_mutex_hash() take PAGE_SIZE index
  hugetlb: drop vma_hugecache_offset() in favor of linear_page_index()
  hugetlb: make hugetlb_add_to_page_cache() use PAGE_SIZE-based index
  hugetlb: pass hugetlb reservation ranges in base-page indices

 Documentation/mm/hugetlbfs_reserv.rst |  12 +--
 fs/hugetlbfs/inode.c                  |  60 +++++++-------
 include/linux/hugetlb.h               |  31 +------
 mm/hugetlb.c                          | 112 ++++++++++++++------------
 mm/memfd.c                            |  18 +++--
 mm/userfaultfd.c                      |   6 +-
 6 files changed, 109 insertions(+), 130 deletions(-)

-- 
2.43.5


^ permalink raw reply

* Re: [PATCH v2 00/16] fs,x86/resctrl: Add kernel-mode (e.g., PLZA) support to the resctrl subsystem
From: Moger, Babu @ 2026-04-09 23:42 UTC (permalink / raw)
  To: Reinette Chatre, Babu Moger, corbet@lwn.net, tony.luck@intel.com,
	Dave.Martin@arm.com, james.morse@arm.com, tglx@kernel.org,
	mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com
  Cc: skhan@linuxfoundation.org, x86@kernel.org, hpa@zytor.com,
	peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	vschneid@redhat.com, kas@kernel.org, rick.p.edgecombe@intel.com,
	akpm@linux-foundation.org, pmladek@suse.com,
	rdunlap@infradead.org, dapeng1.mi@linux.intel.com,
	kees@kernel.org, elver@google.com, paulmck@kernel.org,
	lirongqing@baidu.com, safinaskar@gmail.com, fvdl@google.com,
	seanjc@google.com, pawan.kumar.gupta@linux.intel.com,
	xin@zytor.com, tiala@microsoft.com, chang.seok.bae@intel.com,
	Lendacky, Thomas, elena.reshetova@intel.com,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-coco@lists.linux.dev, kvm@vger.kernel.org,
	eranian@google.com, peternewman@google.com
In-Reply-To: <973067bf-6e6c-446a-a81a-713840d701a9@intel.com>

Hi Reinette,

On 4/9/2026 3:50 PM, Reinette Chatre wrote:
> Hi Babu,
> 
> On 4/9/26 11:05 AM, Moger, Babu wrote:
>> On 4/9/2026 12:26 PM, Reinette Chatre wrote:
>>> On 4/9/26 10:19 AM, Moger, Babu wrote:
>>>> On 4/8/2026 6:41 PM, Reinette Chatre wrote:
>>>
>>>>> When the user switches to either "global_assign_ctrl_inherit_mon_per_cpu" or
>>>>> 'global_assign_ctrl_assign_mon_per_cpu" then "info/kernel_mode_assignment" is created
>>>>> (or made visible to user space) and is expected to point to default group.
>>>>> User can change the group using "info/kernel_mode_assignment" at this point.
>>>>>
>>>>> If the current scenario is below ...
>>>>>       # cat info/kernel_mode
>>>>>       [global_assign_ctrl_inherit_mon_per_cpu]
>>>>>       inherit_ctrl_and_mon
>>>>>       global_assign_ctrl_assign_mon_per_cpu
>>>>>
>>>>> ... then "info/kernel_mode_assignment" will exist but what it should contain if
>>>>> user switches mode at this point may be up for discussion.
>>>>>
>>>>> option 1)
>>>>> When user switches mode to "global_assign_ctrl_assign_mon_per_cpu" then
>>>>> the resource group in "info/kernel_mode_assignment" is reset to the
>>>>> default group and all CPUs PLZA state reset to match. The kernel_mode_cpus
>>>>> and kernel_mode_cpuslist files become visible in default resource group
>>>>> and they contain "all online CPUs".
>>>>>
>>>>> option 2)
>>>>> When user switches mode to "global_assign_ctrl_assign_mon_per_cpu" then
>>>>> the resource group in "info/kernel_mode_assignment" is kept and all
>>>>> CPUs PLZA state set to match it while also keeping the current
>>>>> values of that resource group's kernel_mode_cpus and kernel_mode_cpuslist
>>>>> files.
>>>>>
>>>>> I am leaning towards "option 1" to keep it consistent with a switch from
>>>>> "inherit_ctrl_and_mon" and being deterministic about how a mode is started with
>>>>
>>>> Yes. The "option 1" seems appropriate.
>>>>
>>>>> a clean slate. What are your thoughts? What would be use case where a user would
>>>>> want to switch between "global_assign_ctrl_inherit_mon_per_cpu" and
>>>>> "global_assign_ctrl_assign_mon_per_cpu" to just switch rmid_en on and off?
>>>>
>>>>
>>>> This is a bit tricky.
>>>>
>>>> Currently, our requirement is to have a CTRL_MON group for
>>>> global_assign_ctrl_inherit_mon_per_cpu. In this scenario, we use the
>>>> group’s CLOSID for PLZA configuration, and RMID is not used (rmid_en
>>>> = 0) when setting up PLZA.
>>>>
>>>> Our requirement is also to have a CTRL_MON/MON group for
>>>> global_assign_ctrl_assign_mon_per_cpu. In this case as well, the
>>>> group’s CLOSID and RMID (rmid_en = 1)  both are used configure PLZA.
>>>
>>> ah, right. Good catch.
>>>
>>>>
>>>> Actually, we should not allow these changes from
>>>> global_assign_ctrl_inherit_mon_per_cpu  to
>>>> global_assign_ctrl_assign_mon_per_cpu or visa versa.
>>>
>>> resctrl could allow it but as part of the switch it resets the "kernel mode group" to
>>> be the default group every time? This would be the "option 1" above.
>>
>> Other options.
>>
>> Allow global_assign_ctrl_inherit_mon_per_cpu -> global_assign_ctrl_assign_mon_per_cpu. As part of the switch, reset the "kernel mode group" to the default group.
>>
>> Allow global_assign_ctrl_assign_mon_per_cpu -> global_assign_ctrl_inherit_mon_per_cpu. In this case switch
>> to CTRL_MON/MON -> CTRL_MON.
>>
> 
> ok. Could you please return the courtesy of providing feedback on the
> suggestion you are responding to and also include the motivation why your
> suggestion is the better option?

Yea. Sure.

We need to allow the switch between the modes. Otherwise only way to 
reset is to remount the resctrl filesystem. That is not a good option.

Allow global_assign_ctrl_inherit_mon_per_cpu -> 
global_assign_ctrl_assign_mon_per_cpu. As part of the switch, reset the 
"kernel mode group" to the default group.

This option is same as you suggested.

Allow global_assign_ctrl_assign_mon_per_cpu -> 
global_assign_ctrl_inherit_mon_per_cpu. In this case switch
to CTRL_MON/MON -> CTRL_MON. This option basically disables monitor 
(rmid_en=0). It is less disruptive. Move is between child group to 
parent group.

Thanks
Babu




^ permalink raw reply

* [PATCH] docs: escape ** glob pattern in MAINTAINERS descriptions
From: Matteo Croce @ 2026-04-09 22:31 UTC (permalink / raw)
  To: Mauro Carvalho Chehab, Jonathan Corbet
  Cc: linux-doc, linux-kernel, Matteo Croce

From: Matteo Croce <teknoraver@meta.com>

Escape '**' in the MAINTAINERS descriptions section to prevent
reStructuredText from interpreting it as bold/strong inline markup,
which causes a warning when running 'make htmldocs'.

Fixes: 420849332f9f ("get_maintainer: add ** glob pattern support")
Signed-off-by: Matteo Croce <teknoraver@meta.com>
---
 Documentation/sphinx/maintainers_include.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/sphinx/maintainers_include.py b/Documentation/sphinx/maintainers_include.py
index 519ad18685b2..54f34f47c9ee 100755
--- a/Documentation/sphinx/maintainers_include.py
+++ b/Documentation/sphinx/maintainers_include.py
@@ -89,7 +89,8 @@ class MaintainersInclude(Include):
             output = None
             if descriptions:
                 # Escape the escapes in preformatted text.
-                output = "| %s" % (line.replace("\\", "\\\\"))
+                output = "| %s" % (line.replace("\\", "\\\\")
+                                        .replace("**", "\\**"))
                 # Look for and record field letter to field name mappings:
                 #   R: Designated *reviewer*: FullName <address@domain>
                 m = re.search(r"\s(\S):\s", line)
-- 
2.50.1


^ permalink raw reply related

* Re: [PATCH v12 19/25] drm/connector: Register color format property on HDMI connectors
From: Dmitry Baryshkov @ 2026-04-09 22:09 UTC (permalink / raw)
  To: Nicolas Frattaroli
  Cc: Harry Wentland, Leo Li, Rodrigo Siqueira, Alex Deucher,
	Christian König, David Airlie, Simona Vetter,
	Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
	Andrzej Hajda, Neil Armstrong, Robert Foss, Laurent Pinchart,
	Jonas Karlman, Jernej Skrabec, Sandy Huang, Heiko Stübner,
	Andy Yan, Jani Nikula, Rodrigo Vivi, Joonas Lahtinen,
	Tvrtko Ursulin, Dmitry Baryshkov, Sascha Hauer, Rob Herring,
	Jonathan Corbet, Shuah Khan, kernel, amd-gfx, dri-devel,
	linux-kernel, linux-arm-kernel, linux-rockchip, intel-gfx,
	intel-xe, linux-doc
In-Reply-To: <20260409-color-format-v12-19-ce84e1817a27@collabora.com>

On Thu, Apr 09, 2026 at 05:45:09PM +0200, Nicolas Frattaroli wrote:
> The drmm_connector_hdmi_init function can figure out what DRM color
> formats are supported by a particular connector based on the supported
> HDMI format bitmask that's passed in.
> 
> Use it to register the drm color format property.
> 
> Reviewed-by: Maxime Ripard <mripard@kernel.org>
> Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> ---
>  drivers/gpu/drm/drm_connector.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>


-- 
With best wishes
Dmitry

^ permalink raw reply

* Re: [PATCH v12 05/25] drm/atomic-helper: Add HDMI bridge output bus formats helper
From: Dmitry Baryshkov @ 2026-04-09 22:09 UTC (permalink / raw)
  To: Nicolas Frattaroli
  Cc: Harry Wentland, Leo Li, Rodrigo Siqueira, Alex Deucher,
	Christian König, David Airlie, Simona Vetter,
	Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
	Andrzej Hajda, Neil Armstrong, Robert Foss, Laurent Pinchart,
	Jonas Karlman, Jernej Skrabec, Sandy Huang, Heiko Stübner,
	Andy Yan, Jani Nikula, Rodrigo Vivi, Joonas Lahtinen,
	Tvrtko Ursulin, Dmitry Baryshkov, Sascha Hauer, Rob Herring,
	Jonathan Corbet, Shuah Khan, kernel, amd-gfx, dri-devel,
	linux-kernel, linux-arm-kernel, linux-rockchip, intel-gfx,
	intel-xe, linux-doc
In-Reply-To: <20260409-color-format-v12-5-ce84e1817a27@collabora.com>

On Thu, Apr 09, 2026 at 05:44:55PM +0200, Nicolas Frattaroli wrote:
> The drm_bridge_funcs atomic_get_output_bus_fmts operation should be the
> same for likely every HDMI connector bridge, unless such an HDMI
> connector bridge has some special hardware restrictions that I cannot
> envision yet.
> 
> To avoid code duplication and standardize on a set of media bus formats
> that the HDMI output color formats translate to, add a common helper
> function that implements this operation to the drm bridge helpers.
> 
> The function returns a list of output bus formats based on the HDMI
> bridge's current output bits-per-component, and its bitmask of supported
> color formats.
> 
> To guard against future expansion of DRM_OUTPUT_COLOR_FORMAT outgrowing
> the hweight8 call, add a BUILD_BUG_ON statement where it's used that
> checks for DRM_OUTPUT_COLOR_FORMAT_COUNT. The justification for not
> using hweight32 in all cases is that not all ISAs have a popcount
> instruction, and will benefit from a smaller/faster software
> implementation that doesn't have to operate across all bits.
> 
> The justification for not defining an hweight_color depending on the
> value of DRM_OUTPUT_COLOR_FORMAT_COUNT is that this count enum value is
> only known at compile time, not at preprocessor time.
> 
> Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> ---
>  drivers/gpu/drm/drm_atomic_helper.c | 81 +++++++++++++++++++++++++++++++++++++
>  include/drm/drm_atomic_helper.h     |  7 ++++
>  2 files changed, 88 insertions(+)
> 

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>


-- 
With best wishes
Dmitry

^ permalink raw reply

* Re: [PATCH v12 04/25] drm/bridge: Act on the DRM color format property
From: Dmitry Baryshkov @ 2026-04-09 22:08 UTC (permalink / raw)
  To: Nicolas Frattaroli
  Cc: Harry Wentland, Leo Li, Rodrigo Siqueira, Alex Deucher,
	Christian König, David Airlie, Simona Vetter,
	Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
	Andrzej Hajda, Neil Armstrong, Robert Foss, Laurent Pinchart,
	Jonas Karlman, Jernej Skrabec, Sandy Huang, Heiko Stübner,
	Andy Yan, Jani Nikula, Rodrigo Vivi, Joonas Lahtinen,
	Tvrtko Ursulin, Dmitry Baryshkov, Sascha Hauer, Rob Herring,
	Jonathan Corbet, Shuah Khan, kernel, amd-gfx, dri-devel,
	linux-kernel, linux-arm-kernel, linux-rockchip, intel-gfx,
	intel-xe, linux-doc
In-Reply-To: <20260409-color-format-v12-4-ce84e1817a27@collabora.com>

On Thu, Apr 09, 2026 at 05:44:54PM +0200, Nicolas Frattaroli wrote:
> The new DRM color format property allows userspace to request a specific
> color format on a connector. In turn, this fills the connector state's
> color_format member to switch color formats.
> 
> Make drm_bridges consider the color_format set in the connector state
> during the atomic bridge check. For bridges that represent HDMI bridges,
> rely on whatever format the HDMI logic set. Reject any output bus
> formats that do not correspond to the requested color format.
> 
> Non-HDMI last bridges with DRM_CONNECTOR_COLOR_FORMAT_AUTO set will end
> up choosing the first output format that functions to make a whole
> recursive bridge chain format selection succeed.
> 
> Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> ---
>  drivers/gpu/drm/drm_bridge.c | 89 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 88 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
> index ba80bebb5685..7c1516864d96 100644
> --- a/drivers/gpu/drm/drm_bridge.c
> +++ b/drivers/gpu/drm/drm_bridge.c
> @@ -1150,6 +1150,47 @@ static int select_bus_fmt_recursive(struct drm_bridge *first_bridge,
>  	return ret;
>  }
>  
> +static bool __pure bus_format_is_color_fmt(u32 bus_fmt, enum drm_connector_color_format fmt)
> +{
> +	if (fmt == DRM_CONNECTOR_COLOR_FORMAT_AUTO)
> +		return true;
> +
> +	switch (bus_fmt) {
> +	case MEDIA_BUS_FMT_FIXED:
> +		return true;
> +	case MEDIA_BUS_FMT_RGB888_1X24:
> +	case MEDIA_BUS_FMT_RGB101010_1X30:
> +	case MEDIA_BUS_FMT_RGB121212_1X36:
> +	case MEDIA_BUS_FMT_RGB161616_1X48:
> +		return fmt == DRM_CONNECTOR_COLOR_FORMAT_RGB444;
> +	case MEDIA_BUS_FMT_YUV8_1X24:
> +	case MEDIA_BUS_FMT_YUV10_1X30:
> +	case MEDIA_BUS_FMT_YUV12_1X36:
> +	case MEDIA_BUS_FMT_YUV16_1X48:
> +		return fmt == DRM_CONNECTOR_COLOR_FORMAT_YCBCR444;
> +	case MEDIA_BUS_FMT_UYVY8_1X16:
> +	case MEDIA_BUS_FMT_VYUY8_1X16:
> +	case MEDIA_BUS_FMT_YUYV8_1X16:
> +	case MEDIA_BUS_FMT_YVYU8_1X16:
> +	case MEDIA_BUS_FMT_UYVY10_1X20:
> +	case MEDIA_BUS_FMT_YUYV10_1X20:
> +	case MEDIA_BUS_FMT_VYUY10_1X20:
> +	case MEDIA_BUS_FMT_YVYU10_1X20:
> +	case MEDIA_BUS_FMT_UYVY12_1X24:
> +	case MEDIA_BUS_FMT_VYUY12_1X24:
> +	case MEDIA_BUS_FMT_YUYV12_1X24:
> +	case MEDIA_BUS_FMT_YVYU12_1X24:
> +		return fmt == DRM_CONNECTOR_COLOR_FORMAT_YCBCR422;
> +	case MEDIA_BUS_FMT_UYYVYY8_0_5X24:
> +	case MEDIA_BUS_FMT_UYYVYY10_0_5X30:
> +	case MEDIA_BUS_FMT_UYYVYY12_0_5X36:
> +	case MEDIA_BUS_FMT_UYYVYY16_0_5X48:
> +		return fmt == DRM_CONNECTOR_COLOR_FORMAT_YCBCR420;
> +	default:
> +		return false;
> +	}
> +}
> +
>  /*
>   * This function is called by &drm_atomic_bridge_chain_check() just before
>   * calling &drm_bridge_funcs.atomic_check() on all elements of the chain.
> @@ -1193,6 +1234,7 @@ drm_atomic_bridge_chain_select_bus_fmts(struct drm_bridge *bridge,
>  	struct drm_encoder *encoder = bridge->encoder;
>  	struct drm_bridge_state *last_bridge_state;
>  	unsigned int i, num_out_bus_fmts = 0;
> +	enum drm_connector_color_format fmt;
>  	u32 *out_bus_fmts;
>  	int ret = 0;
>  
> @@ -1234,13 +1276,58 @@ drm_atomic_bridge_chain_select_bus_fmts(struct drm_bridge *bridge,
>  			out_bus_fmts[0] = MEDIA_BUS_FMT_FIXED;
>  	}
>  
> +	/*
> +	 * On HDMI connectors, use the output format chosen by whatever does the
> +	 * HDMI logic. For everyone else, just trust that the bridge out_bus_fmts
> +	 * are sorted by preference for %DRM_CONNECTOR_COLOR_FORMAT_AUTO, as
> +	 * bus_format_is_color_fmt() always returns true for AUTO.
> +	 */
> +	if (last_bridge->type == DRM_MODE_CONNECTOR_HDMIA) {

I still think this is misplaced (and misidentified). Consider HDMI
bridge being routed to the DVI-D connector. The last bridge would have
different type, but the HDMI-specific logic must still be applied. The
bridge must use RGB444, but it must be handled in a generic way.

Or other way around, a DVI bridge being routed through the HDMI
connector (thinking about PandaBoard here). The combo should not go
through the HDMI-specific color format selection although the last
bridge in the chanin is the HDMI-A bridge.

I think all these cases should be handled by the connector, which knows
if there is an OP_HDMI bridge in the chain or not.

> +		drm_dbg_kms(last_bridge->dev,
> +			    "HDMI bridge requests format %s\n",
> +			    drm_hdmi_connector_get_output_format_name(
> +				    conn_state->hdmi.output_format));
> +		switch (conn_state->hdmi.output_format) {
> +		case DRM_OUTPUT_COLOR_FORMAT_RGB444:
> +			fmt = DRM_CONNECTOR_COLOR_FORMAT_RGB444;
> +			break;
> +		case DRM_OUTPUT_COLOR_FORMAT_YCBCR444:
> +			fmt = DRM_CONNECTOR_COLOR_FORMAT_YCBCR444;
> +			break;
> +		case DRM_OUTPUT_COLOR_FORMAT_YCBCR422:
> +			fmt = DRM_CONNECTOR_COLOR_FORMAT_YCBCR422;
> +			break;
> +		case DRM_OUTPUT_COLOR_FORMAT_YCBCR420:
> +			fmt = DRM_CONNECTOR_COLOR_FORMAT_YCBCR420;
> +			break;
> +		default:
> +			ret = -EINVAL;
> +			goto out_free_bus_fmts;
> +		}
> +	} else {
> +		fmt = conn_state->color_format;
> +		drm_dbg_kms(last_bridge->dev, "Non-HDMI bridge requests format %d\n", fmt);
> +	}
> +
>  	for (i = 0; i < num_out_bus_fmts; i++) {
> +		if (!bus_format_is_color_fmt(out_bus_fmts[i], fmt)) {
> +			drm_dbg_kms(last_bridge->dev,
> +				    "Skipping bus format 0x%04x as it doesn't match format %d\n",
> +				    out_bus_fmts[i], fmt);
> +			ret = -ENOTSUPP;
> +			continue;
> +		}
>  		ret = select_bus_fmt_recursive(bridge, last_bridge, crtc_state,
>  					       conn_state, out_bus_fmts[i]);
> -		if (ret != -ENOTSUPP)
> +		if (ret != -ENOTSUPP) {
> +			drm_dbg_kms(last_bridge->dev,
> +				    "Found bridge chain ending with bus format 0x%04x\n",
> +				    out_bus_fmts[i]);
>  			break;
> +		}
>  	}
>  
> +out_free_bus_fmts:
>  	kfree(out_bus_fmts);
>  
>  	return ret;
> 
> -- 
> 2.53.0
> 

-- 
With best wishes
Dmitry

^ permalink raw reply

* [PATCH 3/4] Documentation: gpu: todo: fix typo 'themsevles' -> 'themselves'
From: Francisco Maestre @ 2026-04-09 22:05 UTC (permalink / raw)
  To: airlied, simona; +Cc: dri-devel, linux-doc, linux-kernel, Francisco Maestre

Fix a spelling mistake in the panel-simple/panel-edp TODO section.

Assisted-by: Claude:claude-opus-4-5
Signed-off-by: Francisco Maestre <francisco@maestretorreblanca.com>
---
 Documentation/gpu/todo.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 520da44a0..e37113478 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -456,7 +456,7 @@ be turned into a WARN_ON() or somehow made louder.
 At the moment, we expect that we may still encounter the warnings in the
 drm_panel core when using panel-simple and panel-edp. Since those panel
 drivers are used with a lot of different DRM modeset drivers they still
-make an extra effort to disable/unprepare the panel themsevles at shutdown
+make an extra effort to disable/unprepare the panel themselves at shutdown
 time. Specifically we could still encounter those warnings if the panel
 driver gets shutdown() _before_ the DRM modeset driver and the DRM modeset
 driver properly calls drm_atomic_helper_shutdown() in its own shutdown()
-- 
2.50.1 (Apple Git-155)


^ permalink raw reply related

* Re: [PATCH v4 9/9] Documentation: ABI: Add sysfs ABI documentation for DDR training data
From: Jeff Hugo @ 2026-04-09 21:30 UTC (permalink / raw)
  To: Kishore Batta, Jonathan Corbet, Shuah Khan, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam, andersson
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi
In-Reply-To: <20260319-sahara_protocol_new_v2-v4-9-47ad79308762@oss.qualcomm.com>

On 3/19/2026 12:31 AM, Kishore Batta wrote:
> Add ABI documentation for the DDR training data sysfs attribute exposed by
> the sahara MHI driver.

Sahara

Also, this patch should be squashed with the previous patch since that 
is the one that adds this sysfs entry.

^ permalink raw reply

* Re: [PATCH v4 7/9] bus: mhi: Capture DDR training data using command mode
From: Jeff Hugo @ 2026-04-09 21:27 UTC (permalink / raw)
  To: Kishore Batta, Jonathan Corbet, Shuah Khan, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam, andersson
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi
In-Reply-To: <20260319-sahara_protocol_new_v2-v4-7-47ad79308762@oss.qualcomm.com>

On 3/19/2026 12:31 AM, Kishore Batta wrote:
> During early boot, devices may perform DDR training and produce training
> data that can be reused on subsequent boots to reduce initialization
> time. The sahara protocol provides a command mode flow to transfer this

Sahara

> training data to the host, but the driver currently does not handle
> command mode and drops the training payload.
> 
> Add Sahara command mode support to retrieve DDR training data from the
> device. When the device enters command mode and sends CMD_READY, query
> the support command list and request DDR training data using EXECUTE and
> EXECUTE_DATA. Allocate receive buffers based on the reported response
> size and copy the raw payload directly from the MHI DL completion
> callback.
> 
> Store the captured training data in controller-scoped memory using devres,
> so it remains available after sahara channel teardown. Also distinguish

Sahara

> raw payload completion from control packets in the DL callback, avoiding
> misinterpretation of training data as protocol messages, and requeue
> the RX buffer after switching back to IMAGE_TX_PENDING to allow the
> boot flow to continue.
> 
> Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
> ---
>   drivers/bus/mhi/sahara/sahara.c | 328 +++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 320 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/bus/mhi/sahara/sahara.c b/drivers/bus/mhi/sahara/sahara.c
> index 0a0f578aaa47ab2c4ca0765666b392fb9936ddd5..c88f1220199ac4373d3552167870c19a0d5f23b9 100644
> --- a/drivers/bus/mhi/sahara/sahara.c
> +++ b/drivers/bus/mhi/sahara/sahara.c
> @@ -5,11 +5,14 @@
>    */
>   
>   #include <linux/devcoredump.h>
> +#include <linux/device.h>
> +#include <linux/device/devres.h>
>   #include <linux/firmware.h>
>   #include <linux/limits.h>
>   #include <linux/mhi.h>
>   #include <linux/minmax.h>
>   #include <linux/mod_devicetable.h>
> +#include <linux/mutex.h>
>   #include <linux/overflow.h>
>   #include <linux/sahara.h>
>   #include <linux/types.h>
> @@ -60,8 +63,16 @@
>   #define SAHARA_RESET_LENGTH		0x8
>   #define SAHARA_MEM_DEBUG64_LENGTH	0x18
>   #define SAHARA_MEM_READ64_LENGTH	0x18
> -
> +#define SAHARA_COMMAND_READY_LENGTH	0x8
> +#define SAHARA_COMMAND_EXEC_RESP_LENGTH	0x10
> +#define SAHARA_COMMAND_EXECUTE_LENGTH	0xc
> +#define SAHARA_COMMAND_EXEC_DATA_LENGTH	0xc
> +#define SAHARA_SWITCH_MODE_LENGTH	0xc
> +
> +#define SAHARA_EXEC_CMD_GET_COMMAND_ID_LIST	0x8
> +#define SAHARA_EXEC_CMD_GET_TRAINING_DATA	0x9
>   #define SAHARA_DDR_TRAINING_IMG_ID	34

Why is the indentation of this line messed up?

> +#define SAHARA_NUM_CMD_BUF		SAHARA_NUM_TX_BUF
>   
>   struct sahara_packet {
>   	__le32 cmd;
> @@ -97,6 +108,19 @@ struct sahara_packet {
>   			__le64 memory_address;
>   			__le64 memory_length;
>   		} memory_read64;
> +		struct {
> +			__le32 client_command;
> +		} command_execute;
> +		struct {
> +			__le32 client_command;
> +			__le32 response_length;
> +		} command_execute_resp;
> +		struct {
> +			__le32 client_command;
> +		} command_exec_data;
> +		struct {
> +			__le32 mode;
> +		} mode_switch;
>   	};
>   };
>   
> @@ -163,6 +187,7 @@ struct sahara_context {
>   	struct work_struct		fw_work;
>   	struct work_struct		dump_work;
>   	struct work_struct		read_data_work;
> +	struct work_struct		cmd_work;
>   	struct mhi_device		*mhi_dev;
>   	const char * const		*image_table;
>   	u32				table_size;
> @@ -183,6 +208,24 @@ struct sahara_context {
>   	bool				is_mem_dump_mode;
>   	bool				non_streaming;
>   	const char			*fw_folder;
> +	bool				is_cmd_mode;
> +	bool				receiving_trng_data;

You already spell out "receiving", spell out "training".  I don't recall 
seeing "trng" before so it seems like a really uncommon shortform.

> +	size_t				trng_size;
> +	size_t				trng_rcvd;
> +	u32				trng_nbuf;
> +	char				*cmd_buff[SAHARA_NUM_CMD_BUF];
> +};

^ permalink raw reply

* Re: [PATCH v4 6/9] bus: mhi: Load DDR training data using per-device serial number
From: Jeff Hugo @ 2026-04-09 21:23 UTC (permalink / raw)
  To: Kishore Batta, Jonathan Corbet, Shuah Khan, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam, andersson
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi
In-Reply-To: <20260319-sahara_protocol_new_v2-v4-6-47ad79308762@oss.qualcomm.com>

On 3/19/2026 12:31 AM, Kishore Batta wrote:
> Devices may provide device-specific DDR training data that can be reused
> across boot to avoid retraining and reduce boot time. The Sahara driver
> currently always falls back to the default DDR training image, even when
> per-device training data is available.
> 
> Extend the firmware loading logic to first attempt loading a per-device
> DDR training image using the device serial number. If the serial-specific
> image is not present, fallback to the existing default image, preserving
> current behavior.
> 
> This change enables DDR training data reuse when available while keeping
> the existing training flow unchanged for devices without saved data.
> 
> Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
> ---
>   drivers/bus/mhi/sahara/sahara.c | 47 ++++++++++++++++++++++++++++++++---------
>   1 file changed, 37 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/bus/mhi/sahara/sahara.c b/drivers/bus/mhi/sahara/sahara.c
> index 4ea14c57774f51a778289d7409372a6ab21fea60..0a0f578aaa47ab2c4ca0765666b392fb9936ddd5 100644
> --- a/drivers/bus/mhi/sahara/sahara.c
> +++ b/drivers/bus/mhi/sahara/sahara.c
> @@ -61,6 +61,8 @@
>   #define SAHARA_MEM_DEBUG64_LENGTH	0x18
>   #define SAHARA_MEM_READ64_LENGTH	0x18
>   
> +#define SAHARA_DDR_TRAINING_IMG_ID	34
> +
>   struct sahara_packet {
>   	__le32 cmd;
>   	__le32 length;
> @@ -365,16 +367,41 @@ static int sahara_find_image(struct sahara_context *context, u32 image_id)
>   		return 0;
>   	}
>   
> -	/*
> -	 * This image might be optional. The device may continue without it.
> -	 * Only the device knows. Suppress error messages that could suggest an
> -	 * a problem when we were actually able to continue.
> -	 */
> -	ret = sahara_request_fw(context, context->image_table[image_id]);
> -	if (ret) {
> -		dev_dbg(&context->mhi_dev->dev, "request for image id %d / file %s failed %d\n",
> -			image_id, context->image_table[image_id], ret);
> -		return ret;
> +	/* DDR training special case: Try per-serial number file first */
> +	if (image_id == SAHARA_DDR_TRAINING_IMG_ID && context->fw_folder) {
> +		u32 serial_num = context->mhi_dev->mhi_cntrl->serial_number;
> +
> +		fw_path = kasprintf(GFP_KERNEL,
> +				    "qcom/%s/mdmddr_0x%x.mbn",
> +				    context->fw_folder, serial_num);
> +		if (!fw_path)
> +			return -ENOMEM;
> +
> +		ret = sahara_request_fw(context, fw_path);
> +		kfree(fw_path);
> +
> +		if (ret) {
> +			ret = sahara_request_fw(context, context->image_table[image_id]);
> +			if (ret) {
> +				dev_dbg(&context->mhi_dev->dev,
> +					"request for image id %d / file %s failed %d\n",
> +					image_id, context->image_table[image_id], ret);
> +			}
> +			return ret;
> +		}

This is entirely redundant with the else in the next line. I don't 
understand why id 34 could be reserved for training data, but also be a 
valid image if the training data was not found.

Just have the if that looks for the training data, and an if that if 
there is no found image, do a normal lookup.

> +	} else {
> +		/*
> +		 * This image might be optional. The device may continue without it.
> +		 * Only the device knows. Suppress error messages that could suggest an
> +		 * a problem when we were actually able to continue.
> +		 */
> +		ret = sahara_request_fw(context, context->image_table[image_id]);
> +		if (ret) {
> +			dev_dbg(&context->mhi_dev->dev,
> +				"request for image id %d / file %s failed %d\n",
> +				image_id, context->image_table[image_id], ret);
> +			return ret;
> +		}
>   	}
>   
>   	context->active_image_id = image_id;
> 


^ permalink raw reply

* Re: [PATCH v4 5/9] bus: mhi: Add QDU100 variant and image_id firmware fallback
From: Jeff Hugo @ 2026-04-09 21:14 UTC (permalink / raw)
  To: Kishore Batta, Jonathan Corbet, Shuah Khan, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam, andersson
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi
In-Reply-To: <20260319-sahara_protocol_new_v2-v4-5-47ad79308762@oss.qualcomm.com>

On 3/19/2026 12:31 AM, Kishore Batta wrote:
> The Sahara driver currently selects a firmware image table based on the
> attached device, but it does not recognize QDU100 devices that expose the
> protocol on the SAHARA MHI channel. As a result, the host cannot associate
> QDU100 devices with the correct firmware namespace during image transfer.
> 
> Extend the probe-time variant selection to match the SAHARA MHI channel
> and associate it with the QDU100 firmware folder. Add an image_id based
> firmware lookup fallback for cases where an image does not have an explicit
> table entry. This allows required images to be provisioned by the platform
> without requiring device specific client drivers or additional registration
> mechanisms.
> 
> This change only affects devices matched on the SAHARA channel and does not
> change behavior for existing AIC100 and AIC200 devices.
> 
> Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
> ---
>   drivers/bus/mhi/sahara/sahara.c | 77 ++++++++++++++++++++++++++++++++++++++---
>   1 file changed, 72 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/bus/mhi/sahara/sahara.c b/drivers/bus/mhi/sahara/sahara.c
> index 8f1c0d72066c0cf80c09d78bfc51df2e482133b9..4ea14c57774f51a778289d7409372a6ab21fea60 100644
> --- a/drivers/bus/mhi/sahara/sahara.c
> +++ b/drivers/bus/mhi/sahara/sahara.c
> @@ -234,6 +234,36 @@ static const char * const aic200_image_table[] = {
>   	[78] = "qcom/aic200/pvs.bin",
>   };
>   
> +static const char * const qdu100_image_table[] = {
> +	[5] = "qcom/qdu100/uefi.elf",
> +	[8] = "qcom/qdu100/qdsp6sw.mbn",
> +	[16] = "qcom/qdu100/efs1.bin",
> +	[17] = "qcom/qdu100/efs2.bin",
> +	[20] = "qcom/qdu100/efs3.bin",
> +	[23] = "qcom/qdu100/aop.mbn",
> +	[25] = "qcom/qdu100/tz.mbn",
> +	[29] = "qcom/qdu100/zeros_1sector.bin",
> +	[33] = "qcom/qdu100/hypvm.mbn",
> +	[34] = "qcom/qdu100/mdmddr.mbn",
> +	[36] = "qcom/qdu100/multi_image_qti.mbn",
> +	[37] = "qcom/qdu100/multi_image.mbn",
> +	[38] = "qcom/qdu100/xbl_config.elf",
> +	[39] = "qcom/qdu100/abl_userdebug.elf",
> +	[40] = "qcom/qdu100/zeros_1sector.bin",
> +	[41] = "qcom/qdu100/devcfg.mbn",
> +	[42] = "qcom/qdu100/zeros_1sector.bin",
> +	[45] = "qcom/qdu100/tools_l.elf",
> +	[46] = "qcom/qdu100/Quantum.elf",
> +	[47] = "qcom/qdu100/quest.elf",
> +	[48] = "qcom/qdu100/xbl_ramdump.elf",
> +	[49] = "qcom/qdu100/shrm.elf",
> +	[50] = "qcom/qdu100/cpucp.elf",
> +	[51] = "qcom/qdu100/aop_devcfg.mbn",
> +	[52] = "qcom/qdu100/fw_csm_gsi_3.0.elf",
> +	[53] = "qcom/qdu100/qdsp6sw_dtbs.elf",
> +	[54] = "qcom/qdu100/qupv3fw.elf",
> +};
> +
>   static const struct sahara_variant sahara_variants[] = {
>   	{
>   		.match = "AIC100",
> @@ -250,6 +280,14 @@ static const struct sahara_variant sahara_variants[] = {
>   		.table_size = ARRAY_SIZE(aic200_image_table),
>   		.fw_folder = "aic200",
>   		.non_streaming = false,
> +	},
> +	{
> +		.match = "SAHARA",
> +		.match_is_chan = true,

Theres a half dozen non-QDU100 devices in pci_generic.c which expose 
"SAHARA" channels. Considering those as valid usecases, I don't see this 
mechanism working. Everything non-AIC is now going to be treated as QDU100.

> +		.image_table = qdu100_image_table,
> +		.table_size = ARRAY_SIZE(qdu100_image_table),
> +		.fw_folder = "qdu100",
> +		.non_streaming = false,
>   	}
>   };
>   
> @@ -278,8 +316,21 @@ static const struct sahara_variant *sahara_select_variant(struct mhi_device *mhi
>   	return NULL;
>   }
>   
> +static int sahara_request_fw(struct sahara_context *context, const char *path)
> +{
> +	int ret;
> +
> +	ret = firmware_request_nowarn(&context->firmware, path,
> +				      &context->mhi_dev->dev);
> +	if (ret)
> +		dev_dbg(&context->mhi_dev->dev,
> +			"Request for file %s failed %d\n", path, ret);
> +	return ret;
> +}

This is a pointless change. Drop it.

> +
>   static int sahara_find_image(struct sahara_context *context, u32 image_id)
>   {
> +	char *fw_path;
>   	int ret;
>   
>   	if (image_id == context->active_image_id)
> @@ -292,8 +343,26 @@ static int sahara_find_image(struct sahara_context *context, u32 image_id)
>   	}
>   
>   	if (image_id >= context->table_size || !context->image_table[image_id]) {
> -		dev_err(&context->mhi_dev->dev, "request for unknown image: %d\n", image_id);
> -		return -EINVAL;
> +		if (!context->fw_folder) {
> +			dev_err(&context->mhi_dev->dev,
> +				"Request for unknown image: %u (no fw folder)\n", image_id);
> +			return -EINVAL;
> +		}
> +
> +		fw_path = kasprintf(GFP_KERNEL, "qcom/%s/image_%u.elf",
> +				    context->fw_folder, image_id);
> +		if (!fw_path)
> +			return -ENOMEM;
> +
> +		ret = sahara_request_fw(context, fw_path);
> +		kfree(fw_path);
> +		if (ret) {
> +			dev_err(&context->mhi_dev->dev,
> +				"request for unknown image: %d\n", image_id);
> +			return -EINVAL;
> +		}
> +		context->active_image_id = image_id;
> +		return 0;

I don't see a usecase for this fw path search functionality, and I think 
it breaks the documented firmware loading guidelines.

>   	}
>   
>   	/*
> @@ -301,9 +370,7 @@ static int sahara_find_image(struct sahara_context *context, u32 image_id)
>   	 * Only the device knows. Suppress error messages that could suggest an
>   	 * a problem when we were actually able to continue.
>   	 */
> -	ret = firmware_request_nowarn(&context->firmware,
> -				      context->image_table[image_id],
> -				      &context->mhi_dev->dev);
> +	ret = sahara_request_fw(context, context->image_table[image_id]);
>   	if (ret) {
>   		dev_dbg(&context->mhi_dev->dev, "request for image id %d / file %s failed %d\n",
>   			image_id, context->image_table[image_id], ret);
> 


^ permalink raw reply

* Re: [PATCH v9 02/10] x86/bhi: Make clear_bhb_loop() effective on newer CPUs
From: Jim Mattson @ 2026-04-09 21:06 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Pawan Gupta, x86, Jon Kohler, Nikolay Borisov, H. Peter Anvin,
	Josh Poimboeuf, David Kaplan, Sean Christopherson,
	Borislav Petkov, Dave Hansen, Peter Zijlstra, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, KP Singh, Jiri Olsa,
	David S. Miller, David Laight, Andy Lutomirski, Thomas Gleixner,
	Ingo Molnar, David Ahern, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, John Fastabend, Stanislav Fomichev,
	Hao Luo, Paolo Bonzini, Jonathan Corbet, linux-kernel, kvm,
	Asit Mallick, Tao Zhang, bpf, netdev, linux-doc, chao.gao
In-Reply-To: <410df9f6-69ec-483f-9009-0a9b8c9162a9@intel.com>

On Thu, Apr 9, 2026 at 1:36 PM Dave Hansen <dave.hansen@intel.com> wrote:
>
> On 4/7/26 17:47, Jim Mattson wrote:
> > On Tue, Apr 7, 2026 at 4:41 PM Dave Hansen <dave.hansen@intel.com> wrote:
> >> On 4/7/26 16:27, Jim Mattson wrote:
> >>> What is your proposed BHI_DIS_S override mechanism, then?
> >> Let me make sure I get this right. The desire is to:
> >>
> >> 1. Have hypervisors lie to guests about the CPU they are running on (for
> >>    the benefit of large/diverse migration pools)
> >> 2. Have guests be allowed to boot with BHI_DIS_S for performance
> >> 3. Have apps in those guests that care about security to opt back in to
> >>    BHI_DIS_S for themselves?
> > I just want guests on heterogeneous migration pools to properly
> > protect themselves from native BHI when running on host kernels at
> > least as far back as Linux v6.6.
> >
> > To that end, I would be satisfied with using the longer BHB clearing
> > sequence when HYPERVISOR is true and BHI_CTRL is false.
>
> If the guests can't get mitigation information from model/family because
> the hypervisor is lying (or may lie), then it's on the hypervisor to
> figure it out.
>
> I'm not sure we want to just assume that all hypervisors are going to
> lie all the time about this.

Without any information, that is exactly what we must assume. There is
precedent for this.

In vulnerable_to_its():

        /*
         * If a VMM did not expose ITS_NO, assume that a guest could
         * be running on a vulnerable hardware or may migrate to such
         * hardware.
         */
        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
                return true;


In cpu_set_bug_bits():

        /*
         * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with
         * BHI_NO still need to use the BHI mitigation to prevent Intra-mode
         * attacks.  When virtualized, eIBRS could be hidden, assume vulnerable.
         */
        if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
            (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
             boot_cpu_has(X86_FEATURE_HYPERVISOR)))
                setup_force_cpu_bug(X86_BUG_BHI);

...and...

        if (c->x86_vendor == X86_VENDOR_AMD) {
                if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) ||
                    !cpu_has(c, X86_FEATURE_TSA_L1_NO)) {
                        if (cpu_matches(cpu_vuln_blacklist, TSA) ||
                            /* Enable bug on Zen guests to allow for
live migration. */
                            (cpu_has(c, X86_FEATURE_HYPERVISOR) &&
cpu_has(c, X86_FEATURE_ZEN)))
                                setup_force_cpu_bug(X86_BUG_TSA);
                }
        }


In check_null_seg_clears_base():

        /*
         * CPUID bit above wasn't set. If this kernel is still running
         * as a HV guest, then the HV has decided not to advertize
         * that CPUID bit for whatever reason. For example, one
         * member of the migration pool might be vulnerable. Which
         * means, the bug is present: set the BUG flag and return.
         */
        if (cpu_has(c, X86_FEATURE_HYPERVISOR)) {
                set_cpu_bug(c, X86_BUG_NULL_SEG);
                return;
        }

The hypervisor could provide more information so that the guest can
determine when it's safe to use the short sequence, but that's just
icing on the cake. The default, out-of-the-box configuration must be
safe.

^ permalink raw reply

* [PATCH v3 0/1] leds: Introduce the multi_max_intensity sysfs attribute
From: Armin Wolf @ 2026-04-09 21:06 UTC (permalink / raw)
  To: lee, pavel
  Cc: linux-kernel, corbet, skhan, linux-leds, linux-doc,
	jacek.anaszewski, pobrn, m.tretter, wse

This patch series was born out of of a mailing list thread [1] where
i asked how to properly model a RGB LED as a multicolor LED. Said
LED has some exotic properties:

1. 5 global brightness levels.
2. 50 intensity levels for each R/G/B color components.

The current sysfs interface mandates that the maximum intensity value
for each color component should be the same as the maximum global
brightness. This makes sense for LEDs that only emulate global
brightness using led_mc_calc_color_components(), but causes problems
for LEDs that perform global brightness control in hardware.

Faking a maximum global brightness of 50 will not work in this case,
as the hardware can change the global brightness on its own. Userspace
applications might also prefer to know the true maximum brightness
value.

Because of this i decided to add a new sysfs attribute called
"multi_max_intensity". This attribute is similar to the
"max_brightness" sysfs attribute, except that it targets the intensity
values inside the "multi_intensity" sysfs atribute. I also decided to 
cap intensity values comming from userspace to said maximum intensity
values to relieve drivers from doing it themself. This was already
proposed in a unrelated patch [2] and might break some misbehaving
userspace applications that do not respect max_brightness.

#include <linux/module.h>
#include <linux/init.h>
#include <linux/led-class-multicolor.h>

static int test_brightness_set_blocking(struct led_classdev *led_cdev,
					enum led_brightness brightness)
{
	struct led_classdev_mc *mc_cdev = lcdev_to_mccdev(led_cdev);

	for (int i = 0; i < mc_cdev->num_colors; i++) {
		if (mc_cdev->subled_info[i].intensity > 30)
			return -EIO;
	}

	return 0;
}

static struct mc_subled subleds[] = {
	{
		.color_index = LED_COLOR_ID_RED,
		.max_intensity = 0,
		.channel = 1,
	},
	{
		.color_index = LED_COLOR_ID_GREEN,
		.max_intensity = 0,
		.channel = 2,
	},
	{
		.color_index = LED_COLOR_ID_BLUE,
		.max_intensity = 0,
		.channel = 3,
	},
};

static struct led_classdev_mc led_mc_cdev = {
	.led_cdev = {
		.max_brightness = 255,
		.color = LED_COLOR_ID_MULTI,
		.flags = LED_CORE_SUSPENDRESUME | LED_REJECT_NAME_CONFLICT,
		.brightness_set_blocking = test_brightness_set_blocking,
	},
	.num_colors = ARRAY_SIZE(subleds),
	.subled_info = subleds,
};

static int __init test_init(void)
{
	struct led_init_data init_data = {
		.devicename = "test-led",
		.default_label = "multicolor:" LED_FUNCTION_KBD_BACKLIGHT,
		.devname_mandatory = true,
	};

	return led_classdev_multicolor_register_ext(NULL, &led_mc_cdev, &init_data);
}
module_init(test_init);

static void __exit test_exit(void)
{
	led_classdev_multicolor_unregister(&led_mc_cdev);
}
module_exit(test_exit);

MODULE_AUTHOR("Armin Wolf <W_Armin@gmx.de>");
MODULE_DESCRIPTION("Multicolor LED test device");
MODULE_LICENSE("GPL");

[1] https://lore.kernel.org/linux-leds/2d91a44e-fce2-42dc-b529-133ab4a191f0@gmx.de/
[2] https://lore.kernel.org/linux-leds/20260123-leds-multicolor-limit-intensity-v1-1-b37761c2fdfd@pengutronix.de/

Changes since v2:
- add Reviewed-by tags
- fix spelling mistake

Changes since v1:
- use sysfs_emit_at()
- fix documentation issues

Changes since RFC:
- rework documentation
- drop useless defines
- reduce amount of driver code churn

Armin Wolf (1):
  leds: Introduce the multi_max_intensity sysfs attribute

 .../ABI/testing/sysfs-class-led-multicolor    | 19 ++++++--
 Documentation/leds/leds-class-multicolor.rst  | 21 ++++++++-
 drivers/leds/led-class-multicolor.c           | 47 ++++++++++++++++++-
 drivers/leds/leds-lp50xx.c                    |  1 +
 drivers/leds/rgb/leds-ncp5623.c               |  4 +-
 include/linux/led-class-multicolor.h          | 30 +++++++++++-
 6 files changed, 113 insertions(+), 9 deletions(-)

-- 
2.39.5

^ permalink raw reply

* [PATCH v3 1/1] leds: Introduce the multi_max_intensity sysfs attribute
From: Armin Wolf @ 2026-04-09 21:06 UTC (permalink / raw)
  To: lee, pavel
  Cc: linux-kernel, corbet, skhan, linux-leds, linux-doc,
	jacek.anaszewski, pobrn, m.tretter, wse
In-Reply-To: <20260409210629.9934-1-W_Armin@gmx.de>

Some multicolor LEDs support global brightness control in hardware,
meaning that the maximum intensity of the color components is not
connected to the maximum global brightness. Such LEDs cannot be
described properly by the current multicolor LED class interface,
because it assumes that the maximum intensity of each color component
is described by the maximum global brightness of the LED.

Fix this by introducing a new sysfs attribute called
"multi_max_intensity" holding the maximum intensity values for the
color components of a multicolor LED class device. Drivers can use
the new max_intensity field inside struct mc_subled to tell the
multicolor LED class code about those values. Intensity values written
by userspace applications will be limited to this maximum value.

Drivers for multicolor LEDs that do not support global brightness
control in hardware might still want to use the maximum global LED
brightness supplied via devicetree as the maximum intensity of each
individual color component. Such drivers should set max_intensity
to 0 so that the multicolor LED core can act accordingly.

The lp50xx and ncp5623 LED drivers already use hardware-based control
for the global LED brightness. Modify those drivers to correctly
initalize .max_intensity to avoid being limited to the maximum global
brightness supplied via devicetree.

Reviewed-by: Werner Sembach <wse@tuxedocomputers.com>
Reviewed-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
---
 .../ABI/testing/sysfs-class-led-multicolor    | 19 ++++++--
 Documentation/leds/leds-class-multicolor.rst  | 21 ++++++++-
 drivers/leds/led-class-multicolor.c           | 47 ++++++++++++++++++-
 drivers/leds/leds-lp50xx.c                    |  1 +
 drivers/leds/rgb/leds-ncp5623.c               |  4 +-
 include/linux/led-class-multicolor.h          | 30 +++++++++++-
 6 files changed, 113 insertions(+), 9 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-led-multicolor b/Documentation/ABI/testing/sysfs-class-led-multicolor
index 16fc827b10cb..197da3e775b4 100644
--- a/Documentation/ABI/testing/sysfs-class-led-multicolor
+++ b/Documentation/ABI/testing/sysfs-class-led-multicolor
@@ -16,9 +16,22 @@ Date:		March 2020
 KernelVersion:	5.9
 Contact:	Dan Murphy <dmurphy@ti.com>
 Description:	read/write
-		This file contains array of integers. Order of components is
-		described by the multi_index array. The maximum intensity should
-		not exceed /sys/class/leds/<led>/max_brightness.
+		This file contains an array of integers. The order of components
+		is described by the multi_index array. The maximum intensity value
+		supported by each color component is described by the multi_max_intensity
+		file. Writing intensity values larger than the maximum value of a
+		given color component will result in those values being clamped.
+
+		For additional details please refer to
+		Documentation/leds/leds-class-multicolor.rst.
+
+What:		/sys/class/leds/<led>/multi_max_intensity
+Date:		March 2026
+KernelVersion:	7.1
+Contact:	Armin Wolf <W_Armin@gmx.de>
+Description:	read
+		This file contains an array of integers describing the maximum
+		intensity value for each intensity component.
 
 		For additional details please refer to
 		Documentation/leds/leds-class-multicolor.rst.
diff --git a/Documentation/leds/leds-class-multicolor.rst b/Documentation/leds/leds-class-multicolor.rst
index c6b47b4093c4..68340644f80b 100644
--- a/Documentation/leds/leds-class-multicolor.rst
+++ b/Documentation/leds/leds-class-multicolor.rst
@@ -25,10 +25,14 @@ color name to indexed value.
 The ``multi_index`` file is an array that contains the string list of the colors as
 they are defined in each ``multi_*`` array file.
 
-The ``multi_intensity`` is an array that can be read or written to for the
+The ``multi_intensity`` file is an array that can be read or written to for the
 individual color intensities.  All elements within this array must be written in
 order for the color LED intensities to be updated.
 
+The ``multi_max_intensity`` file is an array that contains the maximum intensity
+value supported by each color intensity. Intensity values above this will be
+automatically clamped into the supported range.
+
 Directory Layout Example
 ========================
 .. code-block:: console
@@ -38,6 +42,7 @@ Directory Layout Example
     -r--r--r--    1 root     root          4096 Oct 19 16:16 max_brightness
     -r--r--r--    1 root     root          4096 Oct 19 16:16 multi_index
     -rw-r--r--    1 root     root          4096 Oct 19 16:16 multi_intensity
+    -r--r--r--    1 root     root          4096 Oct 19 16:16 multi_max_intensity
 
 ..
 
@@ -104,3 +109,17 @@ the color LED group.
     128
 
 ..
+
+Writing intensity values larger than the maximum specified in ``multi_max_intensity``
+will result in those values being clamped into the supported range.
+
+.. code-block:: console
+
+   # cat /sys/class/leds/multicolor:status/multi_max_intensity
+   255 255 255
+
+   # echo 512 512 512 > /sys/class/leds/multicolor:status/multi_intensity
+   # cat /sys/class/leds/multicolor:status/multi_intensity
+   255 255 255
+
+..
diff --git a/drivers/leds/led-class-multicolor.c b/drivers/leds/led-class-multicolor.c
index 6b671f3f9c61..8d763b1ae76f 100644
--- a/drivers/leds/led-class-multicolor.c
+++ b/drivers/leds/led-class-multicolor.c
@@ -7,10 +7,28 @@
 #include <linux/init.h>
 #include <linux/led-class-multicolor.h>
 #include <linux/math.h>
+#include <linux/minmax.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+static unsigned int led_mc_get_max_intensity(struct led_classdev_mc *mcled_cdev, size_t index)
+{
+	unsigned int max_intensity;
+
+	/* The maximum global brightness value might still be changed by
+	 * led_classdev_register_ext() using devicetree properties. This
+	 * prevents us from changing subled_info[X].max_intensity when
+	 * registering a multicolor LED class device, so we have to do
+	 * this during runtime.
+	 */
+	max_intensity = mcled_cdev->subled_info[index].max_intensity;
+	if (max_intensity)
+		return max_intensity;
+
+	return mcled_cdev->led_cdev.max_brightness;
+}
+
 int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev,
 				 enum led_brightness brightness)
 {
@@ -27,6 +45,27 @@ int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev,
 }
 EXPORT_SYMBOL_GPL(led_mc_calc_color_components);
 
+static ssize_t multi_max_intensity_show(struct device *dev,
+					struct device_attribute *intensity_attr, char *buf)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct led_classdev_mc *mcled_cdev = lcdev_to_mccdev(led_cdev);
+	unsigned int max_intensity;
+	int len = 0;
+	int i;
+
+	for (i = 0; i < mcled_cdev->num_colors; i++) {
+		max_intensity = led_mc_get_max_intensity(mcled_cdev, i);
+		len += sysfs_emit_at(buf, len, "%u", max_intensity);
+		if (i < mcled_cdev->num_colors - 1)
+			len += sprintf(buf + len, " ");
+	}
+
+	buf[len++] = '\n';
+	return len;
+}
+static DEVICE_ATTR_RO(multi_max_intensity);
+
 static ssize_t multi_intensity_store(struct device *dev,
 				struct device_attribute *intensity_attr,
 				const char *buf, size_t size)
@@ -35,6 +74,7 @@ static ssize_t multi_intensity_store(struct device *dev,
 	struct led_classdev_mc *mcled_cdev = lcdev_to_mccdev(led_cdev);
 	int nrchars, offset = 0;
 	unsigned int intensity_value[LED_COLOR_ID_MAX];
+	unsigned int max_intensity;
 	int i;
 	ssize_t ret;
 
@@ -56,8 +96,10 @@ static ssize_t multi_intensity_store(struct device *dev,
 		goto err_out;
 	}
 
-	for (i = 0; i < mcled_cdev->num_colors; i++)
-		mcled_cdev->subled_info[i].intensity = intensity_value[i];
+	for (i = 0; i < mcled_cdev->num_colors; i++) {
+		max_intensity = led_mc_get_max_intensity(mcled_cdev, i);
+		mcled_cdev->subled_info[i].intensity = min(intensity_value[i], max_intensity);
+	}
 
 	if (!test_bit(LED_BLINK_SW, &led_cdev->work_flags))
 		led_set_brightness(led_cdev, led_cdev->brightness);
@@ -111,6 +153,7 @@ static ssize_t multi_index_show(struct device *dev,
 static DEVICE_ATTR_RO(multi_index);
 
 static struct attribute *led_multicolor_attrs[] = {
+	&dev_attr_multi_max_intensity.attr,
 	&dev_attr_multi_intensity.attr,
 	&dev_attr_multi_index.attr,
 	NULL,
diff --git a/drivers/leds/leds-lp50xx.c b/drivers/leds/leds-lp50xx.c
index e2a9c8592953..69c3550f1a31 100644
--- a/drivers/leds/leds-lp50xx.c
+++ b/drivers/leds/leds-lp50xx.c
@@ -525,6 +525,7 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
 			}
 
 			mc_led_info[multi_index].color_index = color_id;
+			mc_led_info[multi_index].max_intensity = 255;
 			num_colors++;
 		}
 
diff --git a/drivers/leds/rgb/leds-ncp5623.c b/drivers/leds/rgb/leds-ncp5623.c
index 85d6be6fff2b..f2528f06507d 100644
--- a/drivers/leds/rgb/leds-ncp5623.c
+++ b/drivers/leds/rgb/leds-ncp5623.c
@@ -56,8 +56,7 @@ static int ncp5623_brightness_set(struct led_classdev *cdev,
 	for (int i = 0; i < mc_cdev->num_colors; i++) {
 		ret = ncp5623_write(ncp->client,
 				    NCP5623_PWM_REG(mc_cdev->subled_info[i].channel),
-				    min(mc_cdev->subled_info[i].intensity,
-					NCP5623_MAX_BRIGHTNESS));
+				    mc_cdev->subled_info[i].intensity);
 		if (ret)
 			return ret;
 	}
@@ -190,6 +189,7 @@ static int ncp5623_probe(struct i2c_client *client)
 			goto release_led_node;
 
 		subled_info[ncp->mc_dev.num_colors].channel = reg;
+		subled_info[ncp->mc_dev.num_colors].max_intensity = NCP5623_MAX_BRIGHTNESS;
 		subled_info[ncp->mc_dev.num_colors++].color_index = color_index;
 	}
 
diff --git a/include/linux/led-class-multicolor.h b/include/linux/led-class-multicolor.h
index db9f34c6736e..45469388bb1a 100644
--- a/include/linux/led-class-multicolor.h
+++ b/include/linux/led-class-multicolor.h
@@ -9,10 +9,31 @@
 #include <linux/leds.h>
 #include <dt-bindings/leds/common.h>
 
+/**
+ * struct mc_subled - Color component description.
+ * @color_index: Color ID.
+ * @brightness: Scaled intensity.
+ * @intensity: Current intensity.
+ * @max_intensity: Maximum supported intensity value.
+ * @channel: Channel index.
+ *
+ * Describes a color component of a multicolor LED. Many multicolor LEDs
+ * do no support global brightness control in hardware, so they use
+ * the brightness field in connection with led_mc_calc_color_components()
+ * to perform the intensity scaling in software.
+ * Such drivers should set max_intensity to 0 to signal the multicolor LED core
+ * that the maximum global brightness of the LED class device should be used for
+ * limiting incoming intensity values.
+ *
+ * Multicolor LEDs that do support global brightness control in hardware
+ * should instead set max_intensity to the maximum intensity value supported
+ * by the hardware for a given color component.
+ */
 struct mc_subled {
 	unsigned int color_index;
 	unsigned int brightness;
 	unsigned int intensity;
+	unsigned int max_intensity;
 	unsigned int channel;
 };
 
@@ -53,7 +74,14 @@ int led_classdev_multicolor_register_ext(struct device *parent,
  */
 void led_classdev_multicolor_unregister(struct led_classdev_mc *mcled_cdev);
 
-/* Calculate brightness for the monochrome LED cluster */
+/**
+ * led_mc_calc_color_components() - Calculates component brightness values of a LED cluster.
+ * @mcled_cdev - Multicolor LED class device of the LED cluster.
+ * @brightness - Global brightness of the LED cluster.
+ *
+ * Calculates the brightness values for each color component of a monochrome LED cluster,
+ * see Documentation/leds/leds-class-multicolor.rst for details.
+ */
 int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev,
 				 enum led_brightness brightness);
 
-- 
2.39.5


^ permalink raw reply related

* Re: [PATCH v4 4/9] bus: mhi: Centralize firmware image table selection at probe time
From: Jeff Hugo @ 2026-04-09 20:52 UTC (permalink / raw)
  To: Kishore Batta, Jonathan Corbet, Shuah Khan, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam, andersson
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi
In-Reply-To: <20260319-sahara_protocol_new_v2-v4-4-47ad79308762@oss.qualcomm.com>

On 3/19/2026 12:31 AM, Kishore Batta wrote:
> The Sahara driver currently selects firmware image tables using
> scattered, device specific conditionals in the probe path, making the
> logic harder to  follow and extend.

"to follow" (remove extra space)

> Refactor firmware image table selection into a single, explicit probe-time
> mechanism by introducing a variant table that captures device matching,
> firmware image tables, firmware folder names, and streaming behavior in
> one place.
> 
> This centralizes device specific decisions, simplifies the probe logic,
> and avoids ad-hoc conditionals while preserving the existing behavior for
> all supported AIC devices.

It would probably be useful to mention this is in preparation for adding 
QDU100 support, otherwise this reads like it is just change for the sake 
of change.

> Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
> ---
>   drivers/bus/mhi/sahara/sahara.c | 66 ++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 58 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/bus/mhi/sahara/sahara.c b/drivers/bus/mhi/sahara/sahara.c
> index e3499977e7c6b53bc624a8eb00d0636f2ea63307..8f1c0d72066c0cf80c09d78bfc51df2e482133b9 100644
> --- a/drivers/bus/mhi/sahara/sahara.c
> +++ b/drivers/bus/mhi/sahara/sahara.c
> @@ -180,6 +180,16 @@ struct sahara_context {
>   	u32				read_data_length;
>   	bool				is_mem_dump_mode;
>   	bool				non_streaming;
> +	const char			*fw_folder;
> +};
> +
> +struct sahara_variant {
> +	const char *match;
> +	bool match_is_chan;

This is dead code, add it later on when it gets used.

> +	const char * const *image_table;
> +	size_t table_size;
> +	const char *fw_folder;

This is dead code, add it later on when it gets used.

> +	bool non_streaming;

Please run pahole on this structure. With the interleaving of types for 
every other member, I'm expecting there would be quite a bit of compiler 
added padding.



^ permalink raw reply

* Re: [PATCH v2 00/16] fs,x86/resctrl: Add kernel-mode (e.g., PLZA) support to the resctrl subsystem
From: Reinette Chatre @ 2026-04-09 20:50 UTC (permalink / raw)
  To: Moger, Babu, Babu Moger, corbet@lwn.net, tony.luck@intel.com,
	Dave.Martin@arm.com, james.morse@arm.com, tglx@kernel.org,
	mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com
  Cc: skhan@linuxfoundation.org, x86@kernel.org, hpa@zytor.com,
	peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	vschneid@redhat.com, kas@kernel.org, rick.p.edgecombe@intel.com,
	akpm@linux-foundation.org, pmladek@suse.com,
	rdunlap@infradead.org, dapeng1.mi@linux.intel.com,
	kees@kernel.org, elver@google.com, paulmck@kernel.org,
	lirongqing@baidu.com, safinaskar@gmail.com, fvdl@google.com,
	seanjc@google.com, pawan.kumar.gupta@linux.intel.com,
	xin@zytor.com, tiala@microsoft.com, chang.seok.bae@intel.com,
	Lendacky, Thomas, elena.reshetova@intel.com,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-coco@lists.linux.dev, kvm@vger.kernel.org,
	eranian@google.com, peternewman@google.com
In-Reply-To: <73c46024-4cf2-4f03-9268-d4378825fa87@amd.com>

Hi Babu,

On 4/9/26 11:05 AM, Moger, Babu wrote:
> On 4/9/2026 12:26 PM, Reinette Chatre wrote:
>> On 4/9/26 10:19 AM, Moger, Babu wrote:
>>> On 4/8/2026 6:41 PM, Reinette Chatre wrote:
>>
>>>> When the user switches to either "global_assign_ctrl_inherit_mon_per_cpu" or
>>>> 'global_assign_ctrl_assign_mon_per_cpu" then "info/kernel_mode_assignment" is created
>>>> (or made visible to user space) and is expected to point to default group.
>>>> User can change the group using "info/kernel_mode_assignment" at this point.
>>>>
>>>> If the current scenario is below ...
>>>>      # cat info/kernel_mode
>>>>      [global_assign_ctrl_inherit_mon_per_cpu]
>>>>      inherit_ctrl_and_mon
>>>>      global_assign_ctrl_assign_mon_per_cpu
>>>>
>>>> ... then "info/kernel_mode_assignment" will exist but what it should contain if
>>>> user switches mode at this point may be up for discussion.
>>>>
>>>> option 1)
>>>> When user switches mode to "global_assign_ctrl_assign_mon_per_cpu" then
>>>> the resource group in "info/kernel_mode_assignment" is reset to the
>>>> default group and all CPUs PLZA state reset to match. The kernel_mode_cpus
>>>> and kernel_mode_cpuslist files become visible in default resource group
>>>> and they contain "all online CPUs".
>>>>
>>>> option 2)
>>>> When user switches mode to "global_assign_ctrl_assign_mon_per_cpu" then
>>>> the resource group in "info/kernel_mode_assignment" is kept and all
>>>> CPUs PLZA state set to match it while also keeping the current
>>>> values of that resource group's kernel_mode_cpus and kernel_mode_cpuslist
>>>> files.
>>>>
>>>> I am leaning towards "option 1" to keep it consistent with a switch from
>>>> "inherit_ctrl_and_mon" and being deterministic about how a mode is started with
>>>
>>> Yes. The "option 1" seems appropriate.
>>>
>>>> a clean slate. What are your thoughts? What would be use case where a user would
>>>> want to switch between "global_assign_ctrl_inherit_mon_per_cpu" and
>>>> "global_assign_ctrl_assign_mon_per_cpu" to just switch rmid_en on and off?
>>>
>>>
>>> This is a bit tricky.
>>>
>>> Currently, our requirement is to have a CTRL_MON group for
>>> global_assign_ctrl_inherit_mon_per_cpu. In this scenario, we use the
>>> group’s CLOSID for PLZA configuration, and RMID is not used (rmid_en
>>> = 0) when setting up PLZA.
>>>
>>> Our requirement is also to have a CTRL_MON/MON group for
>>> global_assign_ctrl_assign_mon_per_cpu. In this case as well, the
>>> group’s CLOSID and RMID (rmid_en = 1)  both are used configure PLZA.
>>
>> ah, right. Good catch.
>>
>>>
>>> Actually, we should not allow these changes from
>>> global_assign_ctrl_inherit_mon_per_cpu  to
>>> global_assign_ctrl_assign_mon_per_cpu or visa versa.
>>
>> resctrl could allow it but as part of the switch it resets the "kernel mode group" to
>> be the default group every time? This would be the "option 1" above.
> 
> Other options.
> 
> Allow global_assign_ctrl_inherit_mon_per_cpu -> global_assign_ctrl_assign_mon_per_cpu. As part of the switch, reset the "kernel mode group" to the default group.
> 
> Allow global_assign_ctrl_assign_mon_per_cpu -> global_assign_ctrl_inherit_mon_per_cpu. In this case switch
> to CTRL_MON/MON -> CTRL_MON.
> 

ok. Could you please return the courtesy of providing feedback on the
suggestion you are responding to and also include the motivation why your
suggestion is the better option? 

Reinette

^ permalink raw reply

* Re: [PATCH v9 02/10] x86/bhi: Make clear_bhb_loop() effective on newer CPUs
From: Dave Hansen @ 2026-04-09 20:36 UTC (permalink / raw)
  To: Jim Mattson
  Cc: Pawan Gupta, x86, Jon Kohler, Nikolay Borisov, H. Peter Anvin,
	Josh Poimboeuf, David Kaplan, Sean Christopherson,
	Borislav Petkov, Dave Hansen, Peter Zijlstra, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko, KP Singh, Jiri Olsa,
	David S. Miller, David Laight, Andy Lutomirski, Thomas Gleixner,
	Ingo Molnar, David Ahern, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, John Fastabend, Stanislav Fomichev,
	Hao Luo, Paolo Bonzini, Jonathan Corbet, linux-kernel, kvm,
	Asit Mallick, Tao Zhang, bpf, netdev, linux-doc, chao.gao
In-Reply-To: <CALMp9eRfNsghM_RnDXOs=SJYObfPa5A1aOVDZno_zJ=XotfmRw@mail.gmail.com>

On 4/7/26 17:47, Jim Mattson wrote:
> On Tue, Apr 7, 2026 at 4:41 PM Dave Hansen <dave.hansen@intel.com> wrote:
>> On 4/7/26 16:27, Jim Mattson wrote:
>>> What is your proposed BHI_DIS_S override mechanism, then?
>> Let me make sure I get this right. The desire is to:
>>
>> 1. Have hypervisors lie to guests about the CPU they are running on (for
>>    the benefit of large/diverse migration pools)
>> 2. Have guests be allowed to boot with BHI_DIS_S for performance
>> 3. Have apps in those guests that care about security to opt back in to
>>    BHI_DIS_S for themselves?
> I just want guests on heterogeneous migration pools to properly
> protect themselves from native BHI when running on host kernels at
> least as far back as Linux v6.6.
> 
> To that end, I would be satisfied with using the longer BHB clearing
> sequence when HYPERVISOR is true and BHI_CTRL is false.

If the guests can't get mitigation information from model/family because
the hypervisor is lying (or may lie), then it's on the hypervisor to
figure it out.

I'm not sure we want to just assume that all hypervisors are going to
lie all the time about this.

I kinda think we should just let Pawan's series move forward and then we
can debate the lying hypervisor problem once the series is settled.

^ permalink raw reply

* Re: [PATCH v4 3/9] bus: mhi: Match devices exposing the protocol on the SAHARA channel
From: Jeff Hugo @ 2026-04-09 20:23 UTC (permalink / raw)
  To: Kishore Batta, Jonathan Corbet, Shuah Khan, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam, andersson
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi
In-Reply-To: <20260319-sahara_protocol_new_v2-v4-3-47ad79308762@oss.qualcomm.com>

On 3/19/2026 12:31 AM, Kishore Batta wrote:
> Some Qualcomm devices expose the Sahara protocol on a generic SAHARA MHI
> channel rather than a QAIC specific channel name. As a result, the sahara

"Sahara"

> driver does not currently bind to such devices and never probes.
> 
> Extend the MHI device ID match table to also match the SAHARA channel
> name. This allows the Sahara protocol driver to bind to devices that
> expose the protocol directly on a standard sahara MHI channel.

"Sahara"

> 
> Signed-off-by: Kishore Batta <kishore.batta@oss.qualcomm.com>
> ---
>   drivers/bus/mhi/sahara/sahara.c | 2 ++
>   1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/bus/mhi/sahara/sahara.c b/drivers/bus/mhi/sahara/sahara.c
> index 8ff7b6425ac5423ef8f32117151dca10397686a8..e3499977e7c6b53bc624a8eb00d0636f2ea63307 100644
> --- a/drivers/bus/mhi/sahara/sahara.c
> +++ b/drivers/bus/mhi/sahara/sahara.c
> @@ -911,8 +911,10 @@ static void sahara_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result
>   
>   static const struct mhi_device_id sahara_mhi_match_table[] = {
>   	{ .chan = "QAIC_SAHARA", },
> +	{ .chan = "SAHARA"},

This doesn't work and breaks bisect. At this point Sahara will bind to 
QDU100, ath12k, or something else but not know how to drive one of those 
devices.

Just add this when you add QDU100 support, which looks to be patch 5.

>   	{},
>   };
> +MODULE_DEVICE_TABLE(mhi, sahara_mhi_match_table);
>   
>   static struct mhi_driver sahara_mhi_driver = {
>   	.id_table = sahara_mhi_match_table,
> 


^ permalink raw reply

* Re: [PATCH v4 2/9] bus: mhi: Move sahara protocol driver under drivers/bus/mhi
From: Jeff Hugo @ 2026-04-09 20:20 UTC (permalink / raw)
  To: Kishore Batta, Jonathan Corbet, Shuah Khan, Carl Vanderlip,
	Oded Gabbay, Manivannan Sadhasivam, andersson
  Cc: linux-doc, linux-kernel, linux-arm-msm, dri-devel, mhi
In-Reply-To: <20260319-sahara_protocol_new_v2-v4-2-47ad79308762@oss.qualcomm.com>

On 3/19/2026 12:31 AM, Kishore Batta wrote:
> The Sahara protocol driver is currently located under the QAIC
> accelerator subsystem even though protocol itself is transported over the
> MHI bus and is used by multiple Qualcomm flashless devices.
> 
> Relocate the Sahara protocol driver to drivers/bus/mhi and register it as
> an independent MHI protocol driver. This avoids treating Sahara as QAIC
> specific and makes it available for reuse by other MHI based devices.
> 
> As part of this move, introduce a dedicated Kconfig and Makefile under the
> MHI subsystem and expose the sahara interface via a common header.

I don't think this belongs under MHI. Mani needs to confirm that he 
agrees with the concept of moving this there.

The Sahara protocol as defined by the spec does not require MHI. We know 
that there are Sahara implementations over USB. I don't see a dependency 
or relationship to MHI other than the current in-kernel implementation 
uses MHI, but there are plenty of things that use MHI (qaic, mhi-net, 
ath12k, etc) which are not a part of the MHI bus.

The implementation presented in this series is not well integrated into 
MHI, which also suggests to me that it doesn't belong there. The 
Documentation is not integrated with MHI (which I mentioned over on that 
patch) and I see the header file (sahara.h) is also not integrated.

> diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
> index 63fb8c7b4abcbe4f1b76c32106f4e8b9ea5e2c8e..76cc8086825e7949ed756d51fcb56a08f392d228 100644
> --- a/drivers/accel/qaic/qaic_drv.c
> +++ b/drivers/accel/qaic/qaic_drv.c
> @@ -15,6 +15,7 @@
>   #include <linux/msi.h>
>   #include <linux/mutex.h>
>   #include <linux/pci.h>
> +#include <linux/sahara.h>

What do we need this for? register()/unregister() get removed.

>   #include <linux/spinlock.h>
>   #include <linux/workqueue.h>
>   #include <linux/wait.h>
> @@ -32,7 +33,6 @@
>   #include "qaic_ras.h"
>   #include "qaic_ssr.h"
>   #include "qaic_timesync.h"
> -#include "sahara.h"
>   
>   MODULE_IMPORT_NS("DMA_BUF");
>   
> @@ -782,18 +782,12 @@ static int __init qaic_init(void)
>   	ret = pci_register_driver(&qaic_pci_driver);
>   	if (ret) {
>   		pr_debug("qaic: pci_register_driver failed %d\n", ret);
> -		return ret;
> +		goto free_pci;

This is wrong, and there should not be a change here.

>   
>   	ret = mhi_driver_register(&qaic_mhi_driver);
>   	if (ret) {
>   		pr_debug("qaic: mhi_driver_register failed %d\n", ret);
> -		goto free_pci;
> -	}
> -
> -	ret = sahara_register();
> -	if (ret) {
> -		pr_debug("qaic: sahara_register failed %d\n", ret);
>   		goto free_mhi;

This is also wrong

>   
> @@ -847,7 +841,6 @@ static void __exit qaic_exit(void)
>   	qaic_ras_unregister();
>   	qaic_bootlog_unregister();
>   	qaic_timesync_deinit();
> -	sahara_unregister();
>   	mhi_driver_unregister(&qaic_mhi_driver);
>   	pci_unregister_driver(&qaic_pci_driver);
>   }
> diff --git a/drivers/bus/mhi/Kconfig b/drivers/bus/mhi/Kconfig
> index b39a11e6c624ba00349cca22d74bd876020590ab..4acedb886adccc6f76f69c241d53106da59b491f 100644
> --- a/drivers/bus/mhi/Kconfig
> +++ b/drivers/bus/mhi/Kconfig
> @@ -7,3 +7,4 @@
>   
>   source "drivers/bus/mhi/host/Kconfig"
>   source "drivers/bus/mhi/ep/Kconfig"
> +source "drivers/bus/mhi/sahara/Kconfig"
> diff --git a/drivers/bus/mhi/Makefile b/drivers/bus/mhi/Makefile
> index 354204b0ef3ae4030469a24a659f32429d592aef..e4af535e1bb1bc9481fae60d7eb347700d2e874c 100644
> --- a/drivers/bus/mhi/Makefile
> +++ b/drivers/bus/mhi/Makefile
> @@ -3,3 +3,6 @@ obj-$(CONFIG_MHI_BUS) += host/
>   
>   # Endpoint MHI stack
>   obj-$(CONFIG_MHI_BUS_EP) += ep/
> +
> +# Sahara MHI protocol
> +obj-$(CONFIG_MHI_SAHARA) += sahara/
> diff --git a/drivers/bus/mhi/sahara/Kconfig b/drivers/bus/mhi/sahara/Kconfig
> new file mode 100644
> index 0000000000000000000000000000000000000000..3f1caf6acd979a4af68aaf0e250aa54762e8cda5
> --- /dev/null
> +++ b/drivers/bus/mhi/sahara/Kconfig
> @@ -0,0 +1,18 @@
> +config MHI_SAHARA
> +	tristate
> +	depends on MHI_BUS
> +	select FW_LOADER_COMPRESS

Why are we selecting this? I don't see anyone else doing this. Sahara 
should work with and without firmware compression.

> +	select FW_LOADER_COMPRESS_XZ
> +	select FW_LOADER_COMPRESS_ZSTD
> +	help
> +	  Enable support for the Sahara protocol transported over the MHI bus.
> +
> +	  The Sahara protocol is used to transfer firmware images, retrieve
> +	  memory dumps and exchange command mode DDR calibration data between
> +	  host and device. This driver is not tied to a specific SoC and may be
> +	  used by multiple MHI based devices.
> +
> +	  If unsure, say N.
> +
> +	  To compile this driver as a module, choose M here: the module will be
> +	  called mhi_sahara.
> diff --git a/drivers/bus/mhi/sahara/Makefile b/drivers/bus/mhi/sahara/Makefile
> new file mode 100644
> index 0000000000000000000000000000000000000000..fc02a25935011cbd7138ea8f24b88cf5b032a4ce
> --- /dev/null
> +++ b/drivers/bus/mhi/sahara/Makefile
> @@ -0,0 +1,2 @@
> +obj-$(CONFIG_MHI_SAHARA) += mhi_sahara.o
> +mhi_sahara-y := sahara.o
> diff --git a/drivers/accel/qaic/sahara.c b/drivers/bus/mhi/sahara/sahara.c
> similarity index 99%
> rename from drivers/accel/qaic/sahara.c
> rename to drivers/bus/mhi/sahara/sahara.c
> index fd3c3b2d1fd3bb698809e6ca669128e2dce06613..8ff7b6425ac5423ef8f32117151dca10397686a8 100644
> --- a/drivers/accel/qaic/sahara.c
> +++ b/drivers/bus/mhi/sahara/sahara.c
> @@ -1,6 +1,8 @@
> -// SPDX-License-Identifier: GPL-2.0-only
> -
> -/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
> + *
> + */

What makes you think that changing the copyright markings is appropiate 
when moving a file?

Furthermore, I wrote this code from scratch based on the spec document 
and therefore know beyond a doubt that this file did not exist prior to 
2024, so what you are changing the markings to is completely invalid.

Also the SPDX marking you are using is long deprecated and should not be 
used.

>   #include <linux/devcoredump.h>
>   #include <linux/firmware.h>
> @@ -9,12 +11,11 @@
>   #include <linux/minmax.h>
>   #include <linux/mod_devicetable.h>
>   #include <linux/overflow.h>
> +#include <linux/sahara.h>
>   #include <linux/types.h>
>   #include <linux/vmalloc.h>
>   #include <linux/workqueue.h>
>   
> -#include "sahara.h"
> -
>   #define SAHARA_HELLO_CMD		0x1  /* Min protocol version 1.0 */
>   #define SAHARA_HELLO_RESP_CMD		0x2  /* Min protocol version 1.0 */
>   #define SAHARA_READ_DATA_CMD		0x3  /* Min protocol version 1.0 */
> @@ -928,8 +929,13 @@ int sahara_register(void)
>   {
>   	return mhi_driver_register(&sahara_mhi_driver);
>   }
> +module_init(sahara_register);
>   
>   void sahara_unregister(void)
>   {
>   	mhi_driver_unregister(&sahara_mhi_driver);
>   }
> +module_exit(sahara_unregister);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("Qualcomm Sahara MHI protocol driver");
> diff --git a/drivers/accel/qaic/sahara.h b/include/linux/sahara.h
> similarity index 100%
> rename from drivers/accel/qaic/sahara.h
> rename to include/linux/sahara.h
> 


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox