* Re: [PATCH] mm/rmap: use huge_ptep_get() in try_to_unmap_one()
From: Dev Jain @ 2026-06-25 6:59 UTC (permalink / raw)
To: kernel test robot, akpm, david, ljs
Cc: llvm, oe-kbuild-all, riel, liam, vbabka, harry, jannh, kas,
linux-mm, linux-kernel, ryan.roberts, anshuman.khandual, stable
In-Reply-To: <202606251341.jfIr1D7m-lkp@intel.com>
On 25/06/26 11:15 am, kernel test robot wrote:
> Hi Dev,
>
> kernel test robot noticed the following build errors:
>
> [auto build test ERROR on akpm-mm/mm-everything]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Dev-Jain/mm-rmap-use-huge_ptep_get-in-try_to_unmap_one/20260625-123050
> base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> patch link: https://lore.kernel.org/r/20260625042853.2752898-1-dev.jain%40arm.com
> patch subject: [PATCH] mm/rmap: use huge_ptep_get() in try_to_unmap_one()
> config: hexagon-allnoconfig (https://download.01.org/0day-ci/archive/20260625/202606251341.jfIr1D7m-lkp@intel.com/config)
> compiler: clang version 23.0.0git (https://github.com/llvm/llvm-project 6cc609bb250b21b47fc7d394b4019101e9983597)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260625/202606251341.jfIr1D7m-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202606251341.jfIr1D7m-lkp@intel.com/
>
> All errors (new ones prefixed by >>):
>
>>> mm/rmap.c:2100:13: error: call to undeclared function 'huge_ptep_get'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
> 2100 | pteval = huge_ptep_get(mm, address, pvmw.pte);
> | ^
>>> mm/rmap.c:2100:11: error: assigning to 'pte_t' from incompatible type 'int'
> 2100 | pteval = huge_ptep_get(mm, address, pvmw.pte);
> | ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 2 errors generated.
Weird that I need a stub. This should do:
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 2abaf99321e90..4661f88eee55b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1261,6 +1261,16 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
{
}
+static inline pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+#ifdef CONFIG_MMU
+ return ptep_get(ptep);
+#else
+ return *ptep;
+#endif
+}
+
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
diff --git a/mm/rmap.c b/mm/rmap.c
index 1c77d5dc06e9f..aa8a254efaecc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2095,11 +2095,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
/* Unexpected PMD-mapped THP? */
VM_BUG_ON_FOLIO(!pvmw.pte, folio);
- /*
- * Handle PFN swap PTEs, such as device-exclusive ones, that
- * actually map pages.
- */
- pteval = ptep_get(pvmw.pte);
+ address = pvmw.address;
+ if (folio_test_hugetlb(folio)) {
+ pteval = huge_ptep_get(mm, address, pvmw.pte);
+ } else {
+ /*
+ * Handle PFN swap PTEs, such as device-exclusive ones,
+ * that actually map pages.
+ */
+ pteval = ptep_get(pvmw.pte);
+ }
if (likely(pte_present(pteval))) {
pfn = pte_pfn(pteval);
} else {
@@ -2110,7 +2115,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
}
subpage = folio_page(folio, pfn - folio_pfn(folio));
- address = pvmw.address;
anon_exclusive = folio_test_anon(folio) &&
PageAnonExclusive(subpage);
^ permalink raw reply related
* [PATCH v2 net 3/3] net: udp_tunnel: use atomic bitops for missed bitmap
From: Eric Dumazet @ 2026-06-25 6:59 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Yue Sun, Stanislav Fomichev, netdev, eric.dumazet,
Eric Dumazet
In-Reply-To: <20260625065938.654652-1-edumazet@google.com>
The 'missed' bitmap in struct udp_tunnel_nic can be accessed
concurrently:
- Writes (__set_bit) happen in the port add path (add_port), which
holds the RTNL lock.
- Reads (checking if missed is non-zero) happen in the reset path
(reset_ntf) via __udp_tunnel_nic_device_sync(), which holds
utn->lock but does not hold RTNL after the blamed commit.
This setup creates a data race between concurrent writes and reads
on different CPUs. Fix this by using atomic set_bit() for writes,
READ_ONCE() for the fast-path read, and WRITE_ONCE() for clearing
the bitmap.
Fixes: 1ead7501094c ("udp_tunnel: remove rtnl_lock dependency")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/udp_tunnel_nic.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 840be5d79fc0ac3142049dcb9f1105a5844da9ae..9a567a87635caaf76f5b88029a7f28a65c795efc 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -147,7 +147,7 @@ udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
const struct udp_tunnel_nic_table_info *table;
unsigned int i, j;
- if (!utn->missed)
+ if (!READ_ONCE(utn->missed))
return false;
for (i = 0; i < utn->n_tables; i++) {
@@ -353,7 +353,7 @@ udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn,
if (!udp_tunnel_nic_entry_is_free(entry) &&
entry->port == ti->port &&
entry->type != ti->type) {
- __set_bit(i, &utn->missed);
+ set_bit(i, &utn->missed);
return true;
}
}
@@ -488,7 +488,7 @@ udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn,
* are no devices currently which have multiple tables accepting
* the same tunnel type, and false positives are okay.
*/
- __set_bit(i, &utn->missed);
+ set_bit(i, &utn->missed);
}
return false;
@@ -718,7 +718,7 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
for (i = 0; i < utn->n_tables; i++)
for (j = 0; j < info->tables[i].n_entries; j++)
udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
- utn->missed = 0;
+ WRITE_ONCE(utn->missed, 0);
clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
if (!info->shared) {
--
2.55.0.rc0.799.gd6f94ed593-goog
^ permalink raw reply related
* [PATCH v2 net 2/3] net: udp_tunnel: convert state flags to atomic bitops
From: Eric Dumazet @ 2026-06-25 6:59 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Yue Sun, Stanislav Fomichev, netdev, eric.dumazet,
Eric Dumazet
In-Reply-To: <20260625065938.654652-1-edumazet@google.com>
The state flags of struct udp_tunnel_nic (need_sync, need_replay,
work_pending) are currently bitfields sharing a single byte.
These flags can be modified concurrently from different contexts:
- RTNL-locked paths (like add_port/del_port) write to need_sync and
work_pending.
- The RTNL-less reset path (reset_ntf, used by netdevsim) writes to
need_sync and need_replay under utn->lock.
Since they share a byte, concurrent writes are compiled into non-atomic
Read-Modify-Write (RMW) operations that can corrupt each other. For
example, a write to need_replay in reset_ntf can overwrite and clear
work_pending, defeating the double-queueing prevention and causing UAF.
Fix this by converting these state flags to atomic bitops, ensuring
safe concurrent writes across RTNL-locked and RTNL-less paths.
Fixes: 1ead7501094c ("udp_tunnel: remove rtnl_lock dependency")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/udp_tunnel_nic.c | 43 ++++++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 3b32a0afa9798d3c416d9ae570e6d529f70e6697..840be5d79fc0ac3142049dcb9f1105a5844da9ae 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -30,9 +30,7 @@ struct udp_tunnel_nic_table_entry {
* @work: async work for talking to hardware from process context
* @dev: netdev pointer
* @lock: protects all fields
- * @need_sync: at least one port start changed
- * @need_replay: space was freed, we need a replay of all ports
- * @work_pending: @work is currently scheduled
+ * @flags: sync, replay, pending flags
* @n_tables: number of tables under @entries
* @missed: bitmap of tables which overflown
* @entries: table of tables of ports currently offloaded
@@ -44,9 +42,10 @@ struct udp_tunnel_nic {
struct mutex lock;
- u8 need_sync:1;
- u8 need_replay:1;
- u8 work_pending:1;
+ unsigned long flags;
+#define UDP_TUNNEL_NIC_NEED_SYNC 0
+#define UDP_TUNNEL_NIC_NEED_REPLAY 1
+#define UDP_TUNNEL_NIC_WORK_PENDING 2
unsigned int n_tables;
unsigned long missed;
@@ -116,7 +115,7 @@ udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
unsigned int flag)
{
entry->flags |= flag;
- utn->need_sync = 1;
+ set_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
}
static void
@@ -283,7 +282,7 @@ udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
static void
__udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
{
- if (!utn->need_sync)
+ if (!test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags))
return;
if (dev->udp_tunnel_nic_info->sync_table)
@@ -291,21 +290,27 @@ __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
else
udp_tunnel_nic_device_sync_by_port(dev, utn);
- utn->need_sync = 0;
+ clear_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
/* Can't replay directly here, in case we come from the tunnel driver's
* notification - trying to replay may deadlock inside tunnel driver.
*/
- utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
+ if (udp_tunnel_nic_should_replay(dev, utn))
+ set_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
+ else
+ clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
}
static void
udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
{
- if (!utn->need_sync || utn->work_pending)
+ if (!test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags))
+ return;
+
+ if (test_bit(UDP_TUNNEL_NIC_WORK_PENDING, &utn->flags))
return;
queue_work(udp_tunnel_nic_workqueue, &utn->work);
- utn->work_pending = 1;
+ set_bit(UDP_TUNNEL_NIC_WORK_PENDING, &utn->flags);
}
static bool
@@ -552,7 +557,7 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
mutex_lock(&utn->lock);
- utn->need_sync = false;
+ clear_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
for (i = 0; i < utn->n_tables; i++)
for (j = 0; j < info->tables[i].n_entries; j++) {
struct udp_tunnel_nic_table_entry *entry;
@@ -696,8 +701,8 @@ udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn)
for (i = 0; i < utn->n_tables; i++)
memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
sizeof(**utn->entries)));
- WARN_ON(utn->need_sync);
- utn->need_replay = 0;
+ WARN_ON(test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags));
+ clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
}
static void
@@ -714,7 +719,7 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
for (j = 0; j < info->tables[i].n_entries; j++)
udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
utn->missed = 0;
- utn->need_replay = 0;
+ clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
if (!info->shared) {
udp_tunnel_get_rx_info(dev);
@@ -736,10 +741,10 @@ static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
rtnl_lock();
mutex_lock(&utn->lock);
- utn->work_pending = 0;
+ clear_bit(UDP_TUNNEL_NIC_WORK_PENDING, &utn->flags);
__udp_tunnel_nic_device_sync(utn->dev, utn);
- if (utn->need_replay)
+ if (test_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags))
udp_tunnel_nic_replay(utn->dev, utn);
mutex_unlock(&utn->lock);
@@ -904,7 +909,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
/* Wait for the work to be done using the state, netdev core will
* retry unregister until we give up our reference on this device.
*/
- if (utn->work_pending)
+ if (test_bit(UDP_TUNNEL_NIC_WORK_PENDING, &utn->flags))
return;
udp_tunnel_nic_free(utn);
--
2.55.0.rc0.799.gd6f94ed593-goog
^ permalink raw reply related
* [PATCH v2 net 1/3] net: udp_tunnel: prevent double queueing in udp_tunnel_nic_device_sync
From: Eric Dumazet @ 2026-06-25 6:59 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Yue Sun, Stanislav Fomichev, netdev, eric.dumazet,
Eric Dumazet
In-Reply-To: <20260625065938.654652-1-edumazet@google.com>
Yue Sun reported a use-after-free and debugobjects warning in
udp_tunnel_nic_device_sync_work() during concurrent device operations.
The workqueue core clears the internal pending bit before invoking the
worker. At that point, a concurrent thread can queue the work again.
When the already running worker eventually clears the work_pending flag
to 0, it mistakenly clears the flag for the newly queued instance.
udp_tunnel_nic_unregister() then observes work_pending as 0 and frees
the structure while the second work item is still active in the queue,
leading to UAF.
Fix this by returning early in udp_tunnel_nic_device_sync() if
work_pending is already set, preventing redundant work queueing.
Fixes: cc4e3835eff4 ("udp_tunnel: add central NIC RX port offload infrastructure")
Reported-by: Yue Sun <samsun1006219@gmail.com>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/udp_tunnel_nic.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 9944ed923ddfd10f9adf6ad788c0740daeaf2adb..3b32a0afa9798d3c416d9ae570e6d529f70e6697 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -301,7 +301,7 @@ __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
static void
udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
{
- if (!utn->need_sync)
+ if (!utn->need_sync || utn->work_pending)
return;
queue_work(udp_tunnel_nic_workqueue, &utn->work);
--
2.55.0.rc0.799.gd6f94ed593-goog
^ permalink raw reply related
* [PATCH v2 net 0/3] net: udp_tunnel: fix races and use-after-free
From: Eric Dumazet @ 2026-06-25 6:59 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Yue Sun, Stanislav Fomichev, netdev, eric.dumazet,
Eric Dumazet
Yue Sun reported a use-after-free and debugobjects warning in
udp_tunnel_nic_device_sync_work() when concurrently creating and
destroying netdevsim and geneve devices.
This series resolves the UAF and the underlying data races that
make the fix vulnerable.
The core issue is a workqueue re-queue race combined with data races
introduced by the lock-splitting in commit 1ead7501094c ("udp_tunnel:
remove rtnl_lock dependency"). That commit allowed the device reset
path (reset_ntf) to run without holding the RTNL lock (using only
utn->lock), while the port addition paths (add_port) still run under
RTNL without acquiring utn->lock.
This series fixes these issues in three steps:
1. Patch 1 (Jakub's fix) addresses the UAF by preventing double-queueing
of the sync work. If work_pending is already set, we return early
in device_sync(), blocking a second work item from entering the
queue while the first is blocked on RTNL.
2. Patch 2 converts the state flags (need_sync, need_replay, work_pending)
from bitfields to atomic bitops. Because these flags share a single
byte, concurrent RMW writes from the RTNL-locked path and the RTNL-less
reset path corrupt the byte. This corruption could clear work_pending,
defeating the UAF fix.
3. Patch 3 fixes a similar data race on the 'missed' bitmap. Writes
(__set_bit) happen under RTNL, while reads (should_replay) happen
under utn->lock without RTNL. We convert this to use atomic set_bit(),
READ_ONCE() for the fast-path read, and WRITE_ONCE() for clearing.
Reported-by: Yue Sun <samsun1006219@gmail.com>
Eric Dumazet (3):
net: udp_tunnel: prevent double queueing in udp_tunnel_nic_device_sync
net: udp_tunnel: convert state flags to atomic bitops
net: udp_tunnel: use atomic bitops for missed bitmap
net/ipv4/udp_tunnel_nic.c | 51 +++++++++++++++++++++------------------
1 file changed, 28 insertions(+), 23 deletions(-)
--
2.55.0.rc0.799.gd6f94ed593-goog
^ permalink raw reply
* Re: [PATCH v8 33/46] KVM: selftests: Test conversion precision in guest_memfd
From: Fuad Tabba @ 2026-06-25 6:57 UTC (permalink / raw)
To: ackerleytng
Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-33-9d2959357853@google.com>
On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> The existing guest_memfd conversion tests only use single-page memory
> regions. This provides no coverage for multi-page guest_memfd objects,
> specifically whether KVM correctly handles the page index for conversion
> operations. An incorrect implementation could, for example, always operate
> on the first page regardless of the index provided.
>
> Add a new test case to verify that conversions between private and shared
> memory correctly target the specified page within a multi-page guest_memfd.
>
> This test also verifies the precision of memory conversions by converting a
> single page an then iterating through all other pages ensure they remain in
> their original state.
>
> To support this test, add a new GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED
> macro that handles setting up and tearing down the VM for each page
> iteration. The teardown logic is adjusted to prevent a double-free in this
> new scenario.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Cheers,
/fuad
> ---
> .../kvm/x86/guest_memfd_conversions_test.c | 66 ++++++++++++++++++++++
> 1 file changed, 66 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> index 5b070d3374eae..8e17d5c08aeb8 100644
> --- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> @@ -61,8 +61,13 @@ static void gmem_conversions_do_setup(test_data_t *t, int nr_pages,
>
> static void gmem_conversions_do_teardown(test_data_t *t)
> {
> + /* Use NULL to avoid second free in FIXTURE_TEARDOWN (multipage tests). */
> + if (!t->vcpu)
> + return;
> +
> /* No need to close gmem_fd, it's owned by the VM structure. */
> kvm_vm_free(t->vcpu->vm);
> + t->vcpu = NULL;
> }
>
> FIXTURE_TEARDOWN(gmem_conversions)
> @@ -101,6 +106,29 @@ static void __gmem_conversions_##test(test_data_t *t, int nr_pages) \
> #define GMEM_CONVERSION_TEST_INIT_SHARED(test) \
> __GMEM_CONVERSION_TEST_INIT_SHARED(test, 1)
>
> +/*
> + * Repeats test over nr_pages in a guest_memfd of size nr_pages, providing each
> + * test iteration with test_page, the index of the page under test in
> + * guest_memfd. test_page takes values 0..(nr_pages - 1) inclusive.
> + */
> +#define GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(test, __nr_pages) \
> +static void __gmem_conversions_multipage_##test(test_data_t *t, int nr_pages, \
> + const int test_page); \
> + \
> +TEST_F(gmem_conversions, test) \
> +{ \
> + const u64 flags = GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_INIT_SHARED; \
> + int i; \
> + \
> + for (i = 0; i < __nr_pages; ++i) { \
> + gmem_conversions_do_setup(self, __nr_pages, flags); \
> + __gmem_conversions_multipage_##test(self, __nr_pages, i); \
> + gmem_conversions_do_teardown(self); \
> + } \
> +} \
> +static void __gmem_conversions_multipage_##test(test_data_t *t, int nr_pages, \
> + const int test_page)
> +
> struct guest_check_data {
> void *mem;
> char expected_val;
> @@ -199,6 +227,44 @@ GMEM_CONVERSION_TEST_INIT_SHARED(init_shared)
> test_convert_to_shared(t, 0, 'C', 'D', 'E');
> }
>
> +GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(indexing, 4)
> +{
> + int i;
> +
> + /* Get a char that varies with both i and n. */
> +#define combine(x, n) ((x << 4) + (n))
> +#define i_(n) (combine(i, n))
> +#define t_(n) (combine(test_page, n))
> +
> + /*
> + * Start with the highest index, to catch any errors when, perhaps, the
> + * first page is returned even for the last index.
> + */
> + for (i = nr_pages - 1; i >= 0; --i)
> + test_shared(t, i, 0, i_(0), i_(2));
> +
> + test_convert_to_private(t, test_page, t_(2), t_(3));
> +
> + for (i = 0; i < nr_pages; ++i) {
> + if (i == test_page)
> + test_private(t, test_page, t_(3), t_(4));
> + else
> + test_shared(t, i, i_(2), i_(3), i_(4));
> + }
> +
> + test_convert_to_shared(t, test_page, t_(4), t_(5), t_(6));
> +
> + for (i = 0; i < nr_pages; ++i) {
> + char expected = i == test_page ? t_(6) : i_(4);
> +
> + test_shared(t, i, expected, i_(7), i_(8));
> + }
> +
> +#undef t_
> +#undef i_
> +#undef combine
> +}
> +
> int main(int argc, char *argv[])
> {
> TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>
^ permalink raw reply
* Re: [PATCH v3 1/2] iio: imu: inv_icm42600: reorder includes for buffer module
From: Andy Shevchenko @ 2026-06-25 6:57 UTC (permalink / raw)
To: jean-baptiste.maneyrol
Cc: Jonathan Cameron, David Lechner, Nuno Sá, Andy Shevchenko,
linux-iio, linux-kernel
In-Reply-To: <ajzQ_FUr--jTxYWr@ashevche-desk.local>
On Thu, Jun 25, 2026 at 09:56:01AM +0300, Andy Shevchenko wrote:
> On Thu, Jun 25, 2026 at 09:49:47AM +0300, Andy Shevchenko wrote:
> > On Wed, Jun 24, 2026 at 06:21:18PM +0200, Jean-Baptiste Maneyrol via B4 Relay wrote:
> >
> > > Reorder includes following rules and delete unneeded kernel.h.
> >
> > Actually it's needed as 'proxy' header. If you want to get rid of it, it should
> > be another patch to replace that with the used headers following IWYU
> > principle.
> >
> > Suggested-by: Andy Shevchenko <andriy.shevchenko@intel.com>
>
> Also for the consistency's sake this should be done in all files in that
> folder. I see the same issue(s) in the _gyro and _accel and I assume the rest
> *.c and *.h also might be updated.
For the simplicity, just sort the each group of headers in all files. The IWYU
can be applied later on as it's not in the scope of your series.
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* [PATCH v2] firmware: sysfb: Mark CONFIG_SYSFB_SIMPLEFB as deprecated
From: Thomas Zimmermann @ 2026-06-25 6:57 UTC (permalink / raw)
To: javierm, julianbraha, sima, airlied
Cc: dri-devel, linux-fbdev, linux-kernel, sashiko-reviews,
Thomas Zimmermann
Mark CONFIG_SYSFB_SIMPLEFB as deprecated. Enabling it allows to
run simpledrm and simplefb on EFI/VESA framebuffers. Doing this
is discouraged in favor of using efidrm and vesadrm.
v2:
- resolve conflicting help texts (Sashiko)
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
---
drivers/firmware/Kconfig | 32 ++++++++++----------------------
drivers/gpu/drm/sysfb/Kconfig | 7 ++-----
2 files changed, 12 insertions(+), 27 deletions(-)
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index bbd2155d8483..637e3bb5549e 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -184,32 +184,20 @@ config SYSFB
select SCREEN_INFO
config SYSFB_SIMPLEFB
- bool "Mark VGA/VBE/EFI FB as generic system framebuffer"
+ bool "Mark VGA/VBE/EFI FB as generic system framebuffer (deprecated)"
depends on X86 || EFI
select SYSFB
help
- Firmwares often provide initial graphics framebuffers so the BIOS,
+ Firmware often provides initial graphics framebuffers so the BIOS,
bootloader or kernel can show basic video-output during boot for
- user-guidance and debugging. Historically, x86 used the VESA BIOS
- Extensions and EFI-framebuffers for this, which are mostly limited
- to x86 BIOS or EFI systems.
- This option, if enabled, marks VGA/VBE/EFI framebuffers as generic
- framebuffers so the new generic system-framebuffer drivers can be
- used instead. If the framebuffer is not compatible with the generic
- modes, it is advertised as fallback platform framebuffer so legacy
- drivers like efifb, vesafb and uvesafb can pick it up.
- If this option is not selected, all system framebuffers are always
- marked as fallback platform framebuffers as usual.
-
- Note: Legacy fbdev drivers, including vesafb, efifb, uvesafb, will
- not be able to pick up generic system framebuffers if this option
- is selected. You are highly encouraged to enable simplefb as
- replacement if you select this option. simplefb can correctly deal
- with generic system framebuffers. But you should still keep vesafb
- and others enabled as fallback if a system framebuffer is
- incompatible with simplefb.
-
- If unsure, say Y.
+ user-guidance and debugging.
+
+ This option, if enabled, marks VBE/EFI framebuffers as system
+ framebuffers so the generic simpledrm driver can be used.
+
+ This option is deprecated and will be removed in the near future. If
+ unsure, say N and select efidrm, vesadrm instead. The dedicated DRM
+ drivers provide the same functionality plus additional features.
config TH1520_AON_PROTOCOL
tristate "Always-On firmware protocol"
diff --git a/drivers/gpu/drm/sysfb/Kconfig b/drivers/gpu/drm/sysfb/Kconfig
index 2559ead6cf1f..f7e48178885e 100644
--- a/drivers/gpu/drm/sysfb/Kconfig
+++ b/drivers/gpu/drm/sysfb/Kconfig
@@ -67,11 +67,8 @@ config DRM_SIMPLEDRM
This driver assumes that the display hardware has been initialized
by the firmware or bootloader before the kernel boots. Scanout
- buffer, size, and display format must be provided via device tree,
- UEFI, VESA, etc.
-
- On x86 BIOS or UEFI systems, you should also select SYSFB_SIMPLEFB
- to use UEFI and VESA framebuffers.
+ buffer, size, and display format must be provided via device tree's
+ simple-framebuffer node.
config DRM_VESADRM
tristate "VESA framebuffer driver"
--
2.54.0
^ permalink raw reply related
* Re: [PATCH] gpu: nova-core: falcon: store bar and dev in falcon
From: Alexandre Courbot @ 2026-06-25 6:56 UTC (permalink / raw)
To: Tim Kovalenko
Cc: Danilo Krummrich, Alice Ryhl, David Airlie, Simona Vetter,
Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
nova-gpu, dri-devel, linux-kernel, linux-riscv
In-Reply-To: <20260624-drm-bar-refactor-v1-1-7062899163c5@proton.me>
Hi Tim, thanks for the patch!
On Thu Jun 25, 2026 at 12:51 AM JST, Tim Kovalenko wrote:
> Store the bound device and `BAR0` mapping in `Falcon` instead of passing
> them through every `Falcon` operation. This simplifies the `Falcon` API and
> removes repeated `dev`/`bar` plumbing from reset, load, boot, mailbox, DMA,
> and GSP/FSP-specific Falcon helpers.
>
> Add a named helper for configuring Falcon FBIF transaction slots for
> physical coherent system memory, avoiding direct `BAR0` access from the
> FWSEC bootloader path without exposing BAR0 publicly.
>
> Future work / questions:
Questions are not something we want to appear in the final git log, so
please move such comments to after the `---` mark. If you use b4, this
is where the cover letter text will be placed on single-patch series.
>
> - Focused only on the Falcon for this patch - more to follow.
>
> - Not sure about the FalconHal and if I should modify the Trait
> - Could be part of the next patch
> - But it could definitively be simplified
Every HAL method takes the `Falcon` as a parameter, so it is able
to access its `bar` and `dev`. I think it makes sense to remove the
parameters there as well when possible.
>
> - Also, how far should the refactor go and to what extend add new
> methods to avoid passing `bar` or `dev` but also not exposing them as
> pub. For `dev`, there's a lot of `dev_err` usage that requires us to
> pass it as a param.
> - I've created the `set_fbif_transcfg_phys_sysmem` method to
> address such issue and remove some code duplication but that
> method could be made a bit more generic.
If you add new methods, these should come as separate patches, and be
thoroughly thought - `set_fbif_transcfg_phys_sysmem` seems a bit too
ad-hoc to me. It looks like the proper fix for this access to
`NV_PFALCON_FBIF_TRANSCFG` is more something like moving it to
`Falcon::pio_load` or something, but this is a separate problem.
Let's keep the patch focused on `Falcon` itself, and keep the `bar`
argument of `FwsecFirmwareWithBl::run` until we figure out the right way
to address this. We don't need to clean up everything in one go.
>
> - Also, is there a reason why we are not passing the `GspBootContext`
> when we need stuff like `bar`, `dev` and both falcons?
`GspBootContext` was recently introduced, and yes we want to use it
more, but there are plans to make it generic and until we know what it
will look like I would like to limit its use. So for this patch, better
to keep things simple and mechanical.
>
> Reported-by: Alexandre Courbot <acourbot@nvidia.com>
This is not a bug, so `Suggested-by:` is more accurate I think.
> Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/Storing.20driver-bound.20references.20into.20sub-devices/near/599137882
This can be `Link:`, as this patch by itself doesn't fix them all yet.
The patch itself clearly makes things more readable, and looks like what
I had in mind - modulo the addition of new methods that I would prefer
to see taken care of separately. If you can send a v2 that focuses
strictly on falcon and does not introduce new methods, I think that
would be perfect.
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply
* [PATCH] sched: set TIF_NEED_RESCHED before calling __trace_set_need_resched()
From: Sechang Lim @ 2026-06-25 6:54 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
rostedt, bsegall, mgorman, vschneid, kprateek.nayak, gmonaco
set_tsk_need_resched() tests TIF_NEED_RESCHED, calls
__trace_set_need_resched() if the flag is clear, then sets it via
set_tsk_thread_flag(). A BPF raw_tp program attached to
sched_set_need_resched executes synchronously inside __bpf_trace_run().
On return, __bpf_trace_run() drops the RCU lock with
rcu_read_unlock_migrate(), which on the preempt-or-BH-disabled path
calls set_need_resched_current() -> set_tsk_need_resched() again.
set_tsk_thread_flag() follows the tracepoint call, so every re-entrant
frame sees TIF_NEED_RESCHED clear and calls __trace_set_need_resched()
again:
BUG: TASK stack guard page was hit at ffffc9001224ff98
Oops: stack guard page: 0000 [#1] SMP KASAN PTI
RIP: 0010:__bpf_trace_sched_set_need_resched_tp+0x1c/0x190
Call Trace:
trace_sched_set_need_resched_tp+0x110/0x130
set_tsk_need_resched include/linux/sched.h:2076
set_need_resched_current include/linux/sched.h:2094
rcu_read_unlock_special+0x43a/0x440
__rcu_read_unlock+0x9e/0x120
rcu_read_unlock_migrate+0xa9/0x240
__bpf_trace_run+0x131/0x180
bpf_trace_run3+0x333/0x430
__bpf_trace_sched_set_need_resched_tp+0x13a/0x190
trace_sched_set_need_resched_tp+0x110/0x130
set_tsk_need_resched include/linux/sched.h:2076
...
Replace the separate test_tsk_thread_flag() + set_tsk_thread_flag() pair
with test_and_set_tsk_thread_flag().
Fixes: adcc3bfa8806 ("sched: Adapt sched tracepoints for RV task model")
Signed-off-by: Sechang Lim <rhkrqnwk98@gmail.com>
---
include/linux/sched.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ee06cba5c6f5..c9efd08dae92 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2071,10 +2071,9 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
static inline void set_tsk_need_resched(struct task_struct *tsk)
{
- if (tracepoint_enabled(sched_set_need_resched_tp) &&
- !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED))
+ if (!test_and_set_tsk_thread_flag(tsk, TIF_NEED_RESCHED) &&
+ tracepoint_enabled(sched_set_need_resched_tp))
__trace_set_need_resched(tsk, TIF_NEED_RESCHED);
- set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}
static inline void clear_tsk_need_resched(struct task_struct *tsk)
--
2.43.0
^ permalink raw reply related
* Re: [PATCH] gpu: nova-core: falcon: store bar and dev in falcon
From: Alexandre Courbot @ 2026-06-25 6:56 UTC (permalink / raw)
To: Tim Kovalenko
Cc: Danilo Krummrich, Alice Ryhl, David Airlie, Simona Vetter,
Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
nova-gpu, dri-devel, linux-kernel, linux-riscv
In-Reply-To: <20260624-drm-bar-refactor-v1-1-7062899163c5@proton.me>
Hi Tim, thanks for the patch!
On Thu Jun 25, 2026 at 12:51 AM JST, Tim Kovalenko wrote:
> Store the bound device and `BAR0` mapping in `Falcon` instead of passing
> them through every `Falcon` operation. This simplifies the `Falcon` API and
> removes repeated `dev`/`bar` plumbing from reset, load, boot, mailbox, DMA,
> and GSP/FSP-specific Falcon helpers.
>
> Add a named helper for configuring Falcon FBIF transaction slots for
> physical coherent system memory, avoiding direct `BAR0` access from the
> FWSEC bootloader path without exposing BAR0 publicly.
>
> Future work / questions:
Questions are not something we want to appear in the final git log, so
please move such comments to after the `---` mark. If you use b4, this
is where the cover letter text will be placed on single-patch series.
>
> - Focused only on the Falcon for this patch - more to follow.
>
> - Not sure about the FalconHal and if I should modify the Trait
> - Could be part of the next patch
> - But it could definitively be simplified
Every HAL method takes the `Falcon` as a parameter, so it is able
to access its `bar` and `dev`. I think it makes sense to remove the
parameters there as well when possible.
>
> - Also, how far should the refactor go and to what extend add new
> methods to avoid passing `bar` or `dev` but also not exposing them as
> pub. For `dev`, there's a lot of `dev_err` usage that requires us to
> pass it as a param.
> - I've created the `set_fbif_transcfg_phys_sysmem` method to
> address such issue and remove some code duplication but that
> method could be made a bit more generic.
If you add new methods, these should come as separate patches, and be
thoroughly thought - `set_fbif_transcfg_phys_sysmem` seems a bit too
ad-hoc to me. It looks like the proper fix for this access to
`NV_PFALCON_FBIF_TRANSCFG` is more something like moving it to
`Falcon::pio_load` or something, but this is a separate problem.
Let's keep the patch focused on `Falcon` itself, and keep the `bar`
argument of `FwsecFirmwareWithBl::run` until we figure out the right way
to address this. We don't need to clean up everything in one go.
>
> - Also, is there a reason why we are not passing the `GspBootContext`
> when we need stuff like `bar`, `dev` and both falcons?
`GspBootContext` was recently introduced, and yes we want to use it
more, but there are plans to make it generic and until we know what it
will look like I would like to limit its use. So for this patch, better
to keep things simple and mechanical.
>
> Reported-by: Alexandre Courbot <acourbot@nvidia.com>
This is not a bug, so `Suggested-by:` is more accurate I think.
> Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/Storing.20driver-bound.20references.20into.20sub-devices/near/599137882
This can be `Link:`, as this patch by itself doesn't fix them all yet.
The patch itself clearly makes things more readable, and looks like what
I had in mind - modulo the addition of new methods that I would prefer
to see taken care of separately. If you can send a v2 that focuses
strictly on falcon and does not introduce new methods, I think that
would be perfect.
^ permalink raw reply
* Re: [PATCH v3 1/2] iio: imu: inv_icm42600: reorder includes for buffer module
From: Andy Shevchenko @ 2026-06-25 6:55 UTC (permalink / raw)
To: jean-baptiste.maneyrol
Cc: Jonathan Cameron, David Lechner, Nuno Sá, Andy Shevchenko,
linux-iio, linux-kernel
In-Reply-To: <ajzPhi3ymeIBYHRr@ashevche-desk.local>
On Thu, Jun 25, 2026 at 09:49:47AM +0300, Andy Shevchenko wrote:
> On Wed, Jun 24, 2026 at 06:21:18PM +0200, Jean-Baptiste Maneyrol via B4 Relay wrote:
>
> > Reorder includes following rules and delete unneeded kernel.h.
>
> Actually it's needed as 'proxy' header. If you want to get rid of it, it should
> be another patch to replace that with the used headers following IWYU
> principle.
>
> Suggested-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Also for the consistency's sake this should be done in all files in that
folder. I see the same issue(s) in the _gyro and _accel and I assume the rest
*.c and *.h also might be updated.
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* Re: [PATCH v3 0/2] doc: clarify review replies and reroll timing
From: Weijie Yuan @ 2026-06-25 6:54 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, gitster
In-Reply-To: <ajvDuUiDsmyf5LnX@pks.im>
On Wed, Jun 24, 2026 at 01:47:05PM +0200, Patrick Steinhardt wrote:
> On Sun, Jun 21, 2026 at 04:04:36PM +0800, Weijie Yuan wrote:
> > Changes in v3:
> >
> > - Reworked the substantial-rework case. Instead of suggesting that
> > authors send a new version sooner, the text now advises authors not
> > to rush out an updated version before reviewing the larger changes
> > carefully. It recommends replying to the review that prompted the
> > rewrite, saying that a substantial rework is planned, and pointing
> > out which parts of the current series will become obsolete.
> >
> > - Dropped the advice that a topic close to being accepted may justify
> > a quicker reroll.
> >
> > - Removed "how close the topic is to being accepted" from the short
> > reroll-timing guidance in Documentation/SubmittingPatches.
> >
> > - Updated the commit message of patch 2 accordingly.
>
> I'm happy with this version, thanks!
>
> Patrick
Thank you very much for your review and guidance!
^ permalink raw reply
* Re: [PATCH v3 2/2] iio: imu: inv_icm42600: add buffer hwfifo watermark attributes
From: Andy Shevchenko @ 2026-06-25 6:54 UTC (permalink / raw)
To: jean-baptiste.maneyrol
Cc: Jonathan Cameron, David Lechner, Nuno Sá, Andy Shevchenko,
linux-iio, linux-kernel
In-Reply-To: <20260624-inv-icm42600-add-buffer-hwfifo_attributes-v3-2-5d9a4c662f50@tdk.com>
On Wed, Jun 24, 2026 at 06:21:19PM +0200, Jean-Baptiste Maneyrol via B4 Relay wrote:
> Add hwfifo_watermark/min/max/enabled buffer attributes.
> Hardware FIFO is always enabled and used.
These attributes are already documented and being used by a few drivers.
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* Re: [PATCH v2 2/2] doc: advise batching patch rerolls
From: Weijie Yuan @ 2026-06-25 6:53 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: Junio C Hamano, git
In-Reply-To: <ajvDrjk-bTvYaQtU@pks.im>
On Wed, Jun 24, 2026 at 01:46:54PM +0200, Patrick Steinhardt wrote:
> > But here I think Patrick's original intention is: If your series is
> > *close* to be accepted, (while I'm not sure what the precise definition
> > of this "close to be accepted", does it means: commented by Junio with
> > "Looks good", or reviewed by the community/core contributors with "Makes
> > sense"?) and this time there happens to be a small issue, you can
> > re-roll quickly to make your series more "sturdy" to wait for
> > maintainer's final examination and further merges.
> >
> > So, I think the situation you are describing here is that this version
> > of the patch has already been declared by the *author* to be the final
> > version. (i.e. waiting for Junio to do the last exam)
>
> My "close to be accepted" feeling is when you've had multiple rounds of
> design discussions already, everyone is on the same page, and all you
> got on the last review round is a couple of typo fixes.
>
> But all of this is highly subjective, so it'll always depend and it
> won't be easy to codify all of that. Nor is that necessary, I guess. We
> really only want to provide some rough guidance.
Agreed, thanks!
> > Therefore, I do not think the two situations conflict with each other,
> > or are directly related. One concerns a patch that is already close to
> > receiving the maintainer's final verdict, where a minor issue is
> > discovered and the author quickly rerolls it. The other concerns an
> > author who, without realizing that some issues remain unresolved, rushes
> > to send what they believe to be the final version and then waits for the
> > maintainer to review it.
> >
> > For the latter case, I think it would be better to add a sentence along
> > the lines of: "Before sending a new version/the final version, check
> > once more whether there are any unresolved issues," if the existing
> > documentation does not already make this clear.
>
> I think that should mostly be clear with our documentation. And
> eventually, we should also expect people to have some common sense :)
Agreed.
> > That said, I am not familiar with how patch discussions have played out
> > in the past, so please directly point out any mistakes in my
> > understanding. I have to admit that, by this point in writing the
> > message, I have become a little tangled up in my own reasoning.
>
> I guess that's kind of expected, mostly because many of these things are
> highly subjective and will depend on the situation. The guidance does
> not have to be perfect, you'll probably be able to find counterexamples
> for many of the cases.
Yes, setting the rules too strictly may actually reduce flexibility of
our project.
Thanks!
^ permalink raw reply
* [PATCH v3 0/2] arm64: dts: socfpga: agilex72: Add initial device tree
From: muhammad.nazim.amirul.nazle.asmade @ 2026-06-25 6:53 UTC (permalink / raw)
To: dinguyen; +Cc: robh, krzk+dt, conor+dt, devicetree, linux-kernel
From: Nazim Amirul <muhammad.nazim.amirul.nazle.asmade@altera.com>
This series introduces basic device tree support for the Intel/Altera
Agilex72 SoCFPGA platform, which is a new SoC featuring a heterogeneous
CPU cluster (Cortex-A520 and Cortex-A720 cores).
Patch 1 adds the new compatible strings for Agilex72 to the arm/altera
DT bindings documentation.
Patch 2 introduces the initial DTSI and board-level DTS for the Agilex72
SoCDK. The DTSI covers the core SoC nodes: CPUs, GIC-v3 interrupt
controller with ITS, ARM architectural timer, PSCI, SMMU-v3, OCRAM, and
two UART serial controllers backed by a fixed-clock placeholder. The clock
manager driver for this platform is not yet upstream, so a fixed-clock
at 125 MHz is used as an interim solution for the UART clock, matching
the hardware-confirmed LSP_SP_CLK frequency.
Changes in v3:
- Add UART serial console (uart0, uart1) with fixed-clock placeholder at 125 MHz
- Add aliases and chosen nodes in board DTS for serial console
Changes in v2:
- Applied relevant feedback from Shahsiko's review
- Re-add arm,armv8-timer node which is mandatory for kernel boot
- Rename platform from agilex7-gen2 to agilex72
Nazim Amirul (2):
dt-bindings: arm: altera: Add Agilex72 SoCFPGA compatible strings
arm64: dts: socfpga: agilex72: Add initial device tree
.../devicetree/bindings/arm/altera.yaml | 6 +
arch/arm64/boot/dts/intel/Makefile | 1 +
.../boot/dts/intel/socfpga_agilex72.dtsi | 156 ++++++++++++++++++
.../boot/dts/intel/socfpga_agilex72_socdk.dts | 27 +++
4 files changed, 190 insertions(+)
create mode 100644 arch/arm64/boot/dts/intel/socfpga_agilex72.dtsi
create mode 100644 arch/arm64/boot/dts/intel/socfpga_agilex72_socdk.dts
--
2.43.7
^ permalink raw reply
* [PATCH v3 1/2] dt-bindings: arm: altera: Add Agilex72 SoCFPGA compatible strings
From: muhammad.nazim.amirul.nazle.asmade @ 2026-06-25 6:53 UTC (permalink / raw)
To: dinguyen; +Cc: robh, krzk+dt, conor+dt, devicetree, linux-kernel
In-Reply-To: <20260625065329.20274-1-muhammad.nazim.amirul.nazle.asmade@altera.com>
From: Nazim Amirul <muhammad.nazim.amirul.nazle.asmade@altera.com>
Add the SoC and board compatible strings for the Intel SoCFPGA
Agilex72 platform.
Signed-off-by: Nazim Amirul <muhammad.nazim.amirul.nazle.asmade@altera.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
---
Changes in v3:
- no changes
---
Documentation/devicetree/bindings/arm/altera.yaml | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/Documentation/devicetree/bindings/arm/altera.yaml b/Documentation/devicetree/bindings/arm/altera.yaml
index 4b096e52243e..cc03fb437a9a 100644
--- a/Documentation/devicetree/bindings/arm/altera.yaml
+++ b/Documentation/devicetree/bindings/arm/altera.yaml
@@ -115,6 +115,12 @@ properties:
- intel,socfpga-agilex5-socdk-nand
- const: intel,socfpga-agilex5
+ - description: Agilex72 boards
+ items:
+ - enum:
+ - intel,socfpga-agilex72-socdk
+ - const: intel,socfpga-agilex72
+
- description: Agilex7m boards
items:
- enum:
--
2.43.7
^ permalink raw reply related
* [PATCH v3 2/2] arm64: dts: socfpga: agilex72: Add initial device tree
From: muhammad.nazim.amirul.nazle.asmade @ 2026-06-25 6:53 UTC (permalink / raw)
To: dinguyen; +Cc: robh, krzk+dt, conor+dt, devicetree, linux-kernel
In-Reply-To: <20260625065329.20274-1-muhammad.nazim.amirul.nazle.asmade@altera.com>
From: Nazim Amirul <muhammad.nazim.amirul.nazle.asmade@altera.com>
Add initial device tree support for the Intel SoCFPGA Agilex72
platform. This introduces the SoC DTSI and the SoCDK board DTS as
the first upstream submission for this platform.
The Agilex72 SoC features a heterogeneous CPU cluster with
Cortex-A520 and Cortex-A720 cores, and includes an SMMU v3 for
memory management.
Signed-off-by: Nazim Amirul <muhammad.nazim.amirul.nazle.asmade@altera.com>
---
Changes in v3:
- Add UART serial console (uart0, uart1) with fixed-clock placeholder at 125 MHz
- Add aliases and chosen nodes in board DTS for serial console
Changes in v2:
- Re-add arm,armv8-timer node which is mandatory for kernel boot
- Rename platform from agilex7-gen2 to agilex72
---
arch/arm64/boot/dts/intel/Makefile | 1 +
.../boot/dts/intel/socfpga_agilex72.dtsi | 156 ++++++++++++++++++
.../boot/dts/intel/socfpga_agilex72_socdk.dts | 27 +++
3 files changed, 184 insertions(+)
create mode 100644 arch/arm64/boot/dts/intel/socfpga_agilex72.dtsi
create mode 100644 arch/arm64/boot/dts/intel/socfpga_agilex72_socdk.dts
diff --git a/arch/arm64/boot/dts/intel/Makefile b/arch/arm64/boot/dts/intel/Makefile
index 088a03b89c99..270c70fdf084 100644
--- a/arch/arm64/boot/dts/intel/Makefile
+++ b/arch/arm64/boot/dts/intel/Makefile
@@ -8,6 +8,7 @@ dtb-$(CONFIG_ARCH_INTEL_SOCFPGA) += socfpga_agilex_n6000.dtb \
socfpga_agilex5_socdk_013b.dtb \
socfpga_agilex5_socdk_modular.dtb \
socfpga_agilex5_socdk_nand.dtb \
+ socfpga_agilex72_socdk.dtb \
socfpga_agilex7m_socdk.dtb \
socfpga_n5x_socdk.dtb
dtb-$(CONFIG_ARCH_KEEMBAY) += keembay-evm.dtb
diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex72.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex72.dtsi
new file mode 100644
index 000000000000..c29c2afcaab7
--- /dev/null
+++ b/arch/arm64/boot/dts/intel/socfpga_agilex72.dtsi
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Altera Corporation
+ */
+/dts-v1/;
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+ compatible = "intel,socfpga-agilex72";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ atf_reserved: atf@80000000 {
+ compatible = "shared-dma-pool";
+ reg = <0x0 0x80000000 0x0 0x100000>;
+ alignment = <0x1000>;
+ no-map;
+ };
+
+ service_reserved: svcbuffer@80100000 {
+ compatible = "shared-dma-pool";
+ reg = <0x0 0x80100000 0x0 0xf00000>;
+ alignment = <0x1000>;
+ no-map;
+ };
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ compatible = "arm,cortex-a520";
+ device_type = "cpu";
+ enable-method = "psci";
+ reg = <0x0>;
+ };
+
+ cpu1: cpu@100 {
+ compatible = "arm,cortex-a520";
+ device_type = "cpu";
+ enable-method = "psci";
+ reg = <0x100>;
+ };
+
+ cpu2: cpu@200 {
+ compatible = "arm,cortex-a720";
+ device_type = "cpu";
+ enable-method = "psci";
+ reg = <0x200>;
+ };
+
+ cpu3: cpu@300 {
+ compatible = "arm,cortex-a720";
+ device_type = "cpu";
+ enable-method = "psci";
+ reg = <0x300>;
+ };
+ };
+
+ clocks {
+ uart_clk: uart-clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <125000000>;
+ };
+ };
+
+ psci {
+ compatible = "arm,psci-0.2";
+ method = "smc";
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupt-parent = <&intc>;
+ interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 14 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 11 IRQ_TYPE_LEVEL_LOW>,
+ <GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>;
+ };
+
+ intc: interrupt-controller@7000000 {
+ compatible = "arm,gic-v3";
+ reg = <0x0 0x7000000 0x0 0x10000>,
+ <0x0 0x7080000 0x0 0x100000>;
+ ranges;
+ #interrupt-cells = <3>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-controller;
+ #redistributor-regions = <1>;
+ redistributor-stride = <0x0 0x40000>;
+
+ its: msi-controller@7040000 {
+ compatible = "arm,gic-v3-its";
+ reg = <0x0 0x7040000 0x0 0x20000>;
+ msi-controller;
+ #msi-cells = <1>;
+ };
+ };
+
+ soc: soc@0 {
+ compatible = "simple-bus";
+ ranges = <0 0 0 0xffffffff>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "soc";
+ interrupt-parent = <&intc>;
+
+ smmu: iommu@c100000 {
+ compatible = "arm,smmu-v3";
+ reg = <0x0c100000 0x30000>;
+ interrupts = <GIC_SPI 134 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 129 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 132 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "eventq", "gerror", "priq";
+ dma-coherent;
+ #iommu-cells = <1>;
+ };
+
+ ocram: sram@0 {
+ compatible = "mmio-sram";
+ reg = <0x00000000 0x80000>;
+ ranges = <0 0 0x80000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ };
+
+ uart0: serial@9038000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x9038000 0x100>;
+ interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ clocks = <&uart_clk>;
+ status = "disabled";
+ };
+
+ uart1: serial@9039000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x9039000 0x100>;
+ interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ clocks = <&uart_clk>;
+ status = "disabled";
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex72_socdk.dts b/arch/arm64/boot/dts/intel/socfpga_agilex72_socdk.dts
new file mode 100644
index 000000000000..998f19f492b3
--- /dev/null
+++ b/arch/arm64/boot/dts/intel/socfpga_agilex72_socdk.dts
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Altera Corporation
+ */
+#include "socfpga_agilex72.dtsi"
+
+/ {
+ model = "Altera SoCFPGA Agilex72 SoCDK";
+ compatible = "intel,socfpga-agilex72-socdk", "intel,socfpga-agilex72";
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x0 0x80000000>;
+ };
+};
+
+&uart0 {
+ status = "okay";
+};
--
2.43.7
^ permalink raw reply related
* Re: [PATCH 0/3] iio: light: al3xxx: add missing REGMAP_I2C to Kconfig entries
From: Joshua Crofts @ 2026-06-25 6:53 UTC (permalink / raw)
To: Andy Shevchenko
Cc: Jonathan Cameron, David Lechner, Nuno Sá, Andy Shevchenko,
Svyatoslav Ryhel, David Heidelberg, linux-iio, linux-kernel
In-Reply-To: <ajzNmy3Vhh_Zl9Rs@ashevche-desk.local>
On Thu, 25 Jun 2026 09:41:31 +0300
Andy Shevchenko <andriy.shevchenko@intel.com> wrote:
> On Thu, Jun 25, 2026 at 07:20:42AM +0200, Joshua Crofts wrote:
> > This series adds REGMAP_I2C support to three AL3xxx ambient light
> > sensors that were previously missing this dependency, causing build
> > failures.
>
> There are two problems with the commit message:
> - SELECT versus select (see the comment against patch 1)
> - you mentioned build failures but haven't provided any evidence, please
> provide a reasonable lines of build output to prove that
>
Sure, I could elaborate a bit more.
Just run `make allnoconfig` and `make menuconfig` in which you select
IIO, I2C and any AL3xxx sensor and `make .` will fail with errors such as
drivers/iio/light/al3010.c: In function ‘al3010_probe’:
drivers/iio/light/al3010.c:185:24: error: implicit declaration of function ‘devm_regmap_init_i2c’ [-Wimplicit-function-declaration]
185 | data->regmap = devm_regmap_init_i2c(client, &al3010_regmap_config);
| ^~~~~~~~~~~~~~~~~~~~
drivers/iio/light/al3010.c:185:22: error: assignment to ‘struct regmap *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion]
185 | data->regmap = devm_regmap_init_i2c(client, &al3010_regmap_config);
| ^
drivers/iio/light/al3010.c: At top level:
drivers/iio/light/al3010.c:48:35: error: storage size of ‘al3010_regmap_config’ isn’t known
48 | static const struct regmap_config al3010_regmap_config = {
| ^~~~~~~~~~~~~~~~~~~~
Hopefully this is enough.
--
Kind regards
CJD
^ permalink raw reply
* [PATCH v2] sanity.bbclass: warn on cargo config files outside the build tree
From: Hemanth.KumarMD @ 2026-06-25 6:52 UTC (permalink / raw)
To: openembedded-core; +Cc: Randy.MacLeod, Sundeep.Kokkonda, Hemanth.KumarMD
From: Hemanth Kumar M D <Hemanth.KumarMD@windriver.com>
Cargo walks from CWD up to the filesystem root merging every
.cargo/config[.toml] it finds. Any such file above BASE_WORKDIR is
silently picked up and can override Yocto's linker, registry or
compiler settings, leading to build failures.
Until cargo provides a proper fix upstream, add a warning so users
get a clear diagnostic instead of a build error.
Upstream meta-issue: https://github.com/rust-lang/cargo/issues/9769
[YOCTO #15637]
Signed-off-by: Hemanth Kumar M D <Hemanth.KumarMD@windriver.com>
---
meta/classes-global/sanity.bbclass | 33 ++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/meta/classes-global/sanity.bbclass b/meta/classes-global/sanity.bbclass
index bdfa7f059d..5c5cf311da 100644
--- a/meta/classes-global/sanity.bbclass
+++ b/meta/classes-global/sanity.bbclass
@@ -854,6 +854,38 @@ def sanity_check_locale(d):
except locale.Error:
raise_sanity_error("Your system needs to support the en_US.UTF-8 locale.", d)
+def check_cargo_config(d):
+ # Cargo merges .cargo/config[.toml] from every directory between CWD and
+ # the filesystem root. Warn for anything found in ancestor directories
+ # above BASE_WORKDIR that Cargo would pick up silently.
+ import os
+
+ base_workdir = d.getVar('BASE_WORKDIR')
+ ancestor = os.path.dirname(base_workdir)
+ found = []
+ last_ancestor = None
+ while True:
+ for name in ('config', 'config.toml'):
+ cfg = os.path.join(ancestor, '.cargo', name)
+ if os.path.exists(cfg):
+ found.append(cfg)
+ last_ancestor = ancestor
+ break
+ parent = os.path.dirname(ancestor)
+ if parent == ancestor:
+ break
+ ancestor = parent
+
+ if found:
+ bb.warn("Cargo config file(s) found at %s which is/are outside the build "
+ "directory. Cargo will silently apply their settings during the "
+ "rust/cargo build and can override Yocto's settings like linker, "
+ "registry or compiler settings causing build failures. You can "
+ "either remove these file(s) or move your build directory outside "
+ "of %s to fix this. "
+ "See https://bugzilla.yoctoproject.org/show_bug.cgi?id=15637 for more details."
+ % (', '.join(found), last_ancestor))
+
def check_sanity_everybuild(status, d):
import os, stat
# Sanity tests which test the users environment so need to run at each build (or are so cheap
@@ -873,6 +905,7 @@ def check_sanity_everybuild(status, d):
status.addresult('Bitbake version %s is required and version %s was found\n' % (minversion, bb.__version__))
sanity_check_locale(d)
+ check_cargo_config(d)
paths = d.getVar('PATH').split(":")
if "." in paths or "./" in paths or "" in paths:
--
2.49.0
^ permalink raw reply related
* [paulmckrcu:dev.2026.06.16a 56/59] powerpc64-linux-gnu-ld: warning: discarding dynamic section .glink
From: kernel test robot @ 2026-06-25 6:51 UTC (permalink / raw)
To: oe-kbuild; +Cc: lkp
::::::
:::::: Manual check reason: "low confidence bisect report"
::::::
BCC: lkp@intel.com
CC: oe-kbuild-all@lists.linux.dev
TO: "Paul E. McKenney" <paulmck@kernel.org>
tree: https://github.com/paulmckrcu/linux dev.2026.06.16a
head: 47ca6a8aca98b11c9cb2a1a25597f94af559524f
commit: 678e86682304b42825f4af62b8b954c4e14874b8 [56/59] rcu: Make rcu_gp_cleanup() account for ->dqs_blkd_tasks
:::::: branch date: 13 hours ago
:::::: commit date: 21 hours ago
config: powerpc64-allnoconfig-bpf (https://download.01.org/0day-ci/archive/20260625/202606250855.VAVrrdSV-lkp@intel.com/config)
compiler: powerpc64-linux-gnu-gcc (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260625/202606250855.VAVrrdSV-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/r/202606250855.VAVrrdSV-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> powerpc64-linux-gnu-ld: warning: discarding dynamic section .glink
>> powerpc64-linux-gnu-ld: warning: discarding dynamic section .plt
powerpc64-linux-gnu-ld: linkage table error against `rcu_preempt_blocked_readers_cgp_ndqs'
powerpc64-linux-gnu-ld: stubs don't match calculated size
powerpc64-linux-gnu-ld: can not build stubs: bad value
powerpc64-linux-gnu-ld: kernel/rcu/tree.o: in function `rcu_gp_cleanup':
kernel/rcu/tree.c:2223:(.text+0x10fb8): undefined reference to `rcu_preempt_blocked_readers_cgp_ndqs'
kernel/rcu/tree.c:2223:(.text+0x10fb8): call to `rcu_preempt_blocked_readers_cgp_ndqs' lacks nop, can't restore toc; (plt call stub)
powerpc64-linux-gnu-ld: final link failed: bad value
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply
* [PATCH v2] mm: annotate data-race in cpu_needs_drain() and need_mlock_drain()
From: Xuewen Wang @ 2026-06-25 6:51 UTC (permalink / raw)
To: akpm, liam, ljs, vbabka, jannh, pfalcato, chrisl, kasong,
shikemeng, nphamcs, baoquan.he, baohua, youngjun.park, qi.zheng,
shakeel.butt, axelrasmussen, yuanchu, weixugc, david
Cc: linux-mm, linux-kernel, Xuewen Wang
KCSAN reports a data-race when cpu_needs_drain() reads another CPU's
per-cpu folio_batch->nr without locking, while the owning CPU writes
to it via folio_batch_add(). The same race exists in need_mlock_drain()
which is called from cpu_needs_drain().
Reading a slightly stale value is harmless -- cpu_needs_drain() only
decides whether to schedule a drain, and the next iteration of
__lru_add_drain_all() will re-check.
All other callers of folio_batch_count() either use stack variables or
access their own CPU's per-cpu data where no race exists, so
data_race() is added at the call sites rather than in
folio_batch_count() itself to avoid suppressing KCSAN warnings for
future callers that may have real bugs.
Signed-off-by: Xuewen Wang <wangxuewen@kylinos.cn>
---
Changes in v2:
- Use data_race() instead of READ_ONCE() in folio_batch_count(), as
suggested by Lorenzo. READ_ONCE() is unnecessary for a single-byte
read and imposes overhead on all callers, most of which have no race.
- Move the annotation from folio_batch_count() to the actual call sites
(cpu_needs_drain() and need_mlock_drain()) where the cross-CPU race
occurs, rather than affecting all callers.
- Add need_mlock_drain() which has the same cross-CPU race.
- Add comments explaining why the data race is safe.
v1:
https://lore.kernel.org/all/20260624092606.1083449-1-wangxuewen@kylinos.cn/
---
mm/mlock.c | 2 +-
mm/swap.c | 12 ++++++------
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/mm/mlock.c b/mm/mlock.c
index 8c227fefa2df..fbdb5018e2c3 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -232,7 +232,7 @@ void mlock_drain_remote(int cpu)
bool need_mlock_drain(int cpu)
{
- return folio_batch_count(&per_cpu(mlock_fbatch.fbatch, cpu));
+ return data_race(folio_batch_count(&per_cpu(mlock_fbatch.fbatch, cpu)));
}
/**
diff --git a/mm/swap.c b/mm/swap.c
index 588f50d8f1a8..d046428caed6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -828,12 +828,12 @@ static bool cpu_needs_drain(unsigned int cpu)
struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu);
/* Check these in order of likelihood that they're not zero */
- return folio_batch_count(&fbatches->lru_add) ||
- folio_batch_count(&fbatches->lru_move_tail) ||
- folio_batch_count(&fbatches->lru_deactivate_file) ||
- folio_batch_count(&fbatches->lru_deactivate) ||
- folio_batch_count(&fbatches->lru_lazyfree) ||
- folio_batch_count(&fbatches->lru_activate) ||
+ return data_race(folio_batch_count(&fbatches->lru_add)) ||
+ data_race(folio_batch_count(&fbatches->lru_move_tail)) ||
+ data_race(folio_batch_count(&fbatches->lru_deactivate_file)) ||
+ data_race(folio_batch_count(&fbatches->lru_deactivate)) ||
+ data_race(folio_batch_count(&fbatches->lru_lazyfree)) ||
+ data_race(folio_batch_count(&fbatches->lru_activate)) ||
need_mlock_drain(cpu) ||
has_bh_in_lru(cpu, NULL);
}
--
2.25.1
^ permalink raw reply related
* Re: [PATCH v4] mm: assert exclusive nid/zonenum bits at the page/folio access sites
From: kernel test robot @ 2026-06-25 6:50 UTC (permalink / raw)
To: Hui Zhu, Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
Suren Baghdasaryan, Michal Hocko, Kairui Song, Qi Zheng,
Shakeel Butt, Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu,
linux-kernel
Cc: llvm, oe-kbuild-all, Linux Memory Management List, Hui Zhu
In-Reply-To: <20260625053958.918738-1-hui.zhu@linux.dev>
Hi Hui,
kernel test robot noticed the following build warnings:
[auto build test WARNING on akpm-mm/mm-everything]
url: https://github.com/intel-lab-lkp/linux/commits/Hui-Zhu/mm-assert-exclusive-nid-zonenum-bits-at-the-page-folio-access-sites/20260625-134106
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20260625053958.918738-1-hui.zhu%40linux.dev
patch subject: [PATCH v4] mm: assert exclusive nid/zonenum bits at the page/folio access sites
config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20260625/202606251454.M74ab4Rw-lkp@intel.com/config)
compiler: clang version 23.0.0git (https://github.com/llvm/llvm-project 6cc609bb250b21b47fc7d394b4019101e9983597)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260625/202606251454.M74ab4Rw-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202606251454.M74ab4Rw-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> kernel/fork.c:258:18: warning: unused variable 'page' [-Wunused-variable]
258 | struct page *page = vm_area->pages[i];
| ^~~~
1 warning generated.
vim +/page +258 kernel/fork.c
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 240
449e0b4ed5a16c7 Pasha Tatashin 2025-05-09 241 static bool try_release_thread_stack_to_cache(struct vm_struct *vm_area)
e540bf3162e822d Sebastian Andrzej Siewior 2022-02-17 242 {
e540bf3162e822d Sebastian Andrzej Siewior 2022-02-17 243 unsigned int i;
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 244 int nid;
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 245
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 246 /*
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 247 * Don't cache stacks if any of the pages don't match the local domain, unless
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 248 * there is no local memory to begin with.
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 249 *
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 250 * Note that lack of local memory does not automatically mean it makes no difference
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 251 * performance-wise which other domain backs the stack. In this case we are merely
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 252 * trying to avoid constantly going to vmalloc.
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 253 */
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 254 scoped_guard(preempt) {
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 255 nid = numa_node_id();
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 256 if (node_state(nid, N_MEMORY)) {
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 257 for (i = 0; i < vm_area->nr_pages; i++) {
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 @258 struct page *page = vm_area->pages[i];
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 259 if (page_to_nid(page) != nid)
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 260 return false;
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 261 }
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 262 }
e540bf3162e822d Sebastian Andrzej Siewior 2022-02-17 263
e540bf3162e822d Sebastian Andrzej Siewior 2022-02-17 264 for (i = 0; i < NR_CACHED_STACKS; i++) {
47e39c793367600 Uros Bizjak 2024-05-23 265 struct vm_struct *tmp = NULL;
47e39c793367600 Uros Bizjak 2024-05-23 266
449e0b4ed5a16c7 Pasha Tatashin 2025-05-09 267 if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm_area))
e540bf3162e822d Sebastian Andrzej Siewior 2022-02-17 268 return true;
e540bf3162e822d Sebastian Andrzej Siewior 2022-02-17 269 }
262ef8e55b7ccd4 Mateusz Guzik 2025-11-20 270 }
e540bf3162e822d Sebastian Andrzej Siewior 2022-02-17 271 return false;
e540bf3162e822d Sebastian Andrzej Siewior 2022-02-17 272 }
e540bf3162e822d Sebastian Andrzej Siewior 2022-02-17 273
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply
* Re: [PATCH] btrfs: wait for ordered extents before buffered write fallback in direct IO
From: Qu Wenruo @ 2026-06-25 6:51 UTC (permalink / raw)
To: Zhou, Yun, clm, dsterba; +Cc: linux-btrfs, linux-kernel
In-Reply-To: <f192c12e-135f-4f98-a1a0-df2470eb8a86@windriver.com>
在 2026/6/25 16:16, Zhou, Yun 写道:
>
>
> On 6/25/26 13:17, Qu Wenruo wrote:
>>
>> 在 2026/6/25 14:43, Qu Wenruo 写道:
>>>
>>>
>>> 在 2026/6/25 11:44, Yun Zhou 写道:
>>>> When btrfs_direct_write() falls back to buffered IO after a failed DIO
>>>> attempt, it may race with the asynchronous completion of DIO ordered
>>>> extents. This leads to a BUG_ON in insert_ordered_extent() due to
>>>> overlapping ordered extents in the per-inode rb-tree.
>>>>
>>>> The race sequence is:
>>>> 1. DIO creates an ordered extent via btrfs_dio_iomap_begin()
>>>> 2. Page fault occurs (nofault=true), no bio is submitted
>>>> (submitted=0)
>>>> 3. btrfs_dio_iomap_end() truncates and finishes the OE asynchronously
>>>> via btrfs_finish_ordered_extent() which queues work
>>>> 4. iomap returns 0, retry logic faults in pages and retries DIO
>>>> 5. Second DIO attempt also fails, code reaches buffered: label
>>>> 6. btrfs_buffered_write() dirties pages for the same range
>>>
>>> btrfs_buffered_write()
>>> |- copy_one_range()
>>> |- lock_and_cleanup_extent_if_needed()
>>> |- btrfs_start_ordered_extent()
>>>
>>> So your explanation doesn't makes sense. As if there is the direct IO oe
>>> remaining, we will wait for that OE to complete.
>>>
>>> There is still something missing.
>>>
>>>> 7. btrfs_fdatawrite_range() triggers writeback
>>>> 8. run_delalloc_nocow() -> fallback_to_cow() -> cow_file_range()
>>>> tries to insert a new ordered extent for the same file offset
>>>> 9. The DIO ordered extent hasn't been removed from the rb-tree yet
>>>> (btrfs_finish_ordered_io running async in workqueue) -> BUG_ON
>>>>
>>>> Fix this by waiting for any pending ordered extents in the target range
>>>> before starting the buffered write.
>>>>
>>>> Reported-by: syzbot+ba2afde329fc27e3f22e@syzkaller.appspotmail.com
>>>> Closes: https://syzkaller.appspot.com/bug?extid=ba2afde329fc27e3f22e
>>>> Fixes: acf9ed3a6c00 ("btrfs: retry faulting in the pages after a zero
>>>> sized short direct write")
>>
>> And the fixes tag is also incorrect.
>>
>> Without that commit, we will directly fallback to buffered write without
>> retry faulting in the pages.
>>
>> So by your explanation it will trigger the same problem, with or without
>> that commit.
>
> Yes, my previous analysis does seem inaccurate. Commit acf9ed3a6c00 (which
> added retries) merely amplified the window for the issue to occur, but
> the problem has actually existed since ff66fe666233 ('btrfs: fix
> incorrect buffered IO fallback for append direct writes'), which
> introduced i_size revert on DIO short writes, causing
> lock_and_cleanup_extent_if_need() to skip the OE check (since start_pos
> >= reverted i_size). I will correct the commit message and the Fixes
> tag in v2.
That's right, and I have already sent out a fix based on that isize revert:
https://lore.kernel.org/linux-btrfs/aaabe0f2bee07800fb0490d24fab5cd162e5241d.1782366056.git.wqu@suse.com/
>
> Thanks,
> Yun
^ permalink raw reply
* [PATCH v5] ext4: fix ABBA deadlock in ext4_xattr_inode_cache_find()
From: Aditya Srivastava @ 2026-06-25 6:50 UTC (permalink / raw)
To: tytso, jack
Cc: adilger.kernel, libaokun, ritesh.list, yi.zhang, linux-ext4,
linux-kernel, Aditya Prakash Srivastava, Colin Ian King
From: Aditya Prakash Srivastava <aditya.ansh182@gmail.com>
Syzbot/stress-ng reported an ABBA deadlock in ext4 when exercising
concurrent xattr workloads (using the ea_inode mount/format option).
The deadlock occurs between the running transaction and the eviction
thread:
- Task 1 (stress-ng): Holds a reference to a shared mbcache_entry (ce)
and calls ext4_xattr_inode_cache_find() -> ext4_iget() to retrieve
the corresponding EA inode. Since the EA inode is currently being
evicted, ext4_iget() blocks in __wait_on_freeing_inode() waiting for
eviction to complete.
- Task 2 (eviction thread): Currently evicting the same EA inode in
ext4_evict_ea_inode(). It calls mb_cache_entry_wait_unused(oe) which
blocks waiting for Task 1 to release the reference to the mbcache_entry.
To break this deadlock, implement a new ext4_iget() configuration flag
named EXT4_IGET_NOWAIT. When set, perform a non-blocking lookup of the
inode via VFS's find_inode_nowait() API.
If the inode is currently being evicted (marked with I_FREEING or
I_WILL_FREE) or created (I_CREATING), or if it is not present in the VFS
inode cache (cache miss), simply skip it (returning -ESTALE) rather than
waiting for eviction/creation to complete, breaking the ABBA cycle.
Since we return -ESTALE immediately on a cache miss, we never attempt to
allocate a new inode or call iget_locked(), completely eliminating any
TOCTOU race window.
If the returned inode is I_NEW, wait for its initialization to clear via
wait_on_new_inode(). If initialization fails and the inode is unhashed
during wait_on_new_inode() waking up (e.g., due to an I/O read error in
another thread), safely drop the reference and return -ESTALE. This
unhashed check is executed unconditionally on all cache-hit pathways to
properly handle concurrent initialization failures.
Finally, standard validation checks (including is_bad_inode,
EXT4_EA_INODE_FL, file_acl, and xattr flags) are executed as normal inside
check_igot_inode() to fully guarantee VFS-layer safety.
In ext4_xattr_inode_cache_find(), invoke ext4_iget() with the new
EXT4_IGET_NOWAIT flag to perform the non-blocking cache search.
Suggested-by: Jan Kara <jack@suse.cz>
Reported-by: Colin Ian King <colin.i.king@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219283
Fixes: 0a46ef234756 ("ext4: do not create EA inode under buffer lock")
Signed-off-by: Aditya Prakash Srivastava <aditya.ansh182@gmail.com>
---
Changes in v5:
- Address two critical issues flagged by the Sashiko AI bot in v4:
1. Resolve the Time-Of-Check to Time-Of-Use (TOCTOU) race window between
find_inode_nowait() and iget_locked() by returning -ESTALE immediately
on a VFS cache miss. This completely bypasses fallback to iget_locked()
and prevents potential ABBA deadlocks.
2. Fix the improperly nested inode_unhashed() safety check by moving it
outside the I_NEW condition block, ensuring it runs unconditionally
on all cache-hit pathways to prevent false-positive filesystem
corruption errors during concurrent initialization failures.
Changes in v4:
- Check if the inode was unhashed during wait_on_new_inode() waking up
to handle transient initialization failures (like I/O read errors)
gracefully. Dropping the reference and returning -ESTALE prevents
false filesystem corruption errors (__ext4_error), as found by the
Sashiko AI bot.
Changes in v3:
- Implement a new ext4_iget() configuration flag named EXT4_IGET_NOWAIT to
fully contain the non-blocking lookup and VFS-level validations within
inode.c, as requested by Jan Kara.
- Skip inodes currently being created (I_CREATING), following Jan Kara's
direct feedback.
- Remove all open-coded match helpers and VFS state-checks from xattr.c.
Changes in v2:
- Read inode state locklessly using inode_state_read_once() to resolve
a lockdep assertion on cache hit.
- Manually restore essential inode/ea_inode validations on the retrieved
inode (is_bad_inode, EXT4_EA_INODE_FL, file_acl, and xattr checks) to
match VFS safety guarantees and prevent using corrupted/failed inodes.
fs/ext4/ext4.h | 3 ++-
fs/ext4/inode.c | 41 ++++++++++++++++++++++++++++++++++++++---
fs/ext4/xattr.c | 2 +-
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b37c136ea3ab..c76dd0bdd3d8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3144,7 +3144,8 @@ typedef enum {
EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */
EXT4_IGET_BAD = 0x0004, /* Allow to iget a bad inode */
- EXT4_IGET_EA_INODE = 0x0008 /* Inode should contain an EA value */
+ EXT4_IGET_EA_INODE = 0x0008, /* Inode should contain an EA value */
+ EXT4_IGET_NOWAIT = 0x0010 /* Non-blocking lookup (skip if freeing) */
} ext4_iget_flags;
extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ce99807c5f5b..f6b681320358 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5270,6 +5270,24 @@ void ext4_set_inode_mapping_order(struct inode *inode)
mapping_set_folio_order_range(inode->i_mapping, min_order, max_order);
}
+static int ext4_iget_match(struct inode *inode, u64 ino, void *data)
+{
+ bool *is_freeing = data;
+
+ if (inode->i_ino != ino)
+ return 0;
+ spin_lock(&inode->i_lock);
+ if (inode_state_read(inode) & (I_FREEING | I_WILL_FREE | I_CREATING)) {
+ if (is_freeing)
+ *is_freeing = true;
+ spin_unlock(&inode->i_lock);
+ return -1;
+ }
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ return 1;
+}
+
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ext4_iget_flags flags, const char *function,
unsigned int line)
@@ -5298,9 +5316,26 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
return ERR_PTR(-EFSCORRUPTED);
}
- inode = iget_locked(sb, ino);
- if (!inode)
- return ERR_PTR(-ENOMEM);
+ if (flags & EXT4_IGET_NOWAIT) {
+ bool is_freeing = false;
+
+ inode = find_inode_nowait(sb, ino, ext4_iget_match, &is_freeing);
+ if (is_freeing || !inode)
+ return ERR_PTR(-ESTALE);
+
+ if (inode_state_read_once(inode) & I_NEW)
+ wait_on_new_inode(inode);
+
+ if (unlikely(inode_unhashed(inode))) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+ } else {
+ inode = iget_locked(sb, ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ }
+
if (!(inode_state_read_once(inode) & I_NEW)) {
ret = check_igot_inode(inode, flags, function, line);
if (ret) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 982a1f831e22..21b5670d8503 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1550,7 +1550,7 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
while (ce) {
ea_inode = ext4_iget(inode->i_sb, ce->e_value,
- EXT4_IGET_EA_INODE);
+ EXT4_IGET_EA_INODE | EXT4_IGET_NOWAIT);
if (IS_ERR(ea_inode))
goto next_entry;
ext4_xattr_inode_set_class(ea_inode);
--
2.47.3
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.