All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	patches@lists.linux.dev,
	Alejandro Jimenez <alejandro.j.jimenez@oracle.com>,
	Joao Martins <joao.m.martins@oracle.com>,
	Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>,
	Vasant Hegde <vasant.hegde@amd.com>,
	Joerg Roedel <joerg.roedel@amd.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.6 64/70] iommu/amd/pgtbl: Fix possible race while increase page table level
Date: Mon, 22 Sep 2025 21:30:04 +0200	[thread overview]
Message-ID: <20250922192406.321492658@linuxfoundation.org> (raw)
In-Reply-To: <20250922192404.455120315@linuxfoundation.org>

6.6-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Vasant Hegde <vasant.hegde@amd.com>

[ Upstream commit 1e56310b40fd2e7e0b9493da9ff488af145bdd0c ]

The AMD IOMMU host page table implementation supports dynamic page table levels
(up to 6 levels), starting with a 3-level configuration that expands based on
IOVA address. The kernel maintains a root pointer and current page table level
to enable proper page table walks in alloc_pte()/fetch_pte() operations.

The IOMMU IOVA allocator initially starts with 32-bit address and onces its
exhuasted it switches to 64-bit address (max address is determined based
on IOMMU and device DMA capability). To support larger IOVA, AMD IOMMU
driver increases page table level.

But in unmap path (iommu_v1_unmap_pages()), fetch_pte() reads
pgtable->[root/mode] without lock. So its possible that in exteme corner case,
when increase_address_space() is updating pgtable->[root/mode], fetch_pte()
reads wrong page table level (pgtable->mode). It does compare the value with
level encoded in page table and returns NULL. This will result is
iommu_unmap ops to fail and upper layer may retry/log WARN_ON.

CPU 0                                         CPU 1
------                                       ------
map pages                                    unmap pages
alloc_pte() -> increase_address_space()      iommu_v1_unmap_pages() -> fetch_pte()
  pgtable->root = pte (new root value)
                                             READ pgtable->[mode/root]
					       Reads new root, old mode
  Updates mode (pgtable->mode += 1)

Since Page table level updates are infrequent and already synchronized with a
spinlock, implement seqcount to enable lock-free read operations on the read path.

Fixes: 754265bcab7 ("iommu/amd: Fix race in increase_address_space()")
Reported-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Cc: stable@vger.kernel.org
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
[ Adapted pgtable->mode and pgtable->root to use domain->iop.mode and domain->iop.root ]
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/amd/amd_iommu_types.h |    1 +
 drivers/iommu/amd/io_pgtable.c      |   26 ++++++++++++++++++++++----
 2 files changed, 23 insertions(+), 4 deletions(-)

--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -540,6 +540,7 @@ struct amd_irte_ops;
 	container_of((x), struct amd_io_pgtable, pgtbl_cfg)
 
 struct amd_io_pgtable {
+	seqcount_t		seqcount;	/* Protects root/mode update */
 	struct io_pgtable_cfg	pgtbl_cfg;
 	struct io_pgtable	iop;
 	int			mode;
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/dma-mapping.h>
+#include <linux/seqlock.h>
 
 #include <asm/barrier.h>
 
@@ -171,8 +172,11 @@ static bool increase_address_space(struc
 
 	*pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
 
+	write_seqcount_begin(&domain->iop.seqcount);
 	domain->iop.root  = pte;
 	domain->iop.mode += 1;
+	write_seqcount_end(&domain->iop.seqcount);
+
 	amd_iommu_update_and_flush_device_table(domain);
 	amd_iommu_domain_flush_complete(domain);
 
@@ -199,6 +203,7 @@ static u64 *alloc_pte(struct protection_
 		      gfp_t gfp,
 		      bool *updated)
 {
+	unsigned int seqcount;
 	int level, end_lvl;
 	u64 *pte, *page;
 
@@ -214,8 +219,14 @@ static u64 *alloc_pte(struct protection_
 	}
 
 
-	level   = domain->iop.mode - 1;
-	pte     = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+	do {
+		seqcount = read_seqcount_begin(&domain->iop.seqcount);
+
+		level   = domain->iop.mode - 1;
+		pte     = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+	} while (read_seqcount_retry(&domain->iop.seqcount, seqcount));
+
+
 	address = PAGE_SIZE_ALIGN(address, page_size);
 	end_lvl = PAGE_SIZE_LEVEL(page_size);
 
@@ -292,6 +303,7 @@ static u64 *fetch_pte(struct amd_io_pgta
 		      unsigned long *page_size)
 {
 	int level;
+	unsigned int seqcount;
 	u64 *pte;
 
 	*page_size = 0;
@@ -299,8 +311,12 @@ static u64 *fetch_pte(struct amd_io_pgta
 	if (address > PM_LEVEL_SIZE(pgtable->mode))
 		return NULL;
 
-	level	   =  pgtable->mode - 1;
-	pte	   = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+	do {
+		seqcount = read_seqcount_begin(&pgtable->seqcount);
+		level	   =  pgtable->mode - 1;
+		pte	   = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+	} while (read_seqcount_retry(&pgtable->seqcount, seqcount));
+
 	*page_size =  PTE_LEVEL_PAGE_SIZE(level);
 
 	while (level > 0) {
@@ -524,6 +540,8 @@ static struct io_pgtable *v1_alloc_pgtab
 	cfg->oas            = IOMMU_OUT_ADDR_BIT_SIZE,
 	cfg->tlb            = &v1_flush_ops;
 
+	seqcount_init(&pgtable->seqcount);
+
 	pgtable->iop.ops.map_pages    = iommu_v1_map_pages;
 	pgtable->iop.ops.unmap_pages  = iommu_v1_unmap_pages;
 	pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;



  parent reply	other threads:[~2025-09-22 19:36 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-22 19:29 [PATCH 6.6 00/70] 6.6.108-rc1 review Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 01/70] wifi: wilc1000: avoid buffer overflow in WID string configuration Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 02/70] ALSA: firewire-motu: drop EPOLLOUT from poll return values as write is not supported Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 03/70] wifi: mac80211: increase scan_ies_len for S1G Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 04/70] wifi: mac80211: fix incorrect type for ret Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 05/70] pcmcia: omap_cf: Mark driver struct with __refdata to prevent section mismatch Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 06/70] cgroup: split cgroup_destroy_wq into 3 workqueues Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 07/70] btrfs: fix invalid extref key setup when replaying dentry Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 08/70] um: virtio_uml: Fix use-after-free after put_device in probe Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 09/70] dpaa2-switch: fix buffer pool seeding for control traffic Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 10/70] qed: Dont collect too many protection override GRC elements Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 11/70] bonding: set random address only when slaves already exist Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 12/70] mptcp: set remote_deny_join_id0 on SYN recv Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 13/70] mptcp: tfo: record deny join id0 info Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 14/70] selftests: mptcp: sockopt: fix error messages Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 15/70] net: natsemi: fix `rx_dropped` double accounting on `netif_rx()` failure Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 16/70] i40e: remove redundant memory barrier when cleaning Tx descs Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 17/70] net/mlx5e: Consider aggregated port speed during rate configuration Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 18/70] net/mlx5e: Harden uplink netdev access against device unbind Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 19/70] bonding: dont set oif to bond dev when getting NS target destination Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 20/70] tcp: Clear tcp_sk(sk)->fastopen_rsk in tcp_disconnect() Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 21/70] tls: make sure to abort the stream if headers are bogus Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 22/70] Revert "net/mlx5e: Update and set Xon/Xoff upon port speed set" Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 23/70] net: liquidio: fix overflow in octeon_init_instr_queue() Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 24/70] cnic: Fix use-after-free bugs in cnic_delete_task Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 25/70] octeontx2-pf: Fix use-after-free bugs in otx2_sync_tstamp() Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 26/70] ksmbd: smbdirect: validate data_offset and data_length field of smb_direct_data_transfer Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 27/70] ksmbd: smbdirect: verify remaining_data_length respects max_fragmented_recv_size Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 28/70] nilfs2: fix CFI failure when accessing /sys/fs/nilfs2/features/* Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 29/70] crypto: af_alg - Disallow concurrent writes in af_alg_sendmsg Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 30/70] power: supply: bq27xxx: fix error return in case of no bq27000 hdq battery Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 31/70] power: supply: bq27xxx: restrict no-battery detection to bq27000 Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 32/70] LoongArch: Update help info of ARCH_STRICT_ALIGN Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 33/70] LoongArch: Align ACPI structures if ARCH_STRICT_ALIGN enabled Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 34/70] LoongArch: Check the return value when creating kobj Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 35/70] iommu/vt-d: Fix __domain_mapping()s usage of switch_to_super_page() Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 36/70] btrfs: tree-checker: fix the incorrect inode ref size check Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 37/70] ASoC: qcom: audioreach: Fix lpaif_type configuration for the I2S interface Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 38/70] ASoC: qcom: q6apm-lpass-dais: Fix NULL pointer dereference if source graph failed Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 39/70] ASoC: qcom: q6apm-lpass-dais: Fix missing set_fmt DAI op for I2S Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 40/70] mmc: mvsdio: Fix dma_unmap_sg() nents value Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 41/70] KVM: SVM: Sync TPR from LAPIC into VMCB::V_TPR even if AVIC is active Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 42/70] net: rfkill: gpio: Fix crash due to dereferencering uninitialized pointer Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 43/70] rds: ib: Increment i_fastreg_wrs before bailing out Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 44/70] selftests: mptcp: connect: catch IO errors on listen side Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 45/70] selftests: mptcp: avoid spurious errors on TCP disconnect Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 46/70] ALSA: hda/realtek: Fix mute led for HP Laptop 15-dw4xx Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 47/70] io_uring: backport io_should_terminate_tw() Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 48/70] io_uring: include dying ring in task_work "should cancel" state Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 49/70] ASoC: wm8940: Correct PLL rate rounding Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 50/70] ASoC: wm8940: Correct typo in control name Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 51/70] ASoC: wm8974: Correct PLL rate rounding Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 52/70] ASoC: SOF: Intel: hda-stream: Fix incorrect variable used in error message Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 53/70] drm: bridge: anx7625: Fix NULL pointer dereference with early IRQ Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 54/70] drm: bridge: cdns-mhdp8546: Fix missing mutex unlock on error path Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 55/70] crypto: af_alg - Set merge to zero early in af_alg_sendmsg Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 56/70] smb: client: fix smbdirect_recv_io leak in smbd_negotiate() error path Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 57/70] vmxnet3: unregister xdp rxq info in the reset path Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 58/70] mptcp: pm: nl: announce deny-join-id0 flag Greg Kroah-Hartman
2025-09-22 19:29 ` [PATCH 6.6 59/70] selftests: mptcp: userspace pm: validate " Greg Kroah-Hartman
2025-09-22 19:30 ` [PATCH 6.6 60/70] phy: Use device_get_match_data() Greg Kroah-Hartman
2025-09-22 19:30 ` [PATCH 6.6 61/70] phy: ti: omap-usb2: fix device leak at unbind Greg Kroah-Hartman
2025-09-22 19:30 ` [PATCH 6.6 62/70] xhci: dbc: decouple endpoint allocation from initialization Greg Kroah-Hartman
2025-09-22 19:30 ` [PATCH 6.6 63/70] xhci: dbc: Fix full DbC transfer ring after several reconnects Greg Kroah-Hartman
2025-09-22 19:30 ` Greg Kroah-Hartman [this message]
2025-09-22 19:30 ` [PATCH 6.6 65/70] rtc: pcf2127: fix SPI command byte for PCF2131 backport Greg Kroah-Hartman
2025-09-22 19:30 ` [PATCH 6.6 66/70] mptcp: propagate shutdown to subflows when possible Greg Kroah-Hartman
2025-09-22 19:30 ` [PATCH 6.6 67/70] minmax: avoid overly complicated constant expressions in VM code Greg Kroah-Hartman
2025-09-22 19:30 ` [PATCH 6.6 68/70] minmax: simplify and clarify min_t()/max_t() implementation Greg Kroah-Hartman
2025-09-22 19:30 ` [PATCH 6.6 69/70] minmax: add a few more MIN_T/MAX_T users Greg Kroah-Hartman
2025-09-22 19:30 ` [PATCH 6.6 70/70] Revert "loop: Avoid updating block size under exclusive owner" Greg Kroah-Hartman
2025-09-22 22:48 ` [PATCH 6.6 00/70] 6.6.108-rc1 review Florian Fainelli
2025-09-23  6:53 ` Naresh Kamboju
2025-09-23  7:26 ` Brett A C Sheffield
2025-09-23 10:16 ` [PATCH 6.6 00/70] " Peter Schneider
2025-09-23 13:10 ` Jon Hunter
2025-09-23 15:10 ` Ron Economos
2025-09-23 20:39 ` Miguel Ojeda
2025-09-24  0:29 ` Shuah Khan
2025-09-24  6:57 ` Hardik Garg
2025-09-24  8:39 ` Mark Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250922192406.321492658@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=alejandro.j.jimenez@oracle.com \
    --cc=joao.m.martins@oracle.com \
    --cc=joerg.roedel@amd.com \
    --cc=patches@lists.linux.dev \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=vasant.hegde@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.