Linux Kernel Selftest development
 help / color / mirror / Atom feed
From: Vipin Sharma <vipinsh@google.com>
To: bhelgaas@google.com, alex.williamson@redhat.com,
	pasha.tatashin@soleen.com,  dmatlack@google.com, jgg@ziepe.ca,
	graf@amazon.com
Cc: pratyush@kernel.org, gregkh@linuxfoundation.org,
	chrisl@kernel.org,  rppt@kernel.org, skhawaja@google.com,
	parav@nvidia.com, saeedm@nvidia.com,  kevin.tian@intel.com,
	jrhilke@google.com, david@redhat.com,  jgowans@amazon.com,
	dwmw2@infradead.org, epetron@amazon.de,  junaids@google.com,
	linux-kernel@vger.kernel.org, linux-pci@vger.kernel.org,
	 kvm@vger.kernel.org, linux-kselftest@vger.kernel.org,
	 Vipin Sharma <vipinsh@google.com>
Subject: [RFC PATCH 16/21] vfio/pci: Save and restore the PCI state of the VFIO device
Date: Fri, 17 Oct 2025 17:07:08 -0700	[thread overview]
Message-ID: <20251018000713.677779-17-vipinsh@google.com> (raw)
In-Reply-To: <20251018000713.677779-1-vipinsh@google.com>

Save and restore the PCI state of the VFIO device which in the normal
flow is recorded by VFIO when the device FD is opened for the first time
and then reapplied to PCI device when the last opened device FD is
closed.

Introduce "_ser" version of the struct pci_saved_state{} and struct
pci_cap_saved_data{} to serialized saved PCI state for liveupdate. Store
PCI state in VFIO in a separate folio as the size is indeterministic at
build time to reserve space in struct vfio_pci_core_device_ser{}.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
---
 drivers/vfio/pci/vfio_pci_core.c       |   9 +-
 drivers/vfio/pci/vfio_pci_liveupdate.c | 176 ++++++++++++++++++++++++-
 drivers/vfio/pci/vfio_pci_priv.h       |   8 +-
 3 files changed, 187 insertions(+), 6 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 186a669b68a4..44ea3ac8da16 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -487,7 +487,9 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
 		goto out_power;
 
 	if (vdev->liveupdate_restore) {
-		vfio_pci_liveupdate_restore_device(vdev);
+		ret = vfio_pci_liveupdate_restore_device(vdev);
+		if (ret)
+			goto out_disable_device;
 	} else {
 		/* If reset fails because of the device lock, fail this path entirely */
 		ret = pci_try_reset_function(pdev);
@@ -495,10 +497,11 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
 			goto out_disable_device;
 
 		vdev->reset_works = !ret;
+
+		pci_save_state(pdev);
+		vdev->pci_saved_state = pci_store_saved_state(pdev);
 	}
 
-	pci_save_state(pdev);
-	vdev->pci_saved_state = pci_store_saved_state(pdev);
 	if (!vdev->pci_saved_state)
 		pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
 
diff --git a/drivers/vfio/pci/vfio_pci_liveupdate.c b/drivers/vfio/pci/vfio_pci_liveupdate.c
index 82ff9f178fdc..caef023d007a 100644
--- a/drivers/vfio/pci/vfio_pci_liveupdate.c
+++ b/drivers/vfio/pci/vfio_pci_liveupdate.c
@@ -13,9 +13,22 @@
 #include <linux/anon_inodes.h>
 #include <linux/kexec_handover.h>
 #include <linux/file.h>
+#include <linux/pci.h>
 
 #include "vfio_pci_priv.h"
 
+struct pci_cap_saved_data_ser {
+	u16		cap_nr;
+	bool		cap_extended;
+	unsigned int	size;
+	u32		data[];
+} __packed;
+
+struct pci_saved_state_ser {
+	u32 config_space[16];
+	struct pci_cap_saved_data_ser cap[];
+} __packed;
+
 struct vfio_pci_core_device_ser {
 	u16 bdf;
 	u32 cfg_size;
@@ -23,6 +36,7 @@ struct vfio_pci_core_device_ser {
 	u8 vconfig[PCI_CFG_SPACE_EXP_SIZE];
 	u32 rbar[7];
 	u8 reset_works;
+	u64 pci_saved_state_phys;
 } __packed;
 
 static int vfio_pci_liveupdate_deserialize_config(struct vfio_pci_core_device *vdev,
@@ -51,12 +65,150 @@ static void vfio_pci_liveupdate_serialize_config(struct vfio_pci_core_device *vd
 	memcpy(ser->rbar, vdev->rbar, sizeof(vdev->rbar));
 }
 
+static size_t pci_saved_state_size(struct pci_saved_state *state)
+{
+	struct pci_cap_saved_data *cap;
+	size_t size;
+
+	/* One empty cap to denote end. */
+	size = sizeof(struct pci_saved_state) + sizeof(struct pci_cap_saved_data);
+
+	cap = state->cap;
+	while (cap->size) {
+		size_t len = sizeof(struct pci_cap_saved_data) + cap->size;
+
+		size += len;
+		cap = (struct pci_cap_saved_data *)((u8 *)cap + len);
+	}
+
+	return size;
+}
+
+static size_t pci_saved_state_size_from_ser(struct pci_saved_state_ser *state)
+{
+	struct pci_cap_saved_data_ser *cap;
+	size_t size;
+
+	/* One empty cap to denote end. */
+	size = sizeof(struct pci_saved_state) + sizeof(struct pci_cap_saved_data);
+
+	cap = state->cap;
+	while (cap->size) {
+		size_t len = sizeof(struct pci_cap_saved_data) + cap->size;
+
+		size += len;
+		cap = (struct pci_cap_saved_data_ser *)((u8 *)cap + len);
+	}
+
+	return size;
+}
+
+static void serialize_pci_cap_saved_data(struct pci_saved_state *state,
+					 struct pci_saved_state_ser *state_ser)
+{
+	struct pci_cap_saved_data_ser *cap_ser = state_ser->cap;
+	struct pci_cap_saved_data *cap = state->cap;
+
+	while (cap->size) {
+		cap_ser->cap_nr = cap->cap_nr;
+		cap_ser->cap_extended = cap->cap_extended;
+		cap_ser->size = cap->size;
+		memcpy(cap_ser->data, cap->data, cap_ser->size);
+
+		cap = (void *)cap + sizeof(*cap) + cap->size;
+		cap_ser = (void *)cap_ser + sizeof(*cap_ser) + cap_ser->size;
+	}
+}
+
+static void deserialize_pci_cap_saved_data(struct pci_saved_state *state,
+					   struct pci_saved_state_ser *state_ser)
+{
+	struct pci_cap_saved_data_ser *cap_ser = state_ser->cap;
+	struct pci_cap_saved_data *cap = state->cap;
+
+	while (cap_ser->size) {
+		cap->cap_nr = cap_ser->cap_nr;
+		cap->cap_extended = cap_ser->cap_extended;
+		cap->size = cap_ser->size;
+		memcpy(cap->data, cap_ser->data, cap_ser->size);
+
+		cap = (void *)cap + sizeof(*cap) + cap->size;
+		cap_ser = (void *)cap_ser + sizeof(*cap_ser) + cap_ser->size;
+	}
+}
+
+static int serialize_pci_saved_state(struct vfio_pci_core_device *vdev,
+				     struct vfio_pci_core_device_ser *ser)
+{
+	struct pci_saved_state *state = vdev->pci_saved_state;
+	struct pci_saved_state_ser *state_ser;
+	struct folio *folio;
+	size_t size;
+	int ret;
+
+	if (!state)
+		return 0;
+
+	size = pci_saved_state_size(state);
+
+	folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, get_order(size));
+	if (!folio)
+		return -ENOMEM;
+
+	state_ser = folio_address(folio);
+
+	memcpy(state_ser->config_space, state->config_space,
+	       sizeof(state_ser->config_space));
+
+	serialize_pci_cap_saved_data(state, state_ser);
+
+	ret = kho_preserve_folio(folio);
+	if (ret) {
+		folio_put(folio);
+		return ret;
+	}
+
+	ser->pci_saved_state_phys = virt_to_phys(state_ser);
+
+	return 0;
+}
+
+static int deserialize_pci_saved_state(struct vfio_pci_core_device *vdev,
+				       struct vfio_pci_core_device_ser *ser)
+{
+	struct pci_saved_state_ser *state_ser;
+	struct pci_saved_state *state;
+	size_t size;
+
+	if (!ser->pci_saved_state_phys)
+		return 0;
+
+	state_ser = phys_to_virt(ser->pci_saved_state_phys);
+	size = pci_saved_state_size_from_ser(state_ser);
+	state = kzalloc(size, GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	memcpy(state->config_space, state_ser->config_space,
+	       sizeof(state_ser->config_space));
+
+	deserialize_pci_cap_saved_data(state, state_ser);
+	vdev->pci_saved_state = state;
+	return 0;
+}
+
 static int vfio_pci_lu_serialize(struct vfio_pci_core_device *vdev,
 				 struct vfio_pci_core_device_ser *ser)
 {
+	int err;
+
 	ser->bdf = pci_dev_id(vdev->pdev);
 	vfio_pci_liveupdate_serialize_config(vdev, ser);
 	ser->reset_works = vdev->reset_works;
+	err = serialize_pci_saved_state(vdev, ser);
+	if (err)
+		return err;
+
 	return 0;
 }
 
@@ -101,12 +253,18 @@ static void vfio_pci_liveupdate_cancel(struct liveupdate_file_handler *handler,
 {
 	struct vfio_pci_core_device_ser *ser = phys_to_virt(data);
 	struct folio *folio = virt_to_folio(ser);
+	struct folio *pci_saved_state_folio;
 	struct vfio_pci_core_device *vdev;
 	struct vfio_device *device;
 
 	device = vfio_device_from_file(file);
 	vdev = container_of(device, struct vfio_pci_core_device, vdev);
 	vdev->pdev->skip_kexec_clear_master = false;
+	if (ser->pci_saved_state_phys) {
+		pci_saved_state_folio = virt_to_folio(phys_to_virt(ser->pci_saved_state_phys));
+		WARN_ON_ONCE(kho_unpreserve_folio(pci_saved_state_folio));
+		folio_put(pci_saved_state_folio);
+	}
 	WARN_ON_ONCE(kho_unpreserve_folio(folio));
 	folio_put(folio);
 }
@@ -139,6 +297,9 @@ static void vfio_pci_liveupdate_finish(struct liveupdate_file_handler *handler,
 
 	ser = folio_address(folio);
 
+	if (!reclaimed && ser->pci_saved_state_phys)
+		kho_restore_folio(ser->pci_saved_state_phys);
+
 	device = vfio_find_device_in_cdev_class(&ser->bdf, match_bdf);
 	if (!device)
 		goto out_folio_put;
@@ -155,6 +316,8 @@ static void vfio_pci_liveupdate_finish(struct liveupdate_file_handler *handler,
 	put_device(&device->device);
 
 out_folio_put:
+	if (ser->pci_saved_state_phys)
+		folio_put(virt_to_folio(phys_to_virt(ser->pci_saved_state_phys)));
 	folio_put(folio);
 }
 
@@ -174,6 +337,11 @@ static int vfio_pci_liveupdate_retrieve(struct liveupdate_file_handler *handler,
 		return -ENOENT;
 
 	ser = folio_address(folio);
+	if (ser->pci_saved_state_phys) {
+		if (!kho_restore_folio(ser->pci_saved_state_phys))
+			return -ENOENT;
+	}
+
 	device = vfio_find_device_in_cdev_class(&ser->bdf, match_bdf);
 	if (!device)
 		return -ENODEV;
@@ -262,9 +430,15 @@ int vfio_pci_liveupdate_restore_config(struct vfio_pci_core_device *vdev)
 	return vfio_pci_liveupdate_deserialize_config(vdev, ser);
 }
 
-void vfio_pci_liveupdate_restore_device(struct vfio_pci_core_device *vdev)
+int vfio_pci_liveupdate_restore_device(struct vfio_pci_core_device *vdev)
 {
 	struct vfio_pci_core_device_ser *ser = vdev->liveupdate_restore;
+	int err;
+
+	err = deserialize_pci_saved_state(vdev, ser);
+	if (err)
+		return err;
 
 	vdev->reset_works = ser->reset_works;
+	return 0;
 }
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index ee1c7c229020..9d692e4d0cf7 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -110,14 +110,18 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
 #ifdef CONFIG_LIVEUPDATE
 void vfio_pci_liveupdate_init(void);
 int vfio_pci_liveupdate_restore_config(struct vfio_pci_core_device *vdev);
-void vfio_pci_liveupdate_restore_device(struct vfio_pci_core_device *vdev);
+int vfio_pci_liveupdate_restore_device(struct vfio_pci_core_device *vdev);
 #else
 static inline void vfio_pci_liveupdate_init(void) { }
 int vfio_pci_liveupdate_restore_config(struct vfio_pci_core_device *vdev)
 {
 	return -EINVAL;
 }
-void vfio_pci_liveupdate_restore_device(struct vfio_pci_core_device *vdev) { }
+int vfio_pci_liveupdate_restore_device(struct vfio_pci_core_device *vdev)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_LIVEUPDATE */
 
 #endif
-- 
2.51.0.858.gf9c4a03a3a-goog


  parent reply	other threads:[~2025-10-18  0:07 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-18  0:06 [RFC PATCH 00/21] VFIO live update support Vipin Sharma
2025-10-18  0:06 ` [RFC PATCH 01/21] selftests/liveupdate: Build tests from the selftests/liveupdate directory Vipin Sharma
2025-10-18  0:06 ` [RFC PATCH 02/21] selftests/liveupdate: Create library of core live update ioctls Vipin Sharma
2025-10-18  0:06 ` [RFC PATCH 03/21] selftests/liveupdate: Move do_kexec.sh script to liveupdate/lib Vipin Sharma
2025-10-18  0:06 ` [RFC PATCH 04/21] selftests/liveupdate: Move LUO ioctls calls to liveupdate library Vipin Sharma
2025-10-18  0:06 ` [RFC PATCH 05/21] vfio/pci: Register VFIO live update file handler to Live Update Orchestrator Vipin Sharma
2025-10-31 21:24   ` David Matlack
2025-10-31 22:28   ` David Matlack
2025-10-18  0:06 ` [RFC PATCH 06/21] vfio/pci: Accept live update preservation request for VFIO cdev Vipin Sharma
2025-10-27 20:44   ` Jacob Pan
2025-10-28 13:28     ` Jason Gunthorpe
2025-10-28 17:39       ` Jacob Pan
2025-10-29 16:21         ` Jason Gunthorpe
2025-10-30 23:10     ` David Matlack
2025-10-31  0:18       ` Pasha Tatashin
2025-10-31 21:41         ` David Matlack
2025-10-18  0:06 ` [RFC PATCH 07/21] vfio/pci: Store VFIO PCI device preservation data in KHO for live update Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 08/21] vfio/pci: Retrieve preserved VFIO device for Live Update Orechestrator Vipin Sharma
2025-10-31 23:12   ` David Matlack
2025-10-18  0:07 ` [RFC PATCH 09/21] vfio/pci: Add Live Update finish callback implementation Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 10/21] PCI: Add option to skip Bus Master Enable reset during kexec Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 11/21] vfio/pci: Skip clearing bus master on live update device " Vipin Sharma
2025-10-18  7:09   ` Lukas Wunner
2025-10-18 22:19     ` Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 12/21] vfio/pci: Skip clearing bus master on live update restored device Vipin Sharma
2025-10-20 21:29   ` David Matlack
2025-10-20 22:39     ` Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 13/21] vfio/pci: Preserve VFIO PCI config space through live update Vipin Sharma
2025-10-18 14:59   ` Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 14/21] vfio/pci: Skip device reset on live update restored device Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 15/21] PCI: Make PCI saved state and capability structs public Vipin Sharma
2025-10-18  7:17   ` Lukas Wunner
2025-10-18 22:36     ` Vipin Sharma
2025-10-18 23:11       ` Jason Gunthorpe
2025-10-20 23:49         ` Vipin Sharma
2025-10-22 17:45           ` David Matlack
2025-10-22 17:51             ` Jason Gunthorpe
2025-10-22 17:53           ` Jason Gunthorpe
2025-10-19  8:15       ` Lukas Wunner
2025-10-20 23:54         ` Vipin Sharma
2025-10-30 23:55         ` David Matlack
2025-10-31  0:06           ` David Matlack
2025-10-18  0:07 ` Vipin Sharma [this message]
2025-10-18  7:25   ` [RFC PATCH 16/21] vfio/pci: Save and restore the PCI state of the VFIO device Lukas Wunner
2025-10-18 22:44     ` Vipin Sharma
2025-10-18 15:02   ` Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 17/21] vfio/pci: Disable interrupts before going live update kexec Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 18/21] vfio: selftests: Build liveupdate library in VFIO selftests Vipin Sharma
2025-10-20 20:50   ` David Matlack
2025-10-20 23:55     ` Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 19/21] vfio: selftests: Initialize vfio_pci_device using a VFIO cdev FD Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 20/21] vfio: selftests: Add VFIO live update test Vipin Sharma
2025-10-18  0:07 ` [RFC PATCH 21/21] vfio: selftests: Validate vconfig preservation of VFIO PCI device during live update Vipin Sharma
2025-10-18 17:21 ` [RFC PATCH 00/21] VFIO live update support Jason Gunthorpe
2025-10-18 22:53   ` Vipin Sharma
2025-10-18 23:06     ` Jason Gunthorpe
2025-10-20 23:30       ` Vipin Sharma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251018000713.677779-17-vipinsh@google.com \
    --to=vipinsh@google.com \
    --cc=alex.williamson@redhat.com \
    --cc=bhelgaas@google.com \
    --cc=chrisl@kernel.org \
    --cc=david@redhat.com \
    --cc=dmatlack@google.com \
    --cc=dwmw2@infradead.org \
    --cc=epetron@amazon.de \
    --cc=graf@amazon.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=jgg@ziepe.ca \
    --cc=jgowans@amazon.com \
    --cc=jrhilke@google.com \
    --cc=junaids@google.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=parav@nvidia.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=pratyush@kernel.org \
    --cc=rppt@kernel.org \
    --cc=saeedm@nvidia.com \
    --cc=skhawaja@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox