From: Pratyush Yadav <pratyush@kernel.org>
To: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: pratyush@kernel.org, jasonmiu@google.com, graf@amazon.com,
changyuanl@google.com, rppt@kernel.org, dmatlack@google.com,
rientjes@google.com, corbet@lwn.net, rdunlap@infradead.org,
ilpo.jarvinen@linux.intel.com, kanie@linux.alibaba.com,
ojeda@kernel.org, aliceryhl@google.com, masahiroy@kernel.org,
akpm@linux-foundation.org, tj@kernel.org,
yoann.congal@smile.fr, mmaurer@google.com,
roman.gushchin@linux.dev, chenridong@huawei.com,
axboe@kernel.dk, mark.rutland@arm.com, jannh@google.com,
vincent.guittot@linaro.org, hannes@cmpxchg.org,
dan.j.williams@intel.com, david@redhat.com,
joel.granados@kernel.org, rostedt@goodmis.org,
anna.schumaker@oracle.com, song@kernel.org,
zhangguopeng@kylinos.cn, linux@weissschuh.net,
linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
linux-mm@kvack.org, gregkh@linuxfoundation.org,
tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com,
rafael@kernel.org, dakr@kernel.org,
bartosz.golaszewski@linaro.org, cw00.choi@samsung.com,
myungjoo.ham@samsung.com, yesanishhere@gmail.com,
Jonathan.Cameron@huawei.com, quic_zijuhu@quicinc.com,
aleksander.lobakin@intel.com, ira.weiny@intel.com,
andriy.shevchenko@linux.intel.com, leon@kernel.org,
lukas@wunner.de, bhelgaas@google.com, wagi@kernel.org,
djeffery@redhat.com, stuart.w.hayes@gmail.com
Subject: Re: [RFC v2 03/16] kho: add kho_unpreserve_folio/phys
Date: Wed, 04 Jun 2025 17:00:22 +0200 [thread overview]
Message-ID: <mafs05xhbv9fd.fsf@kernel.org> (raw)
In-Reply-To: <20250515182322.117840-4-pasha.tatashin@soleen.com>
On Thu, May 15 2025, Pasha Tatashin wrote:
> From: Changyuan Lyu <changyuanl@google.com>
>
> Allow users of KHO to cancel the previous preservation by adding the
> necessary interfaces to unpreserve folio.
>
> Signed-off-by: Changyuan Lyu <changyuanl@google.com>
> Co-developed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> ---
> include/linux/kexec_handover.h | 12 +++++
> kernel/kexec_handover.c | 84 ++++++++++++++++++++++++++++------
> 2 files changed, 83 insertions(+), 13 deletions(-)
>
[...]
> diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
> index 8ff561e36a87..eb305e7e6129 100644
> --- a/kernel/kexec_handover.c
> +++ b/kernel/kexec_handover.c
> @@ -101,26 +101,33 @@ static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
> return elm;
> }
>
> -static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
> - unsigned long end_pfn)
> +static void __kho_unpreserve_order(struct kho_mem_track *track, unsigned long pfn,
> + unsigned int order)
> {
> struct kho_mem_phys_bits *bits;
> struct kho_mem_phys *physxa;
> + const unsigned long pfn_high = pfn >> order;
>
> - while (pfn < end_pfn) {
> - const unsigned int order =
> - min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
> - const unsigned long pfn_high = pfn >> order;
> + physxa = xa_load(&track->orders, order);
> + if (!physxa)
> + return;
>
> - physxa = xa_load(&track->orders, order);
> - if (!physxa)
> - continue;
> + bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
> + if (!bits)
> + return;
>
> - bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
> - if (!bits)
> - continue;
> + clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
> +}
>
> - clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
> +static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
> + unsigned long end_pfn)
> +{
> + unsigned int order;
> +
> + while (pfn < end_pfn) {
> + order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
This is fragile. If the preserve call spans say 4 PFNs, then it gets
preserved as a order 2 allocation, but if the PFNs are unpreserved
one-by-one, __kho_unpreserve_order() will unpreserve from the order 0
xarray, which will end up doing nothing, leaking those pages.
It should either look through all orders to find the PFN, or at least
have a requirement in the API that the same phys and size combination as
the preserve call must be given to unpreserve.
> +
> + __kho_unpreserve_order(track, pfn, order);
>
> pfn += 1 << order;
> }
> @@ -607,6 +614,29 @@ int kho_preserve_folio(struct folio *folio)
> }
> EXPORT_SYMBOL_GPL(kho_preserve_folio);
>
> +/**
> + * kho_unpreserve_folio - unpreserve a folio.
> + * @folio: folio to unpreserve.
> + *
> + * Instructs KHO to unpreserve a folio that was preserved by
> + * kho_preserve_folio() before.
> + *
> + * Return: 0 on success, error code on failure
> + */
> +int kho_unpreserve_folio(struct folio *folio)
> +{
> + const unsigned long pfn = folio_pfn(folio);
> + const unsigned int order = folio_order(folio);
> + struct kho_mem_track *track = &kho_out.ser.track;
> +
> + if (kho_out.finalized)
> + return -EBUSY;
> +
> + __kho_unpreserve_order(track, pfn, order);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(kho_unpreserve_folio);
> +
> /**
> * kho_preserve_phys - preserve a physically contiguous range across kexec.
> * @phys: physical address of the range.
> @@ -652,6 +682,34 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
> }
> EXPORT_SYMBOL_GPL(kho_preserve_phys);
>
> +/**
> + * kho_unpreserve_phys - unpreserve a physically contiguous range across kexec.
> + * @phys: physical address of the range.
> + * @size: size of the range.
> + *
> + * Instructs KHO to unpreserve the memory range from @phys to @phys + @size
> + * across kexec.
> + *
> + * Return: 0 on success, error code on failure
> + */
> +int kho_unpreserve_phys(phys_addr_t phys, size_t size)
> +{
> + struct kho_mem_track *track = &kho_out.ser.track;
> + unsigned long pfn = PHYS_PFN(phys);
> + unsigned long end_pfn = PHYS_PFN(phys + size);
> +
> + if (kho_out.finalized)
> + return -EBUSY;
> +
> + if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
> + return -EINVAL;
> +
> + __kho_unpreserve(track, pfn, end_pfn);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(kho_unpreserve_phys);
> +
> int __kho_abort(void)
> {
> int err;
--
Regards,
Pratyush Yadav
next prev parent reply other threads:[~2025-06-04 15:00 UTC|newest]
Thread overview: 102+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-15 18:23 [RFC v2 00/16] Live Update Orchestrator Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 01/16] kho: make debugfs interface optional Pasha Tatashin
2025-06-04 16:03 ` Pratyush Yadav
2025-06-06 16:12 ` Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 02/16] kho: allow to drive kho from within kernel Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 03/16] kho: add kho_unpreserve_folio/phys Pasha Tatashin
2025-06-04 15:00 ` Pratyush Yadav [this message]
2025-06-06 16:22 ` Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 04/16] luo: luo_core: Live Update Orchestrator Pasha Tatashin
2025-05-26 6:31 ` Mike Rapoport
2025-05-30 5:00 ` Pasha Tatashin
2025-06-04 15:17 ` Pratyush Yadav
2025-06-07 17:11 ` Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 05/16] luo: luo_core: integrate with KHO Pasha Tatashin
2025-05-26 7:18 ` Mike Rapoport
2025-06-07 17:50 ` Pasha Tatashin
2025-06-09 2:14 ` Pasha Tatashin
2025-06-04 16:00 ` Pratyush Yadav
2025-06-07 23:30 ` Pasha Tatashin
2025-06-13 14:58 ` Pratyush Yadav
2025-06-17 15:23 ` Jason Gunthorpe
2025-06-17 19:32 ` Pasha Tatashin
2025-06-18 13:11 ` Pratyush Yadav
2025-06-18 14:48 ` Pasha Tatashin
2025-06-18 16:40 ` Mike Rapoport
2025-06-18 17:00 ` Pasha Tatashin
2025-06-18 17:43 ` Pasha Tatashin
2025-06-19 12:00 ` Mike Rapoport
2025-06-19 14:22 ` Pasha Tatashin
2025-06-20 15:28 ` Pratyush Yadav
2025-06-20 16:03 ` Pasha Tatashin
2025-06-24 16:12 ` Pratyush Yadav
2025-06-24 16:55 ` Pasha Tatashin
2025-06-24 18:31 ` Jason Gunthorpe
2025-06-23 7:32 ` Mike Rapoport
2025-06-23 11:29 ` Pasha Tatashin
2025-06-25 13:46 ` Mike Rapoport
2025-05-15 18:23 ` [RFC v2 06/16] luo: luo_subsystems: add subsystem registration Pasha Tatashin
2025-05-26 7:31 ` Mike Rapoport
2025-06-07 23:42 ` Pasha Tatashin
2025-05-28 19:12 ` David Matlack
2025-06-07 23:58 ` Pasha Tatashin
2025-06-04 16:30 ` Pratyush Yadav
2025-06-08 0:04 ` Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 07/16] luo: luo_subsystems: implement subsystem callbacks Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 08/16] luo: luo_files: add infrastructure for FDs Pasha Tatashin
2025-05-15 23:15 ` James Houghton
2025-05-23 18:09 ` Pasha Tatashin
2025-05-26 7:55 ` Mike Rapoport
2025-06-05 11:56 ` Pratyush Yadav
2025-06-08 13:13 ` Pasha Tatashin
2025-06-05 15:56 ` Pratyush Yadav
2025-06-08 13:37 ` Pasha Tatashin
2025-06-13 15:27 ` Pratyush Yadav
2025-06-15 18:02 ` Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 09/16] luo: luo_files: implement file systems callbacks Pasha Tatashin
2025-06-05 16:03 ` Pratyush Yadav
2025-06-08 13:49 ` Pasha Tatashin
2025-06-13 15:18 ` Pratyush Yadav
2025-06-13 20:26 ` Pasha Tatashin
2025-06-16 10:43 ` Pratyush Yadav
2025-06-16 14:57 ` Pasha Tatashin
2025-06-18 13:16 ` Pratyush Yadav
2025-05-15 18:23 ` [RFC v2 10/16] luo: luo_ioctl: add ioctl interface Pasha Tatashin
2025-05-26 8:42 ` Mike Rapoport
2025-06-08 15:08 ` Pasha Tatashin
2025-05-28 20:29 ` David Matlack
2025-06-08 16:32 ` Pasha Tatashin
2025-06-05 16:15 ` Pratyush Yadav
2025-06-08 16:35 ` Pasha Tatashin
2025-06-24 9:50 ` Christian Brauner
2025-06-24 14:27 ` Pasha Tatashin
2025-06-25 9:36 ` Christian Brauner
2025-06-25 16:12 ` David Matlack
2025-06-26 15:42 ` Pratyush Yadav
2025-06-26 16:24 ` David Matlack
2025-07-14 14:56 ` Pratyush Yadav
2025-07-17 16:17 ` David Matlack
2025-07-23 14:51 ` Pratyush Yadav
2025-07-06 14:33 ` Mike Rapoport
2025-07-07 12:56 ` Jason Gunthorpe
2025-06-25 16:58 ` pasha.tatashin
2025-07-06 14:24 ` Mike Rapoport
2025-07-09 21:27 ` Pratyush Yadav
2025-07-10 7:26 ` Mike Rapoport
2025-07-14 14:34 ` Jason Gunthorpe
2025-07-16 9:43 ` Greg KH
2025-05-15 18:23 ` [RFC v2 11/16] luo: luo_sysfs: add sysfs state monitoring Pasha Tatashin
2025-06-05 16:20 ` Pratyush Yadav
2025-06-08 16:36 ` Pasha Tatashin
2025-06-13 15:13 ` Pratyush Yadav
2025-05-15 18:23 ` [RFC v2 12/16] reboot: call liveupdate_reboot() before kexec Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 13/16] luo: add selftests for subsystems un/registration Pasha Tatashin
2025-05-26 8:52 ` Mike Rapoport
2025-06-08 16:47 ` Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 14/16] selftests/liveupdate: add subsystem/state tests Pasha Tatashin
2025-05-15 18:23 ` [RFC v2 15/16] docs: add luo documentation Pasha Tatashin
2025-05-26 9:00 ` Mike Rapoport
2025-05-15 18:23 ` [RFC v2 16/16] MAINTAINERS: add liveupdate entry Pasha Tatashin
2025-05-20 7:25 ` [RFC v2 00/16] Live Update Orchestrator Mike Rapoport
2025-05-23 18:07 ` Pasha Tatashin
2025-05-26 6:32 ` Mike Rapoport
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=mafs05xhbv9fd.fsf@kernel.org \
--to=pratyush@kernel.org \
--cc=Jonathan.Cameron@huawei.com \
--cc=akpm@linux-foundation.org \
--cc=aleksander.lobakin@intel.com \
--cc=aliceryhl@google.com \
--cc=andriy.shevchenko@linux.intel.com \
--cc=anna.schumaker@oracle.com \
--cc=axboe@kernel.dk \
--cc=bartosz.golaszewski@linaro.org \
--cc=bhelgaas@google.com \
--cc=bp@alien8.de \
--cc=changyuanl@google.com \
--cc=chenridong@huawei.com \
--cc=corbet@lwn.net \
--cc=cw00.choi@samsung.com \
--cc=dakr@kernel.org \
--cc=dan.j.williams@intel.com \
--cc=dave.hansen@linux.intel.com \
--cc=david@redhat.com \
--cc=djeffery@redhat.com \
--cc=dmatlack@google.com \
--cc=graf@amazon.com \
--cc=gregkh@linuxfoundation.org \
--cc=hannes@cmpxchg.org \
--cc=hpa@zytor.com \
--cc=ilpo.jarvinen@linux.intel.com \
--cc=ira.weiny@intel.com \
--cc=jannh@google.com \
--cc=jasonmiu@google.com \
--cc=joel.granados@kernel.org \
--cc=kanie@linux.alibaba.com \
--cc=leon@kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux@weissschuh.net \
--cc=lukas@wunner.de \
--cc=mark.rutland@arm.com \
--cc=masahiroy@kernel.org \
--cc=mingo@redhat.com \
--cc=mmaurer@google.com \
--cc=myungjoo.ham@samsung.com \
--cc=ojeda@kernel.org \
--cc=pasha.tatashin@soleen.com \
--cc=quic_zijuhu@quicinc.com \
--cc=rafael@kernel.org \
--cc=rdunlap@infradead.org \
--cc=rientjes@google.com \
--cc=roman.gushchin@linux.dev \
--cc=rostedt@goodmis.org \
--cc=rppt@kernel.org \
--cc=song@kernel.org \
--cc=stuart.w.hayes@gmail.com \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=vincent.guittot@linaro.org \
--cc=wagi@kernel.org \
--cc=x86@kernel.org \
--cc=yesanishhere@gmail.com \
--cc=yoann.congal@smile.fr \
--cc=zhangguopeng@kylinos.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.