linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Harry Yoo <harry.yoo@oracle.com>
To: Raghavendra K T <raghavendra.kt@amd.com>
Cc: AneeshKumar.KizhakeVeetil@arm.com, Hasan.Maruf@amd.com,
	Michael.Day@amd.com, akpm@linux-foundation.org, bharata@amd.com,
	dave.hansen@intel.com, david@redhat.com,
	dongjoo.linux.dev@gmail.com, feng.tang@intel.com,
	gourry@gourry.net, hannes@cmpxchg.org, honggyu.kim@sk.com,
	hughd@google.com, jhubbard@nvidia.com, jon.grimm@amd.com,
	k.shutemov@gmail.com, kbusch@meta.com, kmanaouil.dev@gmail.com,
	leesuyeon0506@gmail.com, leillc@google.com,
	liam.howlett@oracle.com, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, mgorman@techsingularity.net,
	mingo@redhat.com, nadav.amit@gmail.com, nphamcs@gmail.com,
	peterz@infradead.org, riel@surriel.com, rientjes@google.com,
	rppt@kernel.org, santosh.shukla@amd.com, shivankg@amd.com,
	shy828301@gmail.com, sj@kernel.org, vbabka@suse.cz,
	weixugc@google.com, willy@infradead.org,
	ying.huang@linux.alibaba.com, ziy@nvidia.com,
	Jonathan.Cameron@huawei.com, dave@stgolabs.net,
	yuanchu@google.com, kinseyho@google.com, hdanton@sina.com
Subject: Re: [RFC PATCH V2 03/13] mm: Scan the mm and create a migration list
Date: Thu, 26 Jun 2025 07:07:12 +0900	[thread overview]
Message-ID: <aFxzEFhTDOyL4y0e@hyeyoo> (raw)
In-Reply-To: <20250624055617.1291159-4-raghavendra.kt@amd.com>

On Tue, Jun 24, 2025 at 05:56:07AM +0000, Raghavendra K T wrote:
> Since we already have the list of mm_struct in the system, add a module to
> scan each mm that walks VMAs of each mm_struct and scan all the pages
> associated with that.
> 
>  In the scan path: Check for the recently acccessed pages (folios) belonging
> to slowtier nodes. Add all those folios to a list.
> 
> Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
> ---

Hi, just taking a quick look...

>  mm/kscand.c | 319 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 318 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/kscand.c b/mm/kscand.c
> index d5b0d3041b0f..0edec1b7730d 100644
> --- a/mm/kscand.c
> +++ b/mm/kscand.c
> @@ -42,6 +55,8 @@ static struct kmem_cache *kscand_slot_cache __read_mostly;
> @@ -84,11 +122,275 @@ static void kscand_wait_work(void)
>  			scan_sleep_jiffies);
>  }
>  
> +static inline bool is_valid_folio(struct folio *folio)
> +{
> +	if (!folio || folio_test_unevictable(folio) || !folio_mapped(folio) ||
> +		folio_is_zone_device(folio) || folio_maybe_mapped_shared(folio))
> +		return false;
> +
> +	return true;
> +}

What makes it undesirable to migrate shared folios?

> +static bool folio_idle_clear_pte_refs_one(struct folio *folio,
> +					 struct vm_area_struct *vma,
> +					 unsigned long addr,
> +					 pte_t *ptep)
> +{
> +	bool referenced = false;
> +	struct mm_struct *mm = vma->vm_mm;
> +	pmd_t *pmd = pmd_off(mm, addr);
> +
> +	if (ptep) {
> +		if (ptep_clear_young_notify(vma, addr, ptep))
> +			referenced = true;
> +	} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
> +		if (!pmd_present(*pmd))
> +			WARN_ON_ONCE(1);
> +		if (pmdp_clear_young_notify(vma, addr, pmd))
> +			referenced = true;
> +	} else {
> +		WARN_ON_ONCE(1);
> +	}

This does not look good.

I think pmd entry handling should be handled in
mm_walk_ops.pmd_entry callback?

> +
> +	if (referenced) {
> +		folio_clear_idle(folio);
> +		folio_set_young(folio);
> +	}
> +
> +	return true;
> +}
> +
> +static void page_idle_clear_pte_refs(struct page *page, pte_t *pte, struct mm_walk *walk)
> +{
> +	bool need_lock;
> +	struct folio *folio =  page_folio(page);
> +	unsigned long address;
> +
> +	if (!folio_mapped(folio) || !folio_raw_mapping(folio))
> +		return;
> +
> +	need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
> +	if (need_lock && !folio_trylock(folio))
> +		return;

Why acquire folio lock here?

And I'm not even sure if it's safe to acquire it?
The locking order is folio_lock -> pte_lock

page walk should have already acquired pte_lock before calling
->pte_entry() callback.

> +	address = vma_address(walk->vma, page_pgoff(folio, page), compound_nr(page));
> +	VM_BUG_ON_VMA(address == -EFAULT, walk->vma);
> +	folio_idle_clear_pte_refs_one(folio, walk->vma, address, pte);
> +
> +	if (need_lock)
> +		folio_unlock(folio);
> +}
> +
> +static const struct mm_walk_ops hot_vma_set_idle_ops = {
> +	.pte_entry = hot_vma_idle_pte_entry,
> +	.walk_lock = PGWALK_RDLOCK,
> +};
> +
> +static void kscand_walk_page_vma(struct vm_area_struct *vma, struct kscand_scanctrl *scanctrl)
> +{
> +	if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
> +	    is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) {
> +		return;
> +	}
> +	if (!vma->vm_mm ||
> +	    (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ)))
> +		return;

Why not walk writable file VMAs?

> +	if (!vma_is_accessible(vma))
> +		return;
> +
> +	walk_page_vma(vma, &hot_vma_set_idle_ops, scanctrl);
> +}

-- 
Cheers,
Harry / Hyeonggon


  reply	other threads:[~2025-06-25 22:08 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-24  5:56 [RFC PATCH V2 00/13] mm: slowtier page promotion based on PTE A bit Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 01/13] mm: Add kscand kthread for PTE A bit scan Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 02/13] mm: Maintain mm_struct list in the system Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 03/13] mm: Scan the mm and create a migration list Raghavendra K T
2025-06-25 22:07   ` Harry Yoo [this message]
2025-06-25 23:05     ` Harry Yoo
2025-06-26  6:27     ` Raghavendra K T
2025-07-08 11:17       ` Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 04/13] mm: Create a separate kthread for migration Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 05/13] mm/migration: Migrate accessed folios to toptier node Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 06/13] mm: Add throttling of mm scanning using scan_period Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 07/13] mm: Add throttling of mm scanning using scan_size Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 08/13] mm: Add initial scan delay Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 09/13] mm: Add a heuristic to calculate target node Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 10/13] sysfs: Add sysfs support to tune scanning Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 11/13] vmstat: Add vmstat counters Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 12/13] trace/kscand: Add tracing of scanning and migration Raghavendra K T
2025-06-24  7:09   ` Masami Hiramatsu
2025-06-24  7:50     ` Raghavendra K T
2025-06-24  5:56 ` [RFC PATCH V2 13/13] prctl: Introduce new prctl to control scanning Raghavendra K T

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aFxzEFhTDOyL4y0e@hyeyoo \
    --to=harry.yoo@oracle.com \
    --cc=AneeshKumar.KizhakeVeetil@arm.com \
    --cc=Hasan.Maruf@amd.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=Michael.Day@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=bharata@amd.com \
    --cc=dave.hansen@intel.com \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dongjoo.linux.dev@gmail.com \
    --cc=feng.tang@intel.com \
    --cc=gourry@gourry.net \
    --cc=hannes@cmpxchg.org \
    --cc=hdanton@sina.com \
    --cc=honggyu.kim@sk.com \
    --cc=hughd@google.com \
    --cc=jhubbard@nvidia.com \
    --cc=jon.grimm@amd.com \
    --cc=k.shutemov@gmail.com \
    --cc=kbusch@meta.com \
    --cc=kinseyho@google.com \
    --cc=kmanaouil.dev@gmail.com \
    --cc=leesuyeon0506@gmail.com \
    --cc=leillc@google.com \
    --cc=liam.howlett@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@redhat.com \
    --cc=nadav.amit@gmail.com \
    --cc=nphamcs@gmail.com \
    --cc=peterz@infradead.org \
    --cc=raghavendra.kt@amd.com \
    --cc=riel@surriel.com \
    --cc=rientjes@google.com \
    --cc=rppt@kernel.org \
    --cc=santosh.shukla@amd.com \
    --cc=shivankg@amd.com \
    --cc=shy828301@gmail.com \
    --cc=sj@kernel.org \
    --cc=vbabka@suse.cz \
    --cc=weixugc@google.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yuanchu@google.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).