From: Jan Kara <jack@suse.cz>
To: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
Christian Brauner <brauner@kernel.org>,
Carlos Maiolino <cem@kernel.org>,
Chuck Lever <chuck.lever@oracle.com>, Jan Kara <jack@suse.cz>,
Matthew Wilcox <willy@infradead.org>,
Johannes Weiner <hannes@cmpxchg.org>,
Axel Rasmussen <axelrasmussen@google.com>,
linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-mm@kvack.org
Subject: Re: [PATCH 6/8] shmem: move memcg charge out of shmem_add_to_page_cache()
Date: Tue, 3 Oct 2023 15:28:42 +0200 [thread overview]
Message-ID: <20231003132842.lxniwpknqfxan5px@quack3> (raw)
In-Reply-To: <4b2143c5-bf32-64f0-841-81a81158dac@google.com>
On Fri 29-09-23 20:31:27, Hugh Dickins wrote:
> Extract shmem's memcg charging out of shmem_add_to_page_cache(): it's
> misleading done there, because many calls are dealing with a swapcache
> page, whose memcg is nowadays always remembered while swapped out, then
> the charge re-levied when it's brought back into swapcache.
>
> Temporarily move it back up to the shmem_get_folio_gfp() level, where
> the memcg was charged before v5.8; but the next commit goes on to move
> it back down to a new home.
>
> In making this change, it becomes clear that shmem_swapin_folio() does
> not need to know the vma, just the fault mm (if any): call it fault_mm
> rather than charge_mm - let mem_cgroup_charge() decide whom to charge.
>
> Signed-off-by: Hugh Dickins <hughd@google.com>
Looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> mm/shmem.c | 68 +++++++++++++++++++++++-------------------------------
> 1 file changed, 29 insertions(+), 39 deletions(-)
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 63ba6037b23a..0a7f7b567b80 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -146,9 +146,8 @@ static unsigned long shmem_default_max_inodes(void)
> #endif
>
> static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
> - struct folio **foliop, enum sgp_type sgp,
> - gfp_t gfp, struct vm_area_struct *vma,
> - vm_fault_t *fault_type);
> + struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
> + struct mm_struct *fault_mm, vm_fault_t *fault_type);
>
> static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
> {
> @@ -760,12 +759,10 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
> */
> static int shmem_add_to_page_cache(struct folio *folio,
> struct address_space *mapping,
> - pgoff_t index, void *expected, gfp_t gfp,
> - struct mm_struct *charge_mm)
> + pgoff_t index, void *expected, gfp_t gfp)
> {
> XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
> long nr = folio_nr_pages(folio);
> - int error;
>
> VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
> VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
> @@ -776,16 +773,7 @@ static int shmem_add_to_page_cache(struct folio *folio,
> folio->mapping = mapping;
> folio->index = index;
>
> - if (!folio_test_swapcache(folio)) {
> - error = mem_cgroup_charge(folio, charge_mm, gfp);
> - if (error) {
> - if (folio_test_pmd_mappable(folio)) {
> - count_vm_event(THP_FILE_FALLBACK);
> - count_vm_event(THP_FILE_FALLBACK_CHARGE);
> - }
> - goto error;
> - }
> - }
> + gfp &= GFP_RECLAIM_MASK;
> folio_throttle_swaprate(folio, gfp);
>
> do {
> @@ -813,15 +801,12 @@ static int shmem_add_to_page_cache(struct folio *folio,
> } while (xas_nomem(&xas, gfp));
>
> if (xas_error(&xas)) {
> - error = xas_error(&xas);
> - goto error;
> + folio->mapping = NULL;
> + folio_ref_sub(folio, nr);
> + return xas_error(&xas);
> }
>
> return 0;
> -error:
> - folio->mapping = NULL;
> - folio_ref_sub(folio, nr);
> - return error;
> }
>
> /*
> @@ -1324,10 +1309,8 @@ static int shmem_unuse_swap_entries(struct inode *inode,
>
> if (!xa_is_value(folio))
> continue;
> - error = shmem_swapin_folio(inode, indices[i],
> - &folio, SGP_CACHE,
> - mapping_gfp_mask(mapping),
> - NULL, NULL);
> + error = shmem_swapin_folio(inode, indices[i], &folio, SGP_CACHE,
> + mapping_gfp_mask(mapping), NULL, NULL);
> if (error == 0) {
> folio_unlock(folio);
> folio_put(folio);
> @@ -1810,12 +1793,11 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
> */
> static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
> struct folio **foliop, enum sgp_type sgp,
> - gfp_t gfp, struct vm_area_struct *vma,
> + gfp_t gfp, struct mm_struct *fault_mm,
> vm_fault_t *fault_type)
> {
> struct address_space *mapping = inode->i_mapping;
> struct shmem_inode_info *info = SHMEM_I(inode);
> - struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
> struct swap_info_struct *si;
> struct folio *folio = NULL;
> swp_entry_t swap;
> @@ -1843,7 +1825,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
> if (fault_type) {
> *fault_type |= VM_FAULT_MAJOR;
> count_vm_event(PGMAJFAULT);
> - count_memcg_event_mm(charge_mm, PGMAJFAULT);
> + count_memcg_event_mm(fault_mm, PGMAJFAULT);
> }
> /* Here we actually start the io */
> folio = shmem_swapin(swap, gfp, info, index);
> @@ -1880,8 +1862,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
> }
>
> error = shmem_add_to_page_cache(folio, mapping, index,
> - swp_to_radix_entry(swap), gfp,
> - charge_mm);
> + swp_to_radix_entry(swap), gfp);
> if (error)
> goto failed;
>
> @@ -1929,7 +1910,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
> struct address_space *mapping = inode->i_mapping;
> struct shmem_inode_info *info = SHMEM_I(inode);
> struct shmem_sb_info *sbinfo;
> - struct mm_struct *charge_mm;
> + struct mm_struct *fault_mm;
> struct folio *folio;
> pgoff_t hindex;
> gfp_t huge_gfp;
> @@ -1946,7 +1927,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
> }
>
> sbinfo = SHMEM_SB(inode->i_sb);
> - charge_mm = vma ? vma->vm_mm : NULL;
> + fault_mm = vma ? vma->vm_mm : NULL;
>
> folio = filemap_get_entry(mapping, index);
> if (folio && vma && userfaultfd_minor(vma)) {
> @@ -1958,7 +1939,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
>
> if (xa_is_value(folio)) {
> error = shmem_swapin_folio(inode, index, &folio,
> - sgp, gfp, vma, fault_type);
> + sgp, gfp, fault_mm, fault_type);
> if (error == -EEXIST)
> goto repeat;
>
> @@ -2044,9 +2025,16 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
> if (sgp == SGP_WRITE)
> __folio_set_referenced(folio);
>
> - error = shmem_add_to_page_cache(folio, mapping, hindex,
> - NULL, gfp & GFP_RECLAIM_MASK,
> - charge_mm);
> + error = mem_cgroup_charge(folio, fault_mm, gfp);
> + if (error) {
> + if (folio_test_pmd_mappable(folio)) {
> + count_vm_event(THP_FILE_FALLBACK);
> + count_vm_event(THP_FILE_FALLBACK_CHARGE);
> + }
> + goto unacct;
> + }
> +
> + error = shmem_add_to_page_cache(folio, mapping, hindex, NULL, gfp);
> if (error)
> goto unacct;
>
> @@ -2644,8 +2632,10 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
> if (unlikely(pgoff >= max_off))
> goto out_release;
>
> - ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL,
> - gfp & GFP_RECLAIM_MASK, dst_vma->vm_mm);
> + ret = mem_cgroup_charge(folio, dst_vma->vm_mm, gfp);
> + if (ret)
> + goto out_release;
> + ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL, gfp);
> if (ret)
> goto out_release;
>
> --
> 2.35.3
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
next prev parent reply other threads:[~2023-10-03 13:28 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-30 3:23 [PATCH 0/8] shmem,tmpfs: general maintenance Hugh Dickins
2023-09-30 3:25 ` [PATCH 1/8] shmem: shrink shmem_inode_info: dir_offsets in a union Hugh Dickins
2023-09-30 16:16 ` Chuck Lever
2023-10-03 13:06 ` Jan Kara
2023-09-30 3:26 ` [PATCH 2/8] shmem: remove vma arg from shmem_get_folio_gfp() Hugh Dickins
2023-10-03 13:07 ` Jan Kara
2023-09-30 3:27 ` [PATCH 3/8] shmem: factor shmem_falloc_wait() out of shmem_fault() Hugh Dickins
2023-10-03 13:18 ` Jan Kara
2023-10-06 3:48 ` Hugh Dickins
2023-10-06 11:01 ` Jan Kara
2023-09-30 3:28 ` [PATCH 4/8] shmem: trivial tidyups, removing extra blank lines, etc Hugh Dickins
2023-10-03 13:20 ` Jan Kara
2023-09-30 3:30 ` [PATCH 5/8] shmem: shmem_acct_blocks() and shmem_inode_acct_blocks() Hugh Dickins
2023-10-03 13:21 ` Jan Kara
2023-09-30 3:31 ` [PATCH 6/8] shmem: move memcg charge out of shmem_add_to_page_cache() Hugh Dickins
2023-10-03 13:28 ` Jan Kara [this message]
2023-09-30 3:32 ` [PATCH 7/8] shmem: _add_to_page_cache() before shmem_inode_acct_blocks() Hugh Dickins
2023-09-30 3:42 ` [PATCH 8/8] shmem,percpu_counter: add _limited_add(fbc, limit, amount) Hugh Dickins
2023-10-04 15:32 ` Jan Kara
2023-10-04 23:10 ` Dave Chinner
2023-10-06 5:35 ` Hugh Dickins
2023-10-09 0:15 ` Dave Chinner
2023-10-12 4:36 ` Hugh Dickins
2023-10-12 4:40 ` [PATCH 9/8] percpu_counter: extend _limited_add() to negative amounts Hugh Dickins
2023-10-05 16:50 ` [PATCH 8/8] shmem,percpu_counter: add _limited_add(fbc, limit, amount) Chen, Tim C
2023-10-06 5:42 ` Hugh Dickins
2023-10-06 22:59 ` Dennis Zhou
2023-10-12 4:09 ` Hugh Dickins
2024-05-25 6:00 ` Mateusz Guzik
2024-05-28 13:44 ` Mateusz Guzik
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231003132842.lxniwpknqfxan5px@quack3 \
--to=jack@suse.cz \
--cc=akpm@linux-foundation.org \
--cc=axelrasmussen@google.com \
--cc=brauner@kernel.org \
--cc=cem@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).