linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Barry Song <21cnbao@gmail.com>
To: ying.huang@intel.com
Cc: akpm@linux-foundation.org, baolin.wang@linux.alibaba.com,
	chrisl@kernel.org, david@redhat.com, hannes@cmpxchg.org,
	hughd@google.com, kaleshsingh@google.com, kasong@tencent.com,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	mhocko@suse.com, minchan@kernel.org, nphamcs@gmail.com,
	ryan.roberts@arm.com, senozhatsky@chromium.org,
	shakeel.butt@linux.dev, shy828301@gmail.com, surenb@google.com,
	v-songbaohua@oppo.com, willy@infradead.org, xiang@kernel.org,
	yosryahmed@google.com
Subject: Re: [PATCH 1/1] mm: swap: add nr argument in swapcache_prepare and swapcache_clear to support large folios
Date: Fri,  2 Aug 2024 19:18:17 +1200	[thread overview]
Message-ID: <20240802071817.47081-1-21cnbao@gmail.com> (raw)
In-Reply-To: <87sevpj6xp.fsf@yhuang6-desk2.ccr.corp.intel.com>

On Fri, Aug 2, 2024 at 1:50 PM Huang, Ying <ying.huang@intel.com> wrote:
> >
> > Right. I believe the change below can help improve readability and also
> > clarify the swap_count_continued() case.
> >
> > diff --git a/mm/swapfile.c b/mm/swapfile.c
> > index 2fa29bdec171..75a89ce18edc 100644
> > --- a/mm/swapfile.c
> > +++ b/mm/swapfile.c
> > @@ -3538,6 +3538,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
> >
> >       offset = swp_offset(entry);
> >       VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
> > +     VM_WARN_ON(usage == 1 && nr > 1);
> >       ci = lock_cluster_or_swap_info(p, offset);
> >
> >       err = 0;
> > @@ -3564,17 +3565,9 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
> >                               err = -EEXIST;
> >                       else                            /* no users remaining */
> >                               err = -ENOENT;
> > -
> >               } else if (count || has_cache) {
> > -
> > -                     if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
> > -                             continue;
> > -                     else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
> > +                     if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
> >                               err = -EINVAL;
> > -                     else if (swap_count_continued(p, offset + i, count))
> > -                             continue;
> > -                     else
> > -                             err = -ENOMEM;
> >               } else
> >                       err = -ENOENT;                  /* unused swap entry */
> >
> > @@ -3591,8 +3584,12 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
> >                       has_cache = SWAP_HAS_CACHE;
> >               else if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
> >                       count += usage;
> > -             else
> > +             else if (swap_count_continued(p, offset + i, count))
> >                       count = COUNT_CONTINUED;
> > +             else {
> > +                     err = -ENOMEM;
> > +                     goto unlock_out;
> > +             }
> >
> >               WRITE_ONCE(p->swap_map[offset + i], count | has_cache);
> >       }
> >
> > This makes the two iterations become:
> >
> >       for (i = 0; i < nr; i++) {
> >               count = p->swap_map[offset + i];
> >
> >               /*
> >                * swapin_readahead() doesn't check if a swap entry is valid, so the
> >                * swap entry could be SWAP_MAP_BAD. Check here with lock held.
> >                */
> >               if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
> >                       err = -ENOENT;
> >                       goto unlock_out;
> >               }
> >
> >               has_cache = count & SWAP_HAS_CACHE;
> >               count &= ~SWAP_HAS_CACHE;
> >
> >               if (usage == SWAP_HAS_CACHE) {
> >                       /* set SWAP_HAS_CACHE if there is no cache and entry is used */
>
> The comments doen't apply now, we don't "set" here.
>
> >                       if (!has_cache && count)
> >                               continue;
> >                       else if (has_cache)             /* someone else added cache */
> >                               err = -EEXIST;
> >                       else                            /* no users remaining */
> >                               err = -ENOENT;
> >               } else if (count || has_cache) {
> >                       if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
> >                               err = -EINVAL;
> >               } else
> >                       err = -ENOENT;                  /* unused swap entry */
>
> It seems that this can be simplified to:
>
>                 if (!count && !has_cache) {
>                         err = -ENOENT;
>                 } else if (usage == SWAP_HAS_CACHE) {
>                         if (has_cache)
>                                 err = -EEXIST;
>                 } else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX) {
>                         err = -EINVAL;
>                 }
>
> >               if (err)
> >                       goto unlock_out;
> >       }
> >
> >       for (i = 0; i < nr; i++) {
> >               count = p->swap_map[offset + i];
> >               has_cache = count & SWAP_HAS_CACHE;
> >               count &= ~SWAP_HAS_CACHE;
> >
> >               if (usage == SWAP_HAS_CACHE)
> >                       has_cache = SWAP_HAS_CACHE;
> >               else if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
> >                       count += usage;
> >               else if (swap_count_continued(p, offset + i, count))
> >                       count = COUNT_CONTINUED;
> >               else {
>
> Better to add some comments here,
>
>                         /*
>                          * Don't need to rollback changes, because if
>                          * usage == 1, there must be nr == 1.
>                          */
>
> >                       err = -ENOMEM;
> >                       goto unlock_out;
> >               }
> >
> >               WRITE_ONCE(p->swap_map[offset + i], count | has_cache);
> >       }
> >
> > Ying, do you feel more satisfied with the version above
> > compared to the code in mm-unstable?
>
> This looks good to me except some minor comments above.  Thanks!

Thanks very much, Ying. 

Hi Andrew,
Could you please help squash the below change?

From 17cbd696ecd37bc199b6e87c0c837d1a1ae9ac8d Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Fri, 2 Aug 2024 17:52:43 +1200
Subject: [PATCH] mm: clarify swap_count_continued and improve readability for
 __swap_duplicate

when usage=1 and swapcount is very large, the situation can become quite
complex. The first entry might succeed with swap_count_continued(),
but the second entry may require extending to an additional continued
page. Rolling back these changes can be extremely challenging. Therefore,
anyone using usage==1 for batched __swap_duplicate() operations should
proceed with caution.

Additionally, we have moved swap_count_continued() to the second iteration
to enhance readability, as this function may modify data.

Suggested-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
---
 mm/swapfile.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index d9cf31b04db3..ea023fc25d08 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3540,6 +3540,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
 
 	offset = swp_offset(entry);
 	VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
+	VM_WARN_ON(usage == 1 && nr > 1);
 	ci = lock_cluster_or_swap_info(p, offset);
 
 	err = 0;
@@ -3558,27 +3559,14 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
 		has_cache = count & SWAP_HAS_CACHE;
 		count &= ~SWAP_HAS_CACHE;
 
-		if (usage == SWAP_HAS_CACHE) {
-			/* set SWAP_HAS_CACHE if there is no cache and entry is used */
-			if (!has_cache && count)
-				continue;
-			else if (has_cache)		/* someone else added cache */
+		if (!count && !has_cache) {
+			err = -ENOENT;
+		} else if (usage == SWAP_HAS_CACHE) {
+			if (has_cache)
 				err = -EEXIST;
-			else				/* no users remaining */
-				err = -ENOENT;
-
-		} else if (count || has_cache) {
-
-			if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
-				continue;
-			else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
-				err = -EINVAL;
-			else if (swap_count_continued(p, offset + i, count))
-				continue;
-			else
-				err = -ENOMEM;
-		} else
-			err = -ENOENT;			/* unused swap entry */
+		} else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX) {
+			err = -EINVAL;
+		}
 
 		if (err)
 			goto unlock_out;
@@ -3593,8 +3581,16 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
 			has_cache = SWAP_HAS_CACHE;
 		else if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
 			count += usage;
-		else
+		else if (swap_count_continued(p, offset + i, count))
 			count = COUNT_CONTINUED;
+		else {
+			/*
+			 * Don't need to rollback changes, because if
+			 * usage == 1, there must be nr == 1.
+			 */
+			err = -ENOMEM;
+			goto unlock_out;
+		}
 
 		WRITE_ONCE(p->swap_map[offset + i], count | has_cache);
 	}
-- 
2.34.1


> >> >> >> >> >> Best Regards,
> >> >> >> >> >> Huang, Ying
> >> >> >> >> >
> >> >> >
> >

Thanks
Barry


      parent reply	other threads:[~2024-08-02  7:18 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-30  7:13 [PATCH 0/1] mm: swap: add nr argument in swapcache_prepare() and swapcache_clear() Barry Song
2024-07-30  7:13 ` [PATCH 1/1] mm: swap: add nr argument in swapcache_prepare and swapcache_clear to support large folios Barry Song
2024-07-30  7:42   ` David Hildenbrand
2024-07-30  8:15   ` Baolin Wang
2024-07-31  8:10   ` Huang, Ying
2024-07-31  8:20     ` Barry Song
2024-07-31  8:25       ` Huang, Ying
2024-07-31 10:23         ` Barry Song
2024-08-01  1:09           ` Huang, Ying
2024-08-01  2:07             ` Barry Song
2024-08-01  2:32               ` Huang, Ying
2024-08-01  2:42                 ` Barry Song
2024-08-01  2:46                   ` Huang, Ying
2024-08-01  9:20                     ` Barry Song
2024-08-02  1:46                       ` Huang, Ying
2024-08-02  2:09                       ` Baolin Wang
2024-08-02  7:18                     ` Barry Song [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240802071817.47081-1-21cnbao@gmail.com \
    --to=21cnbao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=chrisl@kernel.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kaleshsingh@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=nphamcs@gmail.com \
    --cc=ryan.roberts@arm.com \
    --cc=senozhatsky@chromium.org \
    --cc=shakeel.butt@linux.dev \
    --cc=shy828301@gmail.com \
    --cc=surenb@google.com \
    --cc=v-songbaohua@oppo.com \
    --cc=willy@infradead.org \
    --cc=xiang@kernel.org \
    --cc=ying.huang@intel.com \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).