From: Feng Tang <feng.tang@intel.com>
To: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
Michal Hocko <mhocko@kernel.org>,
David Rientjes <rientjes@google.com>,
Dave Hansen <dave.hansen@intel.com>,
Ben Widawsky <ben.widawsky@intel.com>
Cc: linux-kernel@vger.kernel.org,
Andrea Arcangeli <aarcange@redhat.com>,
Mel Gorman <mgorman@techsingularity.net>,
Mike Kravetz <mike.kravetz@oracle.com>,
Randy Dunlap <rdunlap@infradead.org>,
Vlastimil Babka <vbabka@suse.cz>, Andi Kleen <ak@linux.intel.com>,
Dan Williams <dan.j.williams@intel.com>,
ying.huang@intel.com, Feng Tang <feng.tang@intel.com>
Subject: [PATCH v1 3/4] mm/mempolicy: don't handle MPOL_LOCAL like a fake MPOL_PREFERRED policy
Date: Wed, 26 May 2021 13:01:41 +0800 [thread overview]
Message-ID: <1622005302-23027-4-git-send-email-feng.tang@intel.com> (raw)
In-Reply-To: <1622005302-23027-1-git-send-email-feng.tang@intel.com>
MPOL_LOCAL policy has been setup as a real policy, but it is still
handled like a faked POL_PREFERRED policy with one internal
MPOL_F_LOCAL flag bit set, and there are many places having to
judge the real 'prefer' or the 'local' policy, which are quite
confusing.
In current code, there are four cases that MPOL_LOCAL are used:
* user specifies 'local' policy
* user specifies 'prefer' policy, but with empty nodemask
* system 'default' policy is used
* 'prefer' policy + valid 'preferred' node with MPOL_F_STATIC_NODES
flag set, and when it is 'rebind' to a nodemask which doesn't
contains the 'preferred' node, it will add the MPOL_F_LOCAL bit
and performs as 'local' policy. In future if it is 'rebind' again
with valid nodemask, the policy will be restored back to 'prefer'
So for the first three cases, we make 'local' a real policy
instead of a fake 'prefer' one, this will reduce confusion for
reading code.
And next optional patch will kill the 'MPOL_F_LOCAL' bit.
Signed-off-by: Feng Tang <feng.tang@intel.com>
---
mm/mempolicy.c | 66 ++++++++++++++++++++++++++++++++++------------------------
1 file changed, 39 insertions(+), 27 deletions(-)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 2830bb8..d97839d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -121,8 +121,7 @@ enum zone_type policy_zone = 0;
*/
static struct mempolicy default_policy = {
.refcnt = ATOMIC_INIT(1), /* never free it */
- .mode = MPOL_PREFERRED,
- .flags = MPOL_F_LOCAL,
+ .mode = MPOL_LOCAL,
};
static struct mempolicy preferred_node_policy[MAX_NUMNODES];
@@ -200,12 +199,9 @@ static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes)
static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
{
- if (!nodes)
- pol->flags |= MPOL_F_LOCAL; /* local allocation */
- else if (nodes_empty(*nodes))
- return -EINVAL; /* no allowed nodes */
- else
- pol->v.preferred_node = first_node(*nodes);
+ if (nodes_empty(*nodes))
+ return -EINVAL;
+ pol->v.preferred_node = first_node(*nodes);
return 0;
}
@@ -217,6 +213,11 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
return 0;
}
+static int mpol_new_local(struct mempolicy *pol, const nodemask_t *nodes)
+{
+ return 0;
+}
+
/*
* mpol_set_nodemask is called after mpol_new() to set up the nodemask, if
* any, for the new policy. mpol_new() has already validated the nodes
@@ -239,25 +240,19 @@ static int mpol_set_nodemask(struct mempolicy *pol,
cpuset_current_mems_allowed, node_states[N_MEMORY]);
VM_BUG_ON(!nodes);
- if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes))
- nodes = NULL; /* explicit local allocation */
- else {
- if (pol->flags & MPOL_F_RELATIVE_NODES)
- mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1);
- else
- nodes_and(nsc->mask2, *nodes, nsc->mask1);
- if (mpol_store_user_nodemask(pol))
- pol->w.user_nodemask = *nodes;
- else
- pol->w.cpuset_mems_allowed =
- cpuset_current_mems_allowed;
- }
+ if (pol->flags & MPOL_F_RELATIVE_NODES)
+ mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1);
+ else
+ nodes_and(nsc->mask2, *nodes, nsc->mask1);
- if (nodes)
- ret = mpol_ops[pol->mode].create(pol, &nsc->mask2);
+ if (mpol_store_user_nodemask(pol))
+ pol->w.user_nodemask = *nodes;
else
- ret = mpol_ops[pol->mode].create(pol, NULL);
+ pol->w.cpuset_mems_allowed =
+ cpuset_current_mems_allowed;
+
+ ret = mpol_ops[pol->mode].create(pol, &nsc->mask2);
return ret;
}
@@ -290,13 +285,14 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
if (((flags & MPOL_F_STATIC_NODES) ||
(flags & MPOL_F_RELATIVE_NODES)))
return ERR_PTR(-EINVAL);
+
+ mode = MPOL_LOCAL;
}
} else if (mode == MPOL_LOCAL) {
if (!nodes_empty(*nodes) ||
(flags & MPOL_F_STATIC_NODES) ||
(flags & MPOL_F_RELATIVE_NODES))
return ERR_PTR(-EINVAL);
- mode = MPOL_PREFERRED;
} else if (nodes_empty(*nodes))
return ERR_PTR(-EINVAL);
policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
@@ -427,6 +423,10 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
.create = mpol_new_bind,
.rebind = mpol_rebind_nodemask,
},
+ [MPOL_LOCAL] = {
+ .create = mpol_new_local,
+ .rebind = mpol_rebind_default,
+ },
};
static int migrate_page_add(struct page *page, struct list_head *pagelist,
@@ -1965,6 +1965,8 @@ unsigned int mempolicy_slab_node(void)
&policy->v.nodes);
return z->zone ? zone_to_nid(z->zone) : node;
}
+ case MPOL_LOCAL:
+ return node;
default:
BUG();
@@ -2089,6 +2091,11 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
*mask = mempolicy->v.nodes;
break;
+ case MPOL_LOCAL:
+ nid = numa_node_id();
+ init_nodemask_of_node(mask, nid);
+ break;
+
default:
BUG();
}
@@ -2333,6 +2340,8 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
if (a->flags & MPOL_F_LOCAL)
return true;
return a->v.preferred_node == b->v.preferred_node;
+ case MPOL_LOCAL:
+ return true;
default:
BUG();
return false;
@@ -2476,6 +2485,10 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
polnid = pol->v.preferred_node;
break;
+ case MPOL_LOCAL:
+ polnid = numa_node_id();
+ break;
+
case MPOL_BIND:
/* Optimize placement among multiple nodes via NUMA balancing */
if (pol->flags & MPOL_F_MORON) {
@@ -2920,7 +2933,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
*/
if (nodelist)
goto out;
- mode = MPOL_PREFERRED;
break;
case MPOL_DEFAULT:
/*
@@ -2964,7 +2976,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
else if (nodelist)
new->v.preferred_node = first_node(nodes);
else
- new->flags |= MPOL_F_LOCAL;
+ new->mode = MPOL_LOCAL;
/*
* Save nodes for contextualization: this will be used to "clone"
--
2.7.4
next prev parent reply other threads:[~2021-05-26 5:02 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-26 5:01 [PATCH v1 0/4] mm/mempolicy: some fix and semantics cleanup Feng Tang
2021-05-26 5:01 ` [PATCH v1 1/4] mm/mempolicy: skip nodemask intersect check for 'interleave' when oom Feng Tang
2021-05-27 7:30 ` Michal Hocko
2021-05-27 13:05 ` Feng Tang
2021-05-27 13:15 ` Michal Hocko
2021-05-27 13:22 ` Feng Tang
2021-05-26 5:01 ` [PATCH v1 2/4] mm/mempolicy: unify the preprocessing for mbind and set_mempolicy Feng Tang
2021-05-27 7:39 ` Michal Hocko
2021-05-27 12:31 ` Feng Tang
2021-05-26 5:01 ` Feng Tang [this message]
2021-05-27 8:12 ` [PATCH v1 3/4] mm/mempolicy: don't handle MPOL_LOCAL like a fake MPOL_PREFERRED policy Michal Hocko
2021-05-27 12:06 ` Feng Tang
2021-05-27 12:16 ` Michal Hocko
2021-05-26 5:01 ` [PATCH v1 4/4] mm/mempolicy: kill MPOL_F_LOCAL bit Feng Tang
2021-05-27 8:20 ` Michal Hocko
2021-05-27 12:10 ` Feng Tang
2021-05-27 12:26 ` Michal Hocko
2021-05-27 13:34 ` Feng Tang
2021-05-27 15:34 ` Michal Hocko
2021-05-28 4:39 ` Feng Tang
2021-05-31 7:00 ` Michal Hocko
2021-05-31 7:32 ` Feng Tang
2021-05-31 8:22 ` Michal Hocko
2021-05-31 8:29 ` Feng Tang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1622005302-23027-4-git-send-email-feng.tang@intel.com \
--to=feng.tang@intel.com \
--cc=aarcange@redhat.com \
--cc=ak@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=ben.widawsky@intel.com \
--cc=dan.j.williams@intel.com \
--cc=dave.hansen@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=mhocko@kernel.org \
--cc=mike.kravetz@oracle.com \
--cc=rdunlap@infradead.org \
--cc=rientjes@google.com \
--cc=vbabka@suse.cz \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).