From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: akpm@linux-foundation.org, Mike Kravetz <mike.kravetz@oracle.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Subject: [RFC PATCH] mm/hugetlb: make hugetlb_lock irq safe
Date: Wed, 5 Sep 2018 16:53:41 +0530 [thread overview]
Message-ID: <20180905112341.21355-1-aneesh.kumar@linux.ibm.com> (raw)
================================
WARNING: inconsistent lock state
4.18.0-12148-g905aab86cd98 #28 Not tainted
--------------------------------
inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
swapper/68/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
0000000052a030a7 (hugetlb_lock){+.?.}, at: free_huge_page+0x9c/0x340
{SOFTIRQ-ON-W} state was registered at:
lock_acquire+0xd4/0x230
_raw_spin_lock+0x44/0x70
set_max_huge_pages+0x4c/0x360
hugetlb_sysctl_handler_common+0x108/0x160
proc_sys_call_handler+0x134/0x190
__vfs_write+0x3c/0x1f0
vfs_write+0xd8/0x220
ksys_write+0x64/0x110
system_call+0x5c/0x70
irq event stamp: 20918624
hardirqs last enabled at (20918624): [<c0000000002138a4>] rcu_process_callbacks+0x814/0xa30
hardirqs last disabled at (20918623): [<c0000000002132c8>] rcu_process_callbacks+0x238/0xa30
softirqs last enabled at (20918612): [<c00000000016005c>] irq_enter+0x9c/0xd0
softirqs last disabled at (20918613): [<c000000000160198>] irq_exit+0x108/0x1c0
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(hugetlb_lock);
<Interrupt>
lock(hugetlb_lock);
*** DEADLOCK ***
1 lock held by swapper/68/0:
#0: 0000000097408d5f (rcu_callback){....}, at: rcu_process_callbacks+0x328/0xa30
stack backtrace:
CPU: 68 PID: 0 Comm: swapper/68 Not tainted 4.18.0-12148-g905aab86cd98 #28
Call Trace:
[c00020398d3cf2d0] [c0000000011d57d4] dump_stack+0xe8/0x164 (unreliable)
[c00020398d3cf320] [c0000000001e6d94] print_usage_bug+0x2c4/0x390
[c00020398d3cf3d0] [c0000000001e6fc0] mark_lock+0x160/0x960
[c00020398d3cf480] [c0000000001e7dd0] __lock_acquire+0x530/0x1dd0
[c00020398d3cf600] [c0000000001ea144] lock_acquire+0xd4/0x230
[c00020398d3cf6c0] [c0000000011f8e24] _raw_spin_lock+0x44/0x70
[c00020398d3cf6f0] [c00000000042394c] free_huge_page+0x9c/0x340
[c00020398d3cf740] [c0000000003a5334] __put_compound_page+0x64/0x80
[c00020398d3cf770] [c000000000092098] mm_iommu_free+0x158/0x170
[c00020398d3cf7c0] [c000000000213358] rcu_process_callbacks+0x2c8/0xa30
[c00020398d3cf8e0] [c0000000011f9fb8] __do_softirq+0x168/0x590
[c00020398d3cf9e0] [c000000000160198] irq_exit+0x108/0x1c0
[c00020398d3cfa40] [c000000000029f30] timer_interrupt+0x160/0x440
[c00020398d3cfaa0] [c0000000000091b4] decrementer_common+0x124/0x130
--- interrupt: 901 at replay_interrupt_return+0x0/0x4
LR = arch_local_irq_restore.part.4+0x78/0x90
[c00020398d3cfd90] [c00000000001b334] arch_local_irq_restore.part.4+0x34/0x90 (unreliable)
[c00020398d3cfdc0] [c000000000eec768] cpuidle_enter_state+0xf8/0x4b0
[c00020398d3cfe30] [c0000000001aa33c] call_cpuidle+0x4c/0x90
[c00020398d3cfe50] [c0000000001aa7fc] do_idle+0x34c/0x420
[c00020398d3cfec0] [c0000000001aac7c] cpu_startup_entry+0x3c/0x40
[c00020398d3cfef0] [c0000000000528f8] start_secondary+0x4f8/0x520
[c00020398d3cff90] [c00000000000ad70] start_secondary_prolog+0x10/0x14
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
mm/hugetlb.c | 87 ++++++++++++++++++++++++++-------------------
mm/hugetlb_cgroup.c | 10 +++---
2 files changed, 57 insertions(+), 40 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 88881b3f8628..c8d3a34c48e0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1242,6 +1242,7 @@ void free_huge_page(struct page *page)
* Can't pass hstate in here because it is called from the
* compound page destructor.
*/
+ unsigned long flags;
struct hstate *h = page_hstate(page);
int nid = page_to_nid(page);
struct hugepage_subpool *spool =
@@ -1263,7 +1264,7 @@ void free_huge_page(struct page *page)
if (hugepage_subpool_put_pages(spool, 1) == 0)
restore_reserve = true;
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
clear_page_huge_active(page);
hugetlb_cgroup_uncharge_page(hstate_index(h),
pages_per_huge_page(h), page);
@@ -1284,18 +1285,20 @@ void free_huge_page(struct page *page)
arch_clear_hugepage_flags(page);
enqueue_huge_page(h, page);
}
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
}
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
+ unsigned long flags;
+
INIT_LIST_HEAD(&page->lru);
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
set_hugetlb_cgroup(page, NULL);
h->nr_huge_pages++;
h->nr_huge_pages_node[nid]++;
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
}
static void prep_compound_gigantic_page(struct page *page, unsigned int order)
@@ -1485,8 +1488,9 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
int dissolve_free_huge_page(struct page *page)
{
int rc = 0;
+ unsigned long flags;
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
if (PageHuge(page) && !page_count(page)) {
struct page *head = compound_head(page);
struct hstate *h = page_hstate(head);
@@ -1510,7 +1514,7 @@ int dissolve_free_huge_page(struct page *page)
update_and_free_page(h, head);
}
out:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
return rc;
}
@@ -1549,21 +1553,22 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nmask)
{
+ unsigned long flags;
struct page *page = NULL;
if (hstate_is_gigantic(h))
return NULL;
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
goto out_unlock;
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask);
if (!page)
return NULL;
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
/*
* We could have raced with the pool size change.
* Double check that and simply deallocate the new page
@@ -1581,7 +1586,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
}
out_unlock:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
return page;
}
@@ -1630,16 +1635,17 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
/* page migration callback function */
struct page *alloc_huge_page_node(struct hstate *h, int nid)
{
+ unsigned long flags;
gfp_t gfp_mask = htlb_alloc_mask(h);
struct page *page = NULL;
if (nid != NUMA_NO_NODE)
gfp_mask |= __GFP_THISNODE;
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
if (h->free_huge_pages - h->resv_huge_pages > 0)
page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
if (!page)
page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
@@ -1651,19 +1657,20 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask)
{
+ unsigned long flags;
gfp_t gfp_mask = htlb_alloc_mask(h);
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
if (h->free_huge_pages - h->resv_huge_pages > 0) {
struct page *page;
page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
if (page) {
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
return page;
}
}
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
}
@@ -1997,6 +2004,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
long map_chg, map_commit;
long gbl_chg;
int ret, idx;
+ unsigned long flags;
struct hugetlb_cgroup *h_cg;
idx = hstate_index(h);
@@ -2039,7 +2047,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
if (ret)
goto out_subpool_put;
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
/*
* glb_chg is passed to indicate whether or not a page must be taken
* from the global free pool (global change). gbl_chg == 0 indicates
@@ -2047,7 +2055,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
*/
page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
if (!page) {
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
if (!page)
goto out_uncharge_cgroup;
@@ -2055,12 +2063,12 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
SetPagePrivate(page);
h->resv_huge_pages--;
}
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
list_move(&page->lru, &h->hugepage_activelist);
/* Fall through */
}
hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
set_page_private(page, (unsigned long)spool);
@@ -2279,6 +2287,7 @@ static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
nodemask_t *nodes_allowed)
{
+ unsigned long flags;
unsigned long min_count, ret;
if (hstate_is_gigantic(h) && !gigantic_page_supported())
@@ -2295,7 +2304,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
* pool might be one hugepage larger than it needs to be, but
* within all the constraints specified by the sysctls.
*/
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
while (h->surplus_huge_pages && count > persistent_huge_pages(h)) {
if (!adjust_pool_surplus(h, nodes_allowed, -1))
break;
@@ -2307,13 +2316,13 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
* page, free_huge_page will handle it by freeing the page
* and reducing the surplus.
*/
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
/* yield cpu to avoid soft lockup */
cond_resched();
ret = alloc_pool_huge_page(h, nodes_allowed);
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
if (!ret)
goto out;
@@ -2351,7 +2360,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
}
out:
ret = persistent_huge_pages(h);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
return ret;
}
@@ -2502,6 +2511,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
{
int err;
unsigned long input;
+ unsigned long flags;
struct hstate *h = kobj_to_hstate(kobj, NULL);
if (hstate_is_gigantic(h))
@@ -2511,9 +2521,9 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
if (err)
return err;
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
h->nr_overcommit_huge_pages = input;
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
return count;
}
@@ -2943,7 +2953,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
- unsigned long tmp;
+ unsigned long tmp, flags;
int ret;
if (!hugepages_supported())
@@ -2961,9 +2971,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
goto out;
if (write) {
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
h->nr_overcommit_huge_pages = tmp;
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
}
out:
return ret;
@@ -3053,8 +3063,9 @@ unsigned long hugetlb_total_pages(void)
static int hugetlb_acct_memory(struct hstate *h, long delta)
{
int ret = -ENOMEM;
+ unsigned long flags;
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
/*
* When cpuset is configured, it breaks the strict hugetlb page
* reservation as the accounting is done on a global variable. Such
@@ -3087,7 +3098,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
return_unused_surplus_pages(h, (unsigned long) -delta);
out:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
return ret;
}
@@ -4806,9 +4817,10 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla
bool isolate_huge_page(struct page *page, struct list_head *list)
{
bool ret = true;
+ unsigned long flags;
VM_BUG_ON_PAGE(!PageHead(page), page);
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
if (!page_huge_active(page) || !get_page_unless_zero(page)) {
ret = false;
goto unlock;
@@ -4816,22 +4828,25 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
clear_page_huge_active(page);
list_move_tail(&page->lru, list);
unlock:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
return ret;
}
void putback_active_hugepage(struct page *page)
{
+ unsigned long flags;
+
VM_BUG_ON_PAGE(!PageHead(page), page);
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
set_page_huge_active(page);
list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
put_page(page);
}
void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
{
+ unsigned long flags;
struct hstate *h = page_hstate(oldpage);
hugetlb_cgroup_migrate(oldpage, newpage);
@@ -4854,11 +4869,11 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
SetPageHugeTemporary(oldpage);
ClearPageHugeTemporary(newpage);
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
if (h->surplus_huge_pages_node[old_nid]) {
h->surplus_huge_pages_node[old_nid]--;
h->surplus_huge_pages_node[new_nid]++;
}
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
}
}
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 68c2f2f3c05b..e4e969d566f3 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -160,6 +160,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
*/
static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
{
+ unsigned long flags;
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
struct hstate *h;
struct page *page;
@@ -167,11 +168,11 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
do {
for_each_hstate(h) {
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
list_for_each_entry(page, &h->hugepage_activelist, lru)
hugetlb_cgroup_move_parent(idx, h_cg, page);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
idx++;
}
cond_resched();
@@ -415,6 +416,7 @@ void __init hugetlb_cgroup_file_init(void)
*/
void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
{
+ unsigned long flags;
struct hugetlb_cgroup *h_cg;
struct hstate *h = page_hstate(oldhpage);
@@ -422,14 +424,14 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
return;
VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
h_cg = hugetlb_cgroup_from_page(oldhpage);
set_hugetlb_cgroup(oldhpage, NULL);
/* move the h_cg details to new cgroup */
set_hugetlb_cgroup(newhpage, h_cg);
list_move(&newhpage->lru, &h->hugepage_activelist);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
return;
}
--
2.17.1
next reply other threads:[~2018-09-05 11:23 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-05 11:23 Aneesh Kumar K.V [this message]
2018-09-05 13:04 ` [RFC PATCH] mm/hugetlb: make hugetlb_lock irq safe Matthew Wilcox
2018-09-05 13:26 ` Aneesh Kumar K.V
2018-09-05 13:48 ` Matthew Wilcox
2018-09-05 19:58 ` Andrew Morton
2018-09-05 21:35 ` Mike Kravetz
2018-09-05 22:00 ` Andrew Morton
2018-09-05 23:07 ` Matthew Wilcox
2018-09-05 23:51 ` Mike Kravetz
2018-09-06 4:03 ` Aneesh Kumar K.V
2018-09-06 11:19 ` Michal Hocko
2018-09-06 3:58 ` Aneesh Kumar K.V
2018-09-06 3:54 ` Aneesh Kumar K.V
2018-09-06 4:00 ` Aneesh Kumar K.V
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180905112341.21355-1-aneesh.kumar@linux.ibm.com \
--to=aneesh.kumar@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.