From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pl0-f69.google.com (mail-pl0-f69.google.com [209.85.160.69]) by kanga.kvack.org (Postfix) with ESMTP id 335846B0284 for ; Sat, 16 Jun 2018 22:01:16 -0400 (EDT) Received: by mail-pl0-f69.google.com with SMTP id t19-v6so7721494plo.9 for ; Sat, 16 Jun 2018 19:01:16 -0700 (PDT) Received: from bombadil.infradead.org (bombadil.infradead.org. [2607:7c80:54:e::133]) by mx.google.com with ESMTPS id j20-v6si11853000pll.211.2018.06.16.19.01.14 for (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Sat, 16 Jun 2018 19:01:14 -0700 (PDT) From: Matthew Wilcox Subject: [PATCH v14 54/74] memfd: Convert memfd_wait_for_pins to XArray Date: Sat, 16 Jun 2018 19:00:32 -0700 Message-Id: <20180617020052.4759-55-willy@infradead.org> In-Reply-To: <20180617020052.4759-1-willy@infradead.org> References: <20180617020052.4759-1-willy@infradead.org> Sender: owner-linux-mm@kvack.org List-ID: To: linux-mm@kvack.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Cc: Matthew Wilcox , Jan Kara , Jeff Layton , Lukas Czerner , Ross Zwisler , Christoph Hellwig , Goldwyn Rodrigues , Nicholas Piggin , Ryusuke Konishi , linux-nilfs@vger.kernel.org, Jaegeuk Kim , Chao Yu , linux-f2fs-devel@lists.sourceforge.net Simplify the locking by taking the spinlock while we walk the tree on the assumption that many acquires and releases of the lock will be worse than holding the lock while we process an entire batch of pages. Signed-off-by: Matthew Wilcox Reviewed-by: Mike Kravetz --- mm/memfd.c | 61 ++++++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/mm/memfd.c b/mm/memfd.c index 27069518e3c5..e7d6be725b7a 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -21,7 +21,7 @@ #include /* - * We need a tag: a new tag would expand every radix_tree_node by 8 bytes, + * We need a tag: a new tag would expand every xa_node by 8 bytes, * so reuse a tag which we firmly believe is never set or cleared on tmpfs * or hugetlbfs because they are memory only filesystems. */ @@ -72,9 +72,7 @@ static void memfd_tag_pins(struct address_space *mapping) */ static int memfd_wait_for_pins(struct address_space *mapping) { - struct radix_tree_iter iter; - void __rcu **slot; - pgoff_t start; + XA_STATE(xas, &mapping->i_pages, 0); struct page *page; int error, scan; @@ -82,7 +80,9 @@ static int memfd_wait_for_pins(struct address_space *mapping) error = 0; for (scan = 0; scan <= LAST_SCAN; scan++) { - if (!radix_tree_tagged(&mapping->i_pages, MEMFD_TAG_PINNED)) + unsigned int tagged = 0; + + if (!xas_tagged(&xas, MEMFD_TAG_PINNED)) break; if (!scan) @@ -90,45 +90,34 @@ static int memfd_wait_for_pins(struct address_space *mapping) else if (schedule_timeout_killable((HZ << scan) / 200)) scan = LAST_SCAN; - start = 0; - rcu_read_lock(); - radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, - start, MEMFD_TAG_PINNED) { - - page = radix_tree_deref_slot(slot); - if (radix_tree_exception(page)) { - if (radix_tree_deref_retry(page)) { - slot = radix_tree_iter_retry(&iter); - continue; - } - - page = NULL; - } - - if (page && - page_count(page) - page_mapcount(page) != 1) { - if (scan < LAST_SCAN) - goto continue_resched; - + xas_set(&xas, 0); + xas_lock_irq(&xas); + xas_for_each_tagged(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { + bool clear = true; + if (xa_is_value(page)) + continue; + if (page_count(page) - page_mapcount(page) != 1) { /* * On the last scan, we clean up all those tags * we inserted; but make a note that we still * found pages pinned. */ - error = -EBUSY; + if (scan == LAST_SCAN) + error = -EBUSY; + else + clear = false; } + if (clear) + xas_clear_tag(&xas, MEMFD_TAG_PINNED); + if (++tagged % XA_CHECK_SCHED) + continue; - xa_lock_irq(&mapping->i_pages); - radix_tree_tag_clear(&mapping->i_pages, - iter.index, MEMFD_TAG_PINNED); - xa_unlock_irq(&mapping->i_pages); -continue_resched: - if (need_resched()) { - slot = radix_tree_iter_resume(slot, &iter); - cond_resched_rcu(); - } + xas_pause(&xas); + xas_unlock_irq(&xas); + cond_resched(); + xas_lock_irq(&xas); } - rcu_read_unlock(); + xas_unlock_irq(&xas); } return error; -- 2.17.1