public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: stable@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Vitaly Wool <vitalywool@gmail.com>,
	Vitaly Wool <vitaly.vul@sony.com>,
	Jongseok Kim <ks77sj@gmail.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Sasha Levin <sashal@kernel.org>,
	linux-mm@kvack.org
Subject: [PATCH AUTOSEL 4.19 33/36] z3fold: fix possible reclaim races
Date: Thu, 22 Nov 2018 14:52:37 -0500	[thread overview]
Message-ID: <20181122195240.13123-33-sashal@kernel.org> (raw)
In-Reply-To: <20181122195240.13123-1-sashal@kernel.org>

From: Vitaly Wool <vitalywool@gmail.com>

[ Upstream commit ca0246bb97c23da9d267c2107c07fb77e38205c9 ]

Reclaim and free can race on an object which is basically fine but in
order for reclaim to be able to map "freed" object we need to encode
object length in the handle.  handle_to_chunks() is then introduced to
extract object length from a handle and use it during mapping.

Moreover, to avoid racing on a z3fold "headless" page release, we should
not try to free that page in z3fold_free() if the reclaim bit is set.
Also, in the unlikely case of trying to reclaim a page being freed, we
should not proceed with that page.

While at it, fix the page accounting in reclaim function.

This patch supersedes "[PATCH] z3fold: fix reclaim lock-ups".

Link: http://lkml.kernel.org/r/20181105162225.74e8837d03583a9b707cf559@gmail.com
Signed-off-by: Vitaly Wool <vitaly.vul@sony.com>
Signed-off-by: Jongseok Kim <ks77sj@gmail.com>
Reported-by-by: Jongseok Kim <ks77sj@gmail.com>
Reviewed-by: Snild Dolkow <snild@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 mm/z3fold.c | 101 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 62 insertions(+), 39 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 4b366d181f35..aee9b0b8d907 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -99,6 +99,7 @@ struct z3fold_header {
 #define NCHUNKS		((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
 
 #define BUDDY_MASK	(0x3)
+#define BUDDY_SHIFT	2
 
 /**
  * struct z3fold_pool - stores metadata for each z3fold pool
@@ -145,7 +146,7 @@ enum z3fold_page_flags {
 	MIDDLE_CHUNK_MAPPED,
 	NEEDS_COMPACTING,
 	PAGE_STALE,
-	UNDER_RECLAIM
+	PAGE_CLAIMED, /* by either reclaim or free */
 };
 
 /*****************
@@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
 	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
 	clear_bit(NEEDS_COMPACTING, &page->private);
 	clear_bit(PAGE_STALE, &page->private);
-	clear_bit(UNDER_RECLAIM, &page->private);
+	clear_bit(PAGE_CLAIMED, &page->private);
 
 	spin_lock_init(&zhdr->page_lock);
 	kref_init(&zhdr->refcount);
@@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
 	unsigned long handle;
 
 	handle = (unsigned long)zhdr;
-	if (bud != HEADLESS)
-		handle += (bud + zhdr->first_num) & BUDDY_MASK;
+	if (bud != HEADLESS) {
+		handle |= (bud + zhdr->first_num) & BUDDY_MASK;
+		if (bud == LAST)
+			handle |= (zhdr->last_chunks << BUDDY_SHIFT);
+	}
 	return handle;
 }
 
@@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle)
 	return (struct z3fold_header *)(handle & PAGE_MASK);
 }
 
+/* only for LAST bud, returns zero otherwise */
+static unsigned short handle_to_chunks(unsigned long handle)
+{
+	return (handle & ~PAGE_MASK) >> BUDDY_SHIFT;
+}
+
 /*
  * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle
  *  but that doesn't matter. because the masking will result in the
@@ -720,37 +730,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 	page = virt_to_page(zhdr);
 
 	if (test_bit(PAGE_HEADLESS, &page->private)) {
-		/* HEADLESS page stored */
-		bud = HEADLESS;
-	} else {
-		z3fold_page_lock(zhdr);
-		bud = handle_to_buddy(handle);
-
-		switch (bud) {
-		case FIRST:
-			zhdr->first_chunks = 0;
-			break;
-		case MIDDLE:
-			zhdr->middle_chunks = 0;
-			zhdr->start_middle = 0;
-			break;
-		case LAST:
-			zhdr->last_chunks = 0;
-			break;
-		default:
-			pr_err("%s: unknown bud %d\n", __func__, bud);
-			WARN_ON(1);
-			z3fold_page_unlock(zhdr);
-			return;
+		/* if a headless page is under reclaim, just leave.
+		 * NB: we use test_and_set_bit for a reason: if the bit
+		 * has not been set before, we release this page
+		 * immediately so we don't care about its value any more.
+		 */
+		if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
+			spin_lock(&pool->lock);
+			list_del(&page->lru);
+			spin_unlock(&pool->lock);
+			free_z3fold_page(page);
+			atomic64_dec(&pool->pages_nr);
 		}
+		return;
 	}
 
-	if (bud == HEADLESS) {
-		spin_lock(&pool->lock);
-		list_del(&page->lru);
-		spin_unlock(&pool->lock);
-		free_z3fold_page(page);
-		atomic64_dec(&pool->pages_nr);
+	/* Non-headless case */
+	z3fold_page_lock(zhdr);
+	bud = handle_to_buddy(handle);
+
+	switch (bud) {
+	case FIRST:
+		zhdr->first_chunks = 0;
+		break;
+	case MIDDLE:
+		zhdr->middle_chunks = 0;
+		break;
+	case LAST:
+		zhdr->last_chunks = 0;
+		break;
+	default:
+		pr_err("%s: unknown bud %d\n", __func__, bud);
+		WARN_ON(1);
+		z3fold_page_unlock(zhdr);
 		return;
 	}
 
@@ -758,7 +770,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		atomic64_dec(&pool->pages_nr);
 		return;
 	}
-	if (test_bit(UNDER_RECLAIM, &page->private)) {
+	if (test_bit(PAGE_CLAIMED, &page->private)) {
 		z3fold_page_unlock(zhdr);
 		return;
 	}
@@ -836,20 +848,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		}
 		list_for_each_prev(pos, &pool->lru) {
 			page = list_entry(pos, struct page, lru);
+
+			/* this bit could have been set by free, in which case
+			 * we pass over to the next page in the pool.
+			 */
+			if (test_and_set_bit(PAGE_CLAIMED, &page->private))
+				continue;
+
+			zhdr = page_address(page);
 			if (test_bit(PAGE_HEADLESS, &page->private))
-				/* candidate found */
 				break;
 
-			zhdr = page_address(page);
-			if (!z3fold_page_trylock(zhdr))
+			if (!z3fold_page_trylock(zhdr)) {
+				zhdr = NULL;
 				continue; /* can't evict at this point */
+			}
 			kref_get(&zhdr->refcount);
 			list_del_init(&zhdr->buddy);
 			zhdr->cpu = -1;
-			set_bit(UNDER_RECLAIM, &page->private);
 			break;
 		}
 
+		if (!zhdr)
+			break;
+
 		list_del_init(&page->lru);
 		spin_unlock(&pool->lock);
 
@@ -898,6 +920,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		if (test_bit(PAGE_HEADLESS, &page->private)) {
 			if (ret == 0) {
 				free_z3fold_page(page);
+				atomic64_dec(&pool->pages_nr);
 				return 0;
 			}
 			spin_lock(&pool->lock);
@@ -905,7 +928,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			spin_unlock(&pool->lock);
 		} else {
 			z3fold_page_lock(zhdr);
-			clear_bit(UNDER_RECLAIM, &page->private);
+			clear_bit(PAGE_CLAIMED, &page->private);
 			if (kref_put(&zhdr->refcount,
 					release_z3fold_page_locked)) {
 				atomic64_dec(&pool->pages_nr);
@@ -964,7 +987,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 		set_bit(MIDDLE_CHUNK_MAPPED, &page->private);
 		break;
 	case LAST:
-		addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
+		addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT);
 		break;
 	default:
 		pr_err("unknown buddy id %d\n", buddy);
-- 
2.17.1

  parent reply	other threads:[~2018-11-22 19:52 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-22 19:52 [PATCH AUTOSEL 4.19 01/36] pinctrl: meson: fix pinconf bias disable Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 02/36] pinctrl: meson: fix gxbb ao pull register bits Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 03/36] pinctrl: meson: fix gxl " Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 04/36] pinctrl: meson: fix meson8 " Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 05/36] pinctrl: meson: fix meson8b " Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 06/36] tools/testing/nvdimm: Fix the array size for dimm devices Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 07/36] scsi: lpfc: fix remoteport access Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 08/36] scsi: hisi_sas: Remove set but not used variable 'dq_list' Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 09/36] scsi: NCR5380: Return false instead of NULL Sasha Levin
2018-11-22 21:49   ` Finn Thain
2018-11-23 11:27     ` Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 10/36] KVM: PPC: Move and undef TRACE_INCLUDE_PATH/FILE Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 11/36] cpufreq: imx6q: add return value check for voltage scale Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 12/36] rtc: cmos: Do not export alarm rtc_ops when we do not support alarms Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 13/36] rtc: pcf2127: fix a kmemleak caused in pcf2127_i2c_gather_write Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 14/36] crypto: simd - correctly take reqsize of wrapped skcipher into account Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 15/36] floppy: fix race condition in __floppy_read_block_0() Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 16/36] powerpc/io: Fix the IO workarounds code to work with Radix Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 17/36] sched/fair: Fix cpu_util_wake() for 'execl' type workloads Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 18/36] perf/x86/intel/uncore: Add more IMC PCI IDs for KabyLake and CoffeeLake CPUs Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 19/36] ARM: make lookup_processor_type() non-__init Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 20/36] ARM: split out processor lookup Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 21/36] ARM: clean up per-processor check_bugs method call Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 22/36] ARM: add PROC_VTABLE and PROC_TABLE macros Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 23/36] ARM: spectre-v2: per-CPU vtables to work around big.Little systems Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 24/36] block: copy ioprio in __bio_clone_fast() and bounce Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 25/36] SUNRPC: Fix a bogus get/put in generic_key_to_expire() Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 26/36] riscv: add missing vdso_install target Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 27/36] RISC-V: Silence some module warnings on 32-bit Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 28/36] drm/amdgpu: fix bug with IH ring setup Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 29/36] kdb: Use strscpy with destination buffer size Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 30/36] NFSv4: Fix an Oops during delegation callbacks Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 31/36] powerpc/numa: Suppress "VPHN is not supported" messages Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 32/36] efi/arm: Revert deferred unmap of early memmap mapping Sasha Levin
2018-11-22 19:52 ` Sasha Levin [this message]
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 34/36] mm, memory_hotplug: check zone_movable in has_unmovable_pages Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 35/36] tmpfs: make lseek(SEEK_DATA/SEK_HOLE) return ENXIO with a negative offset Sasha Levin
2018-11-22 19:52 ` [PATCH AUTOSEL 4.19 36/36] mm, page_alloc: check for max order in hot path Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181122195240.13123-33-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=ks77sj@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=stable@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vitaly.vul@sony.com \
    --cc=vitalywool@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox