All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Stancek <jstancek@redhat.com>
To: ltp@lists.linux.it
Subject: [LTP] [PATCH] madvise06: wait a bit after madvise() call
Date: Thu, 21 Jul 2016 16:23:27 +0200	[thread overview]
Message-ID: <5790DADF.8000506@redhat.com> (raw)
In-Reply-To: <20160721110159.GA13274@gmail.com>

On 07/21/2016 01:02 PM, Li Wang wrote:
> On Thu, Jul 21, 2016 at 06:31:58AM -0400, Chunyu Hu wrote:
>>>
>>> If you still have the setup, can you try how reliable is this approach?
>>
>> I also had a try on my desktop. I copied the file as a.c and compiled it in ltp.
>> Result is that if the sys is fresh with low Cache, it can pass rightly. But if 
>> the Cache is before exhausted, it can hit failure, as the thresh_hold is too
>> large to get there. Just FYI. 

I'm not sure I follow here, your /proc/meminfo shows:
Cached:           260124 kB
SwapCached:        38096 kB

That doesn't seem very high to me.

> 
> Yes, Chunyu ran failed the case with his destop(uptime more than 30days) at first,
> after rebooting it could be PASS.

I'm starting to run out of ideas how we can test this somewhat reliably.

Attached is approach v3, which sets up memory cgroup:
- memory.limit_in_bytes is 128M
- we allocate 512M
- as consequence ~384M should be swapped while system should still have
  plenty of free memory, which should be available for cache

Regards,
Jan

-------------- next part --------------
/*
 * Copyright (c) 2016 Red Hat, Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * DESCRIPTION
 *
 *   Page fault occurs in spite that madvise(WILLNEED) system call is called
 *   to prefetch the page. This issue is reproduced by running a program
 *   which sequentially accesses to a shared memory and calls madvise(WILLNEED)
 *   to the next page on a page fault.
 *
 *   This bug is present in all RHEL7 versions. It looks like this was fixed in
 *   mainline kernel > v3.15 by the following patch:
 *
 *   commit 55231e5c898c5c03c14194001e349f40f59bd300
 *   Author: Johannes Weiner <hannes@cmpxchg.org>
 *   Date:   Thu May 22 11:54:17 2014 -0700
 *
 *       mm: madvise: fix MADV_WILLNEED on shmem swapouts
 */

#include <errno.h>
#include <stdio.h>
#include <sys/mount.h>
#include <sys/sysinfo.h>
#include "tst_test.h"

#define CHUNK_SZ (512*1024*1024L)
#define CHUNK_PAGES (CHUNK_SZ / pg_sz)
#define PASS_THRESHOLD (CHUNK_SZ / 4)

static const char drop_caches_fname[] = "/proc/sys/vm/drop_caches";
static int pg_sz;

static void drop_caches(void)
{
	int ret;
	FILE *f;

	f = fopen(drop_caches_fname, "w");
	if (f) {
		ret = fprintf(f, "1");
		fclose(f);
		if (ret < 1)
			tst_brk(TBROK, "Failed to drop caches");
	} else {
		tst_brk(TBROK, "Failed to open drop_caches");
	}
}

static void setup(void)
{
	struct sysinfo sys_buf_start;

	pg_sz = getpagesize();

	if (access(drop_caches_fname, R_OK | W_OK))
		tst_brk(TCONF, "needed: %s\n", drop_caches_fname);
	tst_res(TINFO, "dropping caches");
	drop_caches();

	sysinfo(&sys_buf_start);
	if (sys_buf_start.freeram < 2 * CHUNK_SZ)
		tst_brk(TCONF, "System RAM is too small, skip test");
	if (sys_buf_start.freeswap < 2 * CHUNK_SZ)
		tst_brk(TCONF, "System swap is too small");

	SAFE_MKDIR("memory", 0700);
	SAFE_MOUNT("memory", "memory", "cgroup", 0, "memory");
	if (access("memory/memory.limit_in_bytes", R_OK | W_OK))
		tst_brk(TCONF, "cgroup memory.limit_in_bytes needed");

	SAFE_MKDIR("memory/madvise06", 0700);
	SAFE_FILE_PRINTF("memory/madvise06/memory.limit_in_bytes", "%ld\n",
		PASS_THRESHOLD);
	SAFE_FILE_PRINTF("memory/madvise06/tasks", "%d\n", getpid());
}

static void cleanup(void)
{
	FILE *f = fopen("memory/tasks", "w");

	if (f) {
		fprintf(f, "%d\n", getpid());
		fclose(f);
	}
	rmdir("memory/madvise06");
	umount("memory");
}

static long count_swapped_pages(void *ptr, long pg_count)
{
	int pm;
	long index, ret = 0;
	uint64_t pagemap;
	off_t offset;

	index = ((uintptr_t)ptr / pg_sz) * sizeof(uint64_t);

	pm = open("/proc/self/pagemap", O_RDONLY);
	if (pm == -1) {
		/* In 4.0 and 4.1 opens by unprivileged fail with -EPERM */
		if ((errno == EPERM) && (geteuid() != 0)) {
			tst_brk(TCONF | TERRNO,
				"don't have permission to open dev pagemap");
		} else {
			tst_brk(TFAIL | TERRNO,
				"Open dev pagemap failed");
		}
	}

	offset = lseek(pm, index, SEEK_SET);
	if (offset != index)
		tst_brk(TFAIL | TERRNO, "Reposition offset failed");

	while (pg_count > 0) {
		ret = read(pm, &pagemap, sizeof(uint64_t));
		if (ret < 0)
			tst_brk(TFAIL | TERRNO, "Read pagemap failed");
		if ((pagemap & (1ULL<<62)))
			ret++;
		pg_count--;
	}

	close(pm);
}

static void dirty_pages(char *ptr, long size)
{
	long i;
	long pages = size / pg_sz;

	for (i = 0; i < pages; i++)
		ptr[i * pg_sz] = 'x';
}

static int get_page_fault_num(void)
{
	int pg;

	SAFE_FILE_SCANF("/proc/self/stat",
			"%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %d",
			&pg);

	return pg;
}

static void test_advice_willneed(void)
{
	int loops = 50;
	char *target;
	long swapcached_start, swapcached;

	target = SAFE_MMAP(NULL, CHUNK_SZ, PROT_READ | PROT_WRITE,
			MAP_SHARED | MAP_ANONYMOUS,
			-1, 0);
	dirty_pages(target, CHUNK_SZ);

	SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
		&swapcached_start);
	tst_res(TINFO, "SwapCached (before madvise): %ld", swapcached_start);

	TEST(madvise(target, CHUNK_SZ, MADV_WILLNEED));
	if (TEST_RETURN == -1)
		tst_brk(TBROK | TERRNO, "madvise failed");

	do {
		loops--;
		usleep(100000);
		SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
			&swapcached);
	} while (loops > 0 && swapcached < swapcached_start + PASS_THRESHOLD / 1024);

	tst_res(TINFO, "SwapCached (after madvise): %ld", swapcached);
	if (swapcached > swapcached_start + PASS_THRESHOLD / 1024)
		tst_res(TPASS, "Regression test pass");
	else {
		/* looks like we may have hit a bug, try accessing page */
		int page_fault_num_1;
		int page_fault_num_2;

		page_fault_num_1 = get_page_fault_num();
		tst_res(TINFO, "PageFault(madvice / no mem access): %d",
				page_fault_num_1);
		target[0] = 'a';
		page_fault_num_2 = get_page_fault_num();
		tst_res(TINFO, "PageFault(madvice / mem access): %d",
				page_fault_num_2);

		if (page_fault_num_1 != page_fault_num_2)
			tst_res(TFAIL, "Bug has been reproduced");
		else
			tst_res(TPASS, "Regression test pass");
	}

	SAFE_MUNMAP(target, CHUNK_SZ);
}

static struct tst_test test = {
	.tid = "madvise06",
	.test_all = test_advice_willneed,
	.setup = setup,
	.cleanup = cleanup,
	.needs_tmpdir = 1,
	.needs_root = 1,
};

  reply	other threads:[~2016-07-21 14:23 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-18 13:37 [LTP] [PATCH] madvise06: wait a bit after madvise() call Jan Stancek
2016-07-18 14:03 ` Cyril Hrubis
2016-07-18 14:22   ` Jan Stancek
2016-07-18 14:49     ` Cyril Hrubis
2016-07-19  5:58 ` Li Wang
2016-07-19  6:56   ` Jan Stancek
2016-07-19  8:57     ` Li Wang
2016-07-20 14:37       ` Jan Stancek
2016-07-21  5:33         ` Li Wang
2016-07-21 10:31         ` Chunyu Hu
2016-07-21 11:02           ` Li Wang
2016-07-21 14:23             ` Jan Stancek [this message]
2016-07-22  3:46               ` Li Wang
2016-07-22  6:59                 ` Jan Stancek
2016-07-22 10:49               ` Chunyu Hu
2016-07-22 10:54                 ` Chunyu Hu
2016-07-22 11:02                   ` Jan Stancek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5790DADF.8000506@redhat.com \
    --to=jstancek@redhat.com \
    --cc=ltp@lists.linux.it \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.