From: Brian Foster <bfoster@redhat.com>
To: Joanne Koong <joannelkoong@gmail.com>
Cc: fstests@vger.kernel.org, linux-fsdevel@vger.kernel.org,
djwong@kernel.org, nirjhar@linux.ibm.com, kernel-team@meta.com
Subject: Re: [PATCH v2 1/2] fsx: support reads/writes from buffers backed by hugepages
Date: Mon, 6 Jan 2025 10:33:04 -0500 [thread overview]
Message-ID: <Z3v3sIj1_5s0tyCk@bfoster> (raw)
In-Reply-To: <20241227193311.1799626-2-joannelkoong@gmail.com>
On Fri, Dec 27, 2024 at 11:33:10AM -0800, Joanne Koong wrote:
> Add support for reads/writes from buffers backed by hugepages.
> This can be enabled through the '-h' flag. This flag should only be used
> on systems where THP capabilities are enabled.
>
> This is motivated by a recent bug that was due to faulty handling of
> userspace buffers backed by hugepages. This patch is a mitigation
> against problems like this in the future.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
> ltp/fsx.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 97 insertions(+), 11 deletions(-)
Thanks for the buffer init code cleanup. This looks much nicer to me
now. Modulo comments from others:
Reviewed-by: Brian Foster <bfoster@redhat.com>
>
> diff --git a/ltp/fsx.c b/ltp/fsx.c
> index 41933354..fb6a9b31 100644
> --- a/ltp/fsx.c
> +++ b/ltp/fsx.c
> @@ -190,6 +190,7 @@ int o_direct; /* -Z */
> int aio = 0;
> int uring = 0;
> int mark_nr = 0;
> +int hugepages = 0; /* -h flag */
>
> int page_size;
> int page_mask;
> @@ -2471,7 +2472,7 @@ void
> usage(void)
> {
> fprintf(stdout, "usage: %s",
> - "fsx [-dfknqxyzBEFHIJKLORWXZ0]\n\
> + "fsx [-dfhknqxyzBEFHIJKLORWXZ0]\n\
> [-b opnum] [-c Prob] [-g filldata] [-i logdev] [-j logid]\n\
> [-l flen] [-m start:end] [-o oplen] [-p progressinterval]\n\
> [-r readbdy] [-s style] [-t truncbdy] [-w writebdy]\n\
> @@ -2484,6 +2485,7 @@ usage(void)
> -e: pollute post-eof on size changes (default 0)\n\
> -f: flush and invalidate cache after I/O\n\
> -g X: write character X instead of random generated data\n\
> + -h hugepages: use buffers backed by hugepages for reads/writes\n\
> -i logdev: do integrity testing, logdev is the dm log writes device\n\
> -j logid: prefix debug log messsages with this id\n\
> -k: do not truncate existing file and use its size as upper bound on file size\n\
> @@ -2856,6 +2858,95 @@ keep_running(void)
> return numops-- != 0;
> }
>
> +static long
> +get_hugepage_size(void)
> +{
> + const char str[] = "Hugepagesize:";
> + size_t str_len = sizeof(str) - 1;
> + unsigned int hugepage_size = 0;
> + char buffer[64];
> + FILE *file;
> +
> + file = fopen("/proc/meminfo", "r");
> + if (!file) {
> + prterr("get_hugepage_size: fopen /proc/meminfo");
> + return -1;
> + }
> + while (fgets(buffer, sizeof(buffer), file)) {
> + if (strncmp(buffer, str, str_len) == 0) {
> + sscanf(buffer + str_len, "%u", &hugepage_size);
> + break;
> + }
> + }
> + fclose(file);
> + if (!hugepage_size) {
> + prterr("get_hugepage_size: failed to find "
> + "hugepage size in /proc/meminfo\n");
> + return -1;
> + }
> +
> + /* convert from KiB to bytes */
> + return hugepage_size << 10;
> +}
> +
> +static void *
> +init_hugepages_buf(unsigned len, int hugepage_size, int alignment)
> +{
> + void *buf;
> + long buf_size = roundup(len, hugepage_size) + alignment;
> +
> + if (posix_memalign(&buf, hugepage_size, buf_size)) {
> + prterr("posix_memalign for buf");
> + return NULL;
> + }
> + memset(buf, '\0', buf_size);
> + if (madvise(buf, buf_size, MADV_COLLAPSE)) {
> + prterr("madvise collapse for buf");
> + free(buf);
> + return NULL;
> + }
> +
> + return buf;
> +}
> +
> +static void
> +init_buffers(void)
> +{
> + int i;
> +
> + original_buf = (char *) malloc(maxfilelen);
> + for (i = 0; i < maxfilelen; i++)
> + original_buf[i] = random() % 256;
> + if (hugepages) {
> + long hugepage_size = get_hugepage_size();
> + if (hugepage_size == -1) {
> + prterr("get_hugepage_size()");
> + exit(100);
> + }
> + good_buf = init_hugepages_buf(maxfilelen, hugepage_size, writebdy);
> + if (!good_buf) {
> + prterr("init_hugepages_buf failed for good_buf");
> + exit(101);
> + }
> +
> + temp_buf = init_hugepages_buf(maxoplen, hugepage_size, readbdy);
> + if (!temp_buf) {
> + prterr("init_hugepages_buf failed for temp_buf");
> + exit(101);
> + }
> + } else {
> + unsigned long good_buf_len = maxfilelen + writebdy;
> + unsigned long temp_buf_len = maxoplen + readbdy;
> +
> + good_buf = (char *) malloc(good_buf_len);
> + memset(good_buf, '\0', good_buf_len);
> + temp_buf = (char *) malloc(temp_buf_len);
> + memset(temp_buf, '\0', temp_buf_len);
> + }
> + good_buf = round_ptr_up(good_buf, writebdy, 0);
> + temp_buf = round_ptr_up(temp_buf, readbdy, 0);
> +}
> +
> static struct option longopts[] = {
> {"replay-ops", required_argument, 0, 256},
> {"record-ops", optional_argument, 0, 255},
> @@ -2883,7 +2974,7 @@ main(int argc, char **argv)
> setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
>
> while ((ch = getopt_long(argc, argv,
> - "0b:c:de:fg:i:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> + "0b:c:de:fg:hi:j:kl:m:no:p:qr:s:t:uw:xyABD:EFJKHzCILN:OP:RS:UWXZ",
> longopts, NULL)) != EOF)
> switch (ch) {
> case 'b':
> @@ -2916,6 +3007,9 @@ main(int argc, char **argv)
> case 'g':
> filldata = *optarg;
> break;
> + case 'h':
> + hugepages = 1;
> + break;
> case 'i':
> integrity = 1;
> logdev = strdup(optarg);
> @@ -3229,15 +3323,7 @@ main(int argc, char **argv)
> exit(95);
> }
> }
> - original_buf = (char *) malloc(maxfilelen);
> - for (i = 0; i < maxfilelen; i++)
> - original_buf[i] = random() % 256;
> - good_buf = (char *) malloc(maxfilelen + writebdy);
> - good_buf = round_ptr_up(good_buf, writebdy, 0);
> - memset(good_buf, '\0', maxfilelen);
> - temp_buf = (char *) malloc(maxoplen + readbdy);
> - temp_buf = round_ptr_up(temp_buf, readbdy, 0);
> - memset(temp_buf, '\0', maxoplen);
> + init_buffers();
> if (lite) { /* zero entire existing file */
> ssize_t written;
>
> --
> 2.47.1
>
next prev parent reply other threads:[~2025-01-06 15:31 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-27 19:33 [PATCH v2 0/2] fstests: test reads/writes from hugepages-backed buffers Joanne Koong
2024-12-27 19:33 ` [PATCH v2 1/2] fsx: support reads/writes from buffers backed by hugepages Joanne Koong
2024-12-30 5:00 ` Nirjhar Roy
2025-01-06 15:33 ` Brian Foster [this message]
2025-01-12 4:16 ` Zorro Lang
2025-01-15 17:53 ` Joanne Koong
2024-12-27 19:33 ` [PATCH v2 2/2] generic: add tests for read/writes from hugepages-backed buffers Joanne Koong
2025-01-06 15:33 ` Brian Foster
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Z3v3sIj1_5s0tyCk@bfoster \
--to=bfoster@redhat.com \
--cc=djwong@kernel.org \
--cc=fstests@vger.kernel.org \
--cc=joannelkoong@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=nirjhar@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.