From: Keith Busch <kbusch@kernel.org>
To: linux-nvme@lists.infradead.org, Jens Axboe <axboe@kernel.dk>
Cc: Keith Busch <kbusch@kernel.org>
Subject: [RFC] nvme-cli: Support for hugetlbfs
Date: Wed, 6 Nov 2019 03:34:12 +0900 [thread overview]
Message-ID: <20191105183412.333-1-kbusch@kernel.org> (raw)
Some commands require exceptionally large data transfers, and the
kernel driver supports only a limited number of phyiscal segments per
command. To help support large transfers, try to allocate physically
contiguous space via hugetlbfs.
Rather than deal with the nuances of allocating huge pages, this patch
requires libhugetlbfs be installed on the host system.
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
Makefile | 7 ++++++
nvme.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 73 insertions(+), 7 deletions(-)
diff --git a/Makefile b/Makefile
index 1dd8c0e..8bcf5a5 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,7 @@ CFLAGS ?= -O2 -g -Wall -Werror
override CFLAGS += -std=gnu99 -I.
override CPPFLAGS += -D_GNU_SOURCE -D__CHECK_ENDIAN__
LIBUUID = $(shell $(LD) -o /dev/null -luuid >/dev/null 2>&1; echo $$?)
+LIBHUGETLBFS = $(shell $(LD) -o /dev/null -lhugetlbfs >/dev/null 2>&1; echo $$?)
HAVE_SYSTEMD = $(shell pkg-config --exists systemd --atleast-version=232; echo $$?)
NVME = nvme
INSTALL ?= install
@@ -21,6 +22,12 @@ ifeq ($(LIBUUID),0)
override LIB_DEPENDS += uuid
endif
+ifeq ($(LIBHUGETLBFS),0)
+ override LDFLAGS += -lhugetlbfs
+ override CFLAGS += -DLIBHUGETLBFS
+ override LIB_DEPENDS += hugetlbfs
+endif
+
INC=-Iutil
ifeq ($(HAVE_SYSTEMD),0)
diff --git a/nvme.c b/nvme.c
index 0823267..254bc7e 100644
--- a/nvme.c
+++ b/nvme.c
@@ -37,6 +37,10 @@
#include <dirent.h>
#include <libgen.h>
+#ifdef LIBHUGETLBFS
+#include <hugetlbfs.h>
+#endif
+
#include <linux/fs.h>
#include <sys/ioctl.h>
@@ -93,6 +97,55 @@ const char *conarg_host_traddr = "host_traddr";
const char *dev = "/dev/";
const char *subsys_dir = "/sys/class/nvme-subsystem/";
+static void *__nvme_alloc(size_t len, bool *huge)
+{
+ void *p;
+
+ if (!posix_memalign(&p, getpagesize(), len)) {
+ *huge = false;
+ memset(p, 0, len);
+ return p;
+ }
+ return NULL;
+}
+
+#ifdef LIBHUGETLBFS
+#define HUGE_MIN 0x80000
+
+static void nvme_free(void *p, bool huge)
+{
+ if (huge)
+ free_hugepage_region(p);
+ else
+ free(p);
+}
+
+static void *nvme_alloc(size_t len, bool *huge)
+{
+ void *p;
+
+ if (len < HUGE_MIN)
+ return __nvme_alloc(len, huge);
+
+ p = get_hugepage_region(len, GHP_DEFAULT);
+ if (!p)
+ return __nvme_alloc(len, huge);
+
+ *huge = true;
+ return p;
+}
+#else
+static void nvme_free(void *p, bool huge)
+{
+ free(p);
+}
+
+static void *nvme_alloc(size_t len, bool *huge)
+{
+ return __nvme_alloc(len, huge);
+}
+#endif
+
static int open_dev(char *dev)
{
int err, fd;
@@ -2362,6 +2415,7 @@ static int fw_download(int argc, char **argv, struct command *cmd, struct plugin
unsigned int fw_size;
struct stat sb;
void *fw_buf, *buf;
+ bool huge;
struct config {
char *fw;
@@ -2409,7 +2463,9 @@ static int fw_download(int argc, char **argv, struct command *cmd, struct plugin
err = -EINVAL;
goto close_fw_fd;
}
- if (posix_memalign(&fw_buf, getpagesize(), fw_size)) {
+
+ fw_buf = nvme_alloc(fw_size, &huge);
+ if (!fw_buf) {
fprintf(stderr, "No memory for f/w size:%d\n", fw_size);
err = -ENOMEM;
goto close_fw_fd;
@@ -2443,7 +2499,7 @@ static int fw_download(int argc, char **argv, struct command *cmd, struct plugin
printf("Firmware download success\n");
free:
- free(buf);
+ nvme_free(buf, huge);
close_fw_fd:
close(fw_fd);
close_fd:
@@ -4144,6 +4200,7 @@ static int submit_io(int opcode, char *command, const char *desc,
__u32 dsmgmt = 0;
int phys_sector_size = 0;
long long buffer_size = 0;
+ bool huge;
const char *start_block = "64-bit addr of first block to access";
const char *block_count = "number of blocks (zeroes based) on device to access";
@@ -4284,12 +4341,12 @@ static int submit_io(int opcode, char *command, const char *desc,
buffer_size = cfg.data_size;
}
- if (posix_memalign(&buffer, getpagesize(), buffer_size)) {
+ buffer = nvme_alloc(buffer_size, &huge);
+ if (!buffer) {
fprintf(stderr, "can not allocate io payload\n");
err = -ENOMEM;
goto close_mfd;
}
- memset(buffer, 0, buffer_size);
if (cfg.metadata_size) {
mbuffer = malloc(cfg.metadata_size);
@@ -4368,7 +4425,7 @@ free_mbuffer:
if (cfg.metadata_size)
free(mbuffer);
free_buffer:
- free(buffer);
+ nvme_free(buffer, huge);
close_mfd:
if (strlen(cfg.metadata))
close(mfd);
@@ -4798,6 +4855,7 @@ static int passthru(int argc, char **argv, int ioctl_cmd, const char *desc, stru
void *data = NULL, *metadata = NULL;
int err = 0, wfd = STDIN_FILENO, fd;
__u32 result;
+ bool huge;
struct config {
__u8 opcode;
@@ -4920,7 +4978,8 @@ static int passthru(int argc, char **argv, int ioctl_cmd, const char *desc, stru
memset(metadata, cfg.prefill, cfg.metadata_len);
}
if (cfg.data_len) {
- if (posix_memalign(&data, getpagesize(), cfg.data_len)) {
+ data = nvme_alloc(cfg.data_len, &huge);
+ if (!data) {
fprintf(stderr, "can not allocate data payload\n");
err = -ENOMEM;
goto free_metadata;
@@ -4983,7 +5042,7 @@ static int passthru(int argc, char **argv, int ioctl_cmd, const char *desc, stru
free_data:
if (cfg.data_len)
- free(data);
+ nvme_free(data, huge);
free_metadata:
if (cfg.metadata_len)
free(metadata);
--
2.21.0
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
next reply other threads:[~2019-11-05 18:34 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-05 18:34 Keith Busch [this message]
2019-11-05 20:35 ` [RFC] nvme-cli: Support for hugetlbfs Chaitanya Kulkarni
2019-11-06 0:49 ` Keith Busch
2019-11-06 0:20 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191105183412.333-1-kbusch@kernel.org \
--to=kbusch@kernel.org \
--cc=axboe@kernel.dk \
--cc=linux-nvme@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox