From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: Zorro Lang <zlang@redhat.com>
Cc: fstests@vger.kernel.org
Subject: Re: [PATCH v4 1/2] fsstress: add splice support
Date: Thu, 31 Jan 2019 18:11:30 -0800 [thread overview]
Message-ID: <20190201021130.GA29630@magnolia> (raw)
In-Reply-To: <20190123073455.24539-1-zlang@redhat.com>
On Wed, Jan 23, 2019 at 03:34:54PM +0800, Zorro Lang wrote:
> Support the splice syscall in fsstress.
>
> Signed-off-by: Zorro Lang <zlang@redhat.com>
> ---
> ltp/fsstress.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 173 insertions(+)
>
> diff --git a/ltp/fsstress.c b/ltp/fsstress.c
> index 99a1d733..c04feb78 100644
> --- a/ltp/fsstress.c
> +++ b/ltp/fsstress.c
> @@ -85,6 +85,7 @@ typedef enum {
> OP_RMDIR,
> OP_SETATTR,
> OP_SETXATTR,
> + OP_SPLICE,
> OP_STAT,
> OP_SYMLINK,
> OP_SYNC,
> @@ -194,6 +195,7 @@ void resvsp_f(int, long);
> void rmdir_f(int, long);
> void setattr_f(int, long);
> void setxattr_f(int, long);
> +void splice_f(int, long);
> void stat_f(int, long);
> void symlink_f(int, long);
> void sync_f(int, long);
> @@ -244,6 +246,7 @@ opdesc_t ops[] = {
> { OP_RMDIR, "rmdir", rmdir_f, 1, 1 },
> { OP_SETATTR, "setattr", setattr_f, 0, 1 },
> { OP_SETXATTR, "setxattr", setxattr_f, 1, 1 },
> + { OP_SPLICE, "splice", splice_f, 1, 1 },
> { OP_STAT, "stat", stat_f, 1, 0 },
> { OP_SYMLINK, "symlink", symlink_f, 2, 1 },
> { OP_SYNC, "sync", sync_f, 1, 1 },
> @@ -2764,6 +2767,176 @@ setxattr_f(int opno, long r)
> #endif
> }
>
> +void
> +splice_f(int opno, long r)
> +{
> + struct pathname fpath1;
> + struct pathname fpath2;
> + struct stat64 stat1;
> + struct stat64 stat2;
> + char inoinfo1[1024];
> + char inoinfo2[1024];
> + loff_t lr;
> + loff_t off1, off2;
> + size_t len;
> + loff_t offset1, offset2;
> + size_t length;
> + size_t total;
> + int v1;
> + int v2;
> + int fd1;
> + int fd2;
> + ssize_t ret1 = 0, ret2 = 0;
> + size_t bytes;
> + int e;
> + int filedes[2];
> +
> + /* Load paths */
> + init_pathname(&fpath1);
> + if (!get_fname(FT_REGm, r, &fpath1, NULL, NULL, &v1)) {
> + if (v1)
> + printf("%d/%d: splice read - no filename\n",
> + procid, opno);
> + goto out_fpath1;
> + }
> +
> + init_pathname(&fpath2);
> + if (!get_fname(FT_REGm, random(), &fpath2, NULL, NULL, &v2)) {
> + if (v2)
> + printf("%d/%d: splice write - no filename\n",
> + procid, opno);
> + goto out_fpath2;
> + }
> +
> + /* Open files */
> + fd1 = open_path(&fpath1, O_RDONLY);
> + e = fd1 < 0 ? errno : 0;
> + check_cwd();
> + if (fd1 < 0) {
> + if (v1)
> + printf("%d/%d: splice read - open %s failed %d\n",
> + procid, opno, fpath1.path, e);
> + goto out_fpath2;
> + }
> +
> + fd2 = open_path(&fpath2, O_WRONLY);
> + e = fd2 < 0 ? errno : 0;
> + check_cwd();
> + if (fd2 < 0) {
> + if (v2)
> + printf("%d/%d: splice write - open %s failed %d\n",
> + procid, opno, fpath2.path, e);
> + goto out_fd1;
> + }
> +
> + /* Get file stats */
> + if (fstat64(fd1, &stat1) < 0) {
> + if (v1)
> + printf("%d/%d: splice read - fstat64 %s failed %d\n",
> + procid, opno, fpath1.path, errno);
> + goto out_fd2;
> + }
> + inode_info(inoinfo1, sizeof(inoinfo1), &stat1, v1);
> +
> + if (fstat64(fd2, &stat2) < 0) {
> + if (v2)
> + printf("%d/%d: splice write - fstat64 %s failed %d\n",
> + procid, opno, fpath2.path, errno);
> + goto out_fd2;
> + }
> + inode_info(inoinfo2, sizeof(inoinfo2), &stat2, v2);
> +
> + /* Calculate offsets */
> + len = (random() % FILELEN_MAX) + 1;
> + if (len == 0)
> + len = stat1.st_blksize;
> + if (len > stat1.st_size)
> + len = stat1.st_size;
> +
> + lr = ((int64_t)random() << 32) + random();
> + if (stat1.st_size == len)
> + off1 = 0;
> + else
> + off1 = (off64_t)(lr % MIN(stat1.st_size - len, MAXFSIZE));
> + off1 %= maxfsize;
> +
> + /*
> + * splice can overlap write, so the offset of the target file can be
> + * any number (< maxfsize)
> + */
Er... sorry I've been offline for a couple of weeks due to illness and
so was not able to comment on this patch until now, but I've had a
problem with my overnight fstests runs:
> + lr = ((int64_t)random() << 32) + random();
This generates a pseudorandom 64-bit candidate offset for the
destination file where we'll land the splice data...
> + off2 = (off64_t)(lr % maxfsize);
...and this caps the offset at maxfsize (which is 2^63- 1 on x64), which
effectively means that the data will appear at a very high file offset,
which creates large (sparse) files very quickly.
Contrast this to other functions like clonerange_f, which add an
additional clamp of 1024 blocks past the current dest file EOF:
max_off2 = MIN(stat2.st_size + (1024ULL * stat2.st_blksize), MAXFSIZE);
do {
lr = ((int64_t)random() << 32) + random();
off2 = (off64_t)(lr % max_off2);
off2 %= maxfsize;
...
}
or truncate_f, which clamps to both 1MB past EOF and MAXFSIZE:
lr = ((int64_t)random() << 32) + random();
off = (off64_t)(lr % MIN(stb.st_size + (1024 * 1024), MAXFSIZE));
off %= maxfsize;
IOWs, I think we try to grow the fsstress file sizes fairly slowly so
that gigantic files don't suddenly jump out of the bushes:
0/487: splice d3/d9/f2c[6319385 1 0 0 176 1395200] [860317,88612] ->
d3/d9/dd/d1c/d21/f4b[1111 1 0 0 0 1408811] [8492675175361853476,88612] 0
Yikes, it wrote 88,612 bytes of data at offset 8,492,675,175,361,853,476!
This causes shared/009 to take forever to run, because it runs fsstress
to generate some files, and then uses md5sum to ensure that duperemove
doesn't corrupt files. Unfortunately it takes a very long time to
read an entire 8500 petabyte file.
--D
> +
> + /*
> + * Due to len, off1 and off2 will be changed later, so record the
> + * original number at here
> + */
> + length = len;
> + offset1 = off1;
> + offset2 = off2;
> +
> + /* Pipe initialize */
> + if (pipe(filedes) < 0) {
> + if (v1 || v2) {
> + printf("%d/%d: splice - pipe failed %d\n",
> + procid, opno, errno);
> + goto out_fd2;
> + }
> + }
> +
> + bytes = 0;
> + total = 0;
> + while (len > 0) {
> + /* move to pipe buffer */
> + ret1 = splice(fd1, &off1, filedes[1], NULL, len, 0);
> + if (ret1 < 0) {
> + break;
> + }
> + bytes = ret1;
> +
> + /* move from pipe buffer to dst file */
> + while (bytes > 0) {
> + ret2 = splice(filedes[0], NULL, fd2, &off2, bytes, 0);
> + if (ret2 < 0) {
> + break;
> + }
> + bytes -= ret2;
> + }
> + if (ret2 < 0)
> + break;
> +
> + len -= ret1;
> + total += ret1;
> + }
> +
> + if (ret1 < 0 || ret2 < 0)
> + e = errno;
> + else
> + e = 0;
> + if (v1 || v2) {
> + printf("%d/%d: splice %s%s [%lld,%lld] -> %s%s [%lld,%lld] %d",
> + procid, opno,
> + fpath1.path, inoinfo1, (long long)offset1, (long long)length,
> + fpath2.path, inoinfo2, (long long)offset2, (long long)length, e);
> +
> + if (length && length > total)
> + printf(" asked for %lld, spliced %lld??\n",
> + (long long)length, (long long)total);
> + printf("\n");
> + }
> +
> + close(filedes[0]);
> + close(filedes[1]);
> +out_fd2:
> + close(fd2);
> +out_fd1:
> + close(fd1);
> +out_fpath2:
> + free_pathname(&fpath2);
> +out_fpath1:
> + free_pathname(&fpath1);
> +}
> +
> void
> creat_f(int opno, long r)
> {
> --
> 2.17.2
>
next prev parent reply other threads:[~2019-02-01 2:11 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-01-23 7:34 [PATCH v4 1/2] fsstress: add splice support Zorro Lang
2019-01-23 7:34 ` [PATCH v4 2/2] common/dump: disable splice from FSSTRESS_AVOID Zorro Lang
2019-02-01 2:11 ` Darrick J. Wong [this message]
2019-02-01 5:07 ` [PATCH v4 1/2] fsstress: add splice support Zorro Lang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190201021130.GA29630@magnolia \
--to=darrick.wong@oracle.com \
--cc=fstests@vger.kernel.org \
--cc=zlang@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox