* [LTP] [PATCH] Add test for possible writev() issues with NULL buffer in iovec
@ 2021-02-19 12:57 Martin Doucha
2021-02-19 14:35 ` Richard Palethorpe
0 siblings, 1 reply; 3+ messages in thread
From: Martin Doucha @ 2021-02-19 12:57 UTC (permalink / raw)
To: ltp
Fixes #790
Signed-off-by: Martin Doucha <mdoucha@suse.cz>
---
This test triggers temporary write of invalid data into test file on some
file systems on kernel 4.4.21 and older.
runtest/syscalls | 1 +
testcases/kernel/syscalls/writev/.gitignore | 1 +
testcases/kernel/syscalls/writev/Makefile | 3 +
testcases/kernel/syscalls/writev/writev03.c | 142 ++++++++++++++++++++
4 files changed, 147 insertions(+)
create mode 100644 testcases/kernel/syscalls/writev/writev03.c
diff --git a/runtest/syscalls b/runtest/syscalls
index ae47a6d5e..f01d94540 100644
--- a/runtest/syscalls
+++ b/runtest/syscalls
@@ -1675,6 +1675,7 @@ write05 write05
writev01 writev01
writev02 writev02
+writev03 writev03
writev05 writev05
writev06 writev06
writev07 writev07
diff --git a/testcases/kernel/syscalls/writev/.gitignore b/testcases/kernel/syscalls/writev/.gitignore
index d60da0f43..167779736 100644
--- a/testcases/kernel/syscalls/writev/.gitignore
+++ b/testcases/kernel/syscalls/writev/.gitignore
@@ -1,5 +1,6 @@
/writev01
/writev02
+/writev03
/writev05
/writev06
/writev07
diff --git a/testcases/kernel/syscalls/writev/Makefile b/testcases/kernel/syscalls/writev/Makefile
index 4844a6910..6627abaed 100644
--- a/testcases/kernel/syscalls/writev/Makefile
+++ b/testcases/kernel/syscalls/writev/Makefile
@@ -9,4 +9,7 @@ endif
include $(top_srcdir)/include/mk/testcases.mk
+writev03: CFLAGS += -pthread
+writev03: LDLIBS += -lrt
+
include $(top_srcdir)/include/mk/generic_leaf_target.mk
diff --git a/testcases/kernel/syscalls/writev/writev03.c b/testcases/kernel/syscalls/writev/writev03.c
new file mode 100644
index 000000000..f48efccf2
--- /dev/null
+++ b/testcases/kernel/syscalls/writev/writev03.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 SUSE LLC <mdoucha@suse.cz>
+ *
+ * Check for potential issues in writev() if the first iovec entry is NULL
+ * and the next one is not present in RAM. This can result in a brief window
+ * where writev() first writes uninitialized data into the file (possibly
+ * exposing internal kernel structures) and then overwrites it with the real
+ * iovec contents later. Bugs fixed in:
+ *
+ * commit d4690f1e1cdabb4d61207b6787b1605a0dc0aeab
+ * Author: Al Viro <viro@ZenIV.linux.org.uk>
+ * Date: Fri Sep 16 00:11:45 2016 +0100
+ *
+ * fix iov_iter_fault_in_readable()
+ */
+
+#include <sys/uio.h>
+#include "tst_test.h"
+#include "tst_fuzzy_sync.h"
+
+#define CHUNK_SIZE 256
+#define BUF_SIZE (2 * CHUNK_SIZE)
+#define MNTPOINT "mntpoint"
+#define TEMPFILE MNTPOINT "/test_file"
+#define MAPFILE MNTPOINT "/map_file"
+
+static unsigned char buf[BUF_SIZE], *map_ptr;
+static int mapfd = -1, writefd = -1;
+static volatile ssize_t written;
+static struct tst_fzsync_pair fzsync_pair;
+struct iovec iov[5];
+
+static void setup(void)
+{
+ int i;
+
+ for (i = 0; i < BUF_SIZE; i++)
+ buf[i] = i & 0xff;
+
+ mapfd = SAFE_OPEN(MAPFILE, O_CREAT|O_RDWR|O_TRUNC, 0644);
+ SAFE_WRITE(1, mapfd, buf, BUF_SIZE);
+
+ tst_fzsync_pair_init(&fzsync_pair);
+}
+
+static void *thread_run(void *arg)
+{
+ while (tst_fzsync_run_b(&fzsync_pair)) {
+ writefd = SAFE_OPEN(TEMPFILE, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+ written = BUF_SIZE;
+ tst_fzsync_wait_b(&fzsync_pair);
+
+ /*
+ * Do *NOT* preload the data using MAP_POPULATE or touching
+ * the mapped range. We're testing whether writev() handles
+ * fault-in correctly.
+ */
+ map_ptr = SAFE_MMAP(NULL, BUF_SIZE, PROT_READ, MAP_SHARED,
+ mapfd, 0);
+ iov[1].iov_base = map_ptr;
+ iov[1].iov_len = CHUNK_SIZE;
+ iov[3].iov_base = map_ptr + CHUNK_SIZE;
+ iov[3].iov_len = CHUNK_SIZE;
+
+ tst_fzsync_start_race_b(&fzsync_pair);
+ written = writev(writefd, iov, ARRAY_SIZE(iov));
+ tst_fzsync_end_race_b(&fzsync_pair);
+
+ SAFE_MUNMAP(map_ptr, BUF_SIZE);
+ map_ptr = NULL;
+ SAFE_CLOSE(writefd);
+ }
+
+ return arg;
+}
+
+static void run(void)
+{
+ int fd, failed = 0;
+ ssize_t total_read;
+ unsigned char readbuf[BUF_SIZE];
+
+ tst_fzsync_pair_reset(&fzsync_pair, thread_run);
+
+ while (!failed && tst_fzsync_run_a(&fzsync_pair)) {
+ tst_fzsync_wait_a(&fzsync_pair);
+ fd = SAFE_OPEN(TEMPFILE, O_RDONLY);
+ tst_fzsync_start_race_a(&fzsync_pair);
+
+ for (total_read = 0; total_read < written;) {
+ total_read += SAFE_READ(0, fd, readbuf + total_read,
+ BUF_SIZE - total_read);
+ }
+
+ tst_fzsync_end_race_a(&fzsync_pair);
+ SAFE_CLOSE(fd);
+
+ if (total_read > BUF_SIZE)
+ tst_brk(TBROK, "read() returned too much data");
+
+ if (total_read <= 0)
+ continue;
+
+ if (memcmp(readbuf, buf, (size_t)total_read)) {
+ tst_res(TFAIL, "writev() wrote invalid data");
+ failed = 1;
+ break;
+ }
+ }
+
+ if (!failed)
+ tst_res(TPASS, "writev() handles page fault-in correctly");
+}
+
+static void cleanup(void)
+{
+ if (map_ptr && map_ptr != MAP_FAILED)
+ SAFE_MUNMAP(map_ptr, BUF_SIZE);
+
+ if (mapfd >= 0)
+ SAFE_CLOSE(mapfd);
+
+ if (writefd >= 0)
+ SAFE_CLOSE(writefd);
+
+ tst_fzsync_pair_cleanup(&fzsync_pair);
+}
+
+static struct tst_test test = {
+ .test_all = run,
+ .needs_root = 1,
+ .mount_device = 1,
+ .mntpoint = MNTPOINT,
+ .all_filesystems = 1,
+ .setup = setup,
+ .cleanup = cleanup,
+ .tags = (const struct tst_tag[]) {
+ {"linux-git", "d4690f1e1cda"},
+ {}
+ }
+};
--
2.30.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* [LTP] [PATCH] Add test for possible writev() issues with NULL buffer in iovec
2021-02-19 12:57 [LTP] [PATCH] Add test for possible writev() issues with NULL buffer in iovec Martin Doucha
@ 2021-02-19 14:35 ` Richard Palethorpe
2021-02-19 17:37 ` Martin Doucha
0 siblings, 1 reply; 3+ messages in thread
From: Richard Palethorpe @ 2021-02-19 14:35 UTC (permalink / raw)
To: ltp
Hello,
Martin Doucha <mdoucha@suse.cz> writes:
> Fixes #790
>
> Signed-off-by: Martin Doucha <mdoucha@suse.cz>
> ---
>
> This test triggers temporary write of invalid data into test file on some
> file systems on kernel 4.4.21 and older.
>
> runtest/syscalls | 1 +
> testcases/kernel/syscalls/writev/.gitignore | 1 +
> testcases/kernel/syscalls/writev/Makefile | 3 +
> testcases/kernel/syscalls/writev/writev03.c | 142 ++++++++++++++++++++
> 4 files changed, 147 insertions(+)
> create mode 100644 testcases/kernel/syscalls/writev/writev03.c
>
> diff --git a/runtest/syscalls b/runtest/syscalls
> index ae47a6d5e..f01d94540 100644
> --- a/runtest/syscalls
> +++ b/runtest/syscalls
> @@ -1675,6 +1675,7 @@ write05 write05
>
> writev01 writev01
> writev02 writev02
> +writev03 writev03
> writev05 writev05
> writev06 writev06
> writev07 writev07
> diff --git a/testcases/kernel/syscalls/writev/.gitignore b/testcases/kernel/syscalls/writev/.gitignore
> index d60da0f43..167779736 100644
> --- a/testcases/kernel/syscalls/writev/.gitignore
> +++ b/testcases/kernel/syscalls/writev/.gitignore
> @@ -1,5 +1,6 @@
> /writev01
> /writev02
> +/writev03
> /writev05
> /writev06
> /writev07
> diff --git a/testcases/kernel/syscalls/writev/Makefile b/testcases/kernel/syscalls/writev/Makefile
> index 4844a6910..6627abaed 100644
> --- a/testcases/kernel/syscalls/writev/Makefile
> +++ b/testcases/kernel/syscalls/writev/Makefile
> @@ -9,4 +9,7 @@ endif
>
> include $(top_srcdir)/include/mk/testcases.mk
>
> +writev03: CFLAGS += -pthread
> +writev03: LDLIBS += -lrt
> +
> include $(top_srcdir)/include/mk/generic_leaf_target.mk
> diff --git a/testcases/kernel/syscalls/writev/writev03.c b/testcases/kernel/syscalls/writev/writev03.c
> new file mode 100644
> index 000000000..f48efccf2
> --- /dev/null
> +++ b/testcases/kernel/syscalls/writev/writev03.c
> @@ -0,0 +1,142 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2021 SUSE LLC <mdoucha@suse.cz>
> + *
> + * Check for potential issues in writev() if the first iovec entry is NULL
> + * and the next one is not present in RAM. This can result in a brief window
> + * where writev() first writes uninitialized data into the file (possibly
> + * exposing internal kernel structures) and then overwrites it with the real
> + * iovec contents later. Bugs fixed in:
> + *
> + * commit d4690f1e1cdabb4d61207b6787b1605a0dc0aeab
> + * Author: Al Viro <viro@ZenIV.linux.org.uk>
> + * Date: Fri Sep 16 00:11:45 2016 +0100
> + *
> + * fix iov_iter_fault_in_readable()
> + */
> +
> +#include <sys/uio.h>
> +#include "tst_test.h"
> +#include "tst_fuzzy_sync.h"
> +
> +#define CHUNK_SIZE 256
> +#define BUF_SIZE (2 * CHUNK_SIZE)
> +#define MNTPOINT "mntpoint"
> +#define TEMPFILE MNTPOINT "/test_file"
> +#define MAPFILE MNTPOINT "/map_file"
> +
> +static unsigned char buf[BUF_SIZE], *map_ptr;
> +static int mapfd = -1, writefd = -1;
> +static volatile ssize_t written;
written should be atomic type (see below)
> +static struct tst_fzsync_pair fzsync_pair;
> +struct iovec iov[5];
> +
> +static void setup(void)
> +{
> + int i;
> +
> + for (i = 0; i < BUF_SIZE; i++)
> + buf[i] = i & 0xff;
> +
> + mapfd = SAFE_OPEN(MAPFILE, O_CREAT|O_RDWR|O_TRUNC, 0644);
> + SAFE_WRITE(1, mapfd, buf, BUF_SIZE);
> +
> + tst_fzsync_pair_init(&fzsync_pair);
> +}
> +
> +static void *thread_run(void *arg)
> +{
> + while (tst_fzsync_run_b(&fzsync_pair)) {
> + writefd = SAFE_OPEN(TEMPFILE, O_CREAT|O_WRONLY|O_TRUNC, 0644);
> + written = BUF_SIZE;
> + tst_fzsync_wait_b(&fzsync_pair);
> +
> + /*
> + * Do *NOT* preload the data using MAP_POPULATE or touching
> + * the mapped range. We're testing whether writev() handles
> + * fault-in correctly.
> + */
> + map_ptr = SAFE_MMAP(NULL, BUF_SIZE, PROT_READ, MAP_SHARED,
> + mapfd, 0);
Possibly, instead of recreating the mapping each loop you could call
madvise MADV_DONTNEED on the mapping. In which case it may also be best
to use MAP_PRIVATE as well. Of courese if it is already fast then this
does not matter.
> + iov[1].iov_base = map_ptr;
> + iov[1].iov_len = CHUNK_SIZE;
> + iov[3].iov_base = map_ptr + CHUNK_SIZE;
> + iov[3].iov_len = CHUNK_SIZE;
> +
> + tst_fzsync_start_race_b(&fzsync_pair);
> + written = writev(writefd, iov, ARRAY_SIZE(iov));
To be on the safe side we should write to written with the atomic
functions (see below).
> + tst_fzsync_end_race_b(&fzsync_pair);
> +
> + SAFE_MUNMAP(map_ptr, BUF_SIZE);
> + map_ptr = NULL;
> + SAFE_CLOSE(writefd);
> + }
> +
> + return arg;
> +}
> +
> +static void run(void)
> +{
> + int fd, failed = 0;
> + ssize_t total_read;
> + unsigned char readbuf[BUF_SIZE];
> +
> + tst_fzsync_pair_reset(&fzsync_pair, thread_run);
> +
> + while (!failed && tst_fzsync_run_a(&fzsync_pair)) {
> + tst_fzsync_wait_a(&fzsync_pair);
> + fd = SAFE_OPEN(TEMPFILE, O_RDONLY);
> + tst_fzsync_start_race_a(&fzsync_pair);
> +
> + for (total_read = 0; total_read < written;) {
Also read from written with the tst_atomic functions. This is especially
important for weak memory models because written may not be synchronised
straight away. Then it could block on read().
There is also a small chance some architecture will update ssize_t
non-atomically so written is smaller than expected. This would lead to a
false positive.
I suppose an alternative would be to complete writing the data just using
an ordinary write() call or however you want.
> + total_read += SAFE_READ(0, fd, readbuf + total_read,
> + BUF_SIZE - total_read);
> + }
> +
> + tst_fzsync_end_race_a(&fzsync_pair);
> + SAFE_CLOSE(fd);
> +
> + if (total_read > BUF_SIZE)
> + tst_brk(TBROK, "read() returned too much data");
> +
> + if (total_read <= 0)
> + continue;
> +
> + if (memcmp(readbuf, buf, (size_t)total_read)) {
> + tst_res(TFAIL, "writev() wrote invalid data");
> + failed = 1;
> + break;
> + }
> + }
> +
> + if (!failed)
> + tst_res(TPASS, "writev() handles page fault-in correctly");
> +}
> +
> +static void cleanup(void)
> +{
> + if (map_ptr && map_ptr != MAP_FAILED)
> + SAFE_MUNMAP(map_ptr, BUF_SIZE);
> +
> + if (mapfd >= 0)
> + SAFE_CLOSE(mapfd);
> +
> + if (writefd >= 0)
> + SAFE_CLOSE(writefd);
> +
> + tst_fzsync_pair_cleanup(&fzsync_pair);
> +}
> +
> +static struct tst_test test = {
> + .test_all = run,
> + .needs_root = 1,
> + .mount_device = 1,
> + .mntpoint = MNTPOINT,
> + .all_filesystems = 1,
> + .setup = setup,
> + .cleanup = cleanup,
> + .tags = (const struct tst_tag[]) {
> + {"linux-git", "d4690f1e1cda"},
I guess CVE is on the way?
> + {}
> + }
> +};
> --
> 2.30.0
Apart from the volatile variable looks good!
--
Thank you,
Richard.
^ permalink raw reply [flat|nested] 3+ messages in thread* [LTP] [PATCH] Add test for possible writev() issues with NULL buffer in iovec
2021-02-19 14:35 ` Richard Palethorpe
@ 2021-02-19 17:37 ` Martin Doucha
0 siblings, 0 replies; 3+ messages in thread
From: Martin Doucha @ 2021-02-19 17:37 UTC (permalink / raw)
To: ltp
On 19. 02. 21 15:35, Richard Palethorpe wrote:
>> +static void *thread_run(void *arg)
>> +{
>> + while (tst_fzsync_run_b(&fzsync_pair)) {
>> + writefd = SAFE_OPEN(TEMPFILE, O_CREAT|O_WRONLY|O_TRUNC, 0644);
>> + written = BUF_SIZE;
>> + tst_fzsync_wait_b(&fzsync_pair);
>> +
>> + /*
>> + * Do *NOT* preload the data using MAP_POPULATE or touching
>> + * the mapped range. We're testing whether writev() handles
>> + * fault-in correctly.
>> + */
>> + map_ptr = SAFE_MMAP(NULL, BUF_SIZE, PROT_READ, MAP_SHARED,
>> + mapfd, 0);
>
> Possibly, instead of recreating the mapping each loop you could call
> madvise MADV_DONTNEED on the mapping. In which case it may also be best
> to use MAP_PRIVATE as well. Of courese if it is already fast then this
> does not matter.
I considered using madvise(MADV_DONTNEED) but decided against it because
it might only mark the memory page as stale but otherwise leave the
contents untouched. That would result in writev() always writing the
correct data after the first iteration even if the page-in is completely
broken and the first iteration only passed due to lucky timing.
Recreating the mapping is fast enough and more reliable for reproducing
the expected bugs.
>> +static void run(void)
>> +{
>> + int fd, failed = 0;
>> + ssize_t total_read;
>> + unsigned char readbuf[BUF_SIZE];
>> +
>> + tst_fzsync_pair_reset(&fzsync_pair, thread_run);
>> +
>> + while (!failed && tst_fzsync_run_a(&fzsync_pair)) {
>> + tst_fzsync_wait_a(&fzsync_pair);
>> + fd = SAFE_OPEN(TEMPFILE, O_RDONLY);
>> + tst_fzsync_start_race_a(&fzsync_pair);
>> +
>> + for (total_read = 0; total_read < written;) {
>
> Also read from written with the tst_atomic functions. This is especially
> important for weak memory models because written may not be synchronised
> straight away. Then it could block on read().
>
> There is also a small chance some architecture will update ssize_t
> non-atomically so written is smaller than expected. This would lead to a
> false positive.
>
> I suppose an alternative would be to complete writing the data just using
> an ordinary write() call or however you want.
This test does not care whether writev() actually writes anything to the
test file. Returning -1 is perfectly valid result and the test will
simply skip to the next iteration. The only failure states are:
- read() fails (returning 0 is OK)
- read() finds too much data in the file (I should adjust readbuf size
to actually detect that)
- read() loads something that doesn't match what was supposed to be
written (and in case of short write, we care only about the portion that
was actually written)
> I guess CVE is on the way?
I'm not aware of any existing CVE and the bugfix is almost 4 years old
so I don't expect any.
I'll resubmit on Monday after some improvements, including atomic
load/store.
--
Martin Doucha mdoucha@suse.cz
QA Engineer for Software Maintenance
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-02-19 17:37 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-02-19 12:57 [LTP] [PATCH] Add test for possible writev() issues with NULL buffer in iovec Martin Doucha
2021-02-19 14:35 ` Richard Palethorpe
2021-02-19 17:37 ` Martin Doucha
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.