qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org, renzhen@linux.alibaba.com,
	eguan@linux.alibaba.com, ganesh.mahalingam@intel.com,
	m.mizuma@jp.fujitsu.com, mszeredi@redhat.com,
	misono.tomohiro@jp.fujitsu.com, tao.peng@linux.alibaba.com,
	piaojun@huawei.com, stefanha@redhat.com, vgoyal@redhat.com,
	mst@redhat.com, berrange@redhat.com
Subject: [PATCH 06/25] virtiofsd: passthrough_ll: add fallback for racy ops
Date: Thu, 24 Oct 2019 12:26:59 +0100	[thread overview]
Message-ID: <20191024112718.34657-7-dgilbert@redhat.com> (raw)
In-Reply-To: <20191024112718.34657-1-dgilbert@redhat.com>

From: Miklos Szeredi <mszeredi@redhat.com>

We have two operations that cannot be done race-free on a symlink in
certain cases: utimes and link.

Add racy fallback for these if the race-free method doesn't work.  We do
our best to avoid races even in this case:

  - get absolute path by reading /proc/self/fd/NN symlink

  - lookup parent directory: after this we are safe against renames in
    ancestors

  - lookup name in parent directory, and verify that we got to the original
    inode,  if not retry the whole thing

Both utimes(2) and link(2) hold i_lock on the inode across the operation,
so a racing rename/delete by this fuse instance is not possible, only from
other entities changing the filesystem.

If the "norace" option is given, then disable the racy fallbacks.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 contrib/virtiofsd/passthrough_ll.c | 149 +++++++++++++++++++++++++----
 1 file changed, 131 insertions(+), 18 deletions(-)

diff --git a/contrib/virtiofsd/passthrough_ll.c b/contrib/virtiofsd/passthrough_ll.c
index a71fbff143..9f84419816 100644
--- a/contrib/virtiofsd/passthrough_ll.c
+++ b/contrib/virtiofsd/passthrough_ll.c
@@ -98,6 +98,7 @@ enum {
 struct lo_data {
 	pthread_mutex_t mutex;
 	int debug;
+	int norace;
 	int writeback;
 	int flock;
 	int xattr;
@@ -136,10 +137,16 @@ static const struct fuse_opt lo_opts[] = {
 	  offsetof(struct lo_data, cache), CACHE_NORMAL },
 	{ "cache=always",
 	  offsetof(struct lo_data, cache), CACHE_ALWAYS },
-
+	{ "norace",
+	  offsetof(struct lo_data, norace), 1 },
 	FUSE_OPT_END
 };
 
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
+
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
+
+
 static struct lo_data *lo_data(fuse_req_t req)
 {
 	return (struct lo_data *) fuse_req_userdata(req);
@@ -345,24 +352,116 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
 	fuse_reply_attr(req, &buf, lo->timeout);
 }
 
-static int utimensat_empty_nofollow(struct lo_inode *inode,
-				    const struct timespec *tv)
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
+			      char path[PATH_MAX], struct lo_inode **parent)
 {
-	int res;
 	char procname[64];
+	char *last;
+	struct stat stat;
+	struct lo_inode *p;
+	int retries = 2;
+	int res;
+
+retry:
+	sprintf(procname, "/proc/self/fd/%i", inode->fd);
+
+	res = readlink(procname, path, PATH_MAX);
+	if (res < 0) {
+		fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink failed: %m\n");
+		goto fail_noretry;
+	}
+
+	if (res >= PATH_MAX) {
+		fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: readlink overflowed\n");
+		goto fail_noretry;
+	}
+	path[res] = '\0';
+
+	last = strrchr(path, '/');
+	if (last == NULL) {
+		/* Shouldn't happen */
+		fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: INTERNAL ERROR: bad path read from proc\n");
+		goto fail_noretry;
+	}
+	if (last == path) {
+		p = &lo->root;
+		pthread_mutex_lock(&lo->mutex);
+		p->refcount++;
+		pthread_mutex_unlock(&lo->mutex);
+	} else {
+		*last = '\0';
+		res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
+		if (res == -1) {
+			if (!retries)
+				fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: failed to stat parent: %m\n");
+			goto fail;
+		}
+		p = lo_find(lo, &stat);
+		if (p == NULL) {
+			if (!retries)
+				fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: failed to find parent\n");
+			goto fail;
+		}
+	}
+	last++;
+	res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
+	if (res == -1) {
+		if (!retries)
+			fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: failed to stat last\n");
+		goto fail_unref;
+	}
+	if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
+		if (!retries)
+			fuse_log(FUSE_LOG_WARNING, "lo_parent_and_name: failed to match last\n");
+		goto fail_unref;
+	}
+	*parent = p;
+	memmove(path, last, strlen(last) + 1);
+
+	return 0;
+
+fail_unref:
+	unref_inode(lo, p, 1);
+fail:
+	if (retries) {
+		retries--;
+		goto retry;
+	}
+fail_noretry:
+	errno = EIO;
+	return -1;
+}
+
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
+			   const struct timespec *tv)
+{
+	int res;
+	struct lo_inode *parent;
+	char path[PATH_MAX];
 
 	if (inode->is_symlink) {
-		res = utimensat(inode->fd, "", tv,
-				AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+		res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
 		if (res == -1 && errno == EINVAL) {
 			/* Sorry, no race free way to set times on symlink. */
-			errno = EPERM;
+			if (lo->norace)
+				errno = EPERM;
+			else
+				goto fallback;
 		}
 		return res;
 	}
-	sprintf(procname, "/proc/self/fd/%i", inode->fd);
+	sprintf(path, "/proc/self/fd/%i", inode->fd);
+
+	return utimensat(AT_FDCWD, path, tv, 0);
+
+fallback:
+	res = lo_parent_and_name(lo, inode, path, &parent);
+	if (res != -1) {
+		res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
+		unref_inode(lo, parent, 1);
+	}
 
-	return utimensat(AT_FDCWD, procname, tv, 0);
+	return res;
 }
 
 static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
@@ -385,6 +484,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
 {
 	int saverr;
 	char procname[64];
+	struct lo_data *lo = lo_data(req);
 	struct lo_inode *inode;
 	int ifd;
 	int res;
@@ -454,7 +554,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
 		if (fi)
 			res = futimens(fd, tv);
 		else
-			res = utimensat_empty_nofollow(inode, tv);
+			res = utimensat_empty(lo, inode, tv);
 		if (res == -1)
 			goto out_err;
 	}
@@ -673,24 +773,37 @@ static void lo_symlink(fuse_req_t req, const char *link,
 	lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
 }
 
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
-				 const char *name)
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
+				 int dfd, const char *name)
 {
 	int res;
-	char procname[64];
+	struct lo_inode *parent;
+	char path[PATH_MAX];
 
 	if (inode->is_symlink) {
 		res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
 		if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
 			/* Sorry, no race free way to hard-link a symlink. */
-			errno = EPERM;
+			if (lo->norace)
+				errno = EPERM;
+			else
+				goto fallback;
 		}
 		return res;
 	}
 
-	sprintf(procname, "/proc/self/fd/%i", inode->fd);
+	sprintf(path, "/proc/self/fd/%i", inode->fd);
+
+	return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
 
-	return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
+fallback:
+	res = lo_parent_and_name(lo, inode, path, &parent);
+	if (res != -1) {
+		res = linkat(parent->fd, path, dfd, name, 0);
+		unref_inode(lo, parent, 1);
+	}
+
+	return res;
 }
 
 static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
@@ -712,7 +825,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
 	e.attr_timeout = lo->timeout;
 	e.entry_timeout = lo->timeout;
 
-	res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
+	res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
 	if (res == -1)
 		goto out_err;
 
@@ -1466,7 +1579,7 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
 	}
 
 	if (inode->is_symlink) {
-		/* Sorry, no race free way to setxattr on symlink. */
+		/* Sorry, no race free way to removexattr on symlink. */
 		saverr = EPERM;
 		goto out;
 	}
-- 
2.23.0



  parent reply	other threads:[~2019-10-24 13:12 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-24 11:26 [PATCH 00/25] virtiofs daemon (security) Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 01/25] virtiofsd: passthrough_ll: create new files in caller's context Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 02/25] virtiofsd: passthrough_ll: add lo_map for ino/fh indirection Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 03/25] virtiofsd: passthrough_ll: add ino_map to hide lo_inode pointers Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 04/25] virtiofsd: passthrough_ll: add dirp_map to hide lo_dirp pointers Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` [PATCH 05/25] virtiofsd: passthrough_ll: add fd_map to hide file descriptors Dr. David Alan Gilbert (git)
2019-10-24 11:26 ` Dr. David Alan Gilbert (git) [this message]
2019-10-24 11:27 ` [PATCH 07/25] virtiofsd: validate path components Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 08/25] virtiofsd: Plumb fuse_bufvec through to do_write_buf Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 09/25] virtiofsd: Pass write iov's all the way through Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 10/25] virtiofsd: add fuse_mbuf_iter API Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 11/25] virtiofsd: validate input buffer sizes in do_write_buf() Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 12/25] virtiofsd: check input buffer size in fuse_lowlevel.c ops Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 13/25] virtiofsd: prevent ".." escape in lo_do_lookup() Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 14/25] virtiofsd: prevent ".." escape in lo_do_readdir() Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 15/25] virtiofsd: use /proc/self/fd/ O_PATH file descriptor Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 16/25] virtiofsd: sandbox mount namespace Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 17/25] virtiofsd: move to an empty network namespace Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 18/25] virtiofsd: move to a new pid namespace Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 19/25] virtiofsd: add seccomp whitelist Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 20/25] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 21/25] virtiofsd: Drop CAP_FSETID if client asked for it Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 22/25] virtiofsd: set maximum RLIMIT_NOFILE limit Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 23/25] virtiofsd: add security guide document Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 24/25] virtiofsd: add --syslog command-line option Dr. David Alan Gilbert (git)
2019-10-24 11:27 ` [PATCH 25/25] virtiofsd: print log only when priority is high enough Dr. David Alan Gilbert (git)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191024112718.34657-7-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=berrange@redhat.com \
    --cc=eguan@linux.alibaba.com \
    --cc=ganesh.mahalingam@intel.com \
    --cc=m.mizuma@jp.fujitsu.com \
    --cc=misono.tomohiro@jp.fujitsu.com \
    --cc=mst@redhat.com \
    --cc=mszeredi@redhat.com \
    --cc=piaojun@huawei.com \
    --cc=qemu-devel@nongnu.org \
    --cc=renzhen@linux.alibaba.com \
    --cc=stefanha@redhat.com \
    --cc=tao.peng@linux.alibaba.com \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).