From: Jens Axboe <axboe@kernel.dk>
To: linux-fsdevel@vger.kernel.org
Cc: torvalds@linux-foundation.org, viro@zeniv.linux.org.uk,
Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 1/2] fs: add support for LOOKUP_NONBLOCK
Date: Thu, 10 Dec 2020 13:01:13 -0700 [thread overview]
Message-ID: <20201210200114.525026-2-axboe@kernel.dk> (raw)
In-Reply-To: <20201210200114.525026-1-axboe@kernel.dk>
io_uring always punts opens to async context, since there's no control
over whether the lookup blocks or not. Add LOOKUP_NONBLOCK to support
just doing the fast RCU based lookups, which we know will not block. If
we can do a cached path resolution of the filename, then we don't have
to always punt lookups for a worker.
During path resolution, we always do LOOKUP_RCU first. If that fails and
we terminate LOOKUP_RCU, then fail a LOOKUP_NONBLOCK attempt as well.
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
fs/namei.c | 60 +++++++++++++++++++++++++++++++------------
include/linux/namei.h | 1 +
2 files changed, 44 insertions(+), 17 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index 03d0e11e4f36..3d86915568fa 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -679,7 +679,7 @@ static bool legitimize_root(struct nameidata *nd)
* Nothing should touch nameidata between unlazy_walk() failure and
* terminate_walk().
*/
-static int unlazy_walk(struct nameidata *nd)
+static int complete_walk_rcu(struct nameidata *nd)
{
struct dentry *parent = nd->path.dentry;
@@ -704,6 +704,18 @@ static int unlazy_walk(struct nameidata *nd)
return -ECHILD;
}
+static int unlazy_walk(struct nameidata *nd)
+{
+ int ret;
+
+ ret = complete_walk_rcu(nd);
+ /* If caller is asking for NONBLOCK lookup, assume we can't satisfy it */
+ if (!ret && (nd->flags & LOOKUP_NONBLOCK))
+ ret = -EAGAIN;
+
+ return ret;
+}
+
/**
* unlazy_child - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
@@ -764,10 +776,13 @@ static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned se
static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
{
- if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+ if ((flags & (LOOKUP_RCU | LOOKUP_NONBLOCK)) == LOOKUP_NONBLOCK)
+ return -EAGAIN;
return dentry->d_op->d_revalidate(dentry, flags);
- else
- return 1;
+ }
+
+ return 1;
}
/**
@@ -792,7 +807,7 @@ static int complete_walk(struct nameidata *nd)
*/
if (!(nd->flags & (LOOKUP_ROOT | LOOKUP_IS_SCOPED)))
nd->root.mnt = NULL;
- if (unlikely(unlazy_walk(nd)))
+ if (unlikely(complete_walk_rcu(nd)))
return -ECHILD;
}
@@ -1466,8 +1481,9 @@ static struct dentry *lookup_fast(struct nameidata *nd,
unsigned seq;
dentry = __d_lookup_rcu(parent, &nd->last, &seq);
if (unlikely(!dentry)) {
- if (unlazy_walk(nd))
- return ERR_PTR(-ECHILD);
+ int ret = unlazy_walk(nd);
+ if (ret)
+ return ERR_PTR(ret);
return NULL;
}
@@ -1569,8 +1585,9 @@ static inline int may_lookup(struct nameidata *nd)
int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
if (err != -ECHILD)
return err;
- if (unlazy_walk(nd))
- return -ECHILD;
+ err = unlazy_walk(nd);
+ if (err)
+ return err;
}
return inode_permission(nd->inode, MAY_EXEC);
}
@@ -1591,9 +1608,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
// we need to grab link before we do unlazy. And we can't skip
// unlazy even if we fail to grab the link - cleanup needs it
bool grabbed_link = legitimize_path(nd, link, seq);
+ int ret;
- if (unlazy_walk(nd) != 0 || !grabbed_link)
- return -ECHILD;
+ ret = unlazy_walk(nd);
+ if (ret && !grabbed_link)
+ return ret;
if (nd_alloc_stack(nd))
return 0;
@@ -1634,8 +1653,9 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
touch_atime(&last->link);
cond_resched();
} else if (atime_needs_update(&last->link, inode)) {
- if (unlikely(unlazy_walk(nd)))
- return ERR_PTR(-ECHILD);
+ error = unlazy_walk(nd);
+ if (unlikely(error))
+ return ERR_PTR(error);
touch_atime(&last->link);
}
@@ -1652,8 +1672,9 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
if (nd->flags & LOOKUP_RCU) {
res = get(NULL, inode, &last->done);
if (res == ERR_PTR(-ECHILD)) {
- if (unlikely(unlazy_walk(nd)))
- return ERR_PTR(-ECHILD);
+ error = unlazy_walk(nd);
+ if (unlikely(error))
+ return ERR_PTR(error);
res = get(link->dentry, inode, &last->done);
}
} else {
@@ -2193,8 +2214,9 @@ static int link_path_walk(const char *name, struct nameidata *nd)
}
if (unlikely(!d_can_lookup(nd->path.dentry))) {
if (nd->flags & LOOKUP_RCU) {
- if (unlazy_walk(nd))
- return -ECHILD;
+ err = unlazy_walk(nd);
+ if (err)
+ return err;
}
return -ENOTDIR;
}
@@ -3394,10 +3416,14 @@ struct file *do_filp_open(int dfd, struct filename *pathname,
set_nameidata(&nd, dfd, pathname);
filp = path_openat(&nd, op, flags | LOOKUP_RCU);
+ /* If we fail RCU lookup, assume NONBLOCK cannot be honored */
+ if (flags & LOOKUP_NONBLOCK)
+ goto out;
if (unlikely(filp == ERR_PTR(-ECHILD)))
filp = path_openat(&nd, op, flags);
if (unlikely(filp == ERR_PTR(-ESTALE)))
filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
+out:
restore_nameidata();
return filp;
}
diff --git a/include/linux/namei.h b/include/linux/namei.h
index a4bb992623c4..c36c4e0805fc 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -46,6 +46,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
#define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */
#define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */
#define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */
+#define LOOKUP_NONBLOCK 0x200000 /* don't block for lookup */
/* LOOKUP_* flags which do scope-related checks based on the dirfd. */
#define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT)
--
2.29.2
next prev parent reply other threads:[~2020-12-10 20:02 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-10 20:01 [PATCHSET 0/2] fs: Support for LOOKUP_NONBLOCK / RESOLVE_NONBLOCK Jens Axboe
2020-12-10 20:01 ` Jens Axboe [this message]
2020-12-10 20:53 ` [PATCH 1/2] fs: add support for LOOKUP_NONBLOCK Linus Torvalds
2020-12-10 21:06 ` Jens Axboe
2020-12-11 2:45 ` Al Viro
2020-12-11 16:05 ` Jens Axboe
2020-12-11 17:20 ` Al Viro
2020-12-11 17:35 ` Linus Torvalds
2020-12-11 18:50 ` Jens Axboe
2020-12-11 21:51 ` Al Viro
2020-12-11 23:47 ` Jens Axboe
2020-12-11 17:33 ` Matthew Wilcox
2020-12-11 18:55 ` Jens Axboe
2020-12-11 2:35 ` Al Viro
2020-12-11 15:57 ` Jens Axboe
2020-12-11 17:21 ` Linus Torvalds
2020-12-11 17:29 ` Al Viro
2020-12-11 17:38 ` Al Viro
2020-12-11 17:44 ` Linus Torvalds
2020-12-11 21:46 ` Jens Axboe
2020-12-10 20:01 ` [PATCH 2/2] fs: expose LOOKUP_NONBLOCK through openat2() RESOLVE_NONBLOCK Jens Axboe
2020-12-10 22:29 ` Dave Chinner
2020-12-10 23:12 ` Jens Axboe
2020-12-10 23:29 ` Linus Torvalds
2020-12-11 0:58 ` Dave Chinner
2020-12-11 1:01 ` Linus Torvalds
2020-12-11 3:45 ` Dave Chinner
2020-12-11 18:07 ` Linus Torvalds
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201210200114.525026-2-axboe@kernel.dk \
--to=axboe@kernel.dk \
--cc=linux-fsdevel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).