* [RFC v2 PATCH 1/3] init: add sys-wrapper.h
2010-08-29 17:28 [RFC v2 PATCH 0/3] initramfs: cleanups Namhyung Kim
@ 2010-08-29 17:28 ` Namhyung Kim
2010-08-30 12:11 ` Arnd Bergmann
2010-08-29 17:28 ` [RFC v2 PATCH 2/3] initramfs: use kern_sys_* wrappers instead of syscall Namhyung Kim
` (2 subsequent siblings)
3 siblings, 1 reply; 8+ messages in thread
From: Namhyung Kim @ 2010-08-29 17:28 UTC (permalink / raw)
To: Andrew Morton; +Cc: Phillip Lougher, Arnd Bergmann, Al Viro, linux-kernel
sys-wrapper.h contains wrapper functions for various syscalls used in init
code. This wrappers handle proper address space conversion so that it can
remove a lot of warnings from sparse.
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
---
init/sys-wrapper.h | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 246 insertions(+), 0 deletions(-)
create mode 100644 init/sys-wrapper.h
diff --git a/init/sys-wrapper.h b/init/sys-wrapper.h
new file mode 100644
index 0000000..e4227f9
--- /dev/null
+++ b/init/sys-wrapper.h
@@ -0,0 +1,246 @@
+/*
+ * init/sys-wrapper.h
+ *
+ * Copyright (C) 2010 Namhyung Kim <namhyung@gmail.com>
+ *
+ * wrappers for various syscalls for use in the init code
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/dirent.h>
+#include <linux/syscalls.h>
+
+
+/* These macro are called just before/after actual syscalls. */
+#define KSYS_PREPARE \
+ mm_segment_t old_fs = get_fs(); \
+ set_fs(KERNEL_DS);
+
+#define KSYS_RESTORE \
+ set_fs(old_fs);
+
+
+static inline int kern_sys_link(const char *oldname, const char *newname)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_link((const char __user __force *) oldname,
+ (const char __user __force *) newname);
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_unlink(const char *pathname)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_unlink((const char __user __force *) pathname);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_newlstat(const char *filename,
+ struct stat *statbuf)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_newlstat((const char __user __force *) filename,
+ (struct stat __user __force *) statbuf);
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_mkdir(const char *pathname, int mode)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_mkdir((const char __user __force *) pathname, mode);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_rmdir(const char *pathname)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_rmdir((const char __user __force *) pathname);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_mknod(const char *filename, int mode, unsigned dev)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_mknod((const char __user __force *) filename, mode, dev);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_chown(const char *filename, uid_t user, gid_t group)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_chown((const char __user __force *) filename, user, group);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_chmod(const char *filename, mode_t mode)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_chmod((const char __user __force *) filename, mode);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_open(const char *filename, int flags, int mode)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_open((const char __user __force *) filename, flags, mode);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_fchown(unsigned int fd, uid_t user, gid_t group)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_fchown(fd, user, group);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_fchmod(unsigned int fd, mode_t mode)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_fchmod(fd, mode);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_ftruncate(unsigned int fd, unsigned long length)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_ftruncate(fd, length);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_read(unsigned int fd, char *buf, size_t count)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_read(fd, (char __user __force *) buf, count);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_write(unsigned int fd, const char *buf,
+ size_t count)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_write(fd, (const char __user __force *) buf, count);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_close(unsigned int fd)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_close(fd);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_symlink(const char *oldname, const char *newname)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_symlink((const char __user __force *) oldname,
+ (const char __user __force *) newname);
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_lchown(const char *filename, uid_t user,
+ gid_t group)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_lchown((const char __user __force *) filename, user, group);
+
+ KSYS_RESTORE;
+ return ret;
+}
+
+static inline int kern_sys_getdents64(unsigned int fd,
+ struct linux_dirent64 *dirent,
+ unsigned int count)
+{
+ int ret;
+ KSYS_PREPARE;
+
+ ret = sys_getdents64(fd,
+ (struct linux_dirent64 __user __force *) dirent,
+ count);
+ KSYS_RESTORE;
+ return ret;
+}
+
+
+#undef KSYS_PREPARE
+#undef KSYS_RESTORE
--
1.7.2.2
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [RFC v2 PATCH 1/3] init: add sys-wrapper.h
2010-08-29 17:28 ` [RFC v2 PATCH 1/3] init: add sys-wrapper.h Namhyung Kim
@ 2010-08-30 12:11 ` Arnd Bergmann
2010-08-30 14:17 ` Namhyung Kim
0 siblings, 1 reply; 8+ messages in thread
From: Arnd Bergmann @ 2010-08-30 12:11 UTC (permalink / raw)
To: Namhyung Kim; +Cc: Andrew Morton, Phillip Lougher, Al Viro, linux-kernel
On Sunday 29 August 2010, Namhyung Kim wrote:
> +
> +/* These macro are called just before/after actual syscalls. */
> +#define KSYS_PREPARE \
> + mm_segment_t old_fs = get_fs(); \
> + set_fs(KERNEL_DS);
> +
> +#define KSYS_RESTORE \
> + set_fs(old_fs);
These macros are not that nice, because they depend on context.
I would probably open-code them in each function, or possibly
use a single macro to combine it to something like
#define kern_sys_call(call, ...) \
({ \
mm_segment_t old_fs = get_fs(); \
long result; \
set_fs(KERNEL_DS); \
result = call(__VA_ARGS__); \
set_fs(old_fs); \
result; \
})
static inline int kern_sys_link(const char *oldname, const char *newname)
{
return kern_sys_call(sys_link, (const char __user __force *)oldname,
(const char __user __force *)newname);
}
> +static inline int kern_sys_fchown(unsigned int fd, uid_t user, gid_t group)
> +{
> + int ret;
> + KSYS_PREPARE;
> +
> + ret = sys_fchown(fd, user, group);
> +
> + KSYS_RESTORE;
> + return ret;
> +}
When there are no pointer arguments, there is no need to do set_fs
tricks.
Arnd
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC v2 PATCH 1/3] init: add sys-wrapper.h
2010-08-30 12:11 ` Arnd Bergmann
@ 2010-08-30 14:17 ` Namhyung Kim
0 siblings, 0 replies; 8+ messages in thread
From: Namhyung Kim @ 2010-08-30 14:17 UTC (permalink / raw)
To: Arnd Bergmann; +Cc: Andrew Morton, Phillip Lougher, Al Viro, linux-kernel
On Mon, Aug 30, 2010 at 21:11, Arnd Bergmann <arnd@arndb.de> wrote:
> On Sunday 29 August 2010, Namhyung Kim wrote:
>> +
>> +/* These macro are called just before/after actual syscalls. */
>> +#define KSYS_PREPARE \
>> + mm_segment_t old_fs = get_fs(); \
>> + set_fs(KERNEL_DS);
>> +
>> +#define KSYS_RESTORE \
>> + set_fs(old_fs);
>
> These macros are not that nice, because they depend on context.
> I would probably open-code them in each function, or possibly
> use a single macro to combine it to something like
>
> #define kern_sys_call(call, ...) \
> ({ \
> mm_segment_t old_fs = get_fs(); \
> long result; \
> set_fs(KERNEL_DS); \
> result = call(__VA_ARGS__); \
> set_fs(old_fs); \
> result; \
> })
>
> static inline int kern_sys_link(const char *oldname, const char *newname)
> {
> return kern_sys_call(sys_link, (const char __user __force *)oldname,
> (const char __user __force *)newname);
> }
>
Cool. Will use it. :-)
>> +static inline int kern_sys_fchown(unsigned int fd, uid_t user, gid_t group)
>> +{
>> + int ret;
>> + KSYS_PREPARE;
>> +
>> + ret = sys_fchown(fd, user, group);
>> +
>> + KSYS_RESTORE;
>> + return ret;
>> +}
>
> When there are no pointer arguments, there is no need to do set_fs
> tricks.
>
My intentions was it might be good, IMHO, if we have common setup/tear-down code
around actual syscall possibly extended in future. But now I think
it's a kind of over-
engineering so I'll discard it and follow your advice above.
Thanks.
--
Regards,
Namhyung Kim
^ permalink raw reply [flat|nested] 8+ messages in thread
* [RFC v2 PATCH 2/3] initramfs: use kern_sys_* wrappers instead of syscall
2010-08-29 17:28 [RFC v2 PATCH 0/3] initramfs: cleanups Namhyung Kim
2010-08-29 17:28 ` [RFC v2 PATCH 1/3] init: add sys-wrapper.h Namhyung Kim
@ 2010-08-29 17:28 ` Namhyung Kim
2010-08-29 17:28 ` [RFC v2 PATCH 3/3] init: introduce CONFIG_USE_INIT_SYSCALL_AS_KERNEL_ROUTINE Namhyung Kim
2010-08-30 12:02 ` [RFC v2 PATCH 0/3] initramfs: cleanups Arnd Bergmann
3 siblings, 0 replies; 8+ messages in thread
From: Namhyung Kim @ 2010-08-29 17:28 UTC (permalink / raw)
To: Andrew Morton; +Cc: Phillip Lougher, Arnd Bergmann, Al Viro, linux-kernel
replace direct call to syscall routines to its wrapper functions
defined in init/sys-wrapper.h
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
---
init/sys-wrapper.c | 589 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 589 insertions(+), 0 deletions(-)
create mode 100644 init/sys-wrapper.c
diff --git a/init/sys-wrapper.c b/init/sys-wrapper.c
new file mode 100644
index 0000000..fa5949f
--- /dev/null
+++ b/init/sys-wrapper.c
@@ -0,0 +1,589 @@
+/*
+ * init/sys-wrapper.c
+ *
+ * Copyright (C) 2010 Namhyung Kim <namhyung@gmail.com>
+ *
+ * Wrappers for various syscalls for use in the init code.
+ * Most of these functions are copied from their syscall implementation
+ * verbatim except that path lookup codes are changed to use kernel
+ * functions and security checks are removed.
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/fcntl.h>
+#include <linux/dirent.h>
+#include <linux/syscalls.h>
+#include <linux/highuid.h>
+#include "sys-wrapper.h"
+
+int __init kern_sys_link(const char *oldname, const char *newname)
+{
+ struct path old_path;
+ struct dentry *new_dentry;
+ struct nameidata nd;
+ int error;
+
+ error = kern_path(oldname, 0, &old_path);
+ if (error)
+ goto out;
+
+ error = path_lookup(newname, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out_path;
+
+ error = -EXDEV;
+ if (old_path.mnt != nd.path.mnt)
+ goto out_nd;
+
+ new_dentry = lookup_create(&nd, 0);
+ if (IS_ERR(new_dentry)) {
+ error = PTR_ERR(new_dentry);
+ goto out_unlock;
+ }
+
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto out_dput;
+
+ error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
+
+ mnt_drop_write(nd.path.mnt);
+out_dput:
+ dput(new_dentry);
+out_unlock:
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+out_nd:
+ path_put(&nd.path);
+out_path:
+ path_put(&old_path);
+out:
+ return error;
+}
+
+static struct dentry *lookup_hash(struct nameidata *nd)
+{
+ int err;
+ struct dentry *base;
+ struct qstr *name;
+ struct inode *inode;
+ struct dentry *dentry;
+
+ base = nd->path.dentry;
+ name = &nd->last;
+ inode = base->d_inode;
+
+ if (inode->i_op->permission) {
+ err = inode->i_op->permission(inode, MAY_EXEC);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ /*
+ * See if the low-level filesystem might want
+ * to use its own hash..
+ */
+ if (base->d_op && base->d_op->d_hash) {
+ err = base->d_op->d_hash(base, name);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
+ /*
+ * Don't bother with __d_lookup: callers are for creat as
+ * well as unlink, so a lot of the time it would cost
+ * a double lookup.
+ */
+ dentry = d_lookup(nd->path.dentry, &nd->last);
+
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ int status = dentry->d_op->d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ /*
+ * The dentry failed validation.
+ * If d_revalidate returned 0 attempt to invalidate
+ * the dentry otherwise d_revalidate is asking us
+ * to return a fail status.
+ */
+ if (!status) {
+ if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ }
+ } else {
+ dput(dentry);
+ return ERR_PTR(status);
+ }
+ }
+ }
+
+ if (!dentry) {
+ struct dentry *old;
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(inode)))
+ return ERR_PTR(-ENOENT);
+
+ dentry = d_alloc(base, name);
+ if (unlikely(!dentry))
+ return ERR_PTR(-ENOMEM);
+
+ old = inode->i_op->lookup(inode, dentry, nd);
+ if (unlikely(old)) {
+ dput(dentry);
+ dentry = old;
+ }
+ }
+ return dentry;
+}
+
+int __init kern_sys_unlink(const char *pathname)
+{
+ int error;
+ struct dentry *dentry;
+ struct nameidata nd;
+ struct inode *inode = NULL;
+
+ error = path_lookup(pathname, LOOKUP_PARENT, &nd);
+ if (error)
+ return error;
+
+ error = -EISDIR;
+ if (nd.last_type != LAST_NORM)
+ goto out_path;
+
+ nd.flags &= ~LOOKUP_PARENT;
+
+ mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ dentry = lookup_hash(&nd);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ /* Why not before? Because we want correct error value */
+ if (nd.last.name[nd.last.len])
+ goto slashes;
+ inode = dentry->d_inode;
+ if (inode)
+ atomic_inc(&inode->i_count);
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto out_dput;
+ error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+
+ mnt_drop_write(nd.path.mnt);
+ out_dput:
+ dput(dentry);
+ }
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ if (inode)
+ iput(inode); /* truncate the inode here */
+out_path:
+ path_put(&nd.path);
+ return error;
+
+slashes:
+ error = !dentry->d_inode ? -ENOENT :
+ S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+ goto out_dput;
+}
+
+/* This functions is taken from fs/stat.c */
+static int __init cp_new_stat(struct kstat *stat, struct stat *statbuf)
+{
+ struct stat tmp;
+
+#if BITS_PER_LONG == 32
+ if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
+ return -EOVERFLOW;
+#else
+ if (!new_valid_dev(stat->dev) || !new_valid_dev(stat->rdev))
+ return -EOVERFLOW;
+#endif
+
+ memset(&tmp, 0, sizeof(tmp));
+#if BITS_PER_LONG == 32
+ tmp.st_dev = old_encode_dev(stat->dev);
+#else
+ tmp.st_dev = new_encode_dev(stat->dev);
+#endif
+ tmp.st_ino = stat->ino;
+ if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
+ return -EOVERFLOW;
+ tmp.st_mode = stat->mode;
+ tmp.st_nlink = stat->nlink;
+ if (tmp.st_nlink != stat->nlink)
+ return -EOVERFLOW;
+ SET_UID(tmp.st_uid, stat->uid);
+ SET_GID(tmp.st_gid, stat->gid);
+#if BITS_PER_LONG == 32
+ tmp.st_rdev = old_encode_dev(stat->rdev);
+#else
+ tmp.st_rdev = new_encode_dev(stat->rdev);
+#endif
+#if BITS_PER_LONG == 32
+ if (stat->size > MAX_NON_LFS)
+ return -EOVERFLOW;
+#endif
+ tmp.st_size = stat->size;
+ tmp.st_atime = stat->atime.tv_sec;
+ tmp.st_mtime = stat->mtime.tv_sec;
+ tmp.st_ctime = stat->ctime.tv_sec;
+#ifdef STAT_HAVE_NSEC
+ tmp.st_atime_nsec = stat->atime.tv_nsec;
+ tmp.st_mtime_nsec = stat->mtime.tv_nsec;
+ tmp.st_ctime_nsec = stat->ctime.tv_nsec;
+#endif
+ tmp.st_blocks = stat->blocks;
+ tmp.st_blksize = stat->blksize;
+
+ memcpy(statbuf, &tmp, sizeof(tmp));
+ return 0;
+}
+
+int __init kern_sys_newlstat(const char *filename, struct stat *statbuf)
+{
+ int error;
+ struct path path;
+ struct kstat kstat;
+
+ error = kern_path(filename, 0, &path);
+ if (error)
+ return error;
+
+ error = vfs_getattr(path.mnt, path.dentry, &kstat);
+ if (error)
+ goto out;
+
+ cp_new_stat(&kstat, statbuf);
+out:
+ path_put(&path);
+ return error;
+}
+
+int __init kern_sys_mkdir(const char *pathname, int mode)
+{
+ int error;
+ struct dentry *dentry;
+ struct nameidata nd;
+
+ error = path_lookup(pathname, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out_err;
+
+ dentry = lookup_create(&nd, 1);
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto out_unlock;
+ }
+
+ if (!IS_POSIXACL(nd.path.dentry->d_inode))
+ mode &= ~current_umask();
+
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto out_dput;
+
+ error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+
+ mnt_drop_write(nd.path.mnt);
+out_dput:
+ dput(dentry);
+out_unlock:
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ path_put(&nd.path);
+out_err:
+ return error;
+}
+
+int __init kern_sys_rmdir(const char *pathname)
+{
+ int error;
+ struct dentry *dentry;
+ struct nameidata nd;
+
+ error = path_lookup(pathname, LOOKUP_PARENT, &nd);
+ if (error)
+ return error;
+
+ switch(nd.last_type) {
+ case LAST_DOTDOT:
+ error = -ENOTEMPTY;
+ goto exit1;
+ case LAST_DOT:
+ error = -EINVAL;
+ goto exit1;
+ case LAST_ROOT:
+ error = -EBUSY;
+ goto exit1;
+ }
+
+ nd.flags &= ~LOOKUP_PARENT;
+
+ mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+
+ dentry = lookup_hash(&nd);
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto exit2;
+ }
+
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto exit3;
+
+ error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+
+ mnt_drop_write(nd.path.mnt);
+exit3:
+ dput(dentry);
+exit2:
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+exit1:
+ path_put(&nd.path);
+ return error;
+}
+
+int __init kern_sys_mknod(const char *filename, int mode, unsigned dev)
+{
+ int error;
+ struct dentry *dentry;
+ struct nameidata nd;
+
+ if (S_ISDIR(mode))
+ return -EPERM;
+
+ error = path_lookup(filename, LOOKUP_PARENT, &nd);
+ if (error)
+ return error;
+
+ dentry = lookup_create(&nd, 0);
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto out_unlock;
+ }
+
+ if (!IS_POSIXACL(nd.path.dentry->d_inode))
+ mode &= ~current_umask();
+
+ switch (mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ case 0: /* zero mode translates to S_IFREG */
+ break;
+ case S_IFDIR:
+ error = -EPERM;
+ goto out_dput;
+ default:
+ error = -EINVAL;
+ goto out_dput;
+ }
+
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto out_dput;
+
+ switch (mode & S_IFMT) {
+ case 0:
+ case S_IFREG:
+ error = vfs_create(nd.path.dentry->d_inode,dentry, mode, &nd);
+ break;
+
+ case S_IFCHR:
+ case S_IFBLK:
+ error = vfs_mknod(nd.path.dentry->d_inode,dentry, mode,
+ new_decode_dev(dev));
+ break;
+
+ case S_IFIFO:
+ case S_IFSOCK:
+ error = vfs_mknod(nd.path.dentry->d_inode,dentry, mode, 0);
+ break;
+ }
+
+ mnt_drop_write(nd.path.mnt);
+out_dput:
+ dput(dentry);
+out_unlock:
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ path_put(&nd.path);
+
+ return error;
+}
+
+static int __init chown_common(struct path *path, uid_t user, gid_t group)
+{
+ int error;
+ struct iattr newattrs;
+ struct inode *inode = path->dentry->d_inode;
+
+ newattrs.ia_valid = ATTR_CTIME;
+ if (user != (uid_t) -1) {
+ newattrs.ia_valid |= ATTR_UID;
+ newattrs.ia_uid = user;
+ }
+ if (group != (gid_t) -1) {
+ newattrs.ia_valid |= ATTR_GID;
+ newattrs.ia_gid = group;
+ }
+ if (!S_ISDIR(inode->i_mode))
+ newattrs.ia_valid |=
+ ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+
+ mutex_lock(&inode->i_mutex);
+ error = notify_change(path->dentry, &newattrs);
+ mutex_unlock(&inode->i_mutex);
+
+ return error;
+}
+
+int __init kern_sys_chown(const char *filename, uid_t user, gid_t group)
+{
+ int error;
+ struct path path;
+
+ error = kern_path(filename, LOOKUP_FOLLOW, &path);
+ if (error)
+ goto out;
+
+ error = mnt_want_write(path.mnt);
+ if (error)
+ goto out_release;
+
+ error = chown_common(&path, user, group);
+
+ mnt_drop_write(path.mnt);
+out_release:
+ path_put(&path);
+out:
+ return error;
+}
+
+int __init kern_sys_chmod(const char *filename, mode_t mode)
+{
+ int error;
+ struct path path;
+ struct inode *inode;
+ struct iattr newattrs;
+
+ error = kern_path(filename, LOOKUP_FOLLOW, &path);
+ if (error)
+ goto out;
+
+ inode = path.dentry->d_inode;
+
+ error = mnt_want_write(path.mnt);
+ if (error)
+ goto dput_and_out;
+
+ mutex_lock(&inode->i_mutex);
+
+ if (mode == (mode_t) -1)
+ mode = inode->i_mode;
+
+ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+
+ error = notify_change(path.dentry, &newattrs);
+
+ mutex_unlock(&inode->i_mutex);
+ mnt_drop_write(path.mnt);
+dput_and_out:
+ path_put(&path);
+out:
+ return error;
+}
+
+int __init kern_sys_open(const char *filename, int flags, int mode)
+{
+ int fd;
+
+ if (force_o_largefile())
+ flags |= O_LARGEFILE;
+
+ fd = get_unused_fd_flags(flags);
+ if (fd >= 0) {
+ struct file *f = do_filp_open(AT_FDCWD, filename, flags,
+ mode, 0);
+ if (IS_ERR(f)) {
+ put_unused_fd(fd);
+ fd = PTR_ERR(f);
+ } else {
+ fd_install(fd, f);
+ }
+ }
+ return fd;
+}
+
+int __init kern_sys_symlink(const char *oldname, const char *newname)
+{
+ int error;
+ struct dentry *dentry;
+ struct nameidata nd;
+
+ error = path_lookup(newname, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out_putname;
+
+ dentry = lookup_create(&nd, 0);
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto out_unlock;
+ }
+
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto out_dput;
+
+ error = vfs_symlink(nd.path.dentry->d_inode, dentry, oldname);
+
+ mnt_drop_write(nd.path.mnt);
+out_dput:
+ dput(dentry);
+out_unlock:
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ path_put(&nd.path);
+out_putname:
+ return error;
+}
+
+int __init kern_sys_lchown(const char *filename, uid_t user, gid_t group)
+{
+ int error;
+ struct path path;
+
+ error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
+ if (error)
+ goto out;
+
+ error = mnt_want_write(path.mnt);
+ if (error)
+ goto out_release;
+
+ error = chown_common(&path, user, group);
+
+ mnt_drop_write(path.mnt);
+out_release:
+ path_put(&path);
+out:
+ return error;
+}
--
1.7.2.2
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC v2 PATCH 3/3] init: introduce CONFIG_USE_INIT_SYSCALL_AS_KERNEL_ROUTINE
2010-08-29 17:28 [RFC v2 PATCH 0/3] initramfs: cleanups Namhyung Kim
2010-08-29 17:28 ` [RFC v2 PATCH 1/3] init: add sys-wrapper.h Namhyung Kim
2010-08-29 17:28 ` [RFC v2 PATCH 2/3] initramfs: use kern_sys_* wrappers instead of syscall Namhyung Kim
@ 2010-08-29 17:28 ` Namhyung Kim
2010-08-30 12:02 ` [RFC v2 PATCH 0/3] initramfs: cleanups Arnd Bergmann
3 siblings, 0 replies; 8+ messages in thread
From: Namhyung Kim @ 2010-08-29 17:28 UTC (permalink / raw)
To: Andrew Morton; +Cc: Phillip Lougher, Arnd Bergmann, Al Viro, linux-kernel
Add new config option USE_INIT_SYSCALL_AS_KERNEL_ROUTINE. This makes
some of kern_sys_*() functions call internal kernel routines directly
instead of calling syscall routine so that it can get rid of
user/kernel address space handling.
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
---
init/Makefile | 2 +
init/sys-wrapper.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
usr/Kconfig | 7 ++++++
3 files changed, 68 insertions(+), 0 deletions(-)
diff --git a/init/Makefile b/init/Makefile
index 0bf677a..296e5ab 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -15,6 +15,8 @@ mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o
mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o
mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o
+obj-$(CONFIG_USE_INIT_SYSCALL_AS_KERNEL_ROUTINE) += sys-wrapper.o
+
# dependencies on generated files need to be listed explicitly
$(obj)/version.o: include/generated/compile.h
diff --git a/init/sys-wrapper.h b/init/sys-wrapper.h
index e4227f9..38f9ec6 100644
--- a/init/sys-wrapper.h
+++ b/init/sys-wrapper.h
@@ -28,6 +28,8 @@
#include <linux/syscalls.h>
+#ifndef CONFIG_USE_INIT_SYSCALL_AS_KERNEL_ROUTINE
+
/* These macro are called just before/after actual syscalls. */
#define KSYS_PREPARE \
mm_segment_t old_fs = get_fs(); \
@@ -244,3 +246,60 @@ static inline int kern_sys_getdents64(unsigned int fd,
#undef KSYS_PREPARE
#undef KSYS_RESTORE
+
+#else /* !CONFIG_USE_INIT_SYSCALL_AS_KERNEL_ROUTINE */
+
+int kern_sys_link(const char *oldname, const char *newname);
+int kern_sys_unlink(const char *pathname);
+int kern_sys_newlstat(const char *filename, struct stat *statbuf);
+int kern_sys_mkdir(const char *pathname, int mode);
+int kern_sys_rmdir(const char *pathname);
+int kern_sys_mknod(const char *filename, int mode, unsigned dev);
+int kern_sys_chown(const char *filename, uid_t user, gid_t group);
+int kern_sys_chmod(const char *filename, mode_t mode);
+int kern_sys_open(const char *filename, int flags, int mode);
+
+static inline int kern_sys_fchown(unsigned int fd, uid_t user, gid_t group)
+{
+ return sys_fchown(fd, user, group);
+}
+
+static inline int kern_sys_fchmod(unsigned int fd, mode_t mode)
+{
+ return sys_fchmod(fd, mode);
+}
+
+static inline int kern_sys_ftruncate(unsigned int fd, unsigned long length)
+{
+ return sys_ftruncate(fd, length);
+}
+
+static inline int kern_sys_read(unsigned int fd, char *buf, size_t count)
+{
+ return sys_read(fd, (char __user __force *) buf, count);
+}
+
+static inline int kern_sys_write(unsigned int fd, const char *buf,
+ size_t count)
+{
+ return sys_write(fd, (const char __user __force *) buf, count);
+}
+
+static inline int kern_sys_close(unsigned int fd)
+{
+ return sys_close(fd);
+}
+
+int kern_sys_symlink(const char *oldname, const char *newname);
+int kern_sys_lchown(const char *filename, uid_t user, gid_t group);
+
+static inline int kern_sys_getdents64(unsigned int fd,
+ struct linux_dirent64 *dirent,
+ unsigned int count)
+{
+ return sys_getdents64(fd,
+ (struct linux_dirent64 __user __force *) dirent,
+ count);
+}
+
+#endif /* !CONFIG_USE_INIT_SYSCALL_AS_KERNEL_ROUTINE */
diff --git a/usr/Kconfig b/usr/Kconfig
index e2721f5..2a914eb 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -148,3 +148,10 @@ config INITRAMFS_COMPRESSION_LZO
(both compression and decompression) is the fastest.
endchoice
+
+config USE_INIT_SYSCALL_AS_KERNEL_ROUTINE
+ bool "Don't call syscalls on init code"
+ depends on BLK_DEV_INITRD
+ default n
+ help
+ replace syscalls to kernel functions in init code.
--
1.7.2.2
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [RFC v2 PATCH 0/3] initramfs: cleanups
2010-08-29 17:28 [RFC v2 PATCH 0/3] initramfs: cleanups Namhyung Kim
` (2 preceding siblings ...)
2010-08-29 17:28 ` [RFC v2 PATCH 3/3] init: introduce CONFIG_USE_INIT_SYSCALL_AS_KERNEL_ROUTINE Namhyung Kim
@ 2010-08-30 12:02 ` Arnd Bergmann
2010-08-30 14:05 ` Namhyung Kim
3 siblings, 1 reply; 8+ messages in thread
From: Arnd Bergmann @ 2010-08-30 12:02 UTC (permalink / raw)
To: Namhyung Kim; +Cc: Andrew Morton, Phillip Lougher, Al Viro, linux-kernel
On Sunday 29 August 2010, Namhyung Kim wrote:
> First two of this patchset wrap all of syscall invocations with kern_sys_*()
> helper functions which does nasty address space conversions for you. This
> idea was suggested by Arnd Bergmann. Last one tries to implement above idea
> - calling internel functions directly - in favour of kernel config option
> even though I'm not sure this is right thing. :-(
I think we can safely say that we do not want the config option, we should
do one option or the other. Since Al already opposed implementing the calls
using low-level VFS operations, that's probably not going to happen.
Arnd
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC v2 PATCH 0/3] initramfs: cleanups
2010-08-30 12:02 ` [RFC v2 PATCH 0/3] initramfs: cleanups Arnd Bergmann
@ 2010-08-30 14:05 ` Namhyung Kim
0 siblings, 0 replies; 8+ messages in thread
From: Namhyung Kim @ 2010-08-30 14:05 UTC (permalink / raw)
To: Arnd Bergmann; +Cc: Andrew Morton, Phillip Lougher, Al Viro, linux-kernel
On Mon, Aug 30, 2010 at 21:02, Arnd Bergmann <arnd@arndb.de> wrote:
> I think we can safely say that we do not want the config option, we should
> do one option or the other. Since Al already opposed implementing the calls
> using low-level VFS operations, that's probably not going to happen.
>
OK. Let's forget about it. :-)
--
Regards,
Namhyung Kim
^ permalink raw reply [flat|nested] 8+ messages in thread