* [PATCH v4 5/7] fs/nfsd: support compiling out splice
From: Pieter Smith @ 2014-11-24 23:01 UTC (permalink / raw)
To: pieter
Cc: Josh Triplett, Alexander Duyck, Alexander Viro,
Alexei Starovoitov, Andrew Morton, Bertrand Jacquin,
Catalina Mocanu, Daniel Borkmann, David S. Miller, Eric Dumazet,
Eric W. Biederman, Fabian Frederick,
open list:FUSE: FILESYSTEM..., Geert Uytterhoeven, Hugh Dickins,
Iulia Manda, Jan Beulich, J. Bruce Fields, Jeff Layton,
open list:ABI/API, linux-fsdevel, linux-kernel
In-Reply-To: <1416870079-15254-1-git-send-email-pieter@boesman.nl>
The goal of the larger patch set is to completely compile out fs/splice, and
as a result, splice support for all file-systems. This patch ensures that
fs/nfsd falls back to non-splice fs support when CONFIG_SYSCALL_SPLICE is
undefined.
Signed-off-by: Pieter Smith <pieter@boesman.nl>
---
net/sunrpc/svc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index ca8a795..6cacc37 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1084,7 +1084,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off only in gss privacy case: */
- rqstp->rq_splice_ok = true;
+ rqstp->rq_splice_ok = IS_ENABLED(CONFIG_SPLICE_SYSCALL);
/* Will be turned off only when NFSv4 Sessions are used */
rqstp->rq_usedeferral = true;
rqstp->rq_dropme = false;
--
2.1.0
^ permalink raw reply related
* [PATCH v4 6/7] net/core: support compiling out splice
From: Pieter Smith @ 2014-11-24 23:01 UTC (permalink / raw)
To: pieter-qeJ+1H9vRZbz+pZb47iToQ
Cc: Michael S. Tsirkin, Trond Myklebust, Bertrand Jacquin,
Oleg Nesterov, J. Bruce Fields, Eric Dumazet, Willem de Bruijn,
蔡正龙, Jeff Layton, Tom Herbert,
Alexei Starovoitov, Miklos Szeredi, Peter Foley, Hugh Dickins,
Xiao Guangrong, Geert Uytterhoeven, Mel Gorman, Matt Turner,
Paul E. McKenney, Alexander Duyck, open list:FUSE: FILESYSTEM...
In-Reply-To: <1416870079-15254-1-git-send-email-pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
To implement splice support, net/core makes use of nosteal_pipe_buf_ops. This
struct is exported by fs/splice. The goal of the larger patch set is to
completely compile out fs/splice, so uses of the exported struct need to be
compiled out along with fs/splice.
This patch therefore compiles out splice support in net/core when
CONFIG_SYSCALL_SPLICE is undefined. The compiled out function skb_splice_bits
is transparently mocked out with a static inline. The greater patch set removes
userspace splice support so it cannot be called anyway.
Signed-off-by: Pieter Smith <pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
---
include/linux/skbuff.h | 10 ++++++++++
net/core/skbuff.c | 11 +++++++----
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a59d934..5cd636b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2640,9 +2640,19 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
int len, __wsum csum);
+#ifdef CONFIG_SYSCALL_SPLICE
int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int len,
unsigned int flags);
+#else
+static inline int
+skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+ struct pipe_inode_info *pipe, unsigned int len,
+ unsigned int flags)
+{
+ return -EPERM;
+}
+#endif
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 61059a0..bb426d9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1678,7 +1678,8 @@ EXPORT_SYMBOL(skb_copy_bits);
* Callback from splice_to_pipe(), if we need to release some pages
* at the end of the spd in case we error'ed out in filling the pipe.
*/
-static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+static void __maybe_unused sock_spd_release(struct splice_pipe_desc *spd,
+ unsigned int i)
{
put_page(spd->pages[i]);
}
@@ -1781,9 +1782,9 @@ static bool __splice_segment(struct page *page, unsigned int poff,
* Map linear and fragment data from the skb to spd. It reports true if the
* pipe is full or if we already spliced the requested length.
*/
-static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
- unsigned int *offset, unsigned int *len,
- struct splice_pipe_desc *spd, struct sock *sk)
+static bool __maybe_unused __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
+ unsigned int *offset, unsigned int *len,
+ struct splice_pipe_desc *spd, struct sock *sk)
{
int seg;
@@ -1821,6 +1822,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
* the frag list, if such a thing exists. We'd probably need to recurse to
* handle that cleanly.
*/
+#ifdef CONFIG_SYSCALL_SPLICE
int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
unsigned int flags)
@@ -1876,6 +1878,7 @@ done:
return ret;
}
+#endif /* CONFIG_SYSCALL_SPLICE */
/**
* skb_store_bits - store bits from kernel buffer to skb
--
2.1.0
------------------------------------------------------------------------------
Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
from Actuate! Instantly Supercharge Your Business Reports and Dashboards
with Interactivity, Sharing, Native Excel Exports, App Integration & more
Get technology previously reserved for billion-dollar corporations, FREE
http://pubads.g.doubleclick.net/gampad/clk?id=157005751&iu=/4140/ostg.clktrk
^ permalink raw reply related
* [PATCH v4 7/7] fs/splice: full support for compiling out splice
From: Pieter Smith @ 2014-11-24 23:01 UTC (permalink / raw)
To: pieter
Cc: Josh Triplett, Alexander Duyck, Alexander Viro,
Alexei Starovoitov, Andrew Morton, Bertrand Jacquin,
Catalina Mocanu, Daniel Borkmann, David S. Miller, Eric Dumazet,
Eric W. Biederman, Fabian Frederick,
open list:FUSE: FILESYSTEM..., Geert Uytterhoeven, Hugh Dickins,
Iulia Manda, Jan Beulich, J. Bruce Fields, Jeff Layton,
open list:ABI/API, linux-fsdevel, linux-kernel
In-Reply-To: <1416870079-15254-1-git-send-email-pieter@boesman.nl>
Entirely compile out splice translation unit when the system is configured
without splice family of syscalls (i.e. CONFIG_SYSCALL_SPLICE is undefined).
Exported fs/splice functions are transparently mocked out with static inlines.
Because userspace support for splice has already been removed by this
patch-set, the exported functions cannot be called anyway. Mocking them out
prevents a maintenance burden on file system drivers.
The bloat score resulting from this patch given a tinyconfig is:
add/remove: 0/25 grow/shrink: 0/5 up/down: 0/-4845 (-4845)
function old new delta
pipe_to_null 4 - -4
generic_pipe_buf_nosteal 6 - -6
spd_release_page 10 - -10
PageUptodate 22 11 -11
lock_page 36 24 -12
page_cache_pipe_buf_release 16 - -16
splice_write_null 24 4 -20
page_cache_pipe_buf_ops 20 - -20
nosteal_pipe_buf_ops 20 - -20
default_pipe_buf_ops 20 - -20
generic_splice_sendpage 24 - -24
splice_shrink_spd 27 - -27
direct_splice_actor 47 - -47
default_file_splice_write 49 - -49
wakeup_pipe_writers 54 - -54
write_pipe_buf 71 - -71
page_cache_pipe_buf_confirm 80 - -80
splice_grow_spd 87 - -87
splice_from_pipe 93 - -93
splice_from_pipe_next 106 - -106
pipe_to_sendpage 109 - -109
page_cache_pipe_buf_steal 114 - -114
generic_file_splice_read 131 8 -123
do_splice_direct 148 - -148
__splice_from_pipe 246 - -246
splice_direct_to_actor 416 - -416
splice_to_pipe 417 - -417
default_file_splice_read 688 - -688
iter_file_splice_write 702 4 -698
__generic_file_splice_read 1109 - -1109
The bloat score for the entire CONFIG_SYSCALL_SPLICE patch-set is:
add/remove: 0/41 grow/shrink: 5/7 up/down: 23/-8422 (-8399)
function old new delta
sys_pwritev 115 122 +7
sys_preadv 115 122 +7
fdput_pos 29 36 +7
sys_pwrite64 115 116 +1
sys_pread64 115 116 +1
pipe_to_null 4 - -4
generic_pipe_buf_nosteal 6 - -6
spd_release_page 10 - -10
fdput 11 - -11
PageUptodate 22 11 -11
lock_page 36 24 -12
signal_pending 39 26 -13
fdget 56 42 -14
page_cache_pipe_buf_release 16 - -16
user_page_pipe_buf_ops 20 - -20
splice_write_null 24 4 -20
page_cache_pipe_buf_ops 20 - -20
nosteal_pipe_buf_ops 20 - -20
default_pipe_buf_ops 20 - -20
generic_splice_sendpage 24 - -24
user_page_pipe_buf_steal 25 - -25
splice_shrink_spd 27 - -27
pipe_to_user 43 - -43
direct_splice_actor 47 - -47
default_file_splice_write 49 - -49
wakeup_pipe_writers 54 - -54
wakeup_pipe_readers 54 - -54
write_pipe_buf 71 - -71
page_cache_pipe_buf_confirm 80 - -80
splice_grow_spd 87 - -87
do_splice_to 87 - -87
ipipe_prep.part 92 - -92
splice_from_pipe 93 - -93
splice_from_pipe_next 107 - -107
pipe_to_sendpage 109 - -109
page_cache_pipe_buf_steal 114 - -114
opipe_prep.part 119 - -119
sys_sendfile 122 - -122
generic_file_splice_read 131 8 -123
sys_sendfile64 126 - -126
sys_vmsplice 137 - -137
do_splice_direct 148 - -148
vmsplice_to_user 205 - -205
__splice_from_pipe 246 - -246
splice_direct_to_actor 348 - -348
splice_to_pipe 371 - -371
do_sendfile 492 - -492
sys_tee 497 - -497
vmsplice_to_pipe 558 - -558
default_file_splice_read 688 - -688
iter_file_splice_write 702 4 -698
sys_splice 1075 - -1075
__generic_file_splice_read 1109 - -1109
Signed-off-by: Pieter Smith <pieter@boesman.nl>
---
fs/Makefile | 3 ++-
fs/splice.c | 2 --
include/linux/fs.h | 26 ++++++++++++++++++++++++++
include/linux/splice.h | 42 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 70 insertions(+), 3 deletions(-)
diff --git a/fs/Makefile b/fs/Makefile
index fb7646e..9395622 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o super.o \
ioctl.o readdir.o select.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
- pnode.o splice.o sync.o utimes.o \
+ pnode.o sync.o utimes.o \
stack.o fs_struct.o statfs.o fs_pin.o
ifeq ($(CONFIG_BLOCK),y)
@@ -22,6 +22,7 @@ endif
obj-$(CONFIG_PROC_FS) += proc_namespace.o
obj-$(CONFIG_FSNOTIFY) += notify/
+obj-$(CONFIG_SYSCALL_SPLICE) += splice.o
obj-$(CONFIG_EPOLL) += eventpoll.o
obj-$(CONFIG_ANON_INODES) += anon_inodes.o
obj-$(CONFIG_SIGNALFD) += signalfd.o
diff --git a/fs/splice.c b/fs/splice.c
index 7c4c695..44b201b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1316,7 +1316,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
return ret;
}
-#ifdef CONFIG_SYSCALL_SPLICE
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags);
@@ -2201,5 +2200,4 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
return do_sendfile(out_fd, in_fd, NULL, count, 0);
}
#endif
-#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a957d43..138107e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2444,6 +2444,7 @@ extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
extern void block_sync_page(struct page *page);
/* fs/splice.c */
+#ifdef CONFIG_SYSCALL_SPLICE
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t default_file_splice_read(struct file *, loff_t *,
@@ -2452,6 +2453,31 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
struct file *out, loff_t *, size_t len, unsigned int flags);
+#else
+static inline ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len, unsigned int flags)
+{
+ return -EPERM;
+}
+
+static inline ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len, unsigned int flags)
+{
+ return -EPERM;
+}
+
+static inline ssize_t iter_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos, size_t len, unsigned int flags)
+{
+ return -EPERM;
+}
+
+static inline ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos, size_t len, unsigned int flags)
+{
+ return -EPERM;
+}
+#endif
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
diff --git a/include/linux/splice.h b/include/linux/splice.h
index da2751d..34570d8 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -65,6 +65,7 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
typedef int (splice_direct_actor)(struct pipe_inode_info *,
struct splice_desc *);
+#ifdef CONFIG_SYSCALL_SPLICE
extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
loff_t *, size_t, unsigned int,
splice_actor *);
@@ -74,13 +75,54 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
struct splice_pipe_desc *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
+#else
+static inline ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags,
+ splice_actor *actor)
+{
+ return -EPERM;
+}
+
+static inline ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
+ splice_actor *actor)
+{
+ return -EPERM;
+}
+
+static inline ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+ struct splice_pipe_desc *spd)
+{
+ return -EPERM;
+}
+
+static inline ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
+ splice_direct_actor *actor)
+{
+ return -EPERM;
+}
+#endif
/*
* for dynamic pipe sizing
*/
+#ifdef CONFIG_SYSCALL_SPLICE
extern int splice_grow_spd(const struct pipe_inode_info *, struct splice_pipe_desc *);
extern void splice_shrink_spd(struct splice_pipe_desc *);
extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
+#else
+static inline int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+{
+ return -EPERM;
+}
+
+static inline void splice_shrink_spd(struct splice_pipe_desc *spd)
+{
+}
+
+static inline void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
+{
+}
+#endif
extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
#endif
--
2.1.0
^ permalink raw reply related
* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice (was: Pre-emption control for userspace)
From: Khalid Aziz @ 2014-11-24 23:20 UTC (permalink / raw)
To: Andi Kleen
Cc: tglx-hfZtesqFncYOwBW4kG4KsQ, corbet-T1hC0tSOHrs,
mingo-H+wXaHxf7aLQT0dZR+AlfA, hpa-YMNOUZJC4hwAvxtiuMwx3w,
peterz-wEGCiKHe2LqWVfeAwA7xHQ, riel-H+wXaHxf7aLQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
rientjes-hpIqsD4AKlfQT0dZR+AlfA, mgorman-l3A5Bk7waGM,
liwanp-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8, raistlin-k2GhghHVRtY,
kirill.shutemov-VuQAYsv1563Yd54FQh9/CA,
atomlin-H+wXaHxf7aLQT0dZR+AlfA, avagin-GEFAQzZX7r8dnm+yROfE0A,
gorcunov-GEFAQzZX7r8dnm+yROfE0A,
serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw, athorlton-sJ/iWh9BUns,
oleg-H+wXaHxf7aLQT0dZR+AlfA, vdavydov-bzQdu9zFT3WakBO8gow8eQ,
daeseok.youn-Re5JQEeQqe8AvxtiuMwx3w,
keescook-F7+t8E8rja9g9hUCZPvPmw,
yangds.fnst-BthXqXjhjHXQFUHtdCDX3A, sbauer-F61uvSdQLzf2fBVCVOL8/A,
vishnu.ps-Sze3O3UU22JBDgjK7y7TUQ, axboe-b10kYP2dOMg,
paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-doc-u79uwXL29TY76Z2rM5mHXA,
linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20141124224302.GL10824-KWJ+5VKanrL29G5dvP0v1laTQe2KTcn/@public.gmane.org>
On Mon, 2014-11-24 at 14:43 -0800, Andi Kleen wrote:
> > +1. Location of shared flag can be set using prctl() only once. To
> > + write a new memory address, the previous memory address must be
> > + cleared first by writing NULL. Each new memory address requires
> > + validation in the kernel and update of pointers. Changing this
> > + address too many times creates too much overhead.
>
> Can you explain this more? Doesn't make any sense to me.
> The validation is just access_ok() which is only a few instructions?
>From userspace app point of view, each call to prctl() incurs the
overhead of a system call.
>
> Also I would drop the config symbol. Linux normally doesn't
> do CONFIG for things like that.
CONFIG_SCHED_PREEMPT_DELAY allows one to keep this code out of compiled
kernel for custom kernels if this feature is definitely not needed.
Concerns were raised last time about this feature impacting other tasks.
Nevertheless, I am ok with removing the config option if that is the
consensus.
>
> > diff --git a/kernel/fork.c b/kernel/fork.c
> > index 9b7d746..7f0d843 100644
> > --- a/kernel/fork.c
> > +++ b/kernel/fork.c
> > @@ -1671,6 +1671,11 @@ long do_fork(unsigned long clone_flags,
> > init_completion(&vfork);
> > get_task_struct(p);
> > }
> > +#ifdef CONFIG_SCHED_PREEMPT_DELAY
> > + p->sched_preempt_delay.delay_req = NULL;
> > + p->sched_preempt_delay.delay_granted = 0;
> > + p->sched_preempt_delay.yield_penalty = 0;
> > +#endif
>
> FWIW this would lead to every new thread having to reexecute
> this. No good way around it, but it may eventually make
> thread spawns more expensive if it was widely used.
Yes, that is true. Newly allocated task_struct is not zero'd out, so
this becomes necessary.
>
> >
> > +#ifdef CONFIG_SCHED_PREEMPT_DELAY
> > + /*
> > + * Clear the penalty flag for current task to reward it for
> > + * palying by the rules
> > + */
> > + current->sched_preempt_delay.yield_penalty = 0;
> > +#endif
>
> Doesn't that need to be quantified? After all they may yield
> only near the end of their time slice.
and that should be ok because the task was allowed to borrow a full
timeslice and it can use it up almost completely. Is there a reason to
differentiate between tasks yielding well before their borrowed
timeslice is up and tasks not yielding until almost the end of borrowed
timeslice?
>
> > + }
> > +
> > + /*
> > + * Get the value of preemption delay request flag from userspace.
> > + * Task had already passed us the address where the flag is stored
> > + * in userspace earlier. This flag is just like the PROCESS_PRIVATE
> > + * futex, leverage the futex code here to read the flag. If there
>
> I don't think any of the calls below are futex code.
Following code was borrowed from get_futex_value_locked() but this
comment is not really necessary here and can be removed if it causes
confusion.
>
> > + case PR_GET_PREEMPT_DELAY:
> > + error = put_user(
> > + (unsigned long)current->sched_preempt_delay.delay_req,
> > + (unsigned long __user *)arg2);
> > + break;
> > +#endif
>
> Unnecessary cast.
I get bunch of warnings and errors if I remove either of the casts.
>
> > --- a/kernel/sysctl.c
> > +++ b/kernel/sysctl.c
> > @@ -1104,6 +1104,15 @@ static struct ctl_table kern_table[] = {
> > .proc_handler = proc_dointvec,
> > },
> > #endif
> > +#ifdef CONFIG_SCHED_PREEMPT_DELAY
> > + {
> > + .procname = "preempt_delay_available",
> > + .data = &sysctl_preempt_delay_available,
> > + .maxlen = sizeof(int),
> > + .mode = 0600,
>
> Better 0644, so users can know if they can use it.
OK, I can change that.
>
> Rest looks reasonable to me.
>
> -Andi
Thanks, Andi! I appreciate your feedback.
--
Khalid
^ permalink raw reply
* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice (was: Pre-emption control for userspace)
From: Thomas Gleixner @ 2014-11-24 23:35 UTC (permalink / raw)
To: Khalid Aziz
Cc: corbet, mingo, hpa, peterz, riel, akpm, rientjes, ak, mgorman,
liwanp, raistlin, kirill.shutemov, atomlin, avagin, gorcunov,
serge.hallyn, athorlton, oleg, vdavydov, daeseok.youn, keescook,
yangds.fnst, sbauer, vishnu.ps, axboe, paulmck, linux-kernel,
linux-doc, linux-api
In-Reply-To: <1416862595-24513-1-git-send-email-khalid.aziz@oracle.com>
On Mon, 24 Nov 2014, Khalid Aziz wrote:
> sched/fair: Add advisory flag for borrowing a timeslice
>
> This patch adds a way for a task to request to borrow one timeslice
> from future if it is about to be preempted, so it could delay
> preemption and complete any critical task it is in the middle of.
>
> This feature helps with performance on databases and has been
> used for many years on other OSs by the databases. This feature
> helps in situation where a task acquires a lock before performing a
> critical operation on the database and happens to get preempted before
> it completes its task. This lock being held causes all other tasks
> that also acquire the same lock to perform their critical operation
> on the database, to start queueing up and causing large number of
> context switches. This queueing problem can be avoided if the task
> that acquires lock first could request scheduler to let it borrow one
> timeslice once it enters its critical section and hence allow it to
> complete its critical section without causing queueing problem. If
While you are niftily avoiding to talk about the nature of the lock, I
can take it for granted that you are talking about user space
spinlocks, right?
Simply if you would talk about futexes and pthread_mutexes then it
would have occured to you while implementing that feature, that the
kernel already has a mechanism to record a reference to a user space
data structure (robust_list_head) which is updated when a futex is
acquired in user space, i.e. when a critical section is entered. It's
not the same as you need, but it would be relatively simple to convey
that information there.
So what are the actual lock types and use cases and why can't you
combine that with the existing robust list mechanism?
> critical section completes before the task is due for preemption,
> the task can simply desassert its request. A task sends the
And that deassertion has which consequences before the next preempt
check happens?
> +config SCHED_PREEMPT_DELAY
> + def_bool n
> + prompt "Scheduler preemption delay support"
> + depends on PROC_FS
Why so?
> @@ -1324,6 +1325,13 @@ struct task_struct {
> /* Revert to default priority/policy when forking */
> unsigned sched_reset_on_fork:1;
> unsigned sched_contributes_to_load:1;
> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
> + struct preempt_delay {
> + u32 __user *delay_req; /* delay request flag pointer */
> + unsigned char delay_granted; /* currently in delay */
> + unsigned char yield_penalty; /* failure to yield penalty */
> + } sched_preempt_delay;
No. First of all this wants to be a proper struct declaration outside
of task_struct.
Aside of that your user space side is actually a structure and not a
opaque u32 pointer, so this should be an explicit data type and not
something randomly defined in the guts of task_struct.
> +#if defined(CONFIG_SCHED_PREEMPT_DELAY) && defined(CONFIG_PROC_FS)
> +extern void sched_preempt_delay_show(struct seq_file *m,
> + struct task_struct *task);
> +extern void sched_preempt_delay_set(struct task_struct *task,
> + unsigned char *val);
> +#endif
Can you please get rid of the leftovers of your previous patches
yourself and before posting? It's annoying as hell to review patches
which contain stale code.
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 9b7d746..7f0d843 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1671,6 +1671,11 @@ long do_fork(unsigned long clone_flags,
> init_completion(&vfork);
> get_task_struct(p);
> }
> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
> + p->sched_preempt_delay.delay_req = NULL;
> + p->sched_preempt_delay.delay_granted = 0;
> + p->sched_preempt_delay.yield_penalty = 0;
> +#endif
Sigh. We do not sprinkle that kind of #ifdef crap all over the
place. That's what inline functions in header files are for.
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 240157c..38cb515 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -4230,6 +4230,14 @@ SYSCALL_DEFINE0(sched_yield)
> {
> struct rq *rq = this_rq_lock();
>
> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
> + /*
> + * Clear the penalty flag for current task to reward it for
> + * palying by the rules
Looking at that mess makes me palying^Wpale.
> + */
> + current->sched_preempt_delay.yield_penalty = 0;
> +#endif
> +
> schedstat_inc(rq, yld_count);
> current->sched_class->yield_task(rq);
> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
> +/*
> + * delay_resched_rq(): Check if the task about to be preempted has
> + * requested an additional time slice. If it has, grant it additional
> + * timeslice once.
> + */
> +static void
> +delay_resched_rq(struct rq *rq)
> +{
> + struct task_struct *curr = rq->curr;
> + struct sched_entity *se;
> + int cpu = task_cpu(curr);
> + u32 __user *delay_req;
> + unsigned int delay_req_flag;
> + unsigned char *delay_flag;
> +
> + /*
> + * Check if task is using pre-emption delay feature. If address
> + * for preemption delay request flag is not set, this task is
> + * not using preemption delay feature, we can reschedule without
> + * any delay
So what happens if:
kernel.preempt_delay_available = 1;
prctl(PR_SET_PREEMPT_DELAY, ...);
kernel.preempt_delay_available = 0;
Nothing happens at all because you fail to give the sysop control over
the feature once you unleashed it.
The proper solution for this is to use a static key to control the
feature itself. That also reduces the overhead for those who are not
interested in that.
> + */
> + delay_req = curr->sched_preempt_delay.delay_req;
> +
> + if ((delay_req == NULL) || (cpu != smp_processor_id()))
check_preempt_tick() clearly does not care about that, but you inflict
a smp_processor_id() on every caller. I can see that you really care
about performance.
> + goto resched_now;
> +
> + /*
> + * Pre-emption delay will be granted only once. If this task
> + * has already been granted delay, rechedule now
> + */
> + if (curr->sched_preempt_delay.delay_granted) {
> + curr->sched_preempt_delay.delay_granted = 0;
> + goto resched_now;
> + }
> + /*
> + * Get the value of preemption delay request flag from userspace.
> + * Task had already passed us the address where the flag is stored
> + * in userspace earlier. This flag is just like the PROCESS_PRIVATE
> + * futex, leverage the futex code here to read the flag. If there
> + * is a page fault accessing this flag in userspace, that means
> + * userspace has not touched this flag recently and we can
> + * assume no preemption delay is needed.
> + *
> + * If task is not requesting additional timeslice, resched now
> + */
> + if (delay_req) {
Surely we need to recheck delay_req here.
> + int ret;
> +
> + pagefault_disable();
> + ret = __copy_from_user_inatomic(&delay_req_flag, delay_req,
> + sizeof(u32));
> + pagefault_enable();
> + delay_flag = &delay_req_flag;
> + if (ret || !delay_flag[0])
This is really a well designed kernel/user space interface. NOT.
> + goto resched_now;
> + } else {
> + goto resched_now;
> + }
> +
> + /*
> + * Current thread has requested preemption delay and has not
> + * been granted an extension yet. If this thread failed to yield
> + * processor after being granted amnesty last time, penalize it
> + * by not granting this delay request, otherwise give it an extra
> + * timeslice.
> + */
> + if (curr->sched_preempt_delay.yield_penalty) {
> + curr->sched_preempt_delay.yield_penalty = 0;
> + goto resched_now;
> + }
> +
> + se = &curr->se;
> + curr->sched_preempt_delay.delay_granted = 1;
> + /*
> + * Set the penalty flag for failing to yield the processor after
> + * being granted immunity. This flag will be cleared in
> + * sched_yield() if the thread indeed calls sched_yield
> + */
> + curr->sched_preempt_delay.yield_penalty = 1;
Why on earth do we need two flags here? Just because we can create
more code in the guts of the scheduler hot pathes that way?
And surely we want to put them into two adjacent u8 to make life
easier for all architectures.
> + /*
> + * Let the thread know it got amnesty and it should call
> + * sched_yield() when it is done to avoid penalty next time
> + * it wants amnesty. We need to write to userspace location.
> + * Since we just read from this location, chances are extremley
> + * low we might page fault. If we do page fault, we will ignore
> + * it and accept the cost of failed write in form of unnecessary
> + * penalty for userspace task for not yielding processor.
This is the completely wrong argument. We know that the task was
asking for an extra time slice because the copy from user above
succeeded. So we are better of to let the task actually handle its
pagefault than scheduling it out.
> + * This is a highly unlikely scenario.
> + */
> + delay_flag[0] = 0;
> + delay_flag[1] = 1;
Sigh.
> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
And all of this needs to be in kernel/sys.c just because...
> +int sysctl_preempt_delay_available;
> +
> +static int
> +preempt_delay_write(struct task_struct *task, unsigned long preempt_delay_addr)
> +{
> + /*
> + * Do not allow write if pointer is currently set
> + */
> + if (task->sched_preempt_delay.delay_req &&
> + ((void *)preempt_delay_addr != NULL))
> + return -EINVAL;
> + /*
> + * Validate the pointer. It should be aligned to 4-byte boundary.
So 4 bytes is a perfect boundary for everyone, right? Pulled that
number out of thin air or what?
> + */
> + if (unlikely(!IS_ALIGNED(preempt_delay_addr, 4)))
> + return -EFAULT;
> + if (unlikely(!access_ok(rw, preempt_delay_addr, sizeof(u32))))
> + return -EFAULT;
> +
> + task->sched_preempt_delay.delay_req = (u32 __user *) preempt_delay_addr;
> +
> + /* zero out flags */
Brilliant comment. I can see what the code is doing. What's way more
interesting and of course undocumented is why you are ignoring the
return value of put_user() ..
> + put_user(0, (uint32_t *)preempt_delay_addr);
Aside of the general issues I have with this (see the inline replies
to your changelog) the overall impression of this patch is that it is
a half baken and carelessly cobbled together extract of some data base
specific kernel hackery, which I prefer not to see at all.
Thanks,
tglx
^ permalink raw reply
* Re: [PATCH v4 3/7] fs/splice: support compiling out splice-family syscalls
From: Josh Triplett @ 2014-11-25 0:49 UTC (permalink / raw)
To: Pieter Smith
Cc: Alexander Duyck, Alexander Viro, Alexei Starovoitov,
Andrew Morton, Bertrand Jacquin, Catalina Mocanu, Daniel Borkmann,
David S. Miller, Eric Dumazet, Eric W. Biederman,
Fabian Frederick, open list:FUSE: FILESYSTEM...,
Geert Uytterhoeven, Hugh Dickins, Iulia Manda, Jan Beulich,
J. Bruce Fields, Jeff Layton, open list:ABI/API, linux-fsdevel,
open list
In-Reply-To: <1416870079-15254-4-git-send-email-pieter@boesman.nl>
On Tue, Nov 25, 2014 at 12:01:02AM +0100, Pieter Smith wrote:
> Many embedded systems will not need the splice-family syscalls (splice,
> vmsplice, tee and sendfile). Omitting them saves space. This adds a new EXPERT
> config option CONFIG_SYSCALL_SPLICE (default y) to support compiling them out.
>
> The goal is to completely compile out fs/splice along with the syscalls. To
> achieve this, the remaining patch-set will deal with fs/splice exports. As far
> as possible, the impact on other device drivers will be minimized so as to
> reduce the overal maintenance burden of CONFIG_SYSCALL_SPLICE.
>
> The use of exported functions will be solved by transparently mocking them out
> with static inlines. Uses of the exported pipe_buf_operations struct however
> require direct modification in fs/fuse and net/core. The next two patches will
> deal with this. A macro is defined that will assist with NULL'ing out callbacks
> when CONFIG_SYSCALL_SPLICE is undefined: __splice_p().
This message needs updating, since the patch series doesn't introduce or
use __splice_p anymore.
> Once all exports are solved, fs/splice can be compiled out.
>
> The bloat benefit of this patch given a tinyconfig is:
>
> add/remove: 0/16 grow/shrink: 2/5 up/down: 114/-3693 (-3579)
> function old new delta
> splice_direct_to_actor 348 416 +68
> splice_to_pipe 371 417 +46
> splice_from_pipe_next 107 106 -1
> fdput 11 - -11
> signal_pending 39 26 -13
> fdget 56 42 -14
> user_page_pipe_buf_ops 20 - -20
> user_page_pipe_buf_steal 25 - -25
> file_end_write 58 29 -29
> file_start_write 68 34 -34
> pipe_to_user 43 - -43
> wakeup_pipe_readers 54 - -54
> do_splice_to 87 - -87
> ipipe_prep.part 92 - -92
> opipe_prep.part 119 - -119
> sys_sendfile 122 - -122
> sys_sendfile64 126 - -126
> sys_vmsplice 137 - -137
> vmsplice_to_user 205 - -205
> sys_tee 491 - -491
> do_sendfile 492 - -492
> vmsplice_to_pipe 558 - -558
> sys_splice 1020 - -1020
>
> Signed-off-by: Pieter Smith <pieter@boesman.nl>
> ---
> fs/splice.c | 2 ++
> init/Kconfig | 10 ++++++++++
> kernel/sys_ni.c | 8 ++++++++
> 3 files changed, 20 insertions(+)
>
> diff --git a/fs/splice.c b/fs/splice.c
> index 44b201b..7c4c695 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -1316,6 +1316,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
> return ret;
> }
>
> +#ifdef CONFIG_SYSCALL_SPLICE
> static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
> struct pipe_inode_info *opipe,
> size_t len, unsigned int flags);
> @@ -2200,4 +2201,5 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
> return do_sendfile(out_fd, in_fd, NULL, count, 0);
> }
> #endif
> +#endif
>
> diff --git a/init/Kconfig b/init/Kconfig
> index d811d5f..dec9819 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1571,6 +1571,16 @@ config NTP
> system clock to an NTP server, you can disable this option to save
> space.
>
> +config SYSCALL_SPLICE
> + bool "Enable splice/vmsplice/tee/sendfile syscalls" if EXPERT
> + default y
> + help
> + This option enables the splice, vmsplice, tee and sendfile syscalls. These
> + are used by applications to: move data between buffers and arbitrary file
> + descriptors; "copy" data between buffers; or copy data from userspace into
> + buffers. If building an embedded system where no applications use these
> + syscalls, you can disable this option to save space.
> +
> config PCI_QUIRKS
> default y
> bool "Enable PCI quirk workarounds" if EXPERT
> diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
> index d2f5b00..25d5551 100644
> --- a/kernel/sys_ni.c
> +++ b/kernel/sys_ni.c
> @@ -170,6 +170,14 @@ cond_syscall(sys_fstat);
> cond_syscall(sys_stat);
> cond_syscall(sys_uname);
> cond_syscall(sys_olduname);
> +cond_syscall(sys_vmsplice);
> +cond_syscall(sys_splice);
> +cond_syscall(sys_tee);
> +cond_syscall(sys_sendfile);
> +cond_syscall(sys_sendfile64);
> +cond_syscall(compat_sys_vmsplice);
> +cond_syscall(compat_sys_sendfile);
> +cond_syscall(compat_sys_sendfile64);
>
> /* arch-specific weak syscall entries */
> cond_syscall(sys_pciconfig_read);
> --
> 2.1.0
>
^ permalink raw reply
* Re: [PATCH v4 0/7] kernel tinification: optionally compile out splice family of syscalls (splice, vmsplice, tee and sendfile)
From: Josh Triplett @ 2014-11-25 0:52 UTC (permalink / raw)
To: Pieter Smith
Cc: Alexander Duyck, Alexander Viro, Alexei Starovoitov,
Andrew Morton, Bertrand Jacquin, Catalina Mocanu, Daniel Borkmann,
David S. Miller, Eric Dumazet, Eric W. Biederman,
Fabian Frederick, open list:FUSE: FILESYSTEM...,
Geert Uytterhoeven, Hugh Dickins, Iulia Manda, Jan Beulich,
J. Bruce Fields, Jeff Layton, open list:ABI/API, linux-fsdevel,
open list
In-Reply-To: <1416870079-15254-1-git-send-email-pieter@boesman.nl>
On Tue, Nov 25, 2014 at 12:00:59AM +0100, Pieter Smith wrote:
> REPO: https://github.com/smipi1/linux-tinification.git
>
> BRANCH: tiny/config-syscall-splice
>
> BACKGROUND: This patch-set forms part of the Linux Kernel Tinification effort (
> https://tiny.wiki.kernel.org/).
>
> GOAL: Support compiling out the splice family of syscalls (splice, vmsplice,
> tee and sendfile) along with all supporting infrastructure if not needed.
> Many embedded systems will not need the splice-family syscalls. Omitting them
> saves space.
>
> HISTORY:
> PATCH v4:
> - Drops __splice_p()
> - Let nfsd fall back to non-splice support when splice is compiled out
> - Style fixes
[...]
> RESULTS: A tinyconfig bloat-o-meter score for the entire patch-set:
>
> add/remove: 0/41 grow/shrink: 5/7 up/down: 23/-8422 (-8399)
I replied to one patch with a minor nit in the commit message. Other
than that, I don't see any obvious issues with this.
- Josh Triplett
^ permalink raw reply
* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice (was: Pre-emption control for userspace)
From: Rik van Riel @ 2014-11-25 2:03 UTC (permalink / raw)
To: Khalid Aziz, tglx, corbet, mingo, hpa, peterz, akpm, rientjes, ak,
mgorman, liwanp, raistlin, kirill.shutemov, atomlin, avagin,
gorcunov, serge.hallyn, athorlton, oleg, vdavydov, daeseok.youn,
keescook, yangds.fnst, sbauer, vishnu.ps, axboe, paulmck
Cc: linux-kernel, linux-doc, linux-api
In-Reply-To: <1416862595-24513-1-git-send-email-khalid.aziz@oracle.com>
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On 11/24/2014 03:56 PM, Khalid Aziz wrote:
> sched/fair: Add advisory flag for borrowing a timeslice
>
> This patch adds a way for a task to request to borrow one
> timeslice from future if it is about to be preempted, so it could
> delay preemption and complete any critical task it is in the middle
> of.
>
> This feature helps with performance on databases and has been used
> for many years on other OSs by the databases. This feature helps in
> situation where a task acquires a lock before performing a critical
> operation on the database and happens to get preempted
Why don't the other tasks that want the lock sleep on the
lock?
I can see this "solution" help mostly with userspace spinlocks,
which are relics of a past era that need to die. There is no
way userspace spinlocks will not fail miserably on virtual
machines, and it is time to get rid of them.
- --
All rights reversed
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1
iQEcBAEBAgAGBQJUc+OIAAoJEM553pKExN6DF/oIAJ+ldPQZBMIxJLK4dmAwCuwu
OLK8sEyOMbg7/0u7EcfJaeWMhxIN+etnK9535TsIpm7ojBeBcuMvXv/K7u5gD6V4
+yU4mV/rUCccolXruaHJqaFZqOg06qmJ0FdzJNyBrsMclgGnfTL8m8p5dlCOMMLZ
11N3imtrrJekigAmn/r9DCr75cGgfpIjPqE1yHc5NhiZ2uPmAS2qvefIZsg+88PH
8M0dOjgIWQKi9SkB6K2OSy7A/fKwyf9DJ3/OKRovA6AHfszvqCU1WVOoYRoO0CPG
v/zOYxIi8FIwi9LT50pM62zcpXVMYddN5etGa9qh4nI7oxXYKniH4JzwZdWDYwo=
=EymR
-----END PGP SIGNATURE-----
^ permalink raw reply
* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice (was: Pre-emption control for userspace)
From: Davidlohr Bueso @ 2014-11-25 2:12 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Khalid Aziz, corbet-T1hC0tSOHrs, mingo-H+wXaHxf7aLQT0dZR+AlfA,
hpa-YMNOUZJC4hwAvxtiuMwx3w, peterz-wEGCiKHe2LqWVfeAwA7xHQ,
riel-H+wXaHxf7aLQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
rientjes-hpIqsD4AKlfQT0dZR+AlfA, ak-VuQAYsv1563Yd54FQh9/CA,
mgorman-l3A5Bk7waGM, liwanp-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
raistlin-k2GhghHVRtY, kirill.shutemov-VuQAYsv1563Yd54FQh9/CA,
atomlin-H+wXaHxf7aLQT0dZR+AlfA, avagin-GEFAQzZX7r8dnm+yROfE0A,
gorcunov-GEFAQzZX7r8dnm+yROfE0A,
serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw, athorlton-sJ/iWh9BUns,
oleg-H+wXaHxf7aLQT0dZR+AlfA, vdavydov-bzQdu9zFT3WakBO8gow8eQ,
daeseok.youn-Re5JQEeQqe8AvxtiuMwx3w,
keescook-F7+t8E8rja9g9hUCZPvPmw,
yangds.fnst-BthXqXjhjHXQFUHtdCDX3A, sbauer-F61uvSdQLzf2fBVCVOL8/A,
vishnu.ps-Sze3O3UU22JBDgjK7y7TUQ, axboe-b10kYP2dOMg,
paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-doc-u79uwXL29TY76Z2rM5mHXA,
linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <alpine.DEB.2.11.1411242317380.6439@nanos>
On Tue, 2014-11-25 at 00:35 +0100, Thomas Gleixner wrote:
> On Mon, 24 Nov 2014, Khalid Aziz wrote:
> > sched/fair: Add advisory flag for borrowing a timeslice
> >
> > This patch adds a way for a task to request to borrow one timeslice
> > from future if it is about to be preempted, so it could delay
> > preemption and complete any critical task it is in the middle of.
> >
> > This feature helps with performance on databases and has been
> > used for many years on other OSs by the databases. This feature
> > helps in situation where a task acquires a lock before performing a
> > critical operation on the database and happens to get preempted before
> > it completes its task. This lock being held causes all other tasks
> > that also acquire the same lock to perform their critical operation
> > on the database, to start queueing up and causing large number of
> > context switches. This queueing problem can be avoided if the task
> > that acquires lock first could request scheduler to let it borrow one
> > timeslice once it enters its critical section and hence allow it to
> > complete its critical section without causing queueing problem. If
>
> While you are niftily avoiding to talk about the nature of the lock, I
> can take it for granted that you are talking about user space
> spinlocks, right?
Probably, or perhaps userspace fair locks. If the task that is next in
line to acquire the lock is preempted and the lock is released, fairness
prevents anyone else from taking it instead.
Thanks,
Davidlohr
^ permalink raw reply
* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice (was: Pre-emption control for userspace)
From: Mike Galbraith @ 2014-11-25 4:20 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Khalid Aziz, corbet, mingo, hpa, peterz, riel, akpm, rientjes, ak,
mgorman, liwanp, raistlin, kirill.shutemov, atomlin, avagin,
gorcunov, serge.hallyn, athorlton, oleg, vdavydov, daeseok.youn,
keescook, yangds.fnst, sbauer, vishnu.ps, axboe, paulmck,
linux-kernel, linux-doc, linux-api
In-Reply-To: <alpine.DEB.2.11.1411242317380.6439@nanos>
On Tue, 2014-11-25 at 00:35 +0100, Thomas Gleixner wrote:
> Aside of the general issues I have with this (see the inline replies
> to your changelog) the overall impression of this patch is that it is
> a half baken and carelessly cobbled together extract of some data base
> specific kernel hackery, which I prefer not to see at all.
It culminates in a lumbering pseudo RT class of task disguised as a fair
class task. I'd expect more gain by twiddling knobs to let last buddy
do its job than the 3% mentioned.
You could perhaps create a SUPER_BATCH class that is not wakeup
preempted by any fair class task of <= priority, not only BATCH and
IDLE, but that's as nasty as this patch, though loads prettier. The
tick time thing doesn't feel right at all... if you're hurt badly by the
tick, you're likely holding the lock too long methinks.
-Mike
^ permalink raw reply
* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice (was: Pre-emption control for userspace)
From: Davidlohr Bueso @ 2014-11-25 6:30 UTC (permalink / raw)
To: Rik van Riel
Cc: Khalid Aziz, tglx, corbet, mingo, hpa, peterz, akpm, rientjes, ak,
mgorman, liwanp, raistlin, kirill.shutemov, atomlin, avagin,
gorcunov, serge.hallyn, athorlton, oleg, vdavydov, daeseok.youn,
keescook, yangds.fnst, sbauer, vishnu.ps, axboe, paulmck,
linux-kernel, linux-doc, linux-api
In-Reply-To: <5473E388.6000605@redhat.com>
On Mon, 2014-11-24 at 21:03 -0500, Rik van Riel wrote:
> I can see this "solution" help mostly with userspace spinlocks,
> which are relics of a past era that need to die. There is no
> way userspace spinlocks will not fail miserably on virtual
> machines, and it is time to get rid of them.
No, not really. Spinlocks are still very useful on bare metal.
Virtualization is not the only thing out there.
Thanks,
Davidlohr
^ permalink raw reply
* [PATCH v5 0/7] kernel tinification: optionally compile out splice family of syscalls (splice, vmsplice, tee and sendfile)
From: Pieter Smith @ 2014-11-25 7:19 UTC (permalink / raw)
To: pieter-qeJ+1H9vRZbz+pZb47iToQ
Cc: Josh Triplett, Alexander Duyck, Alexander Viro,
Alexei Starovoitov, Andrew Morton, Bertrand Jacquin,
Catalina Mocanu, Daniel Borkmann, David S. Miller, Eric Dumazet,
Eric W. Biederman, Fabian Frederick,
open list:FUSE: FILESYSTEM..., Geert Uytterhoeven, Hugh Dickins,
Iulia Manda, Jan Beulich, J. Bruce Fields, Jeff Layton,
open list:ABI/API, linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-kernel
REPO: https://github.com/smipi1/linux-tinification.git
BRANCH: tiny/config-syscall-splice
BACKGROUND: This patch-set forms part of the Linux Kernel Tinification effort (
https://tiny.wiki.kernel.org/).
GOAL: Support compiling out the splice family of syscalls (splice, vmsplice,
tee and sendfile) along with all supporting infrastructure if not needed.
Many embedded systems will not need the splice-family syscalls. Omitting them
saves space.
HISTORY:
PATCH v5:
- Fix up commit log still refering to dropped __splice_p()
PATCH v4:
- Drops __splice_p()
- Let nfsd fall back to non-splice support when splice is compiled out
- Style fixes
PATCH v3:
- Fixup commit logs so that they are consistent with patch strategy
- Style fixes
PATCH v2:
- Avoid the ifdef mess introduced in PATCH v1 by mocking out exported splice
functions.
STRATEGY:
a. With the goal of eventually compiling out fs/splice.c, several functions
that are only used in support of the the splice family of syscalls are moved
into fs/splice.c from fs/read_write.c. The kernel_write function that is not
used to support the splice syscalls is moved to fs/read_write.c.
b. Introduce an EXPERT kernel configuration option; CONFIG_SYSCALL_SPLICE; to
compile out the splice family of syscalls. This removes all userspace uses
of the splice infrastructure.
c. Splice exports an operations struct, nosteal_pipe_buf_ops. Eliminate the
uses of this struct when CONFIG_SYSCALL_SPLICE is undefined, so that splice
can later be compiled out.
d. Let nfsd fall back to non-splice support when splice is compiled out.
e. Compile out fs/splice.c. Functions exported by fs/splice are mocked out with
failing static inlines. This is done so as to all but eliminate the
maintenance burden on file-system drivers.
RESULTS: A tinyconfig bloat-o-meter score for the entire patch-set:
add/remove: 0/41 grow/shrink: 5/7 up/down: 23/-8422 (-8399)
function old new delta
sys_pwritev 115 122 +7
sys_preadv 115 122 +7
fdput_pos 29 36 +7
sys_pwrite64 115 116 +1
sys_pread64 115 116 +1
pipe_to_null 4 - -4
generic_pipe_buf_nosteal 6 - -6
spd_release_page 10 - -10
fdput 11 - -11
PageUptodate 22 11 -11
lock_page 36 24 -12
signal_pending 39 26 -13
fdget 56 42 -14
page_cache_pipe_buf_release 16 - -16
user_page_pipe_buf_ops 20 - -20
splice_write_null 24 4 -20
page_cache_pipe_buf_ops 20 - -20
nosteal_pipe_buf_ops 20 - -20
default_pipe_buf_ops 20 - -20
generic_splice_sendpage 24 - -24
user_page_pipe_buf_steal 25 - -25
splice_shrink_spd 27 - -27
pipe_to_user 43 - -43
direct_splice_actor 47 - -47
default_file_splice_write 49 - -49
wakeup_pipe_writers 54 - -54
wakeup_pipe_readers 54 - -54
write_pipe_buf 71 - -71
page_cache_pipe_buf_confirm 80 - -80
splice_grow_spd 87 - -87
do_splice_to 87 - -87
ipipe_prep.part 92 - -92
splice_from_pipe 93 - -93
splice_from_pipe_next 107 - -107
pipe_to_sendpage 109 - -109
page_cache_pipe_buf_steal 114 - -114
opipe_prep.part 119 - -119
sys_sendfile 122 - -122
generic_file_splice_read 131 8 -123
sys_sendfile64 126 - -126
sys_vmsplice 137 - -137
do_splice_direct 148 - -148
vmsplice_to_user 205 - -205
__splice_from_pipe 246 - -246
splice_direct_to_actor 348 - -348
splice_to_pipe 371 - -371
do_sendfile 492 - -492
sys_tee 497 - -497
vmsplice_to_pipe 558 - -558
default_file_splice_read 688 - -688
iter_file_splice_write 702 4 -698
sys_splice 1075 - -1075
__generic_file_splice_read 1109 - -1109
Pieter Smith (7):
fs: move sendfile syscall into fs/splice
fs: moved kernel_write to fs/read_write
fs/splice: support compiling out splice-family syscalls
fs/fuse: support compiling out splice
net/core: support compiling out splice
fs/nfsd: support compiling out splice
fs/splice: full support for compiling out splice
fs/Makefile | 3 +-
fs/fuse/dev.c | 9 ++-
fs/read_write.c | 181 +++------------------------------------------
fs/splice.c | 194 +++++++++++++++++++++++++++++++++++++++++++++----
include/linux/fs.h | 26 +++++++
include/linux/skbuff.h | 10 +++
include/linux/splice.h | 42 +++++++++++
init/Kconfig | 10 +++
kernel/sys_ni.c | 8 ++
net/core/skbuff.c | 11 ++-
net/sunrpc/svc.c | 2 +-
11 files changed, 302 insertions(+), 194 deletions(-)
--
2.1.0
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH v5 1/7] fs: move sendfile syscall into fs/splice
From: Pieter Smith @ 2014-11-25 7:19 UTC (permalink / raw)
To: pieter-qeJ+1H9vRZbz+pZb47iToQ
Cc: Josh Triplett, Alexander Duyck, Alexander Viro,
Alexei Starovoitov, Andrew Morton, Bertrand Jacquin,
Catalina Mocanu, Daniel Borkmann, David S. Miller, Eric Dumazet,
Eric W. Biederman, Fabian Frederick,
open list:FUSE: FILESYSTEM..., Geert Uytterhoeven, Hugh Dickins,
Iulia Manda, Jan Beulich, J. Bruce Fields, Jeff Layton,
open list:ABI/API, linux-fsdevel-u79uwXL29TY76Z2rM5mHXA,
linux-kernel
In-Reply-To: <1416899996-21315-1-git-send-email-pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
sendfile functionally forms part of the splice group of syscalls (splice,
vmsplice and tee). Grouping sendfile with splice paves the way to compiling out
the splice group of syscalls for embedded systems that do not need these.
add/remove: 0/0 grow/shrink: 7/2 up/down: 86/-61 (25)
function old new delta
file_start_write 34 68 +34
file_end_write 29 58 +29
sys_pwritev 115 122 +7
sys_preadv 115 122 +7
fdput_pos 29 36 +7
sys_pwrite64 115 116 +1
sys_pread64 115 116 +1
sys_tee 497 491 -6
sys_splice 1075 1020 -55
Signed-off-by: Pieter Smith <pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
---
fs/read_write.c | 175 -------------------------------------------------------
fs/splice.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 178 insertions(+), 175 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 7d9318c..d9451ba 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1191,178 +1191,3 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
}
#endif
-static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
- size_t count, loff_t max)
-{
- struct fd in, out;
- struct inode *in_inode, *out_inode;
- loff_t pos;
- loff_t out_pos;
- ssize_t retval;
- int fl;
-
- /*
- * Get input file, and verify that it is ok..
- */
- retval = -EBADF;
- in = fdget(in_fd);
- if (!in.file)
- goto out;
- if (!(in.file->f_mode & FMODE_READ))
- goto fput_in;
- retval = -ESPIPE;
- if (!ppos) {
- pos = in.file->f_pos;
- } else {
- pos = *ppos;
- if (!(in.file->f_mode & FMODE_PREAD))
- goto fput_in;
- }
- retval = rw_verify_area(READ, in.file, &pos, count);
- if (retval < 0)
- goto fput_in;
- count = retval;
-
- /*
- * Get output file, and verify that it is ok..
- */
- retval = -EBADF;
- out = fdget(out_fd);
- if (!out.file)
- goto fput_in;
- if (!(out.file->f_mode & FMODE_WRITE))
- goto fput_out;
- retval = -EINVAL;
- in_inode = file_inode(in.file);
- out_inode = file_inode(out.file);
- out_pos = out.file->f_pos;
- retval = rw_verify_area(WRITE, out.file, &out_pos, count);
- if (retval < 0)
- goto fput_out;
- count = retval;
-
- if (!max)
- max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
-
- if (unlikely(pos + count > max)) {
- retval = -EOVERFLOW;
- if (pos >= max)
- goto fput_out;
- count = max - pos;
- }
-
- fl = 0;
-#if 0
- /*
- * We need to debate whether we can enable this or not. The
- * man page documents EAGAIN return for the output at least,
- * and the application is arguably buggy if it doesn't expect
- * EAGAIN on a non-blocking file descriptor.
- */
- if (in.file->f_flags & O_NONBLOCK)
- fl = SPLICE_F_NONBLOCK;
-#endif
- file_start_write(out.file);
- retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
- file_end_write(out.file);
-
- if (retval > 0) {
- add_rchar(current, retval);
- add_wchar(current, retval);
- fsnotify_access(in.file);
- fsnotify_modify(out.file);
- out.file->f_pos = out_pos;
- if (ppos)
- *ppos = pos;
- else
- in.file->f_pos = pos;
- }
-
- inc_syscr(current);
- inc_syscw(current);
- if (pos > max)
- retval = -EOVERFLOW;
-
-fput_out:
- fdput(out);
-fput_in:
- fdput(in);
-out:
- return retval;
-}
-
-SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
-{
- loff_t pos;
- off_t off;
- ssize_t ret;
-
- if (offset) {
- if (unlikely(get_user(off, offset)))
- return -EFAULT;
- pos = off;
- ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
- if (unlikely(put_user(pos, offset)))
- return -EFAULT;
- return ret;
- }
-
- return do_sendfile(out_fd, in_fd, NULL, count, 0);
-}
-
-SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
-{
- loff_t pos;
- ssize_t ret;
-
- if (offset) {
- if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
- return -EFAULT;
- ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
- if (unlikely(put_user(pos, offset)))
- return -EFAULT;
- return ret;
- }
-
- return do_sendfile(out_fd, in_fd, NULL, count, 0);
-}
-
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
- compat_off_t __user *, offset, compat_size_t, count)
-{
- loff_t pos;
- off_t off;
- ssize_t ret;
-
- if (offset) {
- if (unlikely(get_user(off, offset)))
- return -EFAULT;
- pos = off;
- ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
- if (unlikely(put_user(pos, offset)))
- return -EFAULT;
- return ret;
- }
-
- return do_sendfile(out_fd, in_fd, NULL, count, 0);
-}
-
-COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
- compat_loff_t __user *, offset, compat_size_t, count)
-{
- loff_t pos;
- ssize_t ret;
-
- if (offset) {
- if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
- return -EFAULT;
- ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
- if (unlikely(put_user(pos, offset)))
- return -EFAULT;
- return ret;
- }
-
- return do_sendfile(out_fd, in_fd, NULL, count, 0);
-}
-#endif
diff --git a/fs/splice.c b/fs/splice.c
index f5cb9ba..c1a2861 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -28,6 +28,7 @@
#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/uio.h>
+#include <linux/fsnotify.h>
#include <linux/security.h>
#include <linux/gfp.h>
#include <linux/socket.h>
@@ -2039,3 +2040,180 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
return error;
}
+
+static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
+ size_t count, loff_t max)
+{
+ struct fd in, out;
+ struct inode *in_inode, *out_inode;
+ loff_t pos;
+ loff_t out_pos;
+ ssize_t retval;
+ int fl;
+
+ /*
+ * Get input file, and verify that it is ok..
+ */
+ retval = -EBADF;
+ in = fdget(in_fd);
+ if (!in.file)
+ goto out;
+ if (!(in.file->f_mode & FMODE_READ))
+ goto fput_in;
+ retval = -ESPIPE;
+ if (!ppos) {
+ pos = in.file->f_pos;
+ } else {
+ pos = *ppos;
+ if (!(in.file->f_mode & FMODE_PREAD))
+ goto fput_in;
+ }
+ retval = rw_verify_area(READ, in.file, &pos, count);
+ if (retval < 0)
+ goto fput_in;
+ count = retval;
+
+ /*
+ * Get output file, and verify that it is ok..
+ */
+ retval = -EBADF;
+ out = fdget(out_fd);
+ if (!out.file)
+ goto fput_in;
+ if (!(out.file->f_mode & FMODE_WRITE))
+ goto fput_out;
+ retval = -EINVAL;
+ in_inode = file_inode(in.file);
+ out_inode = file_inode(out.file);
+ out_pos = out.file->f_pos;
+ retval = rw_verify_area(WRITE, out.file, &out_pos, count);
+ if (retval < 0)
+ goto fput_out;
+ count = retval;
+
+ if (!max)
+ max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
+
+ if (unlikely(pos + count > max)) {
+ retval = -EOVERFLOW;
+ if (pos >= max)
+ goto fput_out;
+ count = max - pos;
+ }
+
+ fl = 0;
+#if 0
+ /*
+ * We need to debate whether we can enable this or not. The
+ * man page documents EAGAIN return for the output at least,
+ * and the application is arguably buggy if it doesn't expect
+ * EAGAIN on a non-blocking file descriptor.
+ */
+ if (in.file->f_flags & O_NONBLOCK)
+ fl = SPLICE_F_NONBLOCK;
+#endif
+ file_start_write(out.file);
+ retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
+ file_end_write(out.file);
+
+ if (retval > 0) {
+ add_rchar(current, retval);
+ add_wchar(current, retval);
+ fsnotify_access(in.file);
+ fsnotify_modify(out.file);
+ out.file->f_pos = out_pos;
+ if (ppos)
+ *ppos = pos;
+ else
+ in.file->f_pos = pos;
+ }
+
+ inc_syscr(current);
+ inc_syscw(current);
+ if (pos > max)
+ retval = -EOVERFLOW;
+
+fput_out:
+ fdput(out);
+fput_in:
+ fdput(in);
+out:
+ return retval;
+}
+
+SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
+{
+ loff_t pos;
+ off_t off;
+ ssize_t ret;
+
+ if (offset) {
+ if (unlikely(get_user(off, offset)))
+ return -EFAULT;
+ pos = off;
+ ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
+ if (unlikely(put_user(pos, offset)))
+ return -EFAULT;
+ return ret;
+ }
+
+ return do_sendfile(out_fd, in_fd, NULL, count, 0);
+}
+
+SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
+{
+ loff_t pos;
+ ssize_t ret;
+
+ if (offset) {
+ if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
+ return -EFAULT;
+ ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
+ if (unlikely(put_user(pos, offset)))
+ return -EFAULT;
+ return ret;
+ }
+
+ return do_sendfile(out_fd, in_fd, NULL, count, 0);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
+ compat_off_t __user *, offset, compat_size_t, count)
+{
+ loff_t pos;
+ off_t off;
+ ssize_t ret;
+
+ if (offset) {
+ if (unlikely(get_user(off, offset)))
+ return -EFAULT;
+ pos = off;
+ ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
+ if (unlikely(put_user(pos, offset)))
+ return -EFAULT;
+ return ret;
+ }
+
+ return do_sendfile(out_fd, in_fd, NULL, count, 0);
+}
+
+COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
+ compat_loff_t __user *, offset, compat_size_t, count)
+{
+ loff_t pos;
+ ssize_t ret;
+
+ if (offset) {
+ if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
+ return -EFAULT;
+ ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
+ if (unlikely(put_user(pos, offset)))
+ return -EFAULT;
+ return ret;
+ }
+
+ return do_sendfile(out_fd, in_fd, NULL, count, 0);
+}
+#endif
+
--
2.1.0
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v5 2/7] fs: moved kernel_write to fs/read_write
From: Pieter Smith @ 2014-11-25 7:19 UTC (permalink / raw)
To: pieter-qeJ+1H9vRZbz+pZb47iToQ
Cc: Michael S. Tsirkin, Trond Myklebust, Bertrand Jacquin,
Oleg Nesterov, J. Bruce Fields, Eric Dumazet, Willem de Bruijn,
蔡正龙, Jeff Layton, Tom Herbert,
Alexei Starovoitov, Miklos Szeredi, Peter Foley, Hugh Dickins,
Xiao Guangrong, Geert Uytterhoeven, Mel Gorman, Matt Turner,
Paul E. McKenney, Alexander Duyck, open list:FUSE: FILESYSTEM...
In-Reply-To: <1416899996-21315-1-git-send-email-pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
kernel_write shares infrastructure with the read_write translation unit but not
with the splice translation unit. Grouping kernel_write with the read_write
translation unit is more logical. It also paves the way to compiling out the
splice group of syscalls for embedded systems that do not need them.
Signed-off-by: Pieter Smith <pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
---
fs/read_write.c | 16 ++++++++++++++++
fs/splice.c | 16 ----------------
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index d9451ba..f4c8d8b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1191,3 +1191,19 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
}
#endif
+ssize_t kernel_write(struct file *file, const char *buf, size_t count,
+ loff_t pos)
+{
+ mm_segment_t old_fs;
+ ssize_t res;
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ res = vfs_write(file, (__force const char __user *)buf, count, &pos);
+ set_fs(old_fs);
+
+ return res;
+}
+EXPORT_SYMBOL(kernel_write);
+
diff --git a/fs/splice.c b/fs/splice.c
index c1a2861..44b201b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -583,22 +583,6 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
return res;
}
-ssize_t kernel_write(struct file *file, const char *buf, size_t count,
- loff_t pos)
-{
- mm_segment_t old_fs;
- ssize_t res;
-
- old_fs = get_fs();
- set_fs(get_ds());
- /* The cast to a user pointer is valid due to the set_fs() */
- res = vfs_write(file, (__force const char __user *)buf, count, &pos);
- set_fs(old_fs);
-
- return res;
-}
-EXPORT_SYMBOL(kernel_write);
-
ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
--
2.1.0
------------------------------------------------------------------------------
Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
from Actuate! Instantly Supercharge Your Business Reports and Dashboards
with Interactivity, Sharing, Native Excel Exports, App Integration & more
Get technology previously reserved for billion-dollar corporations, FREE
http://pubads.g.doubleclick.net/gampad/clk?id=157005751&iu=/4140/ostg.clktrk
^ permalink raw reply related
* [PATCH v5 3/7] fs/splice: support compiling out splice-family syscalls
From: Pieter Smith @ 2014-11-25 7:19 UTC (permalink / raw)
To: pieter-qeJ+1H9vRZbz+pZb47iToQ
Cc: Michael S. Tsirkin, Trond Myklebust, Bertrand Jacquin,
Oleg Nesterov, J. Bruce Fields, Eric Dumazet, Willem de Bruijn,
蔡正龙, Jeff Layton, Tom Herbert,
Alexei Starovoitov, Miklos Szeredi, Peter Foley, Hugh Dickins,
Xiao Guangrong, Geert Uytterhoeven, Mel Gorman, Matt Turner,
Paul E. McKenney, Alexander Duyck, open list:FUSE: FILESYSTEM...
In-Reply-To: <1416899996-21315-1-git-send-email-pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
Many embedded systems will not need the splice-family syscalls (splice,
vmsplice, tee and sendfile). Omitting them saves space. This adds a new EXPERT
config option CONFIG_SYSCALL_SPLICE (default y) to support compiling them out.
The goal is to completely compile out fs/splice along with the syscalls. To
achieve this, the remaining patch-set will deal with fs/splice exports. As far
as possible, the impact on other device drivers will be minimized so as to
reduce the overal maintenance burden of CONFIG_SYSCALL_SPLICE.
The use of exported functions will be solved by transparently mocking them out
with static inlines. Uses of the exported pipe_buf_operations struct however
require direct modification in fs/fuse and net/core. The next two patches will
deal with this.
The last change required before fs/splice can be comipled out is making fs/nfsd
aware of the lacking splice support in file-systems when CONFIG_SYSCALL_SPLICE
is undefined.
The bloat benefit of this patch given a tinyconfig is:
add/remove: 0/16 grow/shrink: 2/5 up/down: 114/-3693 (-3579)
function old new delta
splice_direct_to_actor 348 416 +68
splice_to_pipe 371 417 +46
splice_from_pipe_next 107 106 -1
fdput 11 - -11
signal_pending 39 26 -13
fdget 56 42 -14
user_page_pipe_buf_ops 20 - -20
user_page_pipe_buf_steal 25 - -25
file_end_write 58 29 -29
file_start_write 68 34 -34
pipe_to_user 43 - -43
wakeup_pipe_readers 54 - -54
do_splice_to 87 - -87
ipipe_prep.part 92 - -92
opipe_prep.part 119 - -119
sys_sendfile 122 - -122
sys_sendfile64 126 - -126
sys_vmsplice 137 - -137
vmsplice_to_user 205 - -205
sys_tee 491 - -491
do_sendfile 492 - -492
vmsplice_to_pipe 558 - -558
sys_splice 1020 - -1020
Signed-off-by: Pieter Smith <pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
---
fs/splice.c | 2 ++
init/Kconfig | 10 ++++++++++
kernel/sys_ni.c | 8 ++++++++
3 files changed, 20 insertions(+)
diff --git a/fs/splice.c b/fs/splice.c
index 44b201b..7c4c695 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1316,6 +1316,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
return ret;
}
+#ifdef CONFIG_SYSCALL_SPLICE
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags);
@@ -2200,4 +2201,5 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
return do_sendfile(out_fd, in_fd, NULL, count, 0);
}
#endif
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index d811d5f..dec9819 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1571,6 +1571,16 @@ config NTP
system clock to an NTP server, you can disable this option to save
space.
+config SYSCALL_SPLICE
+ bool "Enable splice/vmsplice/tee/sendfile syscalls" if EXPERT
+ default y
+ help
+ This option enables the splice, vmsplice, tee and sendfile syscalls. These
+ are used by applications to: move data between buffers and arbitrary file
+ descriptors; "copy" data between buffers; or copy data from userspace into
+ buffers. If building an embedded system where no applications use these
+ syscalls, you can disable this option to save space.
+
config PCI_QUIRKS
default y
bool "Enable PCI quirk workarounds" if EXPERT
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index d2f5b00..25d5551 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -170,6 +170,14 @@ cond_syscall(sys_fstat);
cond_syscall(sys_stat);
cond_syscall(sys_uname);
cond_syscall(sys_olduname);
+cond_syscall(sys_vmsplice);
+cond_syscall(sys_splice);
+cond_syscall(sys_tee);
+cond_syscall(sys_sendfile);
+cond_syscall(sys_sendfile64);
+cond_syscall(compat_sys_vmsplice);
+cond_syscall(compat_sys_sendfile);
+cond_syscall(compat_sys_sendfile64);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
--
2.1.0
------------------------------------------------------------------------------
Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
from Actuate! Instantly Supercharge Your Business Reports and Dashboards
with Interactivity, Sharing, Native Excel Exports, App Integration & more
Get technology previously reserved for billion-dollar corporations, FREE
http://pubads.g.doubleclick.net/gampad/clk?id=157005751&iu=/4140/ostg.clktrk
^ permalink raw reply related
* [PATCH v5 4/7] fs/fuse: support compiling out splice
From: Pieter Smith @ 2014-11-25 7:19 UTC (permalink / raw)
To: pieter
Cc: Josh Triplett, Alexander Duyck, Alexander Viro,
Alexei Starovoitov, Andrew Morton, Bertrand Jacquin,
Catalina Mocanu, Daniel Borkmann, David S. Miller, Eric Dumazet,
Eric W. Biederman, Fabian Frederick,
open list:FUSE: FILESYSTEM..., Geert Uytterhoeven, Hugh Dickins,
Iulia Manda, Jan Beulich, J. Bruce Fields, Jeff Layton,
open list:ABI/API, linux-fsdevel, linux-kernel
In-Reply-To: <1416899996-21315-1-git-send-email-pieter@boesman.nl>
To implement splice support, fs/fuse makes use of nosteal_pipe_buf_ops. This
struct is exported by fs/splice. The goal of the larger patch set is to
completely compile out fs/splice, so uses of the exported struct need to be
compiled out along with fs/splice.
This patch therefore compiles out splice support in fs/fuse when
CONFIG_SYSCALL_SPLICE is undefined.
Signed-off-by: Pieter Smith <pieter@boesman.nl>
---
fs/fuse/dev.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ca88731..e984302 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1191,8 +1191,9 @@ __releases(fc->lock)
* request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
-static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
- struct fuse_copy_state *cs, size_t nbytes)
+static ssize_t __maybe_unused
+fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
+ struct fuse_copy_state *cs, size_t nbytes)
{
int err;
struct fuse_req *req;
@@ -1291,6 +1292,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
}
+#ifdef CONFIG_SYSCALL_SPLICE
static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
@@ -1368,6 +1370,9 @@ out:
kfree(bufs);
return ret;
}
+#else /* CONFIG_SYSCALL_SPLICE */
+#define fuse_dev_splice_read NULL
+#endif
static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
--
2.1.0
^ permalink raw reply related
* [PATCH v5 5/7] net/core: support compiling out splice
From: Pieter Smith @ 2014-11-25 7:19 UTC (permalink / raw)
To: pieter-qeJ+1H9vRZbz+pZb47iToQ
Cc: Michael S. Tsirkin, Trond Myklebust, Bertrand Jacquin,
Oleg Nesterov, J. Bruce Fields, Eric Dumazet, Willem de Bruijn,
蔡正龙, Jeff Layton, Tom Herbert,
Alexei Starovoitov, Miklos Szeredi, Peter Foley, Hugh Dickins,
Xiao Guangrong, Geert Uytterhoeven, Mel Gorman, Matt Turner,
Paul E. McKenney, Alexander Duyck, open list:FUSE: FILESYSTEM...
In-Reply-To: <1416899996-21315-1-git-send-email-pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
To implement splice support, net/core makes use of nosteal_pipe_buf_ops. This
struct is exported by fs/splice. The goal of the larger patch set is to
completely compile out fs/splice, so uses of the exported struct need to be
compiled out along with fs/splice.
This patch therefore compiles out splice support in net/core when
CONFIG_SYSCALL_SPLICE is undefined. The compiled out function skb_splice_bits
is transparently mocked out with a static inline. The greater patch set removes
userspace splice support so it cannot be called anyway.
Signed-off-by: Pieter Smith <pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
---
include/linux/skbuff.h | 10 ++++++++++
net/core/skbuff.c | 11 +++++++----
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a59d934..5cd636b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2640,9 +2640,19 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
int len, __wsum csum);
+#ifdef CONFIG_SYSCALL_SPLICE
int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int len,
unsigned int flags);
+#else
+static inline int
+skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+ struct pipe_inode_info *pipe, unsigned int len,
+ unsigned int flags)
+{
+ return -EPERM;
+}
+#endif
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 61059a0..bb426d9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1678,7 +1678,8 @@ EXPORT_SYMBOL(skb_copy_bits);
* Callback from splice_to_pipe(), if we need to release some pages
* at the end of the spd in case we error'ed out in filling the pipe.
*/
-static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+static void __maybe_unused sock_spd_release(struct splice_pipe_desc *spd,
+ unsigned int i)
{
put_page(spd->pages[i]);
}
@@ -1781,9 +1782,9 @@ static bool __splice_segment(struct page *page, unsigned int poff,
* Map linear and fragment data from the skb to spd. It reports true if the
* pipe is full or if we already spliced the requested length.
*/
-static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
- unsigned int *offset, unsigned int *len,
- struct splice_pipe_desc *spd, struct sock *sk)
+static bool __maybe_unused __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
+ unsigned int *offset, unsigned int *len,
+ struct splice_pipe_desc *spd, struct sock *sk)
{
int seg;
@@ -1821,6 +1822,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
* the frag list, if such a thing exists. We'd probably need to recurse to
* handle that cleanly.
*/
+#ifdef CONFIG_SYSCALL_SPLICE
int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
unsigned int flags)
@@ -1876,6 +1878,7 @@ done:
return ret;
}
+#endif /* CONFIG_SYSCALL_SPLICE */
/**
* skb_store_bits - store bits from kernel buffer to skb
--
2.1.0
------------------------------------------------------------------------------
Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
from Actuate! Instantly Supercharge Your Business Reports and Dashboards
with Interactivity, Sharing, Native Excel Exports, App Integration & more
Get technology previously reserved for billion-dollar corporations, FREE
http://pubads.g.doubleclick.net/gampad/clk?id=157005751&iu=/4140/ostg.clktrk
^ permalink raw reply related
* [PATCH v5 6/7] fs/nfsd: support compiling out splice
From: Pieter Smith @ 2014-11-25 7:19 UTC (permalink / raw)
To: pieter
Cc: Josh Triplett, Alexander Duyck, Alexander Viro,
Alexei Starovoitov, Andrew Morton, Bertrand Jacquin,
Catalina Mocanu, Daniel Borkmann, David S. Miller, Eric Dumazet,
Eric W. Biederman, Fabian Frederick,
open list:FUSE: FILESYSTEM..., Geert Uytterhoeven, Hugh Dickins,
Iulia Manda, Jan Beulich, J. Bruce Fields, Jeff Layton,
open list:ABI/API, linux-fsdevel, linux-kernel
In-Reply-To: <1416899996-21315-1-git-send-email-pieter@boesman.nl>
The goal of the larger patch set is to completely compile out fs/splice, and
as a result, splice support for all file-systems. This patch ensures that
fs/nfsd falls back to non-splice fs support when CONFIG_SYSCALL_SPLICE is
undefined.
Signed-off-by: Pieter Smith <pieter@boesman.nl>
---
net/sunrpc/svc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index ca8a795..6cacc37 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1084,7 +1084,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off only in gss privacy case: */
- rqstp->rq_splice_ok = true;
+ rqstp->rq_splice_ok = IS_ENABLED(CONFIG_SPLICE_SYSCALL);
/* Will be turned off only when NFSv4 Sessions are used */
rqstp->rq_usedeferral = true;
rqstp->rq_dropme = false;
--
2.1.0
^ permalink raw reply related
* [PATCH v5 7/7] fs/splice: full support for compiling out splice
From: Pieter Smith @ 2014-11-25 7:19 UTC (permalink / raw)
To: pieter-qeJ+1H9vRZbz+pZb47iToQ
Cc: Michael S. Tsirkin, Trond Myklebust, Bertrand Jacquin,
Oleg Nesterov, J. Bruce Fields, Eric Dumazet, Willem de Bruijn,
蔡正龙, Jeff Layton, Tom Herbert,
Alexei Starovoitov, Miklos Szeredi, Peter Foley, Hugh Dickins,
Xiao Guangrong, Geert Uytterhoeven, Mel Gorman, Matt Turner,
Paul E. McKenney, Alexander Duyck, open list:FUSE: FILESYSTEM...
In-Reply-To: <1416899996-21315-1-git-send-email-pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
Entirely compile out splice translation unit when the system is configured
without splice family of syscalls (i.e. CONFIG_SYSCALL_SPLICE is undefined).
Exported fs/splice functions are transparently mocked out with static inlines.
Because userspace support for splice has already been removed by this
patch-set, the exported functions cannot be called anyway. Mocking them out
prevents a maintenance burden on file system drivers.
The bloat score resulting from this patch given a tinyconfig is:
add/remove: 0/25 grow/shrink: 0/5 up/down: 0/-4845 (-4845)
function old new delta
pipe_to_null 4 - -4
generic_pipe_buf_nosteal 6 - -6
spd_release_page 10 - -10
PageUptodate 22 11 -11
lock_page 36 24 -12
page_cache_pipe_buf_release 16 - -16
splice_write_null 24 4 -20
page_cache_pipe_buf_ops 20 - -20
nosteal_pipe_buf_ops 20 - -20
default_pipe_buf_ops 20 - -20
generic_splice_sendpage 24 - -24
splice_shrink_spd 27 - -27
direct_splice_actor 47 - -47
default_file_splice_write 49 - -49
wakeup_pipe_writers 54 - -54
write_pipe_buf 71 - -71
page_cache_pipe_buf_confirm 80 - -80
splice_grow_spd 87 - -87
splice_from_pipe 93 - -93
splice_from_pipe_next 106 - -106
pipe_to_sendpage 109 - -109
page_cache_pipe_buf_steal 114 - -114
generic_file_splice_read 131 8 -123
do_splice_direct 148 - -148
__splice_from_pipe 246 - -246
splice_direct_to_actor 416 - -416
splice_to_pipe 417 - -417
default_file_splice_read 688 - -688
iter_file_splice_write 702 4 -698
__generic_file_splice_read 1109 - -1109
The bloat score for the entire CONFIG_SYSCALL_SPLICE patch-set is:
add/remove: 0/41 grow/shrink: 5/7 up/down: 23/-8422 (-8399)
function old new delta
sys_pwritev 115 122 +7
sys_preadv 115 122 +7
fdput_pos 29 36 +7
sys_pwrite64 115 116 +1
sys_pread64 115 116 +1
pipe_to_null 4 - -4
generic_pipe_buf_nosteal 6 - -6
spd_release_page 10 - -10
fdput 11 - -11
PageUptodate 22 11 -11
lock_page 36 24 -12
signal_pending 39 26 -13
fdget 56 42 -14
page_cache_pipe_buf_release 16 - -16
user_page_pipe_buf_ops 20 - -20
splice_write_null 24 4 -20
page_cache_pipe_buf_ops 20 - -20
nosteal_pipe_buf_ops 20 - -20
default_pipe_buf_ops 20 - -20
generic_splice_sendpage 24 - -24
user_page_pipe_buf_steal 25 - -25
splice_shrink_spd 27 - -27
pipe_to_user 43 - -43
direct_splice_actor 47 - -47
default_file_splice_write 49 - -49
wakeup_pipe_writers 54 - -54
wakeup_pipe_readers 54 - -54
write_pipe_buf 71 - -71
page_cache_pipe_buf_confirm 80 - -80
splice_grow_spd 87 - -87
do_splice_to 87 - -87
ipipe_prep.part 92 - -92
splice_from_pipe 93 - -93
splice_from_pipe_next 107 - -107
pipe_to_sendpage 109 - -109
page_cache_pipe_buf_steal 114 - -114
opipe_prep.part 119 - -119
sys_sendfile 122 - -122
generic_file_splice_read 131 8 -123
sys_sendfile64 126 - -126
sys_vmsplice 137 - -137
do_splice_direct 148 - -148
vmsplice_to_user 205 - -205
__splice_from_pipe 246 - -246
splice_direct_to_actor 348 - -348
splice_to_pipe 371 - -371
do_sendfile 492 - -492
sys_tee 497 - -497
vmsplice_to_pipe 558 - -558
default_file_splice_read 688 - -688
iter_file_splice_write 702 4 -698
sys_splice 1075 - -1075
__generic_file_splice_read 1109 - -1109
Signed-off-by: Pieter Smith <pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>
---
fs/Makefile | 3 ++-
fs/splice.c | 2 --
include/linux/fs.h | 26 ++++++++++++++++++++++++++
include/linux/splice.h | 42 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 70 insertions(+), 3 deletions(-)
diff --git a/fs/Makefile b/fs/Makefile
index fb7646e..9395622 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o super.o \
ioctl.o readdir.o select.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
- pnode.o splice.o sync.o utimes.o \
+ pnode.o sync.o utimes.o \
stack.o fs_struct.o statfs.o fs_pin.o
ifeq ($(CONFIG_BLOCK),y)
@@ -22,6 +22,7 @@ endif
obj-$(CONFIG_PROC_FS) += proc_namespace.o
obj-$(CONFIG_FSNOTIFY) += notify/
+obj-$(CONFIG_SYSCALL_SPLICE) += splice.o
obj-$(CONFIG_EPOLL) += eventpoll.o
obj-$(CONFIG_ANON_INODES) += anon_inodes.o
obj-$(CONFIG_SIGNALFD) += signalfd.o
diff --git a/fs/splice.c b/fs/splice.c
index 7c4c695..44b201b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1316,7 +1316,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
return ret;
}
-#ifdef CONFIG_SYSCALL_SPLICE
static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags);
@@ -2201,5 +2200,4 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
return do_sendfile(out_fd, in_fd, NULL, count, 0);
}
#endif
-#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a957d43..138107e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2444,6 +2444,7 @@ extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
extern void block_sync_page(struct page *page);
/* fs/splice.c */
+#ifdef CONFIG_SYSCALL_SPLICE
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t default_file_splice_read(struct file *, loff_t *,
@@ -2452,6 +2453,31 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
struct file *out, loff_t *, size_t len, unsigned int flags);
+#else
+static inline ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len, unsigned int flags)
+{
+ return -EPERM;
+}
+
+static inline ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len, unsigned int flags)
+{
+ return -EPERM;
+}
+
+static inline ssize_t iter_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos, size_t len, unsigned int flags)
+{
+ return -EPERM;
+}
+
+static inline ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos, size_t len, unsigned int flags)
+{
+ return -EPERM;
+}
+#endif
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
diff --git a/include/linux/splice.h b/include/linux/splice.h
index da2751d..34570d8 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -65,6 +65,7 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
typedef int (splice_direct_actor)(struct pipe_inode_info *,
struct splice_desc *);
+#ifdef CONFIG_SYSCALL_SPLICE
extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
loff_t *, size_t, unsigned int,
splice_actor *);
@@ -74,13 +75,54 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
struct splice_pipe_desc *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
+#else
+static inline ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags,
+ splice_actor *actor)
+{
+ return -EPERM;
+}
+
+static inline ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
+ splice_actor *actor)
+{
+ return -EPERM;
+}
+
+static inline ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+ struct splice_pipe_desc *spd)
+{
+ return -EPERM;
+}
+
+static inline ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
+ splice_direct_actor *actor)
+{
+ return -EPERM;
+}
+#endif
/*
* for dynamic pipe sizing
*/
+#ifdef CONFIG_SYSCALL_SPLICE
extern int splice_grow_spd(const struct pipe_inode_info *, struct splice_pipe_desc *);
extern void splice_shrink_spd(struct splice_pipe_desc *);
extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
+#else
+static inline int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+{
+ return -EPERM;
+}
+
+static inline void splice_shrink_spd(struct splice_pipe_desc *spd)
+{
+}
+
+static inline void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
+{
+}
+#endif
extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
#endif
--
2.1.0
------------------------------------------------------------------------------
Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
from Actuate! Instantly Supercharge Your Business Reports and Dashboards
with Interactivity, Sharing, Native Excel Exports, App Integration & more
Get technology previously reserved for billion-dollar corporations, FREE
http://pubads.g.doubleclick.net/gampad/clk?id=157005751&iu=/4140/ostg.clktrk
^ permalink raw reply related
* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice (was: Pre-emption control for userspace)
From: Srikar Dronamraju @ 2014-11-25 10:12 UTC (permalink / raw)
To: Khalid Aziz
Cc: tglx-hfZtesqFncYOwBW4kG4KsQ, corbet-T1hC0tSOHrs,
mingo-H+wXaHxf7aLQT0dZR+AlfA, hpa-YMNOUZJC4hwAvxtiuMwx3w,
peterz-wEGCiKHe2LqWVfeAwA7xHQ, riel-H+wXaHxf7aLQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
rientjes-hpIqsD4AKlfQT0dZR+AlfA, ak-VuQAYsv1563Yd54FQh9/CA,
mgorman-l3A5Bk7waGM, liwanp-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
raistlin-k2GhghHVRtY, kirill.shutemov-VuQAYsv1563Yd54FQh9/CA,
atomlin-H+wXaHxf7aLQT0dZR+AlfA, avagin-GEFAQzZX7r8dnm+yROfE0A,
gorcunov-GEFAQzZX7r8dnm+yROfE0A,
serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw, athorlton-sJ/iWh9BUns,
oleg-H+wXaHxf7aLQT0dZR+AlfA, vdavydov-bzQdu9zFT3WakBO8gow8eQ,
daeseok.youn-Re5JQEeQqe8AvxtiuMwx3w,
keescook-F7+t8E8rja9g9hUCZPvPmw,
yangds.fnst-BthXqXjhjHXQFUHtdCDX3A, sbauer-F61uvSdQLzf2fBVCVOL8/A,
vishnu.ps-Sze3O3UU22JBDgjK7y7TUQ, axboe-b10kYP2dOMg,
paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-doc-u79uwXL29TY76Z2rM5mHXA,
linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1416862595-24513-1-git-send-email-khalid.aziz-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
>
> - Request to borrow timeslice is not guranteed to be honored.
> - If the task is allowed to borrow, kernel will inform the task
> of this. When this happens, task must yield the processor as soon
> as it completes its critical section.
> - If the task fails to yield processor after being allowed to
> borrow, it is penalized by forcing it to skip its next time slot
> by the scheduler.
> - Task is charged additional time for the borrowed timeslice as
> accumulated run time. This pushes it further down in consideration
> for the next task to run.
>
Is there a way for us to identify if the lock is contended?
Because it may not be prudent to allow a task to borrow timeslice for a
lock which isnt contended.
--
Thanks and Regards
Srikar Dronamraju
^ permalink raw reply
* Re: [PATCHv10 2/5] x86: Hook up execveat system call.
From: Dan Carpenter @ 2014-11-25 12:16 UTC (permalink / raw)
To: David Drysdale
Cc: Eric W. Biederman, Andy Lutomirski, Alexander Viro, Meredydd Luff,
linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Andrew Morton, David Miller, Thomas Gleixner, Stephen Rothwell,
Oleg Nesterov, Michael Kerrisk, Ingo Molnar, H. Peter Anvin,
Kees Cook, Arnd Bergmann, Rich Felker, Christoph Hellwig, X86 ML,
linux-arch, Linux API, sparclinux-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <CAHse=S-DS=NGC619Uhzkbd-EKa0D+HgBq3rE1czmLdoxAFswPg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
On Mon, Nov 24, 2014 at 06:26:24PM +0000, David Drysdale wrote:
> On Mon, Nov 24, 2014 at 5:06 PM, Dan Carpenter <dan.carpenter-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> wrote:
> > On Mon, Nov 24, 2014 at 11:53:56AM +0000, David Drysdale wrote:
> >> Hook up x86-64, i386 and x32 ABIs.
> >>
> >> Signed-off-by: David Drysdale <drysdale-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> >
> > This one has been breaking my linux-next build for the past week. I'm
> > not sure what's going on.
>
> Hi Dan,
>
> Sorry if this has been causing you problems -- I've not had any
> errors from the kbuild robots or my local builds.
>
For some reason I had a stale copy of
arch/x86/include/generated/asm/unistd_32.h and it was using that in
preference to the arch/x86/include/generated/uapi/asm/unistd_32.h file.
Once I did ran:
arch/x86/include/generated/ -rf
Then it builds now.
I'm not sure what that's all about but it's fixed now.
regards,
dan carpenter
^ permalink raw reply
* [PATCH v3 0/5] Add Spreadtrum Sharkl64 Platform support
From: Chunyan Zhang @ 2014-11-25 12:16 UTC (permalink / raw)
To: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
artagnon, rrichter, will.deacon, arnd, gnomes, corbet, jason,
broonie, heiko, shawn.guo, florian.vaussard, andrew, hytszk,
orsonzhai, geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra,
wei.qiao
Cc: devicetree, linux-doc, linux-api, linux-kernel, linux-serial,
sprdlinux, linux-arm-kernel
In-Reply-To: <Sharkl64-v3>
Spreadtrum is a rapid growing chip vendor providing smart phone total solutions.
Sharkl64 Platform is nominated as a SoC infrastructure that supports 4G/3G/2G
standards based on ARMv8 multiple core architecture.Now we have only one
SoC(SC9836) based on this Platform in developing.
This patchset adds Sharkl64 support in arm64 device tree and the serial driver
of SC9836-UART.
This patchset also has patches which address "sprd" prefix and DT compatible
strings for nodes which appear un-documented.
This version code was tesed on Fast Mode.
We use boot-wrapper-aarch64 as the bootloader.
Changes from v2:
* Addressed review comments:
- Added a specific compitible string 'sc9836-uart' for the serial
- Added a full serial driver
- Added the property 'clock-frequency' for timer node in dtsi.
- Replaceed the old macro prefix 'UART_' with 'SPRD_' in the
Spreadtrum serial driver code.
* Revised the name of SoC and board from 'sharkl3' to 'sc9836'
* Used dual-license for DTS files
* Added a menuconfig 'ARCH_SPRD' in arch/arm64/Kconfig
Changes from v1:
* Addressed review comments:
- Added "sprd" prefix to vendor-prefixes.txt
- Created serial/sprd-serial.txt and remove the properties for serial-sprd
from of-serial.txt to it.
- Renamed of-serial.txt to 8250.txt according to Arnd's review comments
- Splited and revised .dts for Sharkl64 Platform
- Changed to PSCI method for cpu power management
- Revised Kconfig Makefile to match the alphabetical ordering
- Renamed serial-sprd-earlycon.c to serial-sprd.c
Chunyan Zhang (3):
Documentation: DT: Renamed of-serial.txt to 8250.txt
Documentation: DT: Add bindings for Spreadtrum SoC Platform
tty/serial: Add Spreadtrum sc9836-uart driver support
Zhizhou Zhang (2):
arm64: dts: Add support for Spreadtrum SC9836 SoC in dts and Makefile
arm64: Add support for Spreadtrum's Sharkl64 Platform in Kconfig and
defconfig
Documentation/devices.txt | 3 +
Documentation/devicetree/bindings/arm/sprd.txt | 11 +
.../bindings/serial/{of-serial.txt => 8250.txt} | 0
.../devicetree/bindings/serial/sprd-uart.txt | 6 +
.../devicetree/bindings/vendor-prefixes.txt | 1 +
arch/arm64/Kconfig | 17 +
arch/arm64/boot/dts/Makefile | 1 +
arch/arm64/boot/dts/sprd-sc9836-openphone.dts | 85 +++
arch/arm64/boot/dts/sprd-sc9836.dtsi | 103 +++
arch/arm64/boot/dts/sprd-sharkl64.dtsi | 105 +++
arch/arm64/configs/defconfig | 2 +
drivers/tty/serial/Kconfig | 23 +
drivers/tty/serial/Makefile | 1 +
drivers/tty/serial/sprd_serial.c | 752 ++++++++++++++++++++
include/uapi/linux/serial_core.h | 3 +
15 files changed, 1113 insertions(+)
create mode 100644 Documentation/devicetree/bindings/arm/sprd.txt
rename Documentation/devicetree/bindings/serial/{of-serial.txt => 8250.txt} (100%)
create mode 100644 Documentation/devicetree/bindings/serial/sprd-uart.txt
create mode 100644 arch/arm64/boot/dts/sprd-sc9836-openphone.dts
create mode 100644 arch/arm64/boot/dts/sprd-sc9836.dtsi
create mode 100644 arch/arm64/boot/dts/sprd-sharkl64.dtsi
create mode 100644 drivers/tty/serial/sprd_serial.c
--
1.7.9.5
^ permalink raw reply
* [PATCH v3 1/5] Documentation: DT: Renamed of-serial.txt to 8250.txt
From: Chunyan Zhang @ 2014-11-25 12:16 UTC (permalink / raw)
To: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
artagnon, rrichter, will.deacon, arnd, gnomes, corbet, jason,
broonie, heiko, shawn.guo, florian.vaussard, andrew, hytszk,
orsonzhai, geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra,
wei.qiao
Cc: devicetree, linux-doc, linux-api, linux-kernel, linux-serial,
sprdlinux, linux-arm-kernel
In-Reply-To: <1416917818-10506-1-git-send-email-chunyan.zhang@spreadtrum.com>
The file of-serial.txt was only for 8250 compatible UART implementations,
so renamed it to 8250.txt to avoid confusing other persons.
This is recommended by Arnd, see:
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-September/291455.html
Signed-off-by: Chunyan Zhang <chunyan.zhang@spreadtrum.com>
---
.../bindings/serial/{of-serial.txt => 8250.txt} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename Documentation/devicetree/bindings/serial/{of-serial.txt => 8250.txt} (100%)
diff --git a/Documentation/devicetree/bindings/serial/of-serial.txt b/Documentation/devicetree/bindings/serial/8250.txt
similarity index 100%
rename from Documentation/devicetree/bindings/serial/of-serial.txt
rename to Documentation/devicetree/bindings/serial/8250.txt
--
1.7.9.5
^ permalink raw reply
* [PATCH v3 2/5] Documentation: DT: Add bindings for Spreadtrum SoC Platform
From: Chunyan Zhang @ 2014-11-25 12:16 UTC (permalink / raw)
To: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
artagnon, rrichter, will.deacon, arnd, gnomes, corbet, jason,
broonie, heiko, shawn.guo, florian.vaussard, andrew, hytszk,
orsonzhai, geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra,
wei.qiao
Cc: devicetree, linux-doc, linux-api, linux-kernel, linux-serial,
sprdlinux, linux-arm-kernel
In-Reply-To: <1416917818-10506-1-git-send-email-chunyan.zhang@spreadtrum.com>
Adds Spreadtrum's prefix "sprd" to vendor-prefixes file.
Adds the devicetree binding documentations for Spreadtrum's sc9836-uart
and SC9836 SoC based on the Sharkl64 Platform which is a 64-bit SoC
Platform of Spreadtrum.
Signed-off-by: Chunyan Zhang <chunyan.zhang@spreadtrum.com>
Signed-off-by: Orson Zhai <orson.zhai@spreadtrum.com>
---
Documentation/devicetree/bindings/arm/sprd.txt | 11 +++++++++++
.../devicetree/bindings/serial/sprd-uart.txt | 6 ++++++
.../devicetree/bindings/vendor-prefixes.txt | 1 +
3 files changed, 18 insertions(+)
create mode 100644 Documentation/devicetree/bindings/arm/sprd.txt
create mode 100644 Documentation/devicetree/bindings/serial/sprd-uart.txt
diff --git a/Documentation/devicetree/bindings/arm/sprd.txt b/Documentation/devicetree/bindings/arm/sprd.txt
new file mode 100644
index 0000000..31a629d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sprd.txt
@@ -0,0 +1,11 @@
+Spreadtrum SoC Platforms Device Tree Bindings
+----------------------------------------------------
+
+Sharkl64 is a Spreadtrum's SoC Platform which is based
+on ARM 64-bit processor.
+
+SC9836 openphone board with SC9836 SoC based on the
+Sharkl64 Platform shall have the following properties.
+
+Required root node properties:
+ - compatible = "sprd,sc9836-openphone", "sprd,sc9836";
diff --git a/Documentation/devicetree/bindings/serial/sprd-uart.txt b/Documentation/devicetree/bindings/serial/sprd-uart.txt
new file mode 100644
index 0000000..54e532f
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/sprd-uart.txt
@@ -0,0 +1,6 @@
+* Spreadtrum serial UART
+
+Required properties:
+- compatible: must be "sprd,sc9836-uart"
+- reg: offset and length of the register set for the device
+- interrupts: exactly one interrupt specifier
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 723999d..ce99ecdfb 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -142,6 +142,7 @@ snps Synopsys, Inc.
solidrun SolidRun
sony Sony Corporation
spansion Spansion Inc.
+sprd Spreadtrum Communications Inc.
st STMicroelectronics
ste ST-Ericsson
stericsson ST-Ericsson
--
1.7.9.5
^ permalink raw reply related
* [PATCH v3 3/5] arm64: dts: Add support for Spreadtrum SC9836 SoC in dts and Makefile
From: Chunyan Zhang @ 2014-11-25 12:16 UTC (permalink / raw)
To: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
artagnon, rrichter, will.deacon, arnd, gnomes, corbet, jason,
broonie, heiko, shawn.guo, florian.vaussard, andrew, hytszk,
orsonzhai, geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra,
wei.qiao
Cc: devicetree, linux-doc, linux-api, linux-kernel, linux-serial,
sprdlinux, linux-arm-kernel
In-Reply-To: <1416917818-10506-1-git-send-email-chunyan.zhang@spreadtrum.com>
From: Zhizhou Zhang <zhizhou.zhang@spreadtrum.com>
Adds the device tree support for Spreadtrum SC9836 SoC which is based on
Sharkl64 platform.
Sharkl64 platform contains the common nodes of Spreadtrum's arm64-based SoCs.
Signed-off-by: Zhizhou Zhang <zhizhou.zhang@spreadtrum.com>
Signed-off-by: Chunyan Zhang <chunyan.zhang@spreadtrum.com>
Signed-off-by: Orson Zhai <orson.zhai@spreadtrum.com>
---
arch/arm64/boot/dts/Makefile | 1 +
arch/arm64/boot/dts/sprd-sc9836-openphone.dts | 85 ++++++++++++++++++++
arch/arm64/boot/dts/sprd-sc9836.dtsi | 103 ++++++++++++++++++++++++
arch/arm64/boot/dts/sprd-sharkl64.dtsi | 105 +++++++++++++++++++++++++
4 files changed, 294 insertions(+)
create mode 100644 arch/arm64/boot/dts/sprd-sc9836-openphone.dts
create mode 100644 arch/arm64/boot/dts/sprd-sc9836.dtsi
create mode 100644 arch/arm64/boot/dts/sprd-sharkl64.dtsi
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index f8001a6..d0aff8a 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -1,4 +1,5 @@
dtb-$(CONFIG_ARCH_THUNDER) += thunder-88xx.dtb
+dtb-$(CONFIG_ARCH_SHARKL64) += sprd-sc9836-openphone.dtb
dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb
dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb
diff --git a/arch/arm64/boot/dts/sprd-sc9836-openphone.dts b/arch/arm64/boot/dts/sprd-sc9836-openphone.dts
new file mode 100644
index 0000000..484d714
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd-sc9836-openphone.dts
@@ -0,0 +1,85 @@
+/*
+ * Spreadtrum SC9836 openphone board DTS file
+ *
+ * Copyright (C) 2014, Spreadtrum Communications Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ * a) This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ * b) Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include "sprd-sc9836.dtsi"
+
+/ {
+ model = "Spreadtrum,SC9836 Openphone Board";
+
+ compatible = "sprd,sc9836-openphone", "sprd,sc9836";
+
+ aliases {
+ serial0 = &uart0;
+ serial1 = &uart1;
+ serial2 = &uart2;
+ serial3 = &uart3;
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0 0x80000000 0 0x20000000>;
+ };
+
+ chosen {
+ stdout-path = &uart0;
+ };
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&uart1 {
+ status = "okay";
+};
+
+&uart2 {
+ status = "okay";
+};
+
+&uart3 {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/sprd-sc9836.dtsi b/arch/arm64/boot/dts/sprd-sc9836.dtsi
new file mode 100644
index 0000000..d5fe552
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd-sc9836.dtsi
@@ -0,0 +1,103 @@
+/*
+ * Spreadtrum SC9836 SoC DTS file
+ *
+ * Copyright (C) 2014, Spreadtrum Communications Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ * a) This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ * b) Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sprd-sharkl64.dtsi"
+
+/ {
+ compatible = "sprd,sc9836";
+
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ };
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ };
+ cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x2>;
+ enable-method = "psci";
+ };
+ cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x3>;
+ enable-method = "psci";
+ };
+ };
+
+ gic: interrupt-controller@12001000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ interrupt-controller;
+ reg = <0 0x12001000 0 0x1000>,
+ <0 0x12002000 0 0x1000>,
+ <0 0x12004000 0 0x2000>,
+ <0 0x12006000 0 0x2000>;
+ };
+
+ psci {
+ compatible = "arm,psci-0.2";
+ method = "smc";
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <1 13 0xff01>,
+ <1 14 0xff01>,
+ <1 11 0xff01>,
+ <1 10 0xff01>;
+ clock-frequency = <26000000>;
+ };
+};
diff --git a/arch/arm64/boot/dts/sprd-sharkl64.dtsi b/arch/arm64/boot/dts/sprd-sharkl64.dtsi
new file mode 100644
index 0000000..d9ecfe9
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd-sharkl64.dtsi
@@ -0,0 +1,105 @@
+/*
+ * Spreadtrum Sharkl64 platform DTS file
+ *
+ * Copyright (C) 2014, Spreadtrum Communications Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ * a) This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ * b) Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/ {
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ soc {
+ compatible = "simple-bus";
+ reg = <0x0 0x0 0x0 0x80000000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ ap_apb: apb@70000000 {
+ compatible = "simple-bus";
+ reg = <0x0 0x70000000 0x0 0x10000000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ uart0: serial@70000000 {
+ compatible = "sprd,sc9836-uart";
+ reg = <0 0x70000000 0 0x100>;
+ interrupts = <0 2 0xf04>;
+ clocks = <&clk26mhz>;
+ status = "disabled";
+ };
+
+ uart1: serial@70100000 {
+ compatible = "sprd,sc9836-uart";
+ reg = <0 0x70100000 0 0x100>;
+ interrupts = <0 3 0xf04>;
+ clocks = <&clk26mhz>;
+ status = "disabled";
+ };
+
+ uart2: serial@70200000 {
+ compatible = "sprd,sc9836-uart";
+ reg = <0 0x70200000 0 0x100>;
+ interrupts = <0 2 0xf04>;
+ clocks = <&clk26mhz>;
+ status = "disabled";
+ };
+
+ uart3: serial@70300000 {
+ compatible = "sprd,sc9836-uart";
+ reg = <0 0x70300000 0 0x100>;
+ interrupts = <0 3 0xf04>;
+ clocks = <&clk26mhz>;
+ status = "disabled";
+ };
+ };
+ };
+
+ clocks {
+ clk26mhz: clk26mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <26000000>;
+ };
+ };
+};
--
1.7.9.5
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox