* Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
@ 2007-06-25 8:49 David Woodhouse
2007-06-25 9:11 ` Andrew Morton
2007-06-27 13:22 ` Ralf Baechle
0 siblings, 2 replies; 18+ messages in thread
From: David Woodhouse @ 2007-06-25 8:49 UTC (permalink / raw)
To: torvalds, akpm; +Cc: paulus, linux-arch, drepper, rmk
Not all the world is an i386. Many architectures need 64-bit arguments
to be aligned in suitable pairs of registers, and the original
sys_sync_file_range(int, loff_t, loff_t, int) was therefore wasting an
argument register for padding after the first integer. Since we don't
normally have more than 6 arguments for system calls, that left no room
for the final argument on some architectures.
Fix this by introducing sys_sync_file_range2(int, int, loff_t, loff_t)
which all fits nicely. In fact, ARM already had that, but called it
sys_arm_sync_file_range. Move it to fs/sync.c and rename it, then
implement the needed compatibility routine. And stop the missing syscall
check from bitching about the absence of sys_sync_file_range() if we've
implemented sys_sync_file_range2() instead.
Tested on PPC32 and with 32-bit and 64-bit userspace on PPC64.
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-By: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 19326d7..a98d0c9 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -350,7 +350,7 @@
CALL(sys_set_robust_list)
CALL(sys_get_robust_list)
/* 340 */ CALL(sys_splice)
- CALL(sys_arm_sync_file_range)
+ CALL(sys_sync_file_range2)
CALL(sys_tee)
CALL(sys_vmsplice)
CALL(sys_move_pages)
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index 1ca2d51..4d25e49 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -328,16 +328,3 @@ asmlinkage long sys_arm_fadvise64_64(int fd, int advice,
{
return sys_fadvise64_64(fd, offset, len, advice);
}
-
-/*
- * Yet more syscall fsckage - we can't fit sys_sync_file_range's
- * arguments into the available registers with EABI. So, let's
- * create an ARM specific syscall for this which has _sane_
- * arguments. (This incidentally also has an ABI-independent
- * argument layout.)
- */
-asmlinkage long sys_arm_sync_file_range(int fd, unsigned int flags,
- loff_t offset, loff_t nbytes)
-{
- return sys_sync_file_range(fd, offset, nbytes, flags);
-}
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 047246a..b42cbf1 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -810,3 +810,12 @@ asmlinkage long compat_sys_request_key(const char __user *_type,
return sys_request_key(_type, _description, _callout_info, destringid);
}
+asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
+ unsigned offset_hi, unsigned offset_lo,
+ unsigned nbytes_hi, unsigned nbytes_lo)
+{
+ loff_t offset = ((loff_t)offset_hi << 32) | offset_lo;
+ loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo;
+
+ return sys_sync_file_range(fd, offset, nbytes, flags);
+}
diff --git a/fs/sync.c b/fs/sync.c
index 2f97576..7cd005e 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -236,6 +236,14 @@ out:
return ret;
}
+/* It would be nice if people remember that not all the world's an i386
+ when they introduce new system calls */
+asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
+ loff_t offset, loff_t nbytes)
+{
+ return sys_sync_file_range(fd, offset, nbytes, flags);
+}
+
/*
* `endbyte' is inclusive
*/
diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index 250d7f1..bfdbebe 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -367,6 +367,7 @@
#define __NR_get_robust_list (__NR_SYSCALL_BASE+339)
#define __NR_splice (__NR_SYSCALL_BASE+340)
#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE+341)
+#define __NR_sync_file_range2 __NR_arm_sync_file_range
#define __NR_tee (__NR_SYSCALL_BASE+342)
#define __NR_vmsplice (__NR_SYSCALL_BASE+343)
#define __NR_move_pages (__NR_SYSCALL_BASE+344)
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index 700ca59..1cc3f9c 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -311,3 +311,4 @@ COMPAT_SYS_SPU(utimensat)
COMPAT_SYS_SPU(signalfd)
COMPAT_SYS_SPU(timerfd)
SYSCALL_SPU(eventfd)
+COMPAT_SYS_SPU(sync_file_range2)
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index e3c28dc..f71c606 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -330,10 +330,11 @@
#define __NR_signalfd 305
#define __NR_timerfd 306
#define __NR_eventfd 307
+#define __NR_sync_file_range2 308
#ifdef __KERNEL__
-#define __NR_syscalls 308
+#define __NR_syscalls 309
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b02070e..83d0ec1 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -598,6 +598,8 @@ asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags);
asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
unsigned int flags);
+asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
+ loff_t offset, loff_t nbytes);
asmlinkage long sys_get_robust_list(int pid,
struct robust_list_head __user * __user *head_ptr,
size_t __user *len_ptr);
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index f98171f..0dcc01c 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -99,6 +99,11 @@ cat << EOF
#define __IGNORE_setfsuid32
#define __IGNORE_setfsgid32
+/* sync_file_range had a stupid ABI. Allow sync_file_range2 instead */
+#ifdef __NR_sync_file_range2
+#define __IGNORE_sync_file_range
+#endif
+
/* Unmerged syscalls for AFS, STREAMS, etc. */
#define __IGNORE_afs_syscall
#define __IGNORE_getpmsg
--
dwmw2
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 8:49 Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM David Woodhouse
@ 2007-06-25 9:11 ` Andrew Morton
2007-06-25 9:48 ` David Woodhouse
2007-06-25 10:35 ` Matthew Wilcox
2007-06-27 13:22 ` Ralf Baechle
1 sibling, 2 replies; 18+ messages in thread
From: Andrew Morton @ 2007-06-25 9:11 UTC (permalink / raw)
To: David Woodhouse; +Cc: torvalds, paulus, linux-arch, drepper, rmk
On Mon, 25 Jun 2007 09:49:17 +0100 David Woodhouse <dwmw2@infradead.org> wrote:
> +/* It would be nice if people remember that not all the world's an i386
> + when they introduce new system calls */
I think we could do without the smartarse comments, frankly. It took about
two weeks and 1000000000 emails for you guys to sort out the fallocate()
ABI. How would you like "it would be nice if maintainers of oddball
architectures would pay attention"?
> +asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
> + unsigned offset_hi, unsigned offset_lo,
> + unsigned nbytes_hi, unsigned nbytes_lo)
> +{
> + loff_t offset = ((loff_t)offset_hi << 32) | offset_lo;
> + loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo;
> +
> + return sys_sync_file_range(fd, offset, nbytes, flags);
> +}
>
versus
+asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
+ u32 lenhi, u32 lenlo)
+{
+ return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
+ ((loff_t)lenhi << 32) | lenlo);
+}
the naming, the implementation and the types are all inconsistent. Can we
pick one style and stick to it?
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 9:11 ` Andrew Morton
@ 2007-06-25 9:48 ` David Woodhouse
2007-06-25 10:24 ` Matthew Wilcox
2007-06-25 10:35 ` Matthew Wilcox
1 sibling, 1 reply; 18+ messages in thread
From: David Woodhouse @ 2007-06-25 9:48 UTC (permalink / raw)
To: Andrew Morton; +Cc: torvalds, paulus, linux-arch, drepper, rmk
On Mon, 2007-06-25 at 02:11 -0700, Andrew Morton wrote:
> On Mon, 25 Jun 2007 09:49:17 +0100 David Woodhouse <dwmw2@infradead.org> wrote:
>
> > +/* It would be nice if people remember that not all the world's an i386
> > + when they introduce new system calls */
>
> I think we could do without the smartarse comments, frankly. It took about
> two weeks and 1000000000 emails for you guys to sort out the fallocate()
> ABI.
It's exactly the same case -- two ints and two loff_ts. If we'd already
sorted it out properly for sys_sync_file_range() then it would have been
a no-brainer for fallocate().
> How would you like "it would be nice if maintainers of oddball
> architectures would pay attention"?
Seems like a reasonable observation, although 'oddball' isn't really the
case here. There are a bunch of architectures which align 64-bit
arguments into even pairs of registers. And a lot of people who forget
that 64-bit quantities are often aligned to 8 bytes, on non-x86.
cf. f4d2781731e846c2f01dd85e71883d120860c6dd
But aside from nit-picking the wording, I do agree with the sentiment --
and that's precisely why you'll see warnings about sys_sync_file_range()
being unimplemented, when you build current kernels on certain
platforms. So that all arch maintainers pay attention to new syscalls.
> the naming, the implementation and the types are all inconsistent. Can we
> pick one style and stick to it?
It might actually be useful to merge all these into fs/compat.c. I think
the only reason most of them are arch-specific at the moment is because
we have to deal with endianness when we put the two 32-bit integers
together into a 64-bit integer. And MIPS copes well enough with that,
with its merge_64() macro.
--
dwmw2
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 9:48 ` David Woodhouse
@ 2007-06-25 10:24 ` Matthew Wilcox
0 siblings, 0 replies; 18+ messages in thread
From: Matthew Wilcox @ 2007-06-25 10:24 UTC (permalink / raw)
To: David Woodhouse; +Cc: Andrew Morton, torvalds, paulus, linux-arch, drepper, rmk
On Mon, Jun 25, 2007 at 10:48:10AM +0100, David Woodhouse wrote:
> Seems like a reasonable observation, although 'oddball' isn't really the
> case here. There are a bunch of architectures which align 64-bit
> arguments into even pairs of registers. And a lot of people who forget
> that 64-bit quantities are often aligned to 8 bytes, on non-x86.
> cf. f4d2781731e846c2f01dd85e71883d120860c6dd
[...]
> It might actually be useful to merge all these into fs/compat.c. I think
> the only reason most of them are arch-specific at the moment is because
> we have to deal with endianness when we put the two 32-bit integers
> together into a 64-bit integer. And MIPS copes well enough with that,
> with its merge_64() macro.
PowerPC is new to me -- I had thought that MIPS and PA-RISC were the
only two. Seems like you took the opposite path from parisc -- you've
got glibc to call the functions correctly, rather than what we did which
was fix them up in the kernel.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 9:11 ` Andrew Morton
2007-06-25 9:48 ` David Woodhouse
@ 2007-06-25 10:35 ` Matthew Wilcox
2007-06-25 11:09 ` Russell King
2007-06-25 11:33 ` David Howells
1 sibling, 2 replies; 18+ messages in thread
From: Matthew Wilcox @ 2007-06-25 10:35 UTC (permalink / raw)
To: Andrew Morton; +Cc: David Woodhouse, torvalds, paulus, linux-arch, drepper, rmk
On Mon, Jun 25, 2007 at 02:11:45AM -0700, Andrew Morton wrote:
> ABI. How would you like "it would be nice if maintainers of oddball
> architectures would pay attention"?
If new syscalls got posted to linux-arch for discussion, I assure you,
we'd pay attention.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 10:35 ` Matthew Wilcox
@ 2007-06-25 11:09 ` Russell King
2007-06-25 11:37 ` David Woodhouse
2007-06-25 11:33 ` David Howells
1 sibling, 1 reply; 18+ messages in thread
From: Russell King @ 2007-06-25 11:09 UTC (permalink / raw)
To: Matthew Wilcox
Cc: Andrew Morton, David Woodhouse, torvalds, paulus, linux-arch,
drepper
On Mon, Jun 25, 2007 at 04:35:35AM -0600, Matthew Wilcox wrote:
> On Mon, Jun 25, 2007 at 02:11:45AM -0700, Andrew Morton wrote:
> > ABI. How would you like "it would be nice if maintainers of oddball
> > architectures would pay attention"?
>
> If new syscalls got posted to linux-arch for discussion, I assure you,
> we'd pay attention.
Ditto. sys_sync_file_range() wasn't, so I think David's sentiment is
bang on.
And as David says, we've _finally_ been round the discussion loop with
fallocate, so in theory we now know what the issues are, and we _all_
have a good idea how to deal with argument ordering to satisfy the
majority. That should mean that subsequent discussion be shorter.
Also, I find that the accusation that "maintainers of oddball architectures
would pay attention" to be rather insulting. I've tried to keep abreast
of everything that might possibly affect ARM, but it's utterly impossible
given the amount of what is quite frankly noise and utter drivel.
Thankfully, linux-arch is low noise. We just need to convince those who
want to add additional syscalls to copy their stuff here, such as that
sys_sync_file_range() patch (http://lwn.net/Articles/177830/)
At least now, if they don't, provided we build every -rc kernel as it's
released, we can detect when new syscalls are added quickly and give
those submitters a suitable roasting at gas mark 95.
--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of:
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 10:35 ` Matthew Wilcox
2007-06-25 11:09 ` Russell King
@ 2007-06-25 11:33 ` David Howells
2007-06-25 14:35 ` Kyle McMartin
1 sibling, 1 reply; 18+ messages in thread
From: David Howells @ 2007-06-25 11:33 UTC (permalink / raw)
To: Russell King
Cc: Matthew Wilcox, Andrew Morton, David Woodhouse, torvalds, paulus,
linux-arch, drepper
Russell King <rmk@arm.linux.org.uk> wrote:
> Thankfully, linux-arch is low noise. We just need to convince those who
> want to add additional syscalls to copy their stuff here, such as that
> sys_sync_file_range() patch (http://lwn.net/Articles/177830/)
Agreed. And a prototype manual page should perhaps be included in the patch
description...
David
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 11:09 ` Russell King
@ 2007-06-25 11:37 ` David Woodhouse
2007-06-25 11:47 ` Matthew Wilcox
2007-06-27 12:23 ` Geert Uytterhoeven
0 siblings, 2 replies; 18+ messages in thread
From: David Woodhouse @ 2007-06-25 11:37 UTC (permalink / raw)
To: Russell King
Cc: Matthew Wilcox, Andrew Morton, torvalds, paulus, linux-arch,
drepper
On Mon, 2007-06-25 at 12:09 +0100, Russell King wrote:
> On Mon, Jun 25, 2007 at 04:35:35AM -0600, Matthew Wilcox wrote:
> > On Mon, Jun 25, 2007 at 02:11:45AM -0700, Andrew Morton wrote:
> > > ABI. How would you like "it would be nice if maintainers of oddball
> > > architectures would pay attention"?
> >
> > If new syscalls got posted to linux-arch for discussion, I assure you,
> > we'd pay attention.
>
> Ditto. sys_sync_file_range() wasn't, so I think David's sentiment is
> bang on.
>
> And as David says, we've _finally_ been round the discussion loop with
> fallocate, so in theory we now know what the issues are, and we _all_
> have a good idea how to deal with argument ordering to satisfy the
> majority. That should mean that subsequent discussion be shorter.
Well, those who were paying attention might. We should probably add
Documentation/new-syscalls.txt with such information as...
---
Never add any system call without considering how its prototype works
for all architectures and for 32-bit userspace on 64-bit kernels.
- Some architectures have a limit of 6 (32-bit) argument slots.
- Some architectures must align 64-bit integers into an aligned
pair of registers. A slot may be wasted for padding.
- S390 may not have a 64-bit integer in slots 5/6.
Where you invent a data structure for communication with userspace, be
aware of the following:
- Try to ensure your data structure will be identical for 32-bit and
64-bit builds, if possible. That way, you avoid the need to implement
compatibility routines for 32-bit userspace on 64-bit kernels. In
particular, avoid the 'long' data type. Try to use explicitly sized
types such as 'uint64_t' instead.
- Most architectures align 64-bit integers to 8 bytes, but i386 doesn't.
If you have to implement 32-bit compatibility, make sure you get this
right. Preferably, avoid the problem by ensuring that your 64-bit
integers are naturally placed with 8-byte alignment even without
padding.
---
> At least now, if they don't, provided we build every -rc kernel as it's
> released, we can detect when new syscalls are added quickly and give
> those submitters a suitable roasting at gas mark 95.
Linus should be refusing any new system call which doesn't at _least_
handle the 32/64 compatibility issues. Explictly stating that it doesn't
need compatibility wrappers would be OK, as long as it's true -- but
just saying _nothing_ about it is bad.
--
dwmw2
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 11:37 ` David Woodhouse
@ 2007-06-25 11:47 ` Matthew Wilcox
2007-06-25 12:01 ` David Woodhouse
` (2 more replies)
2007-06-27 12:23 ` Geert Uytterhoeven
1 sibling, 3 replies; 18+ messages in thread
From: Matthew Wilcox @ 2007-06-25 11:47 UTC (permalink / raw)
To: David Woodhouse
Cc: Russell King, Andrew Morton, torvalds, paulus, linux-arch,
drepper
On Mon, Jun 25, 2007 at 12:37:34PM +0100, David Woodhouse wrote:
> - Some architectures must align 64-bit integers into an aligned
> pair of registers. A slot may be wasted for padding.
> - S390 may not have a 64-bit integer in slots 5/6.
Uhm, doesn't sys_sync_file_range2 break that?
+asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
+ unsigned offset_hi, unsigned offset_lo,
+ unsigned nbytes_hi, unsigned nbytes_lo)
How about:
asmlinkage long compat_sys_sync_file_range2(unsigned offset_hi,
unsigned offset_lo, unsigned nbytes_hi, unsigned nbytes_lo,
int fd, unsigned int flags)
Also, you might want to put something in the syscall file about signed
vs unsigned arguments and how they behave with 32-on-64 systems.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 11:47 ` Matthew Wilcox
@ 2007-06-25 12:01 ` David Woodhouse
2007-06-25 12:04 ` Segher Boessenkool
2007-06-27 13:34 ` Ralf Baechle
2 siblings, 0 replies; 18+ messages in thread
From: David Woodhouse @ 2007-06-25 12:01 UTC (permalink / raw)
To: Matthew Wilcox
Cc: Russell King, Andrew Morton, torvalds, paulus, linux-arch,
drepper
On Mon, 2007-06-25 at 05:47 -0600, Matthew Wilcox wrote:
> On Mon, Jun 25, 2007 at 12:37:34PM +0100, David Woodhouse wrote:
> > - Some architectures must align 64-bit integers into an aligned
> > pair of registers. A slot may be wasted for padding.
> > - S390 may not have a 64-bit integer in slots 5/6.
>
> Uhm, doesn't sys_sync_file_range2 break that?
Yes, but the consensus when it was discussed for fallocate() seemed to
be that it was more important to have the fd first, for reasons not
entirely clear to me. S390 needs special handling for fallocate(), and
in fact has already implemented sys_sync_file_range() so doesn't need to
do sys_sync_file_range2().
> Also, you might want to put something in the syscall file about signed
> vs unsigned arguments and how they behave with 32-on-64 systems.
True. If I properly understood why arch/powerpc/sys_ppc32.c has _some_
of that stuff but apparently no longer needs it, then I would have tried
to say something coherent about it.
--
dwmw2
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 11:47 ` Matthew Wilcox
2007-06-25 12:01 ` David Woodhouse
@ 2007-06-25 12:04 ` Segher Boessenkool
2007-06-25 12:34 ` David Woodhouse
2007-06-27 13:34 ` Ralf Baechle
2 siblings, 1 reply; 18+ messages in thread
From: Segher Boessenkool @ 2007-06-25 12:04 UTC (permalink / raw)
To: Matthew Wilcox
Cc: David Woodhouse, Russell King, torvalds, Andrew Morton, drepper,
paulus, linux-arch
>> - Some architectures must align 64-bit integers into an aligned
>> pair of registers. A slot may be wasted for padding.
>> - S390 may not have a 64-bit integer in slots 5/6.
>
> Uhm, doesn't sys_sync_file_range2 break that?
>
> +asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int
> flags,
> + unsigned offset_hi, unsigned
> offset_lo,
> + unsigned nbytes_hi, unsigned
> nbytes_lo)
I don't see any 64-bit integers here.
> Also, you might want to put something in the syscall file about signed
> vs unsigned arguments and how they behave with 32-on-64 systems.
"Both work / unsigned is better / use unsigned whenever
possible" -- but that's true for all C coding. Wouldn't
hurt to repeat it though :-)
Segher
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 12:04 ` Segher Boessenkool
@ 2007-06-25 12:34 ` David Woodhouse
2007-06-25 13:10 ` Segher Boessenkool
0 siblings, 1 reply; 18+ messages in thread
From: David Woodhouse @ 2007-06-25 12:34 UTC (permalink / raw)
To: Segher Boessenkool
Cc: Matthew Wilcox, Russell King, torvalds, Andrew Morton, drepper,
paulus, linux-arch
On Mon, 2007-06-25 at 14:04 +0200, Segher Boessenkool wrote:
> >> - Some architectures must align 64-bit integers into an aligned
> >> pair of registers. A slot may be wasted for padding.
> >> - S390 may not have a 64-bit integer in slots 5/6.
> >
> > Uhm, doesn't sys_sync_file_range2 break that?
> >
> > +asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int
> > flags,
> > + unsigned offset_hi, unsigned
> > offset_lo,
> > + unsigned nbytes_hi, unsigned
> > nbytes_lo)
>
> I don't see any 64-bit integers here.
Of course not. This is the routine which is called from 32-bit code.
The prototype in the 32-bit code is (int, unsigned, loff_t, loff_t).
> > Also, you might want to put something in the syscall file about signed
> > vs unsigned arguments and how they behave with 32-on-64 systems.
>
> "Both work / unsigned is better / use unsigned whenever
> possible" -- but that's true for all C coding. Wouldn't
> hurt to repeat it though :-)
Don't we need (int)(-1) to be represented as 0xFFFFFFFFFFFFFFFF in a
register in 64-bit code, while we only get 0xFFFFFFFF passed in from
userspace? Not that this explains why we're doing it even for
filedescriptors.
--
dwmw2
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 12:34 ` David Woodhouse
@ 2007-06-25 13:10 ` Segher Boessenkool
2007-06-25 13:33 ` David Woodhouse
0 siblings, 1 reply; 18+ messages in thread
From: Segher Boessenkool @ 2007-06-25 13:10 UTC (permalink / raw)
To: David Woodhouse
Cc: torvalds, Russell King, Matthew Wilcox, Andrew Morton, drepper,
paulus, linux-arch
>> I don't see any 64-bit integers here.
>
> Of course not. This is the routine which is called from 32-bit code.
> The prototype in the 32-bit code is (int, unsigned, loff_t, loff_t).
Ah I see, sorry for the confusion.
>>> Also, you might want to put something in the syscall file about
>>> signed
>>> vs unsigned arguments and how they behave with 32-on-64 systems.
>>
>> "Both work / unsigned is better / use unsigned whenever
>> possible" -- but that's true for all C coding. Wouldn't
>> hurt to repeat it though :-)
>
> Don't we need (int)(-1) to be represented as 0xFFFFFFFFFFFFFFFF in a
> register in 64-bit code, while we only get 0xFFFFFFFF passed in from
> userspace?
Yes exactly, signed integers need sign extensions, which
makes them less efficient. Some ABIs need zero extensions
too, but on a whole unsigned works better. Most of the
time you don't need to do much on the (C code) kernel side
of things.
Is this enough handwaving? I'm sure someone else can explain
this a lot better than me :-)
Segher
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 13:10 ` Segher Boessenkool
@ 2007-06-25 13:33 ` David Woodhouse
0 siblings, 0 replies; 18+ messages in thread
From: David Woodhouse @ 2007-06-25 13:33 UTC (permalink / raw)
To: Segher Boessenkool
Cc: torvalds, Russell King, Matthew Wilcox, Andrew Morton, drepper,
paulus, linux-arch
On Mon, 2007-06-25 at 15:10 +0200, Segher Boessenkool wrote:
> >> I don't see any 64-bit integers here.
> >
> > Of course not. This is the routine which is called from 32-bit code.
> > The prototype in the 32-bit code is (int, unsigned, loff_t, loff_t).
>
> Ah I see, sorry for the confusion.
> Yes exactly, signed integers need sign extensions, which
> makes them less efficient. Some ABIs need zero extensions
> too, but on a whole unsigned works better. Most of the
> time you don't need to do much on the (C code) kernel side
> of things.
Most of the time, true. But not in the case we're actually talking
about.
> Is this enough handwaving? I'm sure someone else can explain
> this a lot better than me :-)
Perhaps so :)
--
dwmw2
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 11:33 ` David Howells
@ 2007-06-25 14:35 ` Kyle McMartin
0 siblings, 0 replies; 18+ messages in thread
From: Kyle McMartin @ 2007-06-25 14:35 UTC (permalink / raw)
To: David Howells
Cc: Russell King, Matthew Wilcox, Andrew Morton, David Woodhouse,
torvalds, paulus, linux-arch, drepper
On Mon, Jun 25, 2007 at 12:33:43PM +0100, David Howells wrote:
> Russell King <rmk@arm.linux.org.uk> wrote:
>
> > Thankfully, linux-arch is low noise. We just need to convince those who
> > want to add additional syscalls to copy their stuff here, such as that
> > sys_sync_file_range() patch (http://lwn.net/Articles/177830/)
>
> Agreed. And a prototype manual page should perhaps be included in the patch
> description...
>
hear, hear.
A testcase too!
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 11:37 ` David Woodhouse
2007-06-25 11:47 ` Matthew Wilcox
@ 2007-06-27 12:23 ` Geert Uytterhoeven
1 sibling, 0 replies; 18+ messages in thread
From: Geert Uytterhoeven @ 2007-06-27 12:23 UTC (permalink / raw)
To: David Woodhouse
Cc: Russell King, Matthew Wilcox, Andrew Morton, torvalds, paulus,
linux-arch, drepper
On Mon, 25 Jun 2007, David Woodhouse wrote:
> On Mon, 2007-06-25 at 12:09 +0100, Russell King wrote:
> > On Mon, Jun 25, 2007 at 04:35:35AM -0600, Matthew Wilcox wrote:
> > > On Mon, Jun 25, 2007 at 02:11:45AM -0700, Andrew Morton wrote:
> > > > ABI. How would you like "it would be nice if maintainers of oddball
> > > > architectures would pay attention"?
> > >
> > > If new syscalls got posted to linux-arch for discussion, I assure you,
> > > we'd pay attention.
> >
> > Ditto. sys_sync_file_range() wasn't, so I think David's sentiment is
> > bang on.
> >
> > And as David says, we've _finally_ been round the discussion loop with
> > fallocate, so in theory we now know what the issues are, and we _all_
> > have a good idea how to deal with argument ordering to satisfy the
> > majority. That should mean that subsequent discussion be shorter.
>
> Well, those who were paying attention might. We should probably add
> Documentation/new-syscalls.txt with such information as...
>
> ---
>
> Never add any system call without considering how its prototype works
> for all architectures and for 32-bit userspace on 64-bit kernels.
>
> - Some architectures have a limit of 6 (32-bit) argument slots.
> - Some architectures must align 64-bit integers into an aligned
> pair of registers. A slot may be wasted for padding.
> - S390 may not have a 64-bit integer in slots 5/6.
>
> Where you invent a data structure for communication with userspace, be
> aware of the following:
>
> - Try to ensure your data structure will be identical for 32-bit and
> 64-bit builds, if possible. That way, you avoid the need to implement
> compatibility routines for 32-bit userspace on 64-bit kernels. In
> particular, avoid the 'long' data type. Try to use explicitly sized
> types such as 'uint64_t' instead.
> - Most architectures align 64-bit integers to 8 bytes, but i386 doesn't.
> If you have to implement 32-bit compatibility, make sure you get this
> right. Preferably, avoid the problem by ensuring that your 64-bit
> integers are naturally placed with 8-byte alignment even without
> padding.
Add: patches adding syscalls should be CCed to linux-arch (or does this
expose our precious email address to spammers?)
> > At least now, if they don't, provided we build every -rc kernel as it's
> > released, we can detect when new syscalls are added quickly and give
> > those submitters a suitable roasting at gas mark 95.
>
> Linus should be refusing any new system call which doesn't at _least_
> handle the 32/64 compatibility issues. Explictly stating that it doesn't
> need compatibility wrappers would be OK, as long as it's true -- but
> just saying _nothing_ about it is bad.
And can we let checkpatch emit a warning for newly added syscalls?
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 8:49 Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM David Woodhouse
2007-06-25 9:11 ` Andrew Morton
@ 2007-06-27 13:22 ` Ralf Baechle
1 sibling, 0 replies; 18+ messages in thread
From: Ralf Baechle @ 2007-06-27 13:22 UTC (permalink / raw)
To: David Woodhouse; +Cc: torvalds, akpm, paulus, linux-arch, drepper, rmk
On Mon, Jun 25, 2007 at 09:49:17AM +0100, David Woodhouse wrote:
> Not all the world is an i386. Many architectures need 64-bit arguments
> to be aligned in suitable pairs of registers, and the original
> sys_sync_file_range(int, loff_t, loff_t, int) was therefore wasting an
> argument register for padding after the first integer. Since we don't
> normally have more than 6 arguments for system calls, that left no room
> for the final argument on some architectures.
The worse side of this class of problem is that at times new syscalls are
being invoked through pseudo-portable assembler code which on architectures
where where 64-bit values are being passed in an aligned pair frequently
end passing some of the arguments the wrong way. For pread/pwrite this
did result in actual data corruption.
Ralf
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
2007-06-25 11:47 ` Matthew Wilcox
2007-06-25 12:01 ` David Woodhouse
2007-06-25 12:04 ` Segher Boessenkool
@ 2007-06-27 13:34 ` Ralf Baechle
2 siblings, 0 replies; 18+ messages in thread
From: Ralf Baechle @ 2007-06-27 13:34 UTC (permalink / raw)
To: Matthew Wilcox
Cc: David Woodhouse, Russell King, Andrew Morton, torvalds, paulus,
linux-arch, drepper
On Mon, Jun 25, 2007 at 05:47:42AM -0600, Matthew Wilcox wrote:
> Also, you might want to put something in the syscall file about signed
> vs unsigned arguments and how they behave with 32-on-64 systems.
Another fine can of worms. On some architectures function arguments are
getting extended to full register size by the caller, on others it just
doesn't matter. This can have very subtle effects. MIPS belongs into the
first class which forces me to doublecheck each compat syscall entrypoint
for sign/unsigned extension issues. Ick.
Ralf
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2007-06-27 19:35 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-06-25 8:49 Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM David Woodhouse
2007-06-25 9:11 ` Andrew Morton
2007-06-25 9:48 ` David Woodhouse
2007-06-25 10:24 ` Matthew Wilcox
2007-06-25 10:35 ` Matthew Wilcox
2007-06-25 11:09 ` Russell King
2007-06-25 11:37 ` David Woodhouse
2007-06-25 11:47 ` Matthew Wilcox
2007-06-25 12:01 ` David Woodhouse
2007-06-25 12:04 ` Segher Boessenkool
2007-06-25 12:34 ` David Woodhouse
2007-06-25 13:10 ` Segher Boessenkool
2007-06-25 13:33 ` David Woodhouse
2007-06-27 13:34 ` Ralf Baechle
2007-06-27 12:23 ` Geert Uytterhoeven
2007-06-25 11:33 ` David Howells
2007-06-25 14:35 ` Kyle McMartin
2007-06-27 13:22 ` Ralf Baechle
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).