From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org,
Masahiro Yamada <yamada.masahiro@socionext.com>,
Nick Desaulniers <ndesaulniers@google.com>,
Andrew Morton <akpm@linux-foundation.org>,
Christophe Leroy <christophe.leroy@csgroup.eu>,
"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 5.4 24/53] net: Force inlining of checksum functions in net/checksum.h
Date: Mon, 28 Feb 2022 18:24:22 +0100 [thread overview]
Message-ID: <20220228172250.060108334@linuxfoundation.org> (raw)
In-Reply-To: <20220228172248.232273337@linuxfoundation.org>
From: Christophe Leroy <christophe.leroy@csgroup.eu>
commit 5486f5bf790b5c664913076c3194b8f916a5c7ad upstream.
All functions defined as static inline in net/checksum.h are
meant to be inlined for performance reason.
But since commit ac7c3e4ff401 ("compiler: enable
CONFIG_OPTIMIZE_INLINING forcibly") the compiler is allowed to
uninline functions when it wants.
Fair enough in the general case, but for tiny performance critical
checksum helpers that's counter-productive.
The problem mainly arises when selecting CONFIG_CC_OPTIMISE_FOR_SIZE,
Those helpers being 'static inline' in header files you suddenly find
them duplicated many times in the resulting vmlinux.
Here is a typical exemple when building powerpc pmac32_defconfig
with CONFIG_CC_OPTIMISE_FOR_SIZE. csum_sub() appears 4 times:
c04a23cc <csum_sub>:
c04a23cc: 7c 84 20 f8 not r4,r4
c04a23d0: 7c 63 20 14 addc r3,r3,r4
c04a23d4: 7c 63 01 94 addze r3,r3
c04a23d8: 4e 80 00 20 blr
...
c04a2ce8: 4b ff f6 e5 bl c04a23cc <csum_sub>
...
c04a2d2c: 4b ff f6 a1 bl c04a23cc <csum_sub>
...
c04a2d54: 4b ff f6 79 bl c04a23cc <csum_sub>
...
c04a754c <csum_sub>:
c04a754c: 7c 84 20 f8 not r4,r4
c04a7550: 7c 63 20 14 addc r3,r3,r4
c04a7554: 7c 63 01 94 addze r3,r3
c04a7558: 4e 80 00 20 blr
...
c04ac930: 4b ff ac 1d bl c04a754c <csum_sub>
...
c04ad264: 4b ff a2 e9 bl c04a754c <csum_sub>
...
c04e3b08 <csum_sub>:
c04e3b08: 7c 84 20 f8 not r4,r4
c04e3b0c: 7c 63 20 14 addc r3,r3,r4
c04e3b10: 7c 63 01 94 addze r3,r3
c04e3b14: 4e 80 00 20 blr
...
c04e5788: 4b ff e3 81 bl c04e3b08 <csum_sub>
...
c04e65c8: 4b ff d5 41 bl c04e3b08 <csum_sub>
...
c0512d34 <csum_sub>:
c0512d34: 7c 84 20 f8 not r4,r4
c0512d38: 7c 63 20 14 addc r3,r3,r4
c0512d3c: 7c 63 01 94 addze r3,r3
c0512d40: 4e 80 00 20 blr
...
c0512dfc: 4b ff ff 39 bl c0512d34 <csum_sub>
...
c05138bc: 4b ff f4 79 bl c0512d34 <csum_sub>
...
Restore the expected behaviour by using __always_inline for all
functions defined in net/checksum.h
vmlinux size is even reduced by 256 bytes with this patch:
text data bss dec hex filename
6980022 2515362 194384 9689768 93daa8 vmlinux.before
6979862 2515266 194384 9689512 93d9a8 vmlinux.now
Fixes: ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly")
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/checksum.h | 41 +++++++++++++++++++++--------------------
1 file changed, 21 insertions(+), 20 deletions(-)
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -22,7 +22,7 @@
#include <asm/checksum.h>
#ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
-static inline
+static __always_inline
__wsum csum_and_copy_from_user (const void __user *src, void *dst,
int len, __wsum sum, int *err_ptr)
{
@@ -37,7 +37,7 @@ __wsum csum_and_copy_from_user (const vo
#endif
#ifndef HAVE_CSUM_COPY_USER
-static __inline__ __wsum csum_and_copy_to_user
+static __always_inline __wsum csum_and_copy_to_user
(const void *src, void __user *dst, int len, __wsum sum, int *err_ptr)
{
sum = csum_partial(src, len, sum);
@@ -54,7 +54,7 @@ static __inline__ __wsum csum_and_copy_t
#endif
#ifndef HAVE_ARCH_CSUM_ADD
-static inline __wsum csum_add(__wsum csum, __wsum addend)
+static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
{
u32 res = (__force u32)csum;
res += (__force u32)addend;
@@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csu
}
#endif
-static inline __wsum csum_sub(__wsum csum, __wsum addend)
+static __always_inline __wsum csum_sub(__wsum csum, __wsum addend)
{
return csum_add(csum, ~addend);
}
-static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
+static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend)
{
u16 res = (__force u16)csum;
@@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16
return (__force __sum16)(res + (res < (__force u16)addend));
}
-static inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
+static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
{
return csum16_add(csum, ~addend);
}
-static inline __wsum
+static __always_inline __wsum
csum_block_add(__wsum csum, __wsum csum2, int offset)
{
u32 sum = (__force u32)csum2;
@@ -92,36 +92,37 @@ csum_block_add(__wsum csum, __wsum csum2
return csum_add(csum, (__force __wsum)sum);
}
-static inline __wsum
+static __always_inline __wsum
csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
{
return csum_block_add(csum, csum2, offset);
}
-static inline __wsum
+static __always_inline __wsum
csum_block_sub(__wsum csum, __wsum csum2, int offset)
{
return csum_block_add(csum, ~csum2, offset);
}
-static inline __wsum csum_unfold(__sum16 n)
+static __always_inline __wsum csum_unfold(__sum16 n)
{
return (__force __wsum)n;
}
-static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
+static __always_inline
+__wsum csum_partial_ext(const void *buff, int len, __wsum sum)
{
return csum_partial(buff, len, sum);
}
#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
-static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
+static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
{
*sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
}
-static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
+static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
{
__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
@@ -134,7 +135,7 @@ static inline void csum_replace4(__sum16
* m : old value of a 16bit field
* m' : new value of a 16bit field
*/
-static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
+static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
{
*sum = ~csum16_add(csum16_sub(~(*sum), old), new);
}
@@ -153,16 +154,16 @@ void inet_proto_csum_replace16(__sum16 *
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
__wsum diff, bool pseudohdr);
-static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
- __be16 from, __be16 to,
- bool pseudohdr)
+static __always_inline
+void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
+ __be16 from, __be16 to, bool pseudohdr)
{
inet_proto_csum_replace4(sum, skb, (__force __be32)from,
(__force __be32)to, pseudohdr);
}
-static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
- int start, int offset)
+static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum,
+ int start, int offset)
{
__sum16 *psum = (__sum16 *)(ptr + offset);
__wsum delta;
@@ -178,7 +179,7 @@ static inline __wsum remcsum_adjust(void
return delta;
}
-static inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
+static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
{
*psum = csum_fold(csum_sub(delta, (__force __wsum)*psum));
}
next prev parent reply other threads:[~2022-02-28 17:36 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-28 17:23 [PATCH 5.4 00/53] 5.4.182-rc1 review Greg Kroah-Hartman
2022-02-28 17:23 ` [PATCH 5.4 01/53] cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 02/53] clk: jz4725b: fix mmc0 clock gating Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 03/53] vhost/vsock: dont check owner in vhost_vsock_stop() while releasing Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 04/53] parisc/unaligned: Fix fldd and fstd unaligned handlers on 32-bit kernel Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 05/53] parisc/unaligned: Fix ldw() and stw() unalignment handlers Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 06/53] drm/amdgpu: disable MMHUB PG for Picasso Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 07/53] sr9700: sanity check for packet length Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 08/53] USB: zaurus: support another broken Zaurus Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 09/53] netfilter: nf_tables_offload: incorrect flow offload action array size Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 10/53] x86/fpu: Correct pkru/xstate inconsistency Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 11/53] tee: export teedev_open() and teedev_close_context() Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 12/53] optee: use driver internal tee_context for some rpc Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 13/53] lan743x: fix deadlock in lan743x_phy_link_status_change() Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 14/53] ping: remove pr_err from ping_lookup Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 15/53] perf data: Fix double free in perf_session__delete() Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 16/53] bpf: Do not try bpf_msg_push_data with len 0 Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 17/53] net: __pskb_pull_tail() & pskb_carve_frag_list() drop_monitor friends Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 18/53] tipc: Fix end of loop tests for list_for_each_entry() Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 19/53] gso: do not skip outer ip header in case of ipip and net_failover Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 20/53] openvswitch: Fix setting ipv6 fields causing hw csum failure Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 21/53] drm/edid: Always set RGB444 Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 22/53] net/mlx5e: Fix wrong return value on ioctl EEPROM query failure Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 23/53] net: ll_temac: check the return value of devm_kmalloc() Greg Kroah-Hartman
2022-02-28 17:24 ` Greg Kroah-Hartman [this message]
2022-02-28 17:24 ` [PATCH 5.4 25/53] nfp: flower: Fix a potential leak in nfp_tunnel_add_shared_mac() Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 26/53] netfilter: nf_tables: fix memory leak during stateful obj update Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 27/53] net/mlx5: Fix possible deadlock on rule deletion Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 28/53] net/mlx5: Fix wrong limitation of metadata match on ecpf Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 29/53] spi: spi-zynq-qspi: Fix a NULL pointer dereference in zynq_qspi_exec_mem_op() Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 30/53] configfs: fix a race in configfs_{,un}register_subsystem() Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 31/53] RDMA/ib_srp: Fix a deadlock Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 32/53] tracing: Have traceon and traceoff trigger honor the instance Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 33/53] iio: adc: men_z188_adc: Fix a resource leak in an error handling path Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 34/53] iio: adc: ad7124: fix mask used for setting AIN_BUFP & AIN_BUFM bits Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 35/53] iio: Fix error handling for PM Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 36/53] ata: pata_hpt37x: disable primary channel on HPT371 Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 37/53] Revert "USB: serial: ch341: add new Product ID for CH341A" Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 38/53] usb: gadget: rndis: add spinlock for rndis response list Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 39/53] USB: gadget: validate endpoint index for xilinx udc Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 40/53] tracefs: Set the group ownership in apply_options() not parse_options() Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 41/53] USB: serial: option: add support for DW5829e Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 42/53] USB: serial: option: add Telit LE910R1 compositions Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 43/53] usb: dwc3: pci: Fix Bay Trail phy GPIO mappings Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 44/53] usb: dwc3: gadget: Let the interrupt handler disable bottom halves Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 45/53] xhci: re-initialize the HC during resume if HCE was set Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 46/53] xhci: Prevent futile URB re-submissions due to incorrect return value Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 47/53] tty: n_gsm: fix encoding of control signal octet bit DV Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 48/53] tty: n_gsm: fix proper link termination after failed open Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 49/53] tty: n_gsm: fix NULL pointer access due to DLCI release Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 50/53] gpio: tegra186: Fix chip_data type confusion Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 51/53] Revert "drm/nouveau/pmu/gm200-: avoid touching PMU outside of DEVINIT/PREOS/ACR" Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 52/53] memblock: use kfree() to release kmalloced memblock regions Greg Kroah-Hartman
2022-02-28 17:24 ` [PATCH 5.4 53/53] fget: clarify and improve __fget_files() implementation Greg Kroah-Hartman
2022-02-28 21:42 ` [PATCH 5.4 00/53] 5.4.182-rc1 review Shuah Khan
2022-02-28 23:12 ` Florian Fainelli
2022-03-01 11:34 ` Sudip Mukherjee
2022-03-01 16:45 ` Naresh Kamboju
2022-03-01 19:13 ` Guenter Roeck
2022-03-02 7:04 ` Slade Watkins
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220228172250.060108334@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=akpm@linux-foundation.org \
--cc=christophe.leroy@csgroup.eu \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=ndesaulniers@google.com \
--cc=stable@vger.kernel.org \
--cc=yamada.masahiro@socionext.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox