public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* memcpy(a,b,CONST) is not inlined by gcc 3.4.1 in Linux kernel
@ 2005-03-29 14:37 Denis Vlasenko
  2005-03-29 15:06 ` Richard Guenther
                   ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Denis Vlasenko @ 2005-03-29 14:37 UTC (permalink / raw)
  To: linux-kernel, gcc

Try testcase below the sig.

This causes nearly one thousand calls to memcpy in my kernel
(not an allyesconfig one):

# objdump -d vmlinux | grep -F '<memcpy>' | wc -l
    959

# gcc -O2 -c t.c
# objdump -r -d t.o

t.o:     file format elf32-i386

Disassembly of section .text:

00000000 <f3>:
   0:   55                      push   %ebp
   1:   89 e5                   mov    %esp,%ebp
   3:   83 ec 0c                sub    $0xc,%esp
   6:   6a 03                   push   $0x3
   8:   ff 75 0c                pushl  0xc(%ebp)
   b:   ff 75 08                pushl  0x8(%ebp)
   e:   e8 fc ff ff ff          call   f <f3+0xf>
                        f: R_386_PC32   memcpy
  13:   83 c4 10                add    $0x10,%esp
  16:   c9                      leave
  17:   c3                      ret

00000018 <f3b>:
  18:   55                      push   %ebp
  19:   89 e5                   mov    %esp,%ebp
  1b:   8b 55 0c                mov    0xc(%ebp),%edx
  1e:   66 8b 02                mov    (%edx),%ax
  21:   8b 4d 08                mov    0x8(%ebp),%ecx
  24:   66 89 01                mov    %ax,(%ecx)
  27:   8a 42 02                mov    0x2(%edx),%al
  2a:   88 41 02                mov    %al,0x2(%ecx)
  2d:   c9                      leave
  2e:   c3                      ret
  2f:   90                      nop

00000030 <f3k>:
  30:   55                      push   %ebp
  31:   89 e5                   mov    %esp,%ebp
  33:   57                      push   %edi
  34:   56                      push   %esi
  35:   8b 7d 08                mov    0x8(%ebp),%edi
  38:   8b 75 0c                mov    0xc(%ebp),%esi
  3b:   b9 ee 02 00 00          mov    $0x2ee,%ecx
  40:   f3 a5                   repz movsl %ds:(%esi),%es:(%edi)
  42:   5e                      pop    %esi
  43:   5f                      pop    %edi
  44:   c9                      leave
  45:   c3                      ret


--
vda


typedef unsigned int size_t;

static inline void * __memcpy(void * to, const void * from, size_t n)
{
int d0, d1, d2;
__asm__ __volatile__(
        "rep ; movsl\n\t"
        "testb $2,%b4\n\t"
        "je 1f\n\t"
        "movsw\n"
        "1:\ttestb $1,%b4\n\t"
        "je 2f\n\t"
        "movsb\n"
        "2:"
        : "=&c" (d0), "=&D" (d1), "=&S" (d2)
        :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
        : "memory");
return (to);
}

/*
 * This looks horribly ugly, but the compiler can optimize it totally,
 * as the count is constant.
 */
static inline void * __constant_memcpy(void * to, const void * from, size_t n)
{
        if (n <= 128)
                return __builtin_memcpy(to, from, n);

#define COMMON(x) \
__asm__ __volatile__( \
        "rep ; movsl" \
        x \
        : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
        : "0" (n/4),"1" ((long) to),"2" ((long) from) \
        : "memory");
{
        int d0, d1, d2;
        switch (n % 4) {
                case 0: COMMON(""); return to;
                case 1: COMMON("\n\tmovsb"); return to;
                case 2: COMMON("\n\tmovsw"); return to;
                default: COMMON("\n\tmovsw\n\tmovsb"); return to;
        }
}

#undef COMMON
}

#define memcpy(t, f, n) \
(__builtin_constant_p(n) ? \
 __constant_memcpy((t),(f),(n)) : \
 __memcpy((t),(f),(n)))

int f3(char *a, char *b) { memcpy(a,b,3); }
int f3b(char *a, char *b) { __builtin_memcpy(a,b,3); }
int f3k(char *a, char *b) { memcpy(a,b,3000); }


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2005-04-07  7:32 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-03-29 14:37 memcpy(a,b,CONST) is not inlined by gcc 3.4.1 in Linux kernel Denis Vlasenko
2005-03-29 15:06 ` Richard Guenther
2005-03-29 15:08 ` Nathan Sidwell
2005-03-29 15:13 ` Jakub Jelinek
2005-03-29 15:42   ` Andrew Pinski
2005-03-30  2:27     ` Gerold Jury
2005-03-30  6:15       ` Denis Vlasenko
2005-04-01 21:43         ` Jan Hubicka
2005-04-02 12:18           ` Denis Vlasenko
2005-04-02 12:26             ` Denis Vlasenko
2005-04-05 16:34               ` [BUG mm] "fixed" i386 memcpy inlining buggy Christophe Saout
2005-04-06 10:14                 ` Denis Vlasenko
2005-04-06 11:05                   ` Dave Korn
2005-04-06 11:13                     ` Dave Korn
2005-04-06 11:53                       ` Dave Korn
2005-04-06 11:56                         ` Dave Korn
2005-04-06 13:18                         ` Richard B. Johnson
2005-04-06 14:16                           ` Denis Vlasenko
2005-04-06 12:05                   ` Christophe Saout
2005-04-06 12:36                     ` Andrew Haley
2005-04-06 15:18                     ` Paolo Bonzini
2005-04-06 16:11                 ` Denis Vlasenko
2005-03-29 20:22 ` [PATCH] fix i386 memcpy Denis Vlasenko
2005-03-29 20:24   ` Denis Vlasenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox