git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>
Cc: Junio C Hamano <gitster@pobox.com>, git@vger.kernel.org
Subject: Re: [PATCH] cocci: avoid normalization rules for memcpy
Date: Mon, 11 Jul 2022 19:11:18 +0200	[thread overview]
Message-ID: <e29e424f-c214-a912-fba8-107c5e402b8a@web.de> (raw)
In-Reply-To: <220710.86ilo580mb.gmgdl@evledraar.gmail.com>

Am 10.07.22 um 16:45 schrieb Ævar Arnfjörð Bjarmason:
>
> On Sun, Jul 10 2022, René Scharfe wrote:
>
>> Some of the rules for using COPY_ARRAY instead of memcpy with sizeof are
>> intended to reduce the number of sizeof variants to deal with.  They can
>> have unintended side effects if only they match, but not the one for the
>> COPY_ARRAY conversion at the end.
>
> Since ab/cocci-unused is marked for "next" it would be really nice to
> have this based on top so we can add tests for these transformations
> (the topic adds a way to do that).

Testing semantic patches sounds like a good idea.  We can add tests later,
once that feature landed.

>
> But if you don't feel like  doing that this is fine too.
>
>> diff --git a/contrib/coccinelle/array.cocci b/contrib/coccinelle/array.cocci
>> index 9a4f00cb1b..aa75937950 100644
>> --- a/contrib/coccinelle/array.cocci
>> +++ b/contrib/coccinelle/array.cocci
>> @@ -1,60 +1,58 @@
>> -@@
>> -expression dst, src, n, E;
>> -@@
>> -  memcpy(dst, src, n * sizeof(
>> -- E[...]
>> -+ *(E)
>> -  ))
>> -
>> -@@
>> -type T;
>> -T *ptr;
>> -T[] arr;
>> -expression E, n;
>> -@@
>> -(
>> -  memcpy(ptr, E,
>> -- n * sizeof(*(ptr))
>> -+ n * sizeof(T)
>> -  )
>> -|
>> -  memcpy(arr, E,
>> -- n * sizeof(*(arr))
>> -+ n * sizeof(T)
>> -  )
>> -|
>> -  memcpy(E, ptr,
>> -- n * sizeof(*(ptr))
>> -+ n * sizeof(T)
>> -  )
>> -|
>> -  memcpy(E, arr,
>> -- n * sizeof(*(arr))
>> -+ n * sizeof(T)
>> -  )
>> -)
>> -
>>  @@
>>  type T;
>>  T *dst_ptr;
>>  T *src_ptr;
>> -T[] dst_arr;
>> -T[] src_arr;
>>  expression n;
>>  @@
>> -(
>> -- memcpy(dst_ptr, src_ptr, (n) * sizeof(T))
>> +- memcpy(dst_ptr, src_ptr, (n) * \( sizeof(T)
>> +-                                \| sizeof(*(dst_ptr))
>> +-                                \| sizeof(*(src_ptr))
>> +-                                \| sizeof(dst_ptr[...])
>> +-                                \| sizeof(src_ptr[...])
>> +-                                \) )
>>  + COPY_ARRAY(dst_ptr, src_ptr, n)
>> -|
>> -- memcpy(dst_ptr, src_arr, (n) * sizeof(T))
>> +
>> +@@
>> +type T;
>> +T *dst_ptr;
>> +T[] src_arr;
>> +expression n;
>> +@@
>> +- memcpy(dst_ptr, src_arr, (n) * \( sizeof(T)
>> +-                                \| sizeof(*(dst_ptr))
>> +-                                \| sizeof(*(src_arr))
>> +-                                \| sizeof(dst_ptr[...])
>> +-                                \| sizeof(src_arr[...])
>> +-                                \) )
>>  + COPY_ARRAY(dst_ptr, src_arr, n)
>> -|
>> -- memcpy(dst_arr, src_ptr, (n) * sizeof(T))
>> +
>> +@@
>> +type T;
>> +T[] dst_arr;
>> +T *src_ptr;
>> +expression n;
>> +@@
>> +- memcpy(dst_arr, src_ptr, (n) * \( sizeof(T)
>> +-                                \| sizeof(*(dst_arr))
>> +-                                \| sizeof(*(src_ptr))
>> +-                                \| sizeof(dst_arr[...])
>> +-                                \| sizeof(src_ptr[...])
>> +-                                \) )
>>  + COPY_ARRAY(dst_arr, src_ptr, n)
>> -|
>> -- memcpy(dst_arr, src_arr, (n) * sizeof(T))
>> +
>> +@@
>> +type T;
>> +T[] dst_arr;
>> +T[] src_arr;
>> +expression n;
>> +@@
>> +- memcpy(dst_arr, src_arr, (n) * \( sizeof(T)
>> +-                                \| sizeof(*(dst_arr))
>> +-                                \| sizeof(*(src_arr))
>> +-                                \| sizeof(dst_arr[...])
>> +-                                \| sizeof(src_arr[...])
>> +-                                \) )
>>  + COPY_ARRAY(dst_arr, src_arr, n)
>> -)
>>
>>  @@
>>  type T;
>
> Hrm, this seems like a lot of repetition, it's here in the rules you're
> editing already, but these repeated "sizeof" make it a lot more verbose.
>
> Isn't there a way to avoid this by simply wrapping this across lines, I
> didn't test, but I think you can do this sort of thing in the cocci
> grammar:
>
> - memcpy(
> - COPY_ARRAY(
>   (
>   dst_arr
>   |
>   dst_ptr
>   )
>   ,
>   (
>   src_arr
>   |
>   src_ptr
>   )
>   ,
>   (n) *
> -  [your big sizeof alternate here]
>   )

Hmm, that would match many more combinations, e.g. this one:

void f(int *a, int *b, long n, int c[1]) {
	memcpy(a, b, n * sizeof(*c));
}

The elements of a, b and c have the same type, so replacing c with a
(which a conversion to COPY_ARRAY would do) would produce the same
object code.  I can't come up with a plausible scenario like above and
where a type change of c down the line would cause problems, but I
also can't convince myself that no such thing can exist.  Tricky.

And I can't get it to format the whitespace around the third argument
of COPY_ARRAY nicely in all cases.

And it takes 37% longer on my machine.

But it sure is more compact. :)

@@
type T;
T *dst_ptr;
T *src_ptr;
T[] dst_arr;
T[] src_arr;
expression n;
@@
- memcpy
+ COPY_ARRAY
  (
  \( dst_ptr \| dst_arr \) ,
  \( src_ptr \| src_arr \) ,
- (n) *  \( sizeof(T)
-        \| sizeof(*(dst_ptr))
-        \| sizeof(*(dst_arr))
-        \| sizeof(*(src_ptr))
-        \| sizeof(*(src_arr))
-        \| sizeof(dst_ptr[...])
-        \| sizeof(dst_arr[...])
-        \| sizeof(src_ptr[...])
-        \| sizeof(src_arr[...])
-        \)
+ n
  )

>
> I.e. you want to preserve whatever we match in the 1st and 2nd
> arguments, but only want to munge part of the 3rd argument. The cocci
> grammar can "reach into" lines like that, it doesn't need to be limited
> to line-based diffs.
>
> But I didn't try it in this caes, and maybe there's a good reason for
> why it can't happen in this case...
>
> I also wonder if that won't be a lot faster, i.e. if you can condense
> this all into one rule it won't need to match this N times, but maybe
> the overall complexity of the rules makes it come out to the same thing
> in the end...

  parent reply	other threads:[~2022-07-11 17:11 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-07  2:02 [PATCH] builtin/mv.c: use correct type to compute size of an array element Junio C Hamano
2022-07-07  5:52 ` [PATCH v2] builtin/mv.c: use the MOVE_ARRAY() macro instead of memmove() Junio C Hamano
2022-07-10  1:33   ` [PATCH v3] " Junio C Hamano
2022-07-18 20:30     ` Derrick Stolee
2022-07-07 12:11 ` [PATCH] builtin/mv.c: use correct type to compute size of an array element Ævar Arnfjörð Bjarmason
2022-07-07 18:10   ` Junio C Hamano
2022-07-07 19:11     ` René Scharfe
2022-07-09  8:16       ` René Scharfe
2022-07-10  5:38         ` Junio C Hamano
2022-07-10 10:05           ` [PATCH] cocci: avoid normalization rules for memcpy René Scharfe
2022-07-10 14:45             ` Ævar Arnfjörð Bjarmason
2022-07-10 16:32               ` Ævar Arnfjörð Bjarmason
2022-07-10 19:30               ` Junio C Hamano
2022-07-11 17:11               ` René Scharfe [this message]
2022-07-11 20:05                 ` Ævar Arnfjörð Bjarmason
2022-07-07 18:27   ` [PATCH] builtin/mv.c: use correct type to compute size of an array element René Scharfe
2022-07-07 18:42 ` Jeff King
2022-07-07 20:25   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e29e424f-c214-a912-fba8-107c5e402b8a@web.de \
    --to=l.s.r@web.de \
    --cc=avarab@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).