* [PATCH] userdiff: better method/property matching for C# @ 2024-02-25 17:33 Steven Jeuris via GitGitGadget 2024-03-06 20:21 ` [PATCH v2] " Steven Jeuris via GitGitGadget 0 siblings, 1 reply; 14+ messages in thread From: Steven Jeuris via GitGitGadget @ 2024-02-25 17:33 UTC (permalink / raw) To: git; +Cc: Steven Jeuris, Steven Jeuris From: Steven Jeuris <steven.jeuris@3shape.com> - Support multi-line methods by not requiring closing parenthesis. - Support multiple generics (comma was missing before). - Add missing `foreach`, `from`, `lock` and `fixed` keywords to skip over. - Remove `instanceof` keyword, which isn't C#. - Also detect non-method keywords not positioned at the start of a line. - Added tests; none existed before. The overall strategy is to focus more on what isn't expected for method/property definitions, instead of what is, but is fully optional. Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> --- userdiff: better method/property matching for C# Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1682%2FWhathecode%2Fmaster-v1 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1682/Whathecode/master-v1 Pull-Request: https://github.com/git/git/pull/1682 t/t4018/csharp-method | 10 +++ t/t4018/csharp-method-explicit | 12 +++ t/t4018/csharp-method-generics | 11 +++ t/t4018/csharp-method-modifiers | 13 ++++ t/t4018/csharp-method-multiline | 10 +++ t/t4018/csharp-method-params | 10 +++ t/t4018/csharp-method-skip-body | 115 ++++++++++++++++++++++++++++ t/t4018/csharp-method-special-chars | 11 +++ t/t4018/csharp-method-with-spacing | 10 +++ t/t4018/csharp-property | 11 +++ userdiff.c | 16 ++-- 11 files changed, 224 insertions(+), 5 deletions(-) create mode 100644 t/t4018/csharp-method create mode 100644 t/t4018/csharp-method-explicit create mode 100644 t/t4018/csharp-method-generics create mode 100644 t/t4018/csharp-method-modifiers create mode 100644 t/t4018/csharp-method-multiline create mode 100644 t/t4018/csharp-method-params create mode 100644 t/t4018/csharp-method-skip-body create mode 100644 t/t4018/csharp-method-special-chars create mode 100644 t/t4018/csharp-method-with-spacing create mode 100644 t/t4018/csharp-property diff --git a/t/t4018/csharp-method b/t/t4018/csharp-method new file mode 100644 index 00000000000..85ff0cb8b5b --- /dev/null +++ b/t/t4018/csharp-method @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-explicit b/t/t4018/csharp-method-explicit new file mode 100644 index 00000000000..083aa094ce2 --- /dev/null +++ b/t/t4018/csharp-method-explicit @@ -0,0 +1,12 @@ +using System; + +class Example : IDisposable +{ + void IDisposable.Dispose() // RIGHT + { + // Filler + // Filler + + // ChangeMe + } +} diff --git a/t/t4018/csharp-method-generics b/t/t4018/csharp-method-generics new file mode 100644 index 00000000000..c472d4a18df --- /dev/null +++ b/t/t4018/csharp-method-generics @@ -0,0 +1,11 @@ +class Example<T1, T2> +{ + Example<int, string> Method<TA, TB>(TA RIGHT, TB b) + { + // Filler + // Filler + + // ChangeMe + return null; + } +} diff --git a/t/t4018/csharp-method-modifiers b/t/t4018/csharp-method-modifiers new file mode 100644 index 00000000000..f1c008a4749 --- /dev/null +++ b/t/t4018/csharp-method-modifiers @@ -0,0 +1,13 @@ +using System.Threading.Tasks; + +class Example +{ + static internal async Task Method(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + await Task.Delay(1); + } +} diff --git a/t/t4018/csharp-method-multiline b/t/t4018/csharp-method-multiline new file mode 100644 index 00000000000..0a20b0cb49c --- /dev/null +++ b/t/t4018/csharp-method-multiline @@ -0,0 +1,10 @@ +class Example +{ + string Method_RIGHT( + int a, + int b, + int c) + { + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-params b/t/t4018/csharp-method-params new file mode 100644 index 00000000000..18598449008 --- /dev/null +++ b/t/t4018/csharp-method-params @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT, int b, int c = 42) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-skip-body b/t/t4018/csharp-method-skip-body new file mode 100644 index 00000000000..95e93ea995c --- /dev/null +++ b/t/t4018/csharp-method-skip-body @@ -0,0 +1,115 @@ +using System.Linq; +using System; + +class Example : IDisposable +{ + string Method(int RIGHT) + { + // Method calls + MethodCall(); + MethodCall(1, 2); + MethodCall( + 1, 2); + + // Assignments + var constantAssignment = "test"; + var methodAssignment = MethodCall(); + var multiLineMethodAssignment = MethodCall( + ); + + // Initializations/disposal + new Example(); + new Example( + ); + new Example { }; + using (this) + { + } + var def = + this is default( + Example); + + // Iteration statements + do { } while (true); + do MethodCall( + ); while (true); + while (true); + while (true) { + break; + } + for (int i = 0; i < 10; ++i) + { + } + foreach (int i in Enumerable.Range(0, 10)) + { + } + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; + var test = + from num in Numbers( + ) select num; + + // Control + if (false) + { + return "out"; + } + else { } + if (true) MethodCall( + ); + else MethodCall( + ); + switch ("test") + { + case "one": + return MethodCall( + ); + case "two": + break; + } + (int, int) tuple = (1, 4); + switch (tuple) + { + case (1, 4): + MethodCall(); + } + + // Exceptions + try + { + throw new Exception("fail"); + } + catch (Exception) + { + } + finally + { + } + try { } catch (Exception) {} + try + { + throw GetException( + ); + } + catch (Exception) { } + + // Others + lock (this) + { + } + unsafe + { + byte[] bytes = [1, 2, 3]; + fixed (byte* pointerToFirst = bytes) + { + } + } + + return "ChangeMe"; + } + + public void Dispose() {} + + string MethodCall(int a = 0, int b = 0) => "test"; + Exception GetException() => new Exception("fail"); + int[] Numbers() => [0, 1]; +} diff --git a/t/t4018/csharp-method-special-chars b/t/t4018/csharp-method-special-chars new file mode 100644 index 00000000000..ec3565fd000 --- /dev/null +++ b/t/t4018/csharp-method-special-chars @@ -0,0 +1,11 @@ +class @Some_Type +{ + @Some_Type @Method_With_Underscore(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + return new @Some_Type(); + } +} diff --git a/t/t4018/csharp-method-with-spacing b/t/t4018/csharp-method-with-spacing new file mode 100644 index 00000000000..4143929a711 --- /dev/null +++ b/t/t4018/csharp-method-with-spacing @@ -0,0 +1,10 @@ +class Example +{ + string Method ( int RIGHT ) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-property b/t/t4018/csharp-property new file mode 100644 index 00000000000..1792117f964 --- /dev/null +++ b/t/t4018/csharp-property @@ -0,0 +1,11 @@ +class Example +{ + public bool RIGHT + { + get { return true; } + set + { + // ChangeMe + } + } +} diff --git a/userdiff.c b/userdiff.c index e399543823b..a6a26a97e4c 100644 --- a/userdiff.c +++ b/userdiff.c @@ -89,12 +89,18 @@ PATTERNS("cpp", "|\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?" "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>"), PATTERNS("csharp", - /* Keywords */ - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" - /* Methods and constructors */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" + /* + * Jump over keywords not used by methods which can be followed by parentheses without special characters in between, + * making them look like methods. + */ + "!(^|[ \t]+)(do|while|for|foreach|from|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" + /* Methods/constructors: + * the strategy is to identify a minimum of two groups (any combination of keywords/type/name), + * without intermediate or final characters which can't be part of method definitions before the opening parenthesis. + */ + "^[ \t]*(([][[:alnum:]@_<>.,]*[^=:{ \t][ \t]+[][[:alnum:]@_<>.,]*)+\\([^;]*)$\n" /* Properties */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + "^[ \t]*((([][[:alnum:]@_<>.,]+)[ \t]+[][[:alnum:]@_]*)+[^=:;,()]*)$\n" /* Type definitions */ "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" /* Namespace */ base-commit: f41f85c9ec8d4d46de0fd5fded88db94d3ec8c11 -- gitgitgadget ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2] userdiff: better method/property matching for C# 2024-02-25 17:33 [PATCH] userdiff: better method/property matching for C# Steven Jeuris via GitGitGadget @ 2024-03-06 20:21 ` Steven Jeuris via GitGitGadget 2024-03-07 2:11 ` Junio C Hamano ` (4 more replies) 0 siblings, 5 replies; 14+ messages in thread From: Steven Jeuris via GitGitGadget @ 2024-03-06 20:21 UTC (permalink / raw) To: git Cc: Ævar Arnfjörð Bjarmason, Jeff King, Steven Jeuris, Steven Jeuris From: Steven Jeuris <steven.jeuris@3shape.com> - Support multi-line methods by not requiring closing parenthesis. - Support multiple generics (comma was missing before). - Add missing `foreach`, `lock` and `fixed` keywords to skip over. - Remove `instanceof` keyword, which isn't C#. - Also detect non-method keywords not positioned at the start of a line. - Added tests; none existed before. The overall strategy is to focus more on what isn't expected for method/property definitions, instead of what is, but is fully optional. Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> --- userdiff: better method/property matching for C# Change since v1: I removed "from" from the list of keywords to skip. First, I considered adding "await", but I discovered both "await" and "from" are "contextual keywords", which unlike the other keywords currently listed, aren't reserved, and can thus cause false negatives. I.e., it is valid to have a method named "await" or "from". In edge cases, this may lead to false positives, but a different exclusion rule will need to be added to handle these. Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1682%2FWhathecode%2Fmaster-v2 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1682/Whathecode/master-v2 Pull-Request: https://github.com/git/git/pull/1682 Range-diff vs v1: 1: cdd8dd4d871 ! 1: 00315519014 userdiff: better method/property matching for C# @@ Commit message - Support multi-line methods by not requiring closing parenthesis. - Support multiple generics (comma was missing before). - - Add missing `foreach`, `from`, `lock` and `fixed` keywords to skip over. + - Add missing `foreach`, `lock` and `fixed` keywords to skip over. - Remove `instanceof` keyword, which isn't C#. - Also detect non-method keywords not positioned at the start of a line. - Added tests; none existed before. @@ t/t4018/csharp-method-skip-body (new) + { + } + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; -+ var test = -+ from num in Numbers( -+ ) select num; + + // Control + if (false) @@ userdiff.c: PATTERNS("cpp", + * Jump over keywords not used by methods which can be followed by parentheses without special characters in between, + * making them look like methods. + */ -+ "!(^|[ \t]+)(do|while|for|foreach|from|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" ++ "!(^|[ \t]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" + /* Methods/constructors: + * the strategy is to identify a minimum of two groups (any combination of keywords/type/name), + * without intermediate or final characters which can't be part of method definitions before the opening parenthesis. t/t4018/csharp-method | 10 +++ t/t4018/csharp-method-explicit | 12 +++ t/t4018/csharp-method-generics | 11 +++ t/t4018/csharp-method-modifiers | 13 ++++ t/t4018/csharp-method-multiline | 10 +++ t/t4018/csharp-method-params | 10 +++ t/t4018/csharp-method-skip-body | 112 ++++++++++++++++++++++++++++ t/t4018/csharp-method-special-chars | 11 +++ t/t4018/csharp-method-with-spacing | 10 +++ t/t4018/csharp-property | 11 +++ userdiff.c | 16 ++-- 11 files changed, 221 insertions(+), 5 deletions(-) create mode 100644 t/t4018/csharp-method create mode 100644 t/t4018/csharp-method-explicit create mode 100644 t/t4018/csharp-method-generics create mode 100644 t/t4018/csharp-method-modifiers create mode 100644 t/t4018/csharp-method-multiline create mode 100644 t/t4018/csharp-method-params create mode 100644 t/t4018/csharp-method-skip-body create mode 100644 t/t4018/csharp-method-special-chars create mode 100644 t/t4018/csharp-method-with-spacing create mode 100644 t/t4018/csharp-property diff --git a/t/t4018/csharp-method b/t/t4018/csharp-method new file mode 100644 index 00000000000..85ff0cb8b5b --- /dev/null +++ b/t/t4018/csharp-method @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-explicit b/t/t4018/csharp-method-explicit new file mode 100644 index 00000000000..083aa094ce2 --- /dev/null +++ b/t/t4018/csharp-method-explicit @@ -0,0 +1,12 @@ +using System; + +class Example : IDisposable +{ + void IDisposable.Dispose() // RIGHT + { + // Filler + // Filler + + // ChangeMe + } +} diff --git a/t/t4018/csharp-method-generics b/t/t4018/csharp-method-generics new file mode 100644 index 00000000000..c472d4a18df --- /dev/null +++ b/t/t4018/csharp-method-generics @@ -0,0 +1,11 @@ +class Example<T1, T2> +{ + Example<int, string> Method<TA, TB>(TA RIGHT, TB b) + { + // Filler + // Filler + + // ChangeMe + return null; + } +} diff --git a/t/t4018/csharp-method-modifiers b/t/t4018/csharp-method-modifiers new file mode 100644 index 00000000000..f1c008a4749 --- /dev/null +++ b/t/t4018/csharp-method-modifiers @@ -0,0 +1,13 @@ +using System.Threading.Tasks; + +class Example +{ + static internal async Task Method(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + await Task.Delay(1); + } +} diff --git a/t/t4018/csharp-method-multiline b/t/t4018/csharp-method-multiline new file mode 100644 index 00000000000..0a20b0cb49c --- /dev/null +++ b/t/t4018/csharp-method-multiline @@ -0,0 +1,10 @@ +class Example +{ + string Method_RIGHT( + int a, + int b, + int c) + { + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-params b/t/t4018/csharp-method-params new file mode 100644 index 00000000000..18598449008 --- /dev/null +++ b/t/t4018/csharp-method-params @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT, int b, int c = 42) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-skip-body b/t/t4018/csharp-method-skip-body new file mode 100644 index 00000000000..c8c9621634d --- /dev/null +++ b/t/t4018/csharp-method-skip-body @@ -0,0 +1,112 @@ +using System.Linq; +using System; + +class Example : IDisposable +{ + string Method(int RIGHT) + { + // Method calls + MethodCall(); + MethodCall(1, 2); + MethodCall( + 1, 2); + + // Assignments + var constantAssignment = "test"; + var methodAssignment = MethodCall(); + var multiLineMethodAssignment = MethodCall( + ); + + // Initializations/disposal + new Example(); + new Example( + ); + new Example { }; + using (this) + { + } + var def = + this is default( + Example); + + // Iteration statements + do { } while (true); + do MethodCall( + ); while (true); + while (true); + while (true) { + break; + } + for (int i = 0; i < 10; ++i) + { + } + foreach (int i in Enumerable.Range(0, 10)) + { + } + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; + + // Control + if (false) + { + return "out"; + } + else { } + if (true) MethodCall( + ); + else MethodCall( + ); + switch ("test") + { + case "one": + return MethodCall( + ); + case "two": + break; + } + (int, int) tuple = (1, 4); + switch (tuple) + { + case (1, 4): + MethodCall(); + } + + // Exceptions + try + { + throw new Exception("fail"); + } + catch (Exception) + { + } + finally + { + } + try { } catch (Exception) {} + try + { + throw GetException( + ); + } + catch (Exception) { } + + // Others + lock (this) + { + } + unsafe + { + byte[] bytes = [1, 2, 3]; + fixed (byte* pointerToFirst = bytes) + { + } + } + + return "ChangeMe"; + } + + public void Dispose() {} + + string MethodCall(int a = 0, int b = 0) => "test"; + Exception GetException() => new Exception("fail"); + int[] Numbers() => [0, 1]; +} diff --git a/t/t4018/csharp-method-special-chars b/t/t4018/csharp-method-special-chars new file mode 100644 index 00000000000..ec3565fd000 --- /dev/null +++ b/t/t4018/csharp-method-special-chars @@ -0,0 +1,11 @@ +class @Some_Type +{ + @Some_Type @Method_With_Underscore(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + return new @Some_Type(); + } +} diff --git a/t/t4018/csharp-method-with-spacing b/t/t4018/csharp-method-with-spacing new file mode 100644 index 00000000000..4143929a711 --- /dev/null +++ b/t/t4018/csharp-method-with-spacing @@ -0,0 +1,10 @@ +class Example +{ + string Method ( int RIGHT ) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-property b/t/t4018/csharp-property new file mode 100644 index 00000000000..1792117f964 --- /dev/null +++ b/t/t4018/csharp-property @@ -0,0 +1,11 @@ +class Example +{ + public bool RIGHT + { + get { return true; } + set + { + // ChangeMe + } + } +} diff --git a/userdiff.c b/userdiff.c index e399543823b..5a9e8a0ef55 100644 --- a/userdiff.c +++ b/userdiff.c @@ -89,12 +89,18 @@ PATTERNS("cpp", "|\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?" "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>"), PATTERNS("csharp", - /* Keywords */ - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" - /* Methods and constructors */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" + /* + * Jump over keywords not used by methods which can be followed by parentheses without special characters in between, + * making them look like methods. + */ + "!(^|[ \t]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" + /* Methods/constructors: + * the strategy is to identify a minimum of two groups (any combination of keywords/type/name), + * without intermediate or final characters which can't be part of method definitions before the opening parenthesis. + */ + "^[ \t]*(([][[:alnum:]@_<>.,]*[^=:{ \t][ \t]+[][[:alnum:]@_<>.,]*)+\\([^;]*)$\n" /* Properties */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + "^[ \t]*((([][[:alnum:]@_<>.,]+)[ \t]+[][[:alnum:]@_]*)+[^=:;,()]*)$\n" /* Type definitions */ "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" /* Namespace */ base-commit: f41f85c9ec8d4d46de0fd5fded88db94d3ec8c11 -- gitgitgadget ^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v2] userdiff: better method/property matching for C# 2024-03-06 20:21 ` [PATCH v2] " Steven Jeuris via GitGitGadget @ 2024-03-07 2:11 ` Junio C Hamano 2024-03-16 18:14 ` Linus Arver ` (3 subsequent siblings) 4 siblings, 0 replies; 14+ messages in thread From: Junio C Hamano @ 2024-03-07 2:11 UTC (permalink / raw) To: Steven Jeuris via GitGitGadget Cc: git, Ævar Arnfjörð Bjarmason, Jeff King, Steven Jeuris, Steven Jeuris "Steven Jeuris via GitGitGadget" <gitgitgadget@gmail.com> writes: > From: Steven Jeuris <steven.jeuris@3shape.com> > > - Support multi-line methods by not requiring closing parenthesis. > - Support multiple generics (comma was missing before). > - Add missing `foreach`, `lock` and `fixed` keywords to skip over. > - Remove `instanceof` keyword, which isn't C#. > - Also detect non-method keywords not positioned at the start of a line. > - Added tests; none existed before. > > The overall strategy is to focus more on what isn't expected for > method/property definitions, instead of what is, but is fully optional. > Roughly in other words, we assume that any file the end user throws at us is a well formed program, so instead of enumerating all valid keywords and limit the match exactly to them, use a pattern that would match valid keywords (both currently known ones, and anything the language might add in the future that we do not know about), to match with anything syntactically plausible to be a definition? It does make sense to start by assuming that the end user data is a valid C# program. > Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> > --- > diff --git a/userdiff.c b/userdiff.c > index e399543823b..5a9e8a0ef55 100644 > --- a/userdiff.c > +++ b/userdiff.c > @@ -89,12 +89,18 @@ PATTERNS("cpp", > "|\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?" > "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>"), > PATTERNS("csharp", > - /* Keywords */ > - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" > - /* Methods and constructors */ > - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" > + /* > + * Jump over keywords not used by methods which can be followed by parentheses without special characters in between, > + * making them look like methods. > + */ Overly long comments (I'll wrap them while queuing). > + "!(^|[ \t]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" > + /* Methods/constructors: > + * the strategy is to identify a minimum of two groups (any combination of keywords/type/name), > + * without intermediate or final characters which can't be part of method definitions before the opening parenthesis. > + */ > + "^[ \t]*(([][[:alnum:]@_<>.,]*[^=:{ \t][ \t]+[][[:alnum:]@_<>.,]*)+\\([^;]*)$\n" > /* Properties */ > - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" > + "^[ \t]*((([][[:alnum:]@_<>.,]+)[ \t]+[][[:alnum:]@_]*)+[^=:;,()]*)$\n" > /* Type definitions */ > "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" > /* Namespace */ > > base-commit: f41f85c9ec8d4d46de0fd5fded88db94d3ec8c11 ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2] userdiff: better method/property matching for C# 2024-03-06 20:21 ` [PATCH v2] " Steven Jeuris via GitGitGadget 2024-03-07 2:11 ` Junio C Hamano @ 2024-03-16 18:14 ` Linus Arver 2024-03-26 21:38 ` Junio C Hamano ` (2 subsequent siblings) 4 siblings, 0 replies; 14+ messages in thread From: Linus Arver @ 2024-03-16 18:14 UTC (permalink / raw) To: Steven Jeuris via GitGitGadget, git Cc: Ævar Arnfjörð Bjarmason, Jeff King, Steven Jeuris, Steven Jeuris "Steven Jeuris via GitGitGadget" <gitgitgadget@gmail.com> writes: > From: Steven Jeuris <steven.jeuris@3shape.com> > > - Support multi-line methods by not requiring closing parenthesis. > - Support multiple generics (comma was missing before). > - Add missing `foreach`, `lock` and `fixed` keywords to skip over. > - Remove `instanceof` keyword, which isn't C#. > - Also detect non-method keywords not positioned at the start of a line. > - Added tests; none existed before. > > The overall strategy is to focus more on what isn't expected for > method/property definitions, instead of what is, but is fully optional. It would make it easier to review if you broke this patch up into smaller chunks. For example, you could do it like this: (1) add tests to capture existing behavior (2a) change method/property matching behavior, one thing at a time (2b) adjust or add tests along the way, to account for the change in (2a) (3) repeat step (2) as needed for each of the bullet points you've outlined already Not being familiar with this area, I don't know if (2b) is easily doable. But if it is, I think the overall effort of breaking this patch down would be worthwhile. Of course, nothing stops other reviewers (who are more familiar with this area) from chiming in and LGTM'ing this patch as is. Thanks. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2] userdiff: better method/property matching for C# 2024-03-06 20:21 ` [PATCH v2] " Steven Jeuris via GitGitGadget 2024-03-07 2:11 ` Junio C Hamano 2024-03-16 18:14 ` Linus Arver @ 2024-03-26 21:38 ` Junio C Hamano 2024-03-27 8:40 ` Jeff King 2024-03-27 7:30 ` Johannes Sixt 2024-03-28 8:07 ` [PATCH v3] " Steven Jeuris via GitGitGadget 4 siblings, 1 reply; 14+ messages in thread From: Junio C Hamano @ 2024-03-26 21:38 UTC (permalink / raw) To: git Cc: Steven Jeuris via GitGitGadget, Ævar Arnfjörð Bjarmason, Jeff King, Steven Jeuris, Steven Jeuris "Steven Jeuris via GitGitGadget" <gitgitgadget@gmail.com> writes: > From: Steven Jeuris <steven.jeuris@3shape.com> > > - Support multi-line methods by not requiring closing parenthesis. > - Support multiple generics (comma was missing before). > - Add missing `foreach`, `lock` and `fixed` keywords to skip over. > - Remove `instanceof` keyword, which isn't C#. > - Also detect non-method keywords not positioned at the start of a line. > - Added tests; none existed before. > > The overall strategy is to focus more on what isn't expected for > method/property definitions, instead of what is, but is fully optional. > > Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> > --- > userdiff: better method/property matching for C# > > Change since v1: I removed "from" from the list of keywords to skip. > First, I considered adding "await", but I discovered both "await" and > "from" are "contextual keywords", which unlike the other keywords > currently listed, aren't reserved, and can thus cause false negatives. > I.e., it is valid to have a method named "await" or "from". In edge > cases, this may lead to false positives, but a different exclusion rule > will need to be added to handle these. It seems that this has seen no reviews. I somehow find it doubtful that Ævar or Peff would be writing too much C# to be familiar with the language to judge the quality of the patch, but can somebody with C# background (I hear that its most common use is for developing Windows applications etc. there) chip in? Thanks. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2] userdiff: better method/property matching for C# 2024-03-26 21:38 ` Junio C Hamano @ 2024-03-27 8:40 ` Jeff King 0 siblings, 0 replies; 14+ messages in thread From: Jeff King @ 2024-03-27 8:40 UTC (permalink / raw) To: Junio C Hamano Cc: git, Steven Jeuris via GitGitGadget, Ævar Arnfjörð Bjarmason, Steven Jeuris, Steven Jeuris On Tue, Mar 26, 2024 at 02:38:41PM -0700, Junio C Hamano wrote: > > userdiff: better method/property matching for C# > > > > Change since v1: I removed "from" from the list of keywords to skip. > > First, I considered adding "await", but I discovered both "await" and > > "from" are "contextual keywords", which unlike the other keywords > > currently listed, aren't reserved, and can thus cause false negatives. > > I.e., it is valid to have a method named "await" or "from". In edge > > cases, this may lead to false positives, but a different exclusion rule > > will need to be added to handle these. > > It seems that this has seen no reviews. I somehow find it doubtful > that Ævar or Peff would be writing too much C# to be familiar with > the language to judge the quality of the patch, but can somebody > with C# background (I hear that its most common use is for > developing Windows applications etc. there) chip in? Yeah, sorry, I have never written a line of C# in my life, so I have been dutifully ignoring this series. :) -Peff ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2] userdiff: better method/property matching for C# 2024-03-06 20:21 ` [PATCH v2] " Steven Jeuris via GitGitGadget ` (2 preceding siblings ...) 2024-03-26 21:38 ` Junio C Hamano @ 2024-03-27 7:30 ` Johannes Sixt 2024-03-28 8:07 ` [PATCH v3] " Steven Jeuris via GitGitGadget 4 siblings, 0 replies; 14+ messages in thread From: Johannes Sixt @ 2024-03-27 7:30 UTC (permalink / raw) To: Steven Jeuris via GitGitGadget Cc: Ævar Arnfjörð Bjarmason, Jeff King, Steven Jeuris, Steven Jeuris, git Am 06.03.24 um 21:21 schrieb Steven Jeuris via GitGitGadget: > From: Steven Jeuris <steven.jeuris@3shape.com> > > - Support multi-line methods by not requiring closing parenthesis. > - Support multiple generics (comma was missing before). > - Add missing `foreach`, `lock` and `fixed` keywords to skip over. > - Remove `instanceof` keyword, which isn't C#. > - Also detect non-method keywords not positioned at the start of a line. > - Added tests; none existed before. > > The overall strategy is to focus more on what isn't expected for > method/property definitions, instead of what is, but is fully optional. > > Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> > --- I like the comprehensive test cases that are added. However, I found one major point in the patterns that must be considered. See below. > userdiff: better method/property matching for C# > > Change since v1: I removed "from" from the list of keywords to skip. > First, I considered adding "await", but I discovered both "await" and > "from" are "contextual keywords", which unlike the other keywords > currently listed, aren't reserved, and can thus cause false negatives. > I.e., it is valid to have a method named "await" or "from". In edge > cases, this may lead to false positives, but a different exclusion rule > will need to be added to handle these. So, this patch makes the choice to have some false positives instead of some false negatives. I have no experience in C#, so I trust your judgement that users are better served with this choice rather than the opposite. > > Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1682%2FWhathecode%2Fmaster-v2 > Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1682/Whathecode/master-v2 > Pull-Request: https://github.com/git/git/pull/1682 > > Range-diff vs v1: > > 1: cdd8dd4d871 ! 1: 00315519014 userdiff: better method/property matching for C# > @@ Commit message > > - Support multi-line methods by not requiring closing parenthesis. > - Support multiple generics (comma was missing before). > - - Add missing `foreach`, `from`, `lock` and `fixed` keywords to skip over. > + - Add missing `foreach`, `lock` and `fixed` keywords to skip over. > - Remove `instanceof` keyword, which isn't C#. > - Also detect non-method keywords not positioned at the start of a line. > - Added tests; none existed before. > @@ t/t4018/csharp-method-skip-body (new) > + { > + } > + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; > -+ var test = > -+ from num in Numbers( > -+ ) select num; > + > + // Control > + if (false) > @@ userdiff.c: PATTERNS("cpp", > + * Jump over keywords not used by methods which can be followed by parentheses without special characters in between, > + * making them look like methods. > + */ > -+ "!(^|[ \t]+)(do|while|for|foreach|from|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" > ++ "!(^|[ \t]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" > + /* Methods/constructors: > + * the strategy is to identify a minimum of two groups (any combination of keywords/type/name), > + * without intermediate or final characters which can't be part of method definitions before the opening parenthesis. > > > t/t4018/csharp-method | 10 +++ > t/t4018/csharp-method-explicit | 12 +++ > t/t4018/csharp-method-generics | 11 +++ > t/t4018/csharp-method-modifiers | 13 ++++ > t/t4018/csharp-method-multiline | 10 +++ > t/t4018/csharp-method-params | 10 +++ > t/t4018/csharp-method-skip-body | 112 ++++++++++++++++++++++++++++ > t/t4018/csharp-method-special-chars | 11 +++ > t/t4018/csharp-method-with-spacing | 10 +++ > t/t4018/csharp-property | 11 +++ > userdiff.c | 16 ++-- > 11 files changed, 221 insertions(+), 5 deletions(-) > create mode 100644 t/t4018/csharp-method > create mode 100644 t/t4018/csharp-method-explicit > create mode 100644 t/t4018/csharp-method-generics > create mode 100644 t/t4018/csharp-method-modifiers > create mode 100644 t/t4018/csharp-method-multiline > create mode 100644 t/t4018/csharp-method-params > create mode 100644 t/t4018/csharp-method-skip-body > create mode 100644 t/t4018/csharp-method-special-chars > create mode 100644 t/t4018/csharp-method-with-spacing > create mode 100644 t/t4018/csharp-property > > diff --git a/t/t4018/csharp-method b/t/t4018/csharp-method > new file mode 100644 > index 00000000000..85ff0cb8b5b > --- /dev/null > +++ b/t/t4018/csharp-method > @@ -0,0 +1,10 @@ > +class Example > +{ > + string Method(int RIGHT) > + { > + // Filler > + // Filler > + > + return "ChangeMe"; > + } > +} > diff --git a/t/t4018/csharp-method-explicit b/t/t4018/csharp-method-explicit > new file mode 100644 > index 00000000000..083aa094ce2 > --- /dev/null > +++ b/t/t4018/csharp-method-explicit > @@ -0,0 +1,12 @@ > +using System; > + > +class Example : IDisposable > +{ > + void IDisposable.Dispose() // RIGHT > + { > + // Filler > + // Filler > + > + // ChangeMe > + } > +} > diff --git a/t/t4018/csharp-method-generics b/t/t4018/csharp-method-generics > new file mode 100644 > index 00000000000..c472d4a18df > --- /dev/null > +++ b/t/t4018/csharp-method-generics > @@ -0,0 +1,11 @@ > +class Example<T1, T2> > +{ > + Example<int, string> Method<TA, TB>(TA RIGHT, TB b) > + { > + // Filler > + // Filler > + > + // ChangeMe > + return null; > + } > +} > diff --git a/t/t4018/csharp-method-modifiers b/t/t4018/csharp-method-modifiers > new file mode 100644 > index 00000000000..f1c008a4749 > --- /dev/null > +++ b/t/t4018/csharp-method-modifiers > @@ -0,0 +1,13 @@ > +using System.Threading.Tasks; > + > +class Example > +{ > + static internal async Task Method(int RIGHT) > + { > + // Filler > + // Filler > + > + // ChangeMe > + await Task.Delay(1); > + } > +} > diff --git a/t/t4018/csharp-method-multiline b/t/t4018/csharp-method-multiline > new file mode 100644 > index 00000000000..0a20b0cb49c > --- /dev/null > +++ b/t/t4018/csharp-method-multiline > @@ -0,0 +1,10 @@ > +class Example > +{ > + string Method_RIGHT( > + int a, > + int b, > + int c) > + { > + return "ChangeMe"; > + } > +} > diff --git a/t/t4018/csharp-method-params b/t/t4018/csharp-method-params > new file mode 100644 > index 00000000000..18598449008 > --- /dev/null > +++ b/t/t4018/csharp-method-params > @@ -0,0 +1,10 @@ > +class Example > +{ > + string Method(int RIGHT, int b, int c = 42) > + { > + // Filler > + // Filler > + > + return "ChangeMe"; > + } > +} > diff --git a/t/t4018/csharp-method-skip-body b/t/t4018/csharp-method-skip-body > new file mode 100644 > index 00000000000..c8c9621634d > --- /dev/null > +++ b/t/t4018/csharp-method-skip-body > @@ -0,0 +1,112 @@ > +using System.Linq; > +using System; > + > +class Example : IDisposable > +{ > + string Method(int RIGHT) > + { > + // Method calls > + MethodCall(); > + MethodCall(1, 2); > + MethodCall( > + 1, 2); > + > + // Assignments > + var constantAssignment = "test"; > + var methodAssignment = MethodCall(); > + var multiLineMethodAssignment = MethodCall( > + ); > + > + // Initializations/disposal > + new Example(); > + new Example( > + ); > + new Example { }; > + using (this) > + { > + } > + var def = > + this is default( > + Example); > + > + // Iteration statements > + do { } while (true); > + do MethodCall( > + ); while (true); > + while (true); > + while (true) { > + break; > + } > + for (int i = 0; i < 10; ++i) > + { > + } > + foreach (int i in Enumerable.Range(0, 10)) > + { > + } > + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; > + > + // Control > + if (false) > + { > + return "out"; > + } > + else { } > + if (true) MethodCall( > + ); > + else MethodCall( > + ); > + switch ("test") > + { > + case "one": > + return MethodCall( > + ); > + case "two": > + break; > + } > + (int, int) tuple = (1, 4); > + switch (tuple) > + { > + case (1, 4): > + MethodCall(); > + } > + > + // Exceptions > + try > + { > + throw new Exception("fail"); > + } > + catch (Exception) > + { > + } > + finally > + { > + } > + try { } catch (Exception) {} > + try > + { > + throw GetException( > + ); > + } > + catch (Exception) { } > + > + // Others > + lock (this) > + { > + } > + unsafe > + { > + byte[] bytes = [1, 2, 3]; > + fixed (byte* pointerToFirst = bytes) > + { > + } > + } > + > + return "ChangeMe"; > + } Nice! This tests the exlusion patterns. > + > + public void Dispose() {} > + > + string MethodCall(int a = 0, int b = 0) => "test"; > + Exception GetException() => new Exception("fail"); > + int[] Numbers() => [0, 1]; Are these also patterns that should not be picked up? If so, should the "ChangeMe" not be later in the file? As written, they would never be candidates because there is no diff hunk later in the file. > +} > diff --git a/t/t4018/csharp-method-special-chars b/t/t4018/csharp-method-special-chars > new file mode 100644 > index 00000000000..ec3565fd000 > --- /dev/null > +++ b/t/t4018/csharp-method-special-chars > @@ -0,0 +1,11 @@ > +class @Some_Type > +{ > + @Some_Type @Method_With_Underscore(int RIGHT) > + { > + // Filler > + // Filler > + > + // ChangeMe > + return new @Some_Type(); > + } > +} > diff --git a/t/t4018/csharp-method-with-spacing b/t/t4018/csharp-method-with-spacing > new file mode 100644 > index 00000000000..4143929a711 > --- /dev/null > +++ b/t/t4018/csharp-method-with-spacing > @@ -0,0 +1,10 @@ > +class Example > +{ > + string Method ( int RIGHT ) > + { > + // Filler > + // Filler > + > + return "ChangeMe"; > + } > +} > diff --git a/t/t4018/csharp-property b/t/t4018/csharp-property > new file mode 100644 > index 00000000000..1792117f964 > --- /dev/null > +++ b/t/t4018/csharp-property > @@ -0,0 +1,11 @@ > +class Example > +{ > + public bool RIGHT > + { > + get { return true; } > + set > + { > + // ChangeMe > + } > + } > +} > diff --git a/userdiff.c b/userdiff.c > index e399543823b..5a9e8a0ef55 100644 > --- a/userdiff.c > +++ b/userdiff.c > @@ -89,12 +89,18 @@ PATTERNS("cpp", > "|\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?" > "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>"), > PATTERNS("csharp", > - /* Keywords */ > - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" > - /* Methods and constructors */ > - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" > + /* > + * Jump over keywords not used by methods which can be followed by parentheses without special characters in between, > + * making them look like methods. > + */ > + "!(^|[ \t]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" > + /* Methods/constructors: > + * the strategy is to identify a minimum of two groups (any combination of keywords/type/name), > + * without intermediate or final characters which can't be part of method definitions before the opening parenthesis. > + */ I would have appreciated if the comment lines were not so long. We already have many long lines of code in this file, but comment lines generally stay below the 80 character limit. Also watch out for the multi-line comment style (put the opening /* on a line by itself, like you did in the comment above). > + "^[ \t]*(([][[:alnum:]@_<>.,]*[^=:{ \t][ \t]+[][[:alnum:]@_<>.,]*)+\\([^;]*)$\n" I am not 100% sure about the meaning of this expression. But would it not match this line, which looks like part of a multi-line expression: + func(x) and produce a false positive? [Testing myself...] Yes it does. I do not know if it is worth fixing as I do not do C#. But if this were patterns for C or C++, I would consider it a major problem. > /* Properties */ > - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" > + "^[ \t]*((([][[:alnum:]@_<>.,]+)[ \t]+[][[:alnum:]@_]*)+[^=:;,()]*)$\n" OK. Since the second of the two words is optional and almost the same as the first, I have a gut feeling that the pattern can be expensive to match. But since I do not have data to back up my suspicion, let's leave it as it is. > /* Type definitions */ > "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" > /* Namespace */> > base-commit: f41f85c9ec8d4d46de0fd5fded88db94d3ec8c11 -- Hannes ^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v3] userdiff: better method/property matching for C# 2024-03-06 20:21 ` [PATCH v2] " Steven Jeuris via GitGitGadget ` (3 preceding siblings ...) 2024-03-27 7:30 ` Johannes Sixt @ 2024-03-28 8:07 ` Steven Jeuris via GitGitGadget 2024-03-28 19:14 ` [PATCH v4] " Steven Jeuris via GitGitGadget 4 siblings, 1 reply; 14+ messages in thread From: Steven Jeuris via GitGitGadget @ 2024-03-28 8:07 UTC (permalink / raw) To: git Cc: Ævar Arnfjörð Bjarmason, Jeff King, Linus Arver, Johannes Sixt, Steven Jeuris, Steven Jeuris From: Steven Jeuris <steven.jeuris@3shape.com> - Support multi-line methods by not requiring closing parenthesis. - Support multiple generics (comma was missing before). - Add missing `foreach`, `lock` and `fixed` keywords to skip over. - Remove `instanceof` keyword, which isn't C#. - Also detect non-method keywords not positioned at the start of a line. - Added tests; none existed before. The overall strategy is to focus more on what isn't expected for method/property definitions, instead of what is, but is fully optional. Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> --- userdiff: better method/property matching for C# Change since v1: I removed "from" from the list of keywords to skip. First, I considered adding "await", but I discovered both "await" and "from" are "contextual keywords", which unlike the other keywords currently listed, aren't reserved, and can thus cause false negatives. I.e., it is valid to have a method named "await" or "from". In edge cases, this may lead to false positives, but a different exclusion rule will need to be added to handle these. Change since v2: * Corrected comment formatting. * Added csharp-property-skip-body test. * Added comments in test code to explain sections not part of the test. * Elaborated regex comments. * Excluded math operators (+-*/%) in method pattern to not catch multiline operations, and tested for this in the -skip-body tests. Catching "-" only worked when it was defined at the end of the exclusion block for some reason. The regex matcher seems quite bugged. Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1682%2FWhathecode%2Fmaster-v3 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1682/Whathecode/master-v3 Pull-Request: https://github.com/git/git/pull/1682 Range-diff vs v2: 1: 00315519014 ! 1: 9b6c76f5342 userdiff: better method/property matching for C# @@ t/t4018/csharp-method (new) @@ +class Example +{ -+ string Method(int RIGHT) -+ { -+ // Filler -+ // Filler -+ -+ return "ChangeMe"; -+ } ++ string Method(int RIGHT) ++ { ++ // Filler ++ // Filler ++ ++ return "ChangeMe"; ++ } +} ## t/t4018/csharp-method-explicit (new) ## @@ t/t4018/csharp-method-explicit (new) + +class Example : IDisposable +{ -+ void IDisposable.Dispose() // RIGHT -+ { -+ // Filler -+ // Filler -+ -+ // ChangeMe -+ } ++ void IDisposable.Dispose() // RIGHT ++ { ++ // Filler ++ // Filler ++ ++ // ChangeMe ++ } +} ## t/t4018/csharp-method-generics (new) ## @@ +class Example<T1, T2> +{ -+ Example<int, string> Method<TA, TB>(TA RIGHT, TB b) -+ { -+ // Filler -+ // Filler -+ -+ // ChangeMe -+ return null; -+ } ++ Example<int, string> Method<TA, TB>(TA RIGHT, TB b) ++ { ++ // Filler ++ // Filler ++ ++ // ChangeMe ++ return null; ++ } +} ## t/t4018/csharp-method-modifiers (new) ## @@ t/t4018/csharp-method-modifiers (new) + +class Example +{ -+ static internal async Task Method(int RIGHT) -+ { -+ // Filler -+ // Filler -+ -+ // ChangeMe -+ await Task.Delay(1); -+ } ++ static internal async Task Method(int RIGHT) ++ { ++ // Filler ++ // Filler ++ ++ // ChangeMe ++ await Task.Delay(1); ++ } +} ## t/t4018/csharp-method-multiline (new) ## @@ +class Example +{ -+ string Method_RIGHT( -+ int a, -+ int b, -+ int c) -+ { -+ return "ChangeMe"; -+ } ++ string Method_RIGHT( ++ int a, ++ int b, ++ int c) ++ { ++ return "ChangeMe"; ++ } +} ## t/t4018/csharp-method-params (new) ## @@ +class Example +{ -+ string Method(int RIGHT, int b, int c = 42) -+ { -+ // Filler -+ // Filler -+ -+ return "ChangeMe"; -+ } ++ string Method(int RIGHT, int b, int c = 42) ++ { ++ // Filler ++ // Filler ++ ++ return "ChangeMe"; ++ } +} ## t/t4018/csharp-method-skip-body (new) ## @@ t/t4018/csharp-method-skip-body (new) + +class Example : IDisposable +{ -+ string Method(int RIGHT) -+ { -+ // Method calls -+ MethodCall(); -+ MethodCall(1, 2); -+ MethodCall( -+ 1, 2); -+ -+ // Assignments -+ var constantAssignment = "test"; -+ var methodAssignment = MethodCall(); -+ var multiLineMethodAssignment = MethodCall( -+ ); -+ -+ // Initializations/disposal -+ new Example(); -+ new Example( -+ ); -+ new Example { }; -+ using (this) -+ { -+ } -+ var def = -+ this is default( -+ Example); -+ -+ // Iteration statements -+ do { } while (true); -+ do MethodCall( -+ ); while (true); -+ while (true); -+ while (true) { -+ break; -+ } -+ for (int i = 0; i < 10; ++i) -+ { -+ } -+ foreach (int i in Enumerable.Range(0, 10)) -+ { -+ } -+ int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; -+ -+ // Control -+ if (false) -+ { -+ return "out"; -+ } -+ else { } -+ if (true) MethodCall( -+ ); -+ else MethodCall( -+ ); -+ switch ("test") -+ { -+ case "one": -+ return MethodCall( -+ ); -+ case "two": -+ break; -+ } -+ (int, int) tuple = (1, 4); -+ switch (tuple) -+ { -+ case (1, 4): -+ MethodCall(); -+ } -+ -+ // Exceptions -+ try -+ { -+ throw new Exception("fail"); -+ } -+ catch (Exception) -+ { -+ } -+ finally -+ { -+ } -+ try { } catch (Exception) {} -+ try -+ { -+ throw GetException( -+ ); -+ } -+ catch (Exception) { } -+ -+ // Others -+ lock (this) -+ { -+ } -+ unsafe -+ { -+ byte[] bytes = [1, 2, 3]; -+ fixed (byte* pointerToFirst = bytes) -+ { -+ } -+ } -+ -+ return "ChangeMe"; -+ } -+ -+ public void Dispose() {} -+ -+ string MethodCall(int a = 0, int b = 0) => "test"; -+ Exception GetException() => new Exception("fail"); -+ int[] Numbers() => [0, 1]; ++ string Method(int RIGHT) ++ { ++ // Method calls ++ MethodCall(); ++ MethodCall(1, 2); ++ MethodCall( ++ 1, 2); ++ ++ // Assignments ++ var constantAssignment = "test"; ++ var methodAssignment = MethodCall(); ++ var multiLineMethodAssignment = MethodCall( ++ ); ++ var multiLine = "first" ++ + MethodCall() ++ - MethodCall() ++ + MethodCall(); ++ ++ // Initializations/disposal ++ new Example(); ++ new Example( ++ ); ++ new Example { }; ++ using (this) ++ { ++ } ++ var def = ++ this is default( ++ Example); ++ ++ // Iteration statements ++ do { } while (true); ++ do MethodCall( ++ ); while (true); ++ while (true); ++ while (true) { ++ break; ++ } ++ for (int i = 0; i < 10; ++i) ++ { ++ } ++ foreach (int i in Enumerable.Range(0, 10)) ++ { ++ } ++ int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; ++ ++ // Control ++ if (false) ++ { ++ return "out"; ++ } ++ else { } ++ if (true) MethodCall( ++ ); ++ else MethodCall( ++ ); ++ switch ("test") ++ { ++ case "one": ++ return MethodCall( ++ ); ++ case "two": ++ break; ++ } ++ (int, int) tuple = (1, 4); ++ switch (tuple) ++ { ++ case (1, 4): ++ MethodCall(); ++ } ++ ++ // Exceptions ++ try ++ { ++ throw new Exception("fail"); ++ } ++ catch (Exception) ++ { ++ } ++ finally ++ { ++ } ++ try { } catch (Exception) {} ++ try ++ { ++ throw GetException( ++ ); ++ } ++ catch (Exception) { } ++ ++ // Others ++ lock (this) ++ { ++ } ++ unsafe ++ { ++ byte[] bytes = [1, 2, 3]; ++ fixed (byte* pointerToFirst = bytes) ++ { ++ } ++ } ++ ++ return "ChangeMe"; ++ } ++ ++ // Supporting methods to make the tested method above valid C#. ++ public void Dispose() {} ++ string MethodCall(int a = 0, int b = 0) => "test"; ++ Exception GetException() => new Exception("fail"); ++ int[] Numbers() => [0, 1]; +} ## t/t4018/csharp-method-special-chars (new) ## @@ +class @Some_Type +{ -+ @Some_Type @Method_With_Underscore(int RIGHT) -+ { -+ // Filler -+ // Filler -+ -+ // ChangeMe -+ return new @Some_Type(); -+ } ++ @Some_Type @Method_With_Underscore(int RIGHT) ++ { ++ // Filler ++ // Filler ++ ++ // ChangeMe ++ return new @Some_Type(); ++ } +} ## t/t4018/csharp-method-with-spacing (new) ## @@ t/t4018/csharp-method-with-spacing (new) +{ + string Method ( int RIGHT ) + { -+ // Filler -+ // Filler ++ // Filler ++ // Filler + -+ return "ChangeMe"; -+ } ++ return "ChangeMe"; ++ } +} ## t/t4018/csharp-property (new) ## @@ +class Example +{ -+ public bool RIGHT ++ public bool RIGHT + { + get { return true; } + set @@ t/t4018/csharp-property (new) + // ChangeMe + } + } ++} + + ## t/t4018/csharp-property-skip-body (new) ## +@@ ++using System.Linq; ++using System; ++ ++class Example : IDisposable ++{ ++ public string RIGHT ++ { ++ get ++ { ++ // Method calls ++ MethodCall(); ++ MethodCall(1, 2); ++ MethodCall( ++ 1, 2); ++ ++ // Assignments ++ var constantAssignment = "test"; ++ var methodAssignment = MethodCall(); ++ var multiLineMethodAssignment = MethodCall( ++ ); ++ var multiLine = "first" ++ + MethodCall() ++ - MethodCall() ++ + MethodCall(); ++ ++ // Initializations/disposal ++ new Example(); ++ new Example( ++ ); ++ new Example { }; ++ using (this) ++ { ++ } ++ var def = ++ this is default( ++ Example); ++ ++ // Iteration statements ++ do { } while (true); ++ do MethodCall( ++ ); while (true); ++ while (true); ++ while (true) { ++ break; ++ } ++ for (int i = 0; i < 10; ++i) ++ { ++ } ++ foreach (int i in Enumerable.Range(0, 10)) ++ { ++ } ++ int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; ++ ++ // Control ++ if (false) ++ { ++ return "out"; ++ } ++ else { } ++ if (true) MethodCall( ++ ); ++ else MethodCall( ++ ); ++ switch ("test") ++ { ++ case "one": ++ return MethodCall( ++ ); ++ case "two": ++ break; ++ } ++ (int, int) tuple = (1, 4); ++ switch (tuple) ++ { ++ case (1, 4): ++ MethodCall(); ++ } ++ ++ // Exceptions ++ try ++ { ++ throw new Exception("fail"); ++ } ++ catch (Exception) ++ { ++ } ++ finally ++ { ++ } ++ try { } catch (Exception) {} ++ try ++ { ++ throw GetException( ++ ); ++ } ++ catch (Exception) { } ++ ++ // Others ++ lock (this) ++ { ++ } ++ unsafe ++ { ++ byte[] bytes = [1, 2, 3]; ++ fixed (byte* pointerToFirst = bytes) ++ { ++ } ++ } ++ ++ return "ChangeMe"; ++ } ++ set { } ++ } ++ ++ // Supporting methods to make the tested property above valid C#. ++ public void Dispose() {} ++ string MethodCall(int a = 0, int b = 0) => "test"; ++ Exception GetException() => new Exception("fail"); ++ int[] Numbers() => [0, 1]; +} ## userdiff.c ## @@ userdiff.c: PATTERNS("cpp", - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" - /* Methods and constructors */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" +- /* Properties */ +- "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + /* -+ * Jump over keywords not used by methods which can be followed by parentheses without special characters in between, ++ * Jump over reserved keywords which are illegal method names, but which ++ * can be followed by parentheses without special characters in between, + * making them look like methods. + */ + "!(^|[ \t]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" -+ /* Methods/constructors: -+ * the strategy is to identify a minimum of two groups (any combination of keywords/type/name), -+ * without intermediate or final characters which can't be part of method definitions before the opening parenthesis. ++ /* ++ * Methods/constructors: ++ * The strategy is to identify a minimum of two groups (any combination ++ * of keywords/type/name), without intermediate characters which can't be ++ * part of method definitions before the opening parenthesis, and without ++ * final unexpected characters, normally only used in ordinary statements. ++ * "=" is excluded to ignore assignments, but as a result rules out ++ * methods with expression bodies. However, since those fit on 1-2 lines, ++ * a chunk header isn't useful either way. + */ -+ "^[ \t]*(([][[:alnum:]@_<>.,]*[^=:{ \t][ \t]+[][[:alnum:]@_<>.,]*)+\\([^;]*)$\n" - /* Properties */ -- "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" -+ "^[ \t]*((([][[:alnum:]@_<>.,]+)[ \t]+[][[:alnum:]@_]*)+[^=:;,()]*)$\n" ++ "^[ \t]*(([][[:alnum:]@_<>.,]*[^=:{ \t+*%\\/\\-][ \t]+[][[:alnum:]@_<>.,]*)+\\([^;]*)$\n" ++ /* ++ * Properties: ++ * As with methods, expect a minimum of two groups. And, the vast majority ++ * of properties long enough to be worth showing a chunk header for don't ++ * include "=:;,()" on the line they are defined. ++ */ ++ "^[ \t]*([][[:alnum:]@_<>.,]+[ \t]+[][[:alnum:]@_.]+[^=:;,()]*)$\n" /* Type definitions */ "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" /* Namespace */ t/t4018/csharp-method | 10 +++ t/t4018/csharp-method-explicit | 12 +++ t/t4018/csharp-method-generics | 11 +++ t/t4018/csharp-method-modifiers | 13 +++ t/t4018/csharp-method-multiline | 10 +++ t/t4018/csharp-method-params | 10 +++ t/t4018/csharp-method-skip-body | 116 +++++++++++++++++++++++++++ t/t4018/csharp-method-special-chars | 11 +++ t/t4018/csharp-method-with-spacing | 10 +++ t/t4018/csharp-property | 11 +++ t/t4018/csharp-property-skip-body | 120 ++++++++++++++++++++++++++++ userdiff.c | 30 +++++-- 12 files changed, 358 insertions(+), 6 deletions(-) create mode 100644 t/t4018/csharp-method create mode 100644 t/t4018/csharp-method-explicit create mode 100644 t/t4018/csharp-method-generics create mode 100644 t/t4018/csharp-method-modifiers create mode 100644 t/t4018/csharp-method-multiline create mode 100644 t/t4018/csharp-method-params create mode 100644 t/t4018/csharp-method-skip-body create mode 100644 t/t4018/csharp-method-special-chars create mode 100644 t/t4018/csharp-method-with-spacing create mode 100644 t/t4018/csharp-property create mode 100644 t/t4018/csharp-property-skip-body diff --git a/t/t4018/csharp-method b/t/t4018/csharp-method new file mode 100644 index 00000000000..16b367aca2b --- /dev/null +++ b/t/t4018/csharp-method @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-explicit b/t/t4018/csharp-method-explicit new file mode 100644 index 00000000000..5a710116cc4 --- /dev/null +++ b/t/t4018/csharp-method-explicit @@ -0,0 +1,12 @@ +using System; + +class Example : IDisposable +{ + void IDisposable.Dispose() // RIGHT + { + // Filler + // Filler + + // ChangeMe + } +} diff --git a/t/t4018/csharp-method-generics b/t/t4018/csharp-method-generics new file mode 100644 index 00000000000..b3216bfb2a7 --- /dev/null +++ b/t/t4018/csharp-method-generics @@ -0,0 +1,11 @@ +class Example<T1, T2> +{ + Example<int, string> Method<TA, TB>(TA RIGHT, TB b) + { + // Filler + // Filler + + // ChangeMe + return null; + } +} diff --git a/t/t4018/csharp-method-modifiers b/t/t4018/csharp-method-modifiers new file mode 100644 index 00000000000..caefa8ee99c --- /dev/null +++ b/t/t4018/csharp-method-modifiers @@ -0,0 +1,13 @@ +using System.Threading.Tasks; + +class Example +{ + static internal async Task Method(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + await Task.Delay(1); + } +} diff --git a/t/t4018/csharp-method-multiline b/t/t4018/csharp-method-multiline new file mode 100644 index 00000000000..3983ff42f51 --- /dev/null +++ b/t/t4018/csharp-method-multiline @@ -0,0 +1,10 @@ +class Example +{ + string Method_RIGHT( + int a, + int b, + int c) + { + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-params b/t/t4018/csharp-method-params new file mode 100644 index 00000000000..3f00410ba1f --- /dev/null +++ b/t/t4018/csharp-method-params @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT, int b, int c = 42) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-skip-body b/t/t4018/csharp-method-skip-body new file mode 100644 index 00000000000..b3c7194eb74 --- /dev/null +++ b/t/t4018/csharp-method-skip-body @@ -0,0 +1,116 @@ +using System.Linq; +using System; + +class Example : IDisposable +{ + string Method(int RIGHT) + { + // Method calls + MethodCall(); + MethodCall(1, 2); + MethodCall( + 1, 2); + + // Assignments + var constantAssignment = "test"; + var methodAssignment = MethodCall(); + var multiLineMethodAssignment = MethodCall( + ); + var multiLine = "first" + + MethodCall() + - MethodCall() + + MethodCall(); + + // Initializations/disposal + new Example(); + new Example( + ); + new Example { }; + using (this) + { + } + var def = + this is default( + Example); + + // Iteration statements + do { } while (true); + do MethodCall( + ); while (true); + while (true); + while (true) { + break; + } + for (int i = 0; i < 10; ++i) + { + } + foreach (int i in Enumerable.Range(0, 10)) + { + } + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; + + // Control + if (false) + { + return "out"; + } + else { } + if (true) MethodCall( + ); + else MethodCall( + ); + switch ("test") + { + case "one": + return MethodCall( + ); + case "two": + break; + } + (int, int) tuple = (1, 4); + switch (tuple) + { + case (1, 4): + MethodCall(); + } + + // Exceptions + try + { + throw new Exception("fail"); + } + catch (Exception) + { + } + finally + { + } + try { } catch (Exception) {} + try + { + throw GetException( + ); + } + catch (Exception) { } + + // Others + lock (this) + { + } + unsafe + { + byte[] bytes = [1, 2, 3]; + fixed (byte* pointerToFirst = bytes) + { + } + } + + return "ChangeMe"; + } + + // Supporting methods to make the tested method above valid C#. + public void Dispose() {} + string MethodCall(int a = 0, int b = 0) => "test"; + Exception GetException() => new Exception("fail"); + int[] Numbers() => [0, 1]; +} diff --git a/t/t4018/csharp-method-special-chars b/t/t4018/csharp-method-special-chars new file mode 100644 index 00000000000..e6c7bc01a18 --- /dev/null +++ b/t/t4018/csharp-method-special-chars @@ -0,0 +1,11 @@ +class @Some_Type +{ + @Some_Type @Method_With_Underscore(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + return new @Some_Type(); + } +} diff --git a/t/t4018/csharp-method-with-spacing b/t/t4018/csharp-method-with-spacing new file mode 100644 index 00000000000..da0c9b7e60c --- /dev/null +++ b/t/t4018/csharp-method-with-spacing @@ -0,0 +1,10 @@ +class Example +{ + string Method ( int RIGHT ) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-property b/t/t4018/csharp-property new file mode 100644 index 00000000000..e56dfce34c1 --- /dev/null +++ b/t/t4018/csharp-property @@ -0,0 +1,11 @@ +class Example +{ + public bool RIGHT + { + get { return true; } + set + { + // ChangeMe + } + } +} diff --git a/t/t4018/csharp-property-skip-body b/t/t4018/csharp-property-skip-body new file mode 100644 index 00000000000..ad9d96007a8 --- /dev/null +++ b/t/t4018/csharp-property-skip-body @@ -0,0 +1,120 @@ +using System.Linq; +using System; + +class Example : IDisposable +{ + public string RIGHT + { + get + { + // Method calls + MethodCall(); + MethodCall(1, 2); + MethodCall( + 1, 2); + + // Assignments + var constantAssignment = "test"; + var methodAssignment = MethodCall(); + var multiLineMethodAssignment = MethodCall( + ); + var multiLine = "first" + + MethodCall() + - MethodCall() + + MethodCall(); + + // Initializations/disposal + new Example(); + new Example( + ); + new Example { }; + using (this) + { + } + var def = + this is default( + Example); + + // Iteration statements + do { } while (true); + do MethodCall( + ); while (true); + while (true); + while (true) { + break; + } + for (int i = 0; i < 10; ++i) + { + } + foreach (int i in Enumerable.Range(0, 10)) + { + } + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; + + // Control + if (false) + { + return "out"; + } + else { } + if (true) MethodCall( + ); + else MethodCall( + ); + switch ("test") + { + case "one": + return MethodCall( + ); + case "two": + break; + } + (int, int) tuple = (1, 4); + switch (tuple) + { + case (1, 4): + MethodCall(); + } + + // Exceptions + try + { + throw new Exception("fail"); + } + catch (Exception) + { + } + finally + { + } + try { } catch (Exception) {} + try + { + throw GetException( + ); + } + catch (Exception) { } + + // Others + lock (this) + { + } + unsafe + { + byte[] bytes = [1, 2, 3]; + fixed (byte* pointerToFirst = bytes) + { + } + } + + return "ChangeMe"; + } + set { } + } + + // Supporting methods to make the tested property above valid C#. + public void Dispose() {} + string MethodCall(int a = 0, int b = 0) => "test"; + Exception GetException() => new Exception("fail"); + int[] Numbers() => [0, 1]; +} diff --git a/userdiff.c b/userdiff.c index e399543823b..5440ccf2de5 100644 --- a/userdiff.c +++ b/userdiff.c @@ -89,12 +89,30 @@ PATTERNS("cpp", "|\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?" "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>"), PATTERNS("csharp", - /* Keywords */ - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" - /* Methods and constructors */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" - /* Properties */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + /* + * Jump over reserved keywords which are illegal method names, but which + * can be followed by parentheses without special characters in between, + * making them look like methods. + */ + "!(^|[ \t]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" + /* + * Methods/constructors: + * The strategy is to identify a minimum of two groups (any combination + * of keywords/type/name), without intermediate characters which can't be + * part of method definitions before the opening parenthesis, and without + * final unexpected characters, normally only used in ordinary statements. + * "=" is excluded to ignore assignments, but as a result rules out + * methods with expression bodies. However, since those fit on 1-2 lines, + * a chunk header isn't useful either way. + */ + "^[ \t]*(([][[:alnum:]@_<>.,]*[^=:{ \t+*%\\/\\-][ \t]+[][[:alnum:]@_<>.,]*)+\\([^;]*)$\n" + /* + * Properties: + * As with methods, expect a minimum of two groups. And, the vast majority + * of properties long enough to be worth showing a chunk header for don't + * include "=:;,()" on the line they are defined. + */ + "^[ \t]*([][[:alnum:]@_<>.,]+[ \t]+[][[:alnum:]@_.]+[^=:;,()]*)$\n" /* Type definitions */ "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" /* Namespace */ base-commit: f41f85c9ec8d4d46de0fd5fded88db94d3ec8c11 -- gitgitgadget ^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v4] userdiff: better method/property matching for C# 2024-03-28 8:07 ` [PATCH v3] " Steven Jeuris via GitGitGadget @ 2024-03-28 19:14 ` Steven Jeuris via GitGitGadget 2024-03-28 19:33 ` Junio C Hamano ` (2 more replies) 0 siblings, 3 replies; 14+ messages in thread From: Steven Jeuris via GitGitGadget @ 2024-03-28 19:14 UTC (permalink / raw) To: git Cc: Ævar Arnfjörð Bjarmason, Jeff King, Linus Arver, Johannes Sixt, Steven Jeuris, Steven Jeuris From: Steven Jeuris <steven.jeuris@3shape.com> - Support multi-line methods by not requiring closing parenthesis. - Support multiple generics (comma was missing before). - Add missing `foreach`, `lock` and `fixed` keywords to skip over. - Remove `instanceof` keyword, which isn't C#. - Also detect non-method keywords not positioned at the start of a line. - Added tests; none existed before. The overall strategy is to focus more on what isn't expected for method/property definitions, instead of what is, but is fully optional. Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> --- userdiff: better method/property matching for C# Change since v1: I removed "from" from the list of keywords to skip. First, I considered adding "await", but I discovered both "await" and "from" are "contextual keywords", which unlike the other keywords currently listed, aren't reserved, and can thus cause false negatives. I.e., it is valid to have a method named "await" or "from". In edge cases, this may lead to false positives, but a different exclusion rule will need to be added to handle these. Change since v2: * Corrected comment formatting. * Added csharp-property-skip-body test. * Added comments in test code to explain sections not part of the test. * Elaborated regex comments. * Excluded math operators (+-*/%) in method pattern to not catch multiline operations, and tested for this in the -skip-body tests. Catching "-" only worked when it was defined at the end of the exclusion block for some reason. The regex matcher seems quite bugged. Change since v3: * Changed regex to better handle whitespace in types, making use of the fact that it only appears after commas. * Split regex into multiple lines with comments explaining structure. * Split the "skip body" tests into more narrow csharp-exclude- tests. * Added a test for generic methods: csharp-exclude-generic-method-calls. * Added a test for array types used in methods: csharp-method-array. * Added an addition property test: csharp-property-braces-same-line. * Included a test for "( func(x)" case identified by Johannes in csharp-exclude-assignments. As before, I ran into many regex limitations (no possessive quantifiers, no lookahead). It also seems different regex evaluators are used on different test runs. Which one does git diff use? Maybe it is about time to update this? E.g., if speed is a concern, possessive quantifiers can speed up search. Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1682%2FWhathecode%2Fmaster-v4 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1682/Whathecode/master-v4 Pull-Request: https://github.com/git/git/pull/1682 Range-diff vs v3: 1: 9b6c76f5342 ! 1: 2feb84beaa0 userdiff: better method/property matching for C# @@ Commit message Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> - ## t/t4018/csharp-method (new) ## + ## t/t4018/csharp-exclude-assignments (new) ## @@ +class Example +{ + string Method(int RIGHT) + { -+ // Filler -+ // Filler ++ var constantAssignment = "test"; ++ var methodAssignment = MethodCall(); ++ var multiLineMethodAssignment = MethodCall( ++ ); ++ var multiLine = "first" ++ + MethodCall() ++ + ++ ( MethodCall() ++ ) ++ + MethodCall(); + + return "ChangeMe"; + } -+} - - ## t/t4018/csharp-method-explicit (new) ## -@@ -+using System; + -+class Example : IDisposable -+{ -+ void IDisposable.Dispose() // RIGHT -+ { -+ // Filler -+ // Filler -+ -+ // ChangeMe -+ } -+} - - ## t/t4018/csharp-method-generics (new) ## -@@ -+class Example<T1, T2> -+{ -+ Example<int, string> Method<TA, TB>(TA RIGHT, TB b) -+ { -+ // Filler -+ // Filler -+ -+ // ChangeMe -+ return null; -+ } ++ string MethodCall(int a = 0, int b = 0) => "test"; +} - ## t/t4018/csharp-method-modifiers (new) ## + ## t/t4018/csharp-exclude-control-statements (new) ## @@ -+using System.Threading.Tasks; -+ +class Example +{ -+ static internal async Task Method(int RIGHT) ++ string Method(int RIGHT) + { -+ // Filler -+ // Filler -+ -+ // ChangeMe -+ await Task.Delay(1); ++ if (false) ++ { ++ return "out"; ++ } ++ else { } ++ if (true) MethodCall( ++ ); ++ else MethodCall( ++ ); ++ switch ("test") ++ { ++ case "one": ++ return MethodCall( ++ ); ++ case "two": ++ break; ++ } ++ (int, int) tuple = (1, 4); ++ switch (tuple) ++ { ++ case (1, 4): ++ MethodCall(); ++ break; ++ } ++ ++ return "ChangeMe"; + } ++ ++ string MethodCall(int a = 0, int b = 0) => "test"; +} - ## t/t4018/csharp-method-multiline (new) ## + ## t/t4018/csharp-exclude-exceptions (new) ## @@ ++using System; ++ +class Example +{ -+ string Method_RIGHT( -+ int a, -+ int b, -+ int c) ++ string Method(int RIGHT) + { ++ try ++ { ++ throw new Exception("fail"); ++ } ++ catch (Exception) ++ { ++ } ++ finally ++ { ++ } ++ try { } catch (Exception) {} ++ try ++ { ++ throw GetException( ++ ); ++ } ++ catch (Exception) { } ++ + return "ChangeMe"; + } ++ ++ Exception GetException() => new Exception("fail"); +} - ## t/t4018/csharp-method-params (new) ## + ## t/t4018/csharp-exclude-generic-method-calls (new) ## @@ +class Example +{ -+ string Method(int RIGHT, int b, int c = 42) ++ string Method(int RIGHT) + { -+ // Filler -+ // Filler -+ ++ GenericMethodCall<int, int>( ++ ); ++ + return "ChangeMe"; + } ++ ++ string GenericMethodCall<T, T2>() => "test"; +} - ## t/t4018/csharp-method-skip-body (new) ## + ## t/t4018/csharp-exclude-init-dispose (new) ## @@ -+using System.Linq; +using System; + +class Example : IDisposable +{ + string Method(int RIGHT) + { -+ // Method calls -+ MethodCall(); -+ MethodCall(1, 2); -+ MethodCall( -+ 1, 2); -+ -+ // Assignments -+ var constantAssignment = "test"; -+ var methodAssignment = MethodCall(); -+ var multiLineMethodAssignment = MethodCall( -+ ); -+ var multiLine = "first" -+ + MethodCall() -+ - MethodCall() -+ + MethodCall(); -+ -+ // Initializations/disposal + new Example(); + new Example( + ); @@ t/t4018/csharp-method-skip-body (new) + this is default( + Example); + -+ // Iteration statements ++ return "ChangeMe"; ++ } ++ ++ public void Dispose() {} ++} + + ## t/t4018/csharp-exclude-iterations (new) ## +@@ ++using System.Linq; ++ ++class Example ++{ ++ string Method(int RIGHT) ++ { + do { } while (true); + do MethodCall( + ); while (true); @@ t/t4018/csharp-method-skip-body (new) + } + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; + -+ // Control -+ if (false) -+ { -+ return "out"; -+ } -+ else { } -+ if (true) MethodCall( -+ ); -+ else MethodCall( -+ ); -+ switch ("test") -+ { -+ case "one": -+ return MethodCall( -+ ); -+ case "two": -+ break; -+ } -+ (int, int) tuple = (1, 4); -+ switch (tuple) -+ { -+ case (1, 4): -+ MethodCall(); -+ } ++ return "ChangeMe"; ++ } + -+ // Exceptions -+ try -+ { -+ throw new Exception("fail"); -+ } -+ catch (Exception) -+ { -+ } -+ finally -+ { -+ } -+ try { } catch (Exception) {} -+ try -+ { -+ throw GetException( -+ ); -+ } -+ catch (Exception) { } ++ string MethodCall(int a = 0, int b = 0) => "test"; ++} + + ## t/t4018/csharp-exclude-method-calls (new) ## +@@ ++class Example ++{ ++ string Method(int RIGHT) ++ { ++ MethodCall(); ++ MethodCall(1, 2); ++ MethodCall( ++ 1, 2); + -+ // Others ++ return "ChangeMe"; ++ } ++ ++ string MethodCall(int a = 0, int b = 0) => "test"; ++ string GenericMethodCall<T, T2>() => "test"; ++} + + ## t/t4018/csharp-exclude-other (new) ## +@@ ++class Example ++{ ++ string Method(int RIGHT) ++ { + lock (this) + { + } @@ t/t4018/csharp-method-skip-body (new) + + return "ChangeMe"; + } ++} + + ## t/t4018/csharp-method (new) ## +@@ ++class Example ++{ ++ string Method(int RIGHT) ++ { ++ // Filler ++ // Filler + -+ // Supporting methods to make the tested method above valid C#. -+ public void Dispose() {} -+ string MethodCall(int a = 0, int b = 0) => "test"; -+ Exception GetException() => new Exception("fail"); -+ int[] Numbers() => [0, 1]; ++ return "ChangeMe"; ++ } ++} + + ## t/t4018/csharp-method-array (new) ## +@@ ++class Example ++{ ++ string[] Method(int RIGHT) ++ { ++ // Filler ++ // Filler ++ ++ return ["ChangeMe"]; ++ } ++} + + ## t/t4018/csharp-method-explicit (new) ## +@@ ++using System; ++ ++class Example : IDisposable ++{ ++ void IDisposable.Dispose() // RIGHT ++ { ++ // Filler ++ // Filler ++ ++ // ChangeMe ++ } ++} + + ## t/t4018/csharp-method-generics (new) ## +@@ ++class Example<T1, T2> ++{ ++ Example<int, string> Method<TA, TB>(TA RIGHT, TB b) ++ { ++ // Filler ++ // Filler ++ ++ // ChangeMe ++ return null; ++ } ++} + + ## t/t4018/csharp-method-modifiers (new) ## +@@ ++using System.Threading.Tasks; ++ ++class Example ++{ ++ static internal async Task Method(int RIGHT) ++ { ++ // Filler ++ // Filler ++ ++ // ChangeMe ++ await Task.Delay(1); ++ } ++} + + ## t/t4018/csharp-method-multiline (new) ## +@@ ++class Example ++{ ++ string Method_RIGHT( ++ int a, ++ int b, ++ int c) ++ { ++ return "ChangeMe"; ++ } ++} + + ## t/t4018/csharp-method-params (new) ## +@@ ++class Example ++{ ++ string Method(int RIGHT, int b, int c = 42) ++ { ++ // Filler ++ // Filler ++ ++ return "ChangeMe"; ++ } +} ## t/t4018/csharp-method-special-chars (new) ## @@ t/t4018/csharp-method-with-spacing (new) @@ +class Example +{ -+ string Method ( int RIGHT ) -+ { ++ string Method ( int RIGHT ) ++ { + // Filler + // Filler -+ ++ + return "ChangeMe"; + } +} @@ t/t4018/csharp-property (new) + } +} - ## t/t4018/csharp-property-skip-body (new) ## + ## t/t4018/csharp-property-braces-same-line (new) ## @@ -+using System.Linq; -+using System; -+ -+class Example : IDisposable ++class Example +{ -+ public string RIGHT -+ { -+ get ++ public bool RIGHT { ++ get { return true; } ++ set + { -+ // Method calls -+ MethodCall(); -+ MethodCall(1, 2); -+ MethodCall( -+ 1, 2); -+ -+ // Assignments -+ var constantAssignment = "test"; -+ var methodAssignment = MethodCall(); -+ var multiLineMethodAssignment = MethodCall( -+ ); -+ var multiLine = "first" -+ + MethodCall() -+ - MethodCall() -+ + MethodCall(); -+ -+ // Initializations/disposal -+ new Example(); -+ new Example( -+ ); -+ new Example { }; -+ using (this) -+ { -+ } -+ var def = -+ this is default( -+ Example); -+ -+ // Iteration statements -+ do { } while (true); -+ do MethodCall( -+ ); while (true); -+ while (true); -+ while (true) { -+ break; -+ } -+ for (int i = 0; i < 10; ++i) -+ { -+ } -+ foreach (int i in Enumerable.Range(0, 10)) -+ { -+ } -+ int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; -+ -+ // Control -+ if (false) -+ { -+ return "out"; -+ } -+ else { } -+ if (true) MethodCall( -+ ); -+ else MethodCall( -+ ); -+ switch ("test") -+ { -+ case "one": -+ return MethodCall( -+ ); -+ case "two": -+ break; -+ } -+ (int, int) tuple = (1, 4); -+ switch (tuple) -+ { -+ case (1, 4): -+ MethodCall(); -+ } -+ -+ // Exceptions -+ try -+ { -+ throw new Exception("fail"); -+ } -+ catch (Exception) -+ { -+ } -+ finally -+ { -+ } -+ try { } catch (Exception) {} -+ try -+ { -+ throw GetException( -+ ); -+ } -+ catch (Exception) { } -+ -+ // Others -+ lock (this) -+ { -+ } -+ unsafe -+ { -+ byte[] bytes = [1, 2, 3]; -+ fixed (byte* pointerToFirst = bytes) -+ { -+ } -+ } -+ -+ return "ChangeMe"; ++ // ChangeMe + } -+ set { } + } -+ -+ // Supporting methods to make the tested property above valid C#. -+ public void Dispose() {} -+ string MethodCall(int a = 0, int b = 0) => "test"; -+ Exception GetException() => new Exception("fail"); -+ int[] Numbers() => [0, 1]; +} ## userdiff.c ## @@ userdiff.c: PATTERNS("cpp", + * can be followed by parentheses without special characters in between, + * making them look like methods. + */ -+ "!(^|[ \t]+)(do|while|for|foreach|if|else|new|default|return|switch|case|throw|catch|using|lock|fixed)([ \t(]+|$)\n" ++ "!(^|[ \t]+)" /* Start of line or whitespace. */ ++ "(do|while|for|foreach|if|else|new|default|return|switch|case|throw" ++ "|catch|using|lock|fixed)" ++ "([ \t(]+|$)\n" /* Whitespace, "(", or end of line. */ + /* + * Methods/constructors: + * The strategy is to identify a minimum of two groups (any combination -+ * of keywords/type/name), without intermediate characters which can't be -+ * part of method definitions before the opening parenthesis, and without ++ * of keywords/type/name) before the opening parenthesis, and without + * final unexpected characters, normally only used in ordinary statements. -+ * "=" is excluded to ignore assignments, but as a result rules out -+ * methods with expression bodies. However, since those fit on 1-2 lines, -+ * a chunk header isn't useful either way. + */ -+ "^[ \t]*(([][[:alnum:]@_<>.,]*[^=:{ \t+*%\\/\\-][ \t]+[][[:alnum:]@_<>.,]*)+\\([^;]*)$\n" ++ "^[ \t]*" /* Remove leading whitespace. */ ++ "(" /* Start chunk header capture. */ ++ "(" /* Group of keywords/type/names. */ ++ "([][[:alnum:]@_<>.]|, [ |\t]*)+" /* Space only allowed after ",". */ ++ "[ \t]+" /* One required space forces a minimum of two items. */ ++ "([][[:alnum:]@_<>.]|, [ |\t]*)+" ++ "[ \t]*" /* Optional space before parameters start. */ ++ ")+" ++ "\\(" /* Start of method parameters. */ ++ "[^;]*" /* Allow complex parameters, but exclude statements (;). */ ++ ")$\n" /* Close chunk header capture. */ + /* + * Properties: -+ * As with methods, expect a minimum of two groups. And, the vast majority -+ * of properties long enough to be worth showing a chunk header for don't -+ * include "=:;,()" on the line they are defined. ++ * As with methods, expect a minimum of two groups. But, more trivial than ++ * methods, the vast majority of properties long enough to be worth ++ * showing a chunk header for don't include "=:;,()" on the line they are ++ * defined, since they don't have a parameter list. + */ -+ "^[ \t]*([][[:alnum:]@_<>.,]+[ \t]+[][[:alnum:]@_.]+[^=:;,()]*)$\n" ++ "^[ \t]*(" ++ "(" ++ "([][[:alnum:]@_<>.]|, [ |\t]*)+[ \t]+" ++ "([][[:alnum:]@_<>.]|, [ |\t]*)+[ \t]*" ++ ")+" /* Up to here, same as methods regex. */ ++ "[^;=:,()]*" /* Compared to methods, no parameter list allowed. */ ++ ")$\n" /* Type definitions */ "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" /* Namespace */ t/t4018/csharp-exclude-assignments | 20 +++++++++ t/t4018/csharp-exclude-control-statements | 34 +++++++++++++++ t/t4018/csharp-exclude-exceptions | 29 +++++++++++++ t/t4018/csharp-exclude-generic-method-calls | 12 ++++++ t/t4018/csharp-exclude-init-dispose | 22 ++++++++++ t/t4018/csharp-exclude-iterations | 26 ++++++++++++ t/t4018/csharp-exclude-method-calls | 15 +++++++ t/t4018/csharp-exclude-other | 18 ++++++++ t/t4018/csharp-method | 10 +++++ t/t4018/csharp-method-array | 10 +++++ t/t4018/csharp-method-explicit | 12 ++++++ t/t4018/csharp-method-generics | 11 +++++ t/t4018/csharp-method-modifiers | 13 ++++++ t/t4018/csharp-method-multiline | 10 +++++ t/t4018/csharp-method-params | 10 +++++ t/t4018/csharp-method-special-chars | 11 +++++ t/t4018/csharp-method-with-spacing | 10 +++++ t/t4018/csharp-property | 11 +++++ t/t4018/csharp-property-braces-same-line | 10 +++++ userdiff.c | 46 ++++++++++++++++++--- 20 files changed, 334 insertions(+), 6 deletions(-) create mode 100644 t/t4018/csharp-exclude-assignments create mode 100644 t/t4018/csharp-exclude-control-statements create mode 100644 t/t4018/csharp-exclude-exceptions create mode 100644 t/t4018/csharp-exclude-generic-method-calls create mode 100644 t/t4018/csharp-exclude-init-dispose create mode 100644 t/t4018/csharp-exclude-iterations create mode 100644 t/t4018/csharp-exclude-method-calls create mode 100644 t/t4018/csharp-exclude-other create mode 100644 t/t4018/csharp-method create mode 100644 t/t4018/csharp-method-array create mode 100644 t/t4018/csharp-method-explicit create mode 100644 t/t4018/csharp-method-generics create mode 100644 t/t4018/csharp-method-modifiers create mode 100644 t/t4018/csharp-method-multiline create mode 100644 t/t4018/csharp-method-params create mode 100644 t/t4018/csharp-method-special-chars create mode 100644 t/t4018/csharp-method-with-spacing create mode 100644 t/t4018/csharp-property create mode 100644 t/t4018/csharp-property-braces-same-line diff --git a/t/t4018/csharp-exclude-assignments b/t/t4018/csharp-exclude-assignments new file mode 100644 index 00000000000..239f312963b --- /dev/null +++ b/t/t4018/csharp-exclude-assignments @@ -0,0 +1,20 @@ +class Example +{ + string Method(int RIGHT) + { + var constantAssignment = "test"; + var methodAssignment = MethodCall(); + var multiLineMethodAssignment = MethodCall( + ); + var multiLine = "first" + + MethodCall() + + + ( MethodCall() + ) + + MethodCall(); + + return "ChangeMe"; + } + + string MethodCall(int a = 0, int b = 0) => "test"; +} diff --git a/t/t4018/csharp-exclude-control-statements b/t/t4018/csharp-exclude-control-statements new file mode 100644 index 00000000000..3a0f404ee10 --- /dev/null +++ b/t/t4018/csharp-exclude-control-statements @@ -0,0 +1,34 @@ +class Example +{ + string Method(int RIGHT) + { + if (false) + { + return "out"; + } + else { } + if (true) MethodCall( + ); + else MethodCall( + ); + switch ("test") + { + case "one": + return MethodCall( + ); + case "two": + break; + } + (int, int) tuple = (1, 4); + switch (tuple) + { + case (1, 4): + MethodCall(); + break; + } + + return "ChangeMe"; + } + + string MethodCall(int a = 0, int b = 0) => "test"; +} diff --git a/t/t4018/csharp-exclude-exceptions b/t/t4018/csharp-exclude-exceptions new file mode 100644 index 00000000000..b1e64256cfe --- /dev/null +++ b/t/t4018/csharp-exclude-exceptions @@ -0,0 +1,29 @@ +using System; + +class Example +{ + string Method(int RIGHT) + { + try + { + throw new Exception("fail"); + } + catch (Exception) + { + } + finally + { + } + try { } catch (Exception) {} + try + { + throw GetException( + ); + } + catch (Exception) { } + + return "ChangeMe"; + } + + Exception GetException() => new Exception("fail"); +} diff --git a/t/t4018/csharp-exclude-generic-method-calls b/t/t4018/csharp-exclude-generic-method-calls new file mode 100644 index 00000000000..31af546665d --- /dev/null +++ b/t/t4018/csharp-exclude-generic-method-calls @@ -0,0 +1,12 @@ +class Example +{ + string Method(int RIGHT) + { + GenericMethodCall<int, int>( + ); + + return "ChangeMe"; + } + + string GenericMethodCall<T, T2>() => "test"; +} diff --git a/t/t4018/csharp-exclude-init-dispose b/t/t4018/csharp-exclude-init-dispose new file mode 100644 index 00000000000..2bc8e194e20 --- /dev/null +++ b/t/t4018/csharp-exclude-init-dispose @@ -0,0 +1,22 @@ +using System; + +class Example : IDisposable +{ + string Method(int RIGHT) + { + new Example(); + new Example( + ); + new Example { }; + using (this) + { + } + var def = + this is default( + Example); + + return "ChangeMe"; + } + + public void Dispose() {} +} diff --git a/t/t4018/csharp-exclude-iterations b/t/t4018/csharp-exclude-iterations new file mode 100644 index 00000000000..960aa182ae2 --- /dev/null +++ b/t/t4018/csharp-exclude-iterations @@ -0,0 +1,26 @@ +using System.Linq; + +class Example +{ + string Method(int RIGHT) + { + do { } while (true); + do MethodCall( + ); while (true); + while (true); + while (true) { + break; + } + for (int i = 0; i < 10; ++i) + { + } + foreach (int i in Enumerable.Range(0, 10)) + { + } + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; + + return "ChangeMe"; + } + + string MethodCall(int a = 0, int b = 0) => "test"; +} diff --git a/t/t4018/csharp-exclude-method-calls b/t/t4018/csharp-exclude-method-calls new file mode 100644 index 00000000000..8afe2a54638 --- /dev/null +++ b/t/t4018/csharp-exclude-method-calls @@ -0,0 +1,15 @@ +class Example +{ + string Method(int RIGHT) + { + MethodCall(); + MethodCall(1, 2); + MethodCall( + 1, 2); + + return "ChangeMe"; + } + + string MethodCall(int a = 0, int b = 0) => "test"; + string GenericMethodCall<T, T2>() => "test"; +} diff --git a/t/t4018/csharp-exclude-other b/t/t4018/csharp-exclude-other new file mode 100644 index 00000000000..4d5581cf3e1 --- /dev/null +++ b/t/t4018/csharp-exclude-other @@ -0,0 +1,18 @@ +class Example +{ + string Method(int RIGHT) + { + lock (this) + { + } + unsafe + { + byte[] bytes = [1, 2, 3]; + fixed (byte* pointerToFirst = bytes) + { + } + } + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method b/t/t4018/csharp-method new file mode 100644 index 00000000000..16b367aca2b --- /dev/null +++ b/t/t4018/csharp-method @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-array b/t/t4018/csharp-method-array new file mode 100644 index 00000000000..1126de8201d --- /dev/null +++ b/t/t4018/csharp-method-array @@ -0,0 +1,10 @@ +class Example +{ + string[] Method(int RIGHT) + { + // Filler + // Filler + + return ["ChangeMe"]; + } +} diff --git a/t/t4018/csharp-method-explicit b/t/t4018/csharp-method-explicit new file mode 100644 index 00000000000..5a710116cc4 --- /dev/null +++ b/t/t4018/csharp-method-explicit @@ -0,0 +1,12 @@ +using System; + +class Example : IDisposable +{ + void IDisposable.Dispose() // RIGHT + { + // Filler + // Filler + + // ChangeMe + } +} diff --git a/t/t4018/csharp-method-generics b/t/t4018/csharp-method-generics new file mode 100644 index 00000000000..b3216bfb2a7 --- /dev/null +++ b/t/t4018/csharp-method-generics @@ -0,0 +1,11 @@ +class Example<T1, T2> +{ + Example<int, string> Method<TA, TB>(TA RIGHT, TB b) + { + // Filler + // Filler + + // ChangeMe + return null; + } +} diff --git a/t/t4018/csharp-method-modifiers b/t/t4018/csharp-method-modifiers new file mode 100644 index 00000000000..caefa8ee99c --- /dev/null +++ b/t/t4018/csharp-method-modifiers @@ -0,0 +1,13 @@ +using System.Threading.Tasks; + +class Example +{ + static internal async Task Method(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + await Task.Delay(1); + } +} diff --git a/t/t4018/csharp-method-multiline b/t/t4018/csharp-method-multiline new file mode 100644 index 00000000000..3983ff42f51 --- /dev/null +++ b/t/t4018/csharp-method-multiline @@ -0,0 +1,10 @@ +class Example +{ + string Method_RIGHT( + int a, + int b, + int c) + { + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-params b/t/t4018/csharp-method-params new file mode 100644 index 00000000000..3f00410ba1f --- /dev/null +++ b/t/t4018/csharp-method-params @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT, int b, int c = 42) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-special-chars b/t/t4018/csharp-method-special-chars new file mode 100644 index 00000000000..e6c7bc01a18 --- /dev/null +++ b/t/t4018/csharp-method-special-chars @@ -0,0 +1,11 @@ +class @Some_Type +{ + @Some_Type @Method_With_Underscore(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + return new @Some_Type(); + } +} diff --git a/t/t4018/csharp-method-with-spacing b/t/t4018/csharp-method-with-spacing new file mode 100644 index 00000000000..233bb976cc2 --- /dev/null +++ b/t/t4018/csharp-method-with-spacing @@ -0,0 +1,10 @@ +class Example +{ + string Method ( int RIGHT ) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-property b/t/t4018/csharp-property new file mode 100644 index 00000000000..e56dfce34c1 --- /dev/null +++ b/t/t4018/csharp-property @@ -0,0 +1,11 @@ +class Example +{ + public bool RIGHT + { + get { return true; } + set + { + // ChangeMe + } + } +} diff --git a/t/t4018/csharp-property-braces-same-line b/t/t4018/csharp-property-braces-same-line new file mode 100644 index 00000000000..608131d3d31 --- /dev/null +++ b/t/t4018/csharp-property-braces-same-line @@ -0,0 +1,10 @@ +class Example +{ + public bool RIGHT { + get { return true; } + set + { + // ChangeMe + } + } +} diff --git a/userdiff.c b/userdiff.c index e399543823b..35e0e1183f7 100644 --- a/userdiff.c +++ b/userdiff.c @@ -89,12 +89,46 @@ PATTERNS("cpp", "|\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?" "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>"), PATTERNS("csharp", - /* Keywords */ - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" - /* Methods and constructors */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" - /* Properties */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + /* + * Jump over reserved keywords which are illegal method names, but which + * can be followed by parentheses without special characters in between, + * making them look like methods. + */ + "!(^|[ \t]+)" /* Start of line or whitespace. */ + "(do|while|for|foreach|if|else|new|default|return|switch|case|throw" + "|catch|using|lock|fixed)" + "([ \t(]+|$)\n" /* Whitespace, "(", or end of line. */ + /* + * Methods/constructors: + * The strategy is to identify a minimum of two groups (any combination + * of keywords/type/name) before the opening parenthesis, and without + * final unexpected characters, normally only used in ordinary statements. + */ + "^[ \t]*" /* Remove leading whitespace. */ + "(" /* Start chunk header capture. */ + "(" /* Group of keywords/type/names. */ + "([][[:alnum:]@_<>.]|, [ |\t]*)+" /* Space only allowed after ",". */ + "[ \t]+" /* One required space forces a minimum of two items. */ + "([][[:alnum:]@_<>.]|, [ |\t]*)+" + "[ \t]*" /* Optional space before parameters start. */ + ")+" + "\\(" /* Start of method parameters. */ + "[^;]*" /* Allow complex parameters, but exclude statements (;). */ + ")$\n" /* Close chunk header capture. */ + /* + * Properties: + * As with methods, expect a minimum of two groups. But, more trivial than + * methods, the vast majority of properties long enough to be worth + * showing a chunk header for don't include "=:;,()" on the line they are + * defined, since they don't have a parameter list. + */ + "^[ \t]*(" + "(" + "([][[:alnum:]@_<>.]|, [ |\t]*)+[ \t]+" + "([][[:alnum:]@_<>.]|, [ |\t]*)+[ \t]*" + ")+" /* Up to here, same as methods regex. */ + "[^;=:,()]*" /* Compared to methods, no parameter list allowed. */ + ")$\n" /* Type definitions */ "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" /* Namespace */ base-commit: f41f85c9ec8d4d46de0fd5fded88db94d3ec8c11 -- gitgitgadget ^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v4] userdiff: better method/property matching for C# 2024-03-28 19:14 ` [PATCH v4] " Steven Jeuris via GitGitGadget @ 2024-03-28 19:33 ` Junio C Hamano 2024-03-30 18:49 ` Johannes Sixt 2024-04-03 21:42 ` [PATCH v5] " Steven Jeuris via GitGitGadget 2 siblings, 0 replies; 14+ messages in thread From: Junio C Hamano @ 2024-03-28 19:33 UTC (permalink / raw) To: Steven Jeuris via GitGitGadget Cc: git, Ævar Arnfjörð Bjarmason, Jeff King, Linus Arver, Johannes Sixt, Steven Jeuris, Steven Jeuris "Steven Jeuris via GitGitGadget" <gitgitgadget@gmail.com> writes: > As before, I ran into many regex limitations (no possessive quantifiers, > no lookahead). It also seems different regex evaluators are used on > different test runs. Which one does git diff use? Maybe it is about time > to update this? E.g., if speed is a concern, possessive quantifiers can > speed up search. When you make regcomp(3) and regexec(3) calls, you'll be using the system library that supplies them. IOW, we use whatever is on the system. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4] userdiff: better method/property matching for C# 2024-03-28 19:14 ` [PATCH v4] " Steven Jeuris via GitGitGadget 2024-03-28 19:33 ` Junio C Hamano @ 2024-03-30 18:49 ` Johannes Sixt 2024-04-03 21:42 ` [PATCH v5] " Steven Jeuris via GitGitGadget 2 siblings, 0 replies; 14+ messages in thread From: Johannes Sixt @ 2024-03-30 18:49 UTC (permalink / raw) To: Steven Jeuris via GitGitGadget, git Cc: Ævar Arnfjörð Bjarmason, Jeff King, Linus Arver, Steven Jeuris, Steven Jeuris Am 28.03.24 um 20:14 schrieb Steven Jeuris via GitGitGadget: > diff --git a/userdiff.c b/userdiff.c > index e399543823b..35e0e1183f7 100644 > --- a/userdiff.c > +++ b/userdiff.c > @@ -89,12 +89,46 @@ PATTERNS("cpp", > "|\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?" > "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>"), > PATTERNS("csharp", > - /* Keywords */ > - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" > - /* Methods and constructors */ > - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" > - /* Properties */ > - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" > + /* > + * Jump over reserved keywords which are illegal method names, but which > + * can be followed by parentheses without special characters in between, > + * making them look like methods. > + */ > + "!(^|[ \t]+)" /* Start of line or whitespace. */ > + "(do|while|for|foreach|if|else|new|default|return|switch|case|throw" > + "|catch|using|lock|fixed)" > + "([ \t(]+|$)\n" /* Whitespace, "(", or end of line. */ > + /* > + * Methods/constructors: > + * The strategy is to identify a minimum of two groups (any combination > + * of keywords/type/name) before the opening parenthesis, and without > + * final unexpected characters, normally only used in ordinary statements. > + */ > + "^[ \t]*" /* Remove leading whitespace. */ > + "(" /* Start chunk header capture. */ > + "(" /* Group of keywords/type/names. */ > + "([][[:alnum:]@_<>.]|, [ |\t]*)+" /* Space only allowed after ",". */ Mental note: This pattern means: a sequence of at least one of ( one character of a certain set OR a comma followed by SP optionally followed by whitespace or | ) > + "[ \t]+" /* One required space forces a minimum of two items. */ OK. > + "([][[:alnum:]@_<>.]|, [ |\t]*)+" Same here. Is it the case of t4018/csharp-method-generics? Example<int, string> Method<TA, TB>(TA RIGHT, TB b) Let me see if I make sense of this. The idea is to treat the ', ' sequence as a single "character" so that the SP in this sequence does not count as the word separator that we otherwise have between two plain words. I am unsure whether this is a workable solution. The set of symbols that can occur before a method definition overlaps too much with the symbols that can occur elsewhere. For example, if you have these lines of code: something(arg, meth, func(x), prop, y, more); you have a false positive in the lines in the middle. I have the feeling that it is impossible to distinguish method definitions from other code reliably. We can go back and forth a while, but at some point we have to stop and accept that there are false positives. Where that point is, I cannot tell, because I do not know what is common and what is uncommon in typical C# code. It is your call. > + "[ \t]*" /* Optional space before parameters start. */ > + ")+" > + "\\(" /* Start of method parameters. */ > + "[^;]*" /* Allow complex parameters, but exclude statements (;). */ > + ")$\n" /* Close chunk header capture. */ > + /* > + * Properties: > + * As with methods, expect a minimum of two groups. But, more trivial than > + * methods, the vast majority of properties long enough to be worth > + * showing a chunk header for don't include "=:;,()" on the line they are > + * defined, since they don't have a parameter list. > + */ > + "^[ \t]*(" > + "(" > + "([][[:alnum:]@_<>.]|, [ |\t]*)+[ \t]+" > + "([][[:alnum:]@_<>.]|, [ |\t]*)+[ \t]*" > + ")+" /* Up to here, same as methods regex. */ Is the intent to match pairs of words? Or is the intent to find at least two words? If the latter, then the preceding 4 lines are better written as "([][[:alnum:]@_<>.]|,[ \t]+)+" "([ \t]+([][[:alnum:]@_<>.]|,[ \t]+)+)+" If the former, then the space after the second word must not be optional, because it would match a bc d as two pairs (a b)(c d) BTW that there is an | symbol in the potential space after the comma is certainly an oversight, right? And, why is a tab after the comma not permitted? When you construct regular expressions, make sure that repeated parts always begin with a mandatory part and that this mandatory part cannot be an optional part at the end of the preceding or the repeated pattern. Otherwise, in a non-matching case you send the matcher into an expensive backtracking loop. > + "[^;=:,()]*" /* Compared to methods, no parameter list allowed. */ > + ")$\n" > /* Type definitions */ > "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" > /* Namespace */ > > base-commit: f41f85c9ec8d4d46de0fd5fded88db94d3ec8c11 ^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v5] userdiff: better method/property matching for C# 2024-03-28 19:14 ` [PATCH v4] " Steven Jeuris via GitGitGadget 2024-03-28 19:33 ` Junio C Hamano 2024-03-30 18:49 ` Johannes Sixt @ 2024-04-03 21:42 ` Steven Jeuris via GitGitGadget 2024-04-05 22:02 ` Johannes Sixt 2 siblings, 1 reply; 14+ messages in thread From: Steven Jeuris via GitGitGadget @ 2024-04-03 21:42 UTC (permalink / raw) To: git Cc: Ævar Arnfjörð Bjarmason, Jeff King, Linus Arver, Johannes Sixt, Steven Jeuris, Steven Jeuris From: Steven Jeuris <steven.jeuris@3shape.com> - Support multi-line methods by not requiring closing parenthesis. - Support multiple generics (comma was missing before). - Add missing `foreach`, `lock` and `fixed` keywords to skip over. - Remove `instanceof` keyword, which isn't C#. - Also detect non-method keywords not positioned at the start of a line. - Added tests; none existed before. The overall strategy is to focus more on what isn't expected for method/property definitions, instead of what is, but is fully optional. Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> --- userdiff: better method/property matching for C# Change since v1: I removed "from" from the list of keywords to skip. First, I considered adding "await", but I discovered both "await" and "from" are "contextual keywords", which unlike the other keywords currently listed, aren't reserved, and can thus cause false negatives. I.e., it is valid to have a method named "await" or "from". In edge cases, this may lead to false positives, but a different exclusion rule will need to be added to handle these. Change since v2: * Corrected comment formatting. * Added csharp-property-skip-body test. * Added comments in test code to explain sections not part of the test. * Elaborated regex comments. * Excluded math operators (+-*/%) in method pattern to not catch multiline operations, and tested for this in the -skip-body tests. Catching "-" only worked when it was defined at the end of the exclusion block for some reason. The regex matcher seems quite bugged. Change since v3: * Changed regex to better handle whitespace in types, making use of the fact that it only appears after commas. * Split regex into multiple lines with comments explaining structure. * Split the "skip body" tests into more narrow csharp-exclude- tests. * Added a test for generic methods: csharp-exclude-generic-method-calls. * Added a test for array types used in methods: csharp-method-array. * Added an addition property test: csharp-property-braces-same-line. * Included a test for "( func(x)" case identified by Johannes in csharp-exclude-assignments. As before, I ran into many regex limitations (no possessive quantifiers, no lookahead). It also seems different regex evaluators are used on different test runs. Which one does git diff use? Maybe it is about time to update this? E.g., if speed is a concern, possessive quantifiers can speed up search. Change since v4: * Better matching of at least two "words". * Better handling of generics by restricting commas within < ... >. * Allow any spaces around commas in generics. * Because of stricter use of comma, Johannes' identified failing cases now pass. * Updated tests to cover all of the above. Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1682%2FWhathecode%2Fmaster-v5 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1682/Whathecode/master-v5 Pull-Request: https://github.com/git/git/pull/1682 Range-diff vs v4: 1: 2feb84beaa0 ! 1: 00191ef695a userdiff: better method/property matching for C# @@ t/t4018/csharp-exclude-method-calls (new) + MethodCall(1, 2); + MethodCall( + 1, 2); ++ MethodCall( ++ 1, 2, ++ 3); ++ MethodCall( ++ 1, MethodCall(), ++ 2); + + return "ChangeMe"; + } + -+ string MethodCall(int a = 0, int b = 0) => "test"; -+ string GenericMethodCall<T, T2>() => "test"; ++ int MethodCall(int a = 0, int b = 0, int c = 0) => 42; +} ## t/t4018/csharp-exclude-other (new) ## @@ t/t4018/csharp-method-generics (new) + // ChangeMe + return null; + } ++} + + ## t/t4018/csharp-method-generics-alternate-spaces (new) ## +@@ ++class Example<T1, T2> ++{ ++ Example<int,string> Method<TA ,TB>(TA RIGHT, TB b) ++ { ++ // Filler ++ // Filler ++ ++ // ChangeMe ++ return null; ++ } +} ## t/t4018/csharp-method-modifiers (new) ## @@ userdiff.c: PATTERNS("cpp", + */ + "^[ \t]*" /* Remove leading whitespace. */ + "(" /* Start chunk header capture. */ -+ "(" /* Group of keywords/type/names. */ -+ "([][[:alnum:]@_<>.]|, [ |\t]*)+" /* Space only allowed after ",". */ -+ "[ \t]+" /* One required space forces a minimum of two items. */ -+ "([][[:alnum:]@_<>.]|, [ |\t]*)+" -+ "[ \t]*" /* Optional space before parameters start. */ ++ "(" /* First group. */ ++ "[][[:alnum:]@_.]" /* Name. */ ++ "(<[][[:alnum:]@_, \t<>]+>)?" /* Optional generic parameters. */ + ")+" ++ "([ \t]+" /* Subsequent groups, prepended with space. */ ++ "([][[:alnum:]@_.](<[][[:alnum:]@_, \t<>]+>)?)+" ++ ")+" ++ "[ \t]*" /* Optional space before parameters start. */ + "\\(" /* Start of method parameters. */ + "[^;]*" /* Allow complex parameters, but exclude statements (;). */ + ")$\n" /* Close chunk header capture. */ @@ userdiff.c: PATTERNS("cpp", + * defined, since they don't have a parameter list. + */ + "^[ \t]*(" -+ "(" -+ "([][[:alnum:]@_<>.]|, [ |\t]*)+[ \t]+" -+ "([][[:alnum:]@_<>.]|, [ |\t]*)+[ \t]*" ++ "([][[:alnum:]@_.](<[][[:alnum:]@_, \t<>]+>)?)+" ++ "([ \t]+" ++ "([][[:alnum:]@_.](<[][[:alnum:]@_, \t<>]+>)?)+" + ")+" /* Up to here, same as methods regex. */ + "[^;=:,()]*" /* Compared to methods, no parameter list allowed. */ + ")$\n" t/t4018/csharp-exclude-assignments | 20 ++++++++ t/t4018/csharp-exclude-control-statements | 34 +++++++++++++ t/t4018/csharp-exclude-exceptions | 29 +++++++++++ t/t4018/csharp-exclude-generic-method-calls | 12 +++++ t/t4018/csharp-exclude-init-dispose | 22 +++++++++ t/t4018/csharp-exclude-iterations | 26 ++++++++++ t/t4018/csharp-exclude-method-calls | 20 ++++++++ t/t4018/csharp-exclude-other | 18 +++++++ t/t4018/csharp-method | 10 ++++ t/t4018/csharp-method-array | 10 ++++ t/t4018/csharp-method-explicit | 12 +++++ t/t4018/csharp-method-generics | 11 +++++ .../csharp-method-generics-alternate-spaces | 11 +++++ t/t4018/csharp-method-modifiers | 13 +++++ t/t4018/csharp-method-multiline | 10 ++++ t/t4018/csharp-method-params | 10 ++++ t/t4018/csharp-method-special-chars | 11 +++++ t/t4018/csharp-method-with-spacing | 10 ++++ t/t4018/csharp-property | 11 +++++ t/t4018/csharp-property-braces-same-line | 10 ++++ userdiff.c | 48 ++++++++++++++++--- 21 files changed, 352 insertions(+), 6 deletions(-) create mode 100644 t/t4018/csharp-exclude-assignments create mode 100644 t/t4018/csharp-exclude-control-statements create mode 100644 t/t4018/csharp-exclude-exceptions create mode 100644 t/t4018/csharp-exclude-generic-method-calls create mode 100644 t/t4018/csharp-exclude-init-dispose create mode 100644 t/t4018/csharp-exclude-iterations create mode 100644 t/t4018/csharp-exclude-method-calls create mode 100644 t/t4018/csharp-exclude-other create mode 100644 t/t4018/csharp-method create mode 100644 t/t4018/csharp-method-array create mode 100644 t/t4018/csharp-method-explicit create mode 100644 t/t4018/csharp-method-generics create mode 100644 t/t4018/csharp-method-generics-alternate-spaces create mode 100644 t/t4018/csharp-method-modifiers create mode 100644 t/t4018/csharp-method-multiline create mode 100644 t/t4018/csharp-method-params create mode 100644 t/t4018/csharp-method-special-chars create mode 100644 t/t4018/csharp-method-with-spacing create mode 100644 t/t4018/csharp-property create mode 100644 t/t4018/csharp-property-braces-same-line diff --git a/t/t4018/csharp-exclude-assignments b/t/t4018/csharp-exclude-assignments new file mode 100644 index 00000000000..239f312963b --- /dev/null +++ b/t/t4018/csharp-exclude-assignments @@ -0,0 +1,20 @@ +class Example +{ + string Method(int RIGHT) + { + var constantAssignment = "test"; + var methodAssignment = MethodCall(); + var multiLineMethodAssignment = MethodCall( + ); + var multiLine = "first" + + MethodCall() + + + ( MethodCall() + ) + + MethodCall(); + + return "ChangeMe"; + } + + string MethodCall(int a = 0, int b = 0) => "test"; +} diff --git a/t/t4018/csharp-exclude-control-statements b/t/t4018/csharp-exclude-control-statements new file mode 100644 index 00000000000..3a0f404ee10 --- /dev/null +++ b/t/t4018/csharp-exclude-control-statements @@ -0,0 +1,34 @@ +class Example +{ + string Method(int RIGHT) + { + if (false) + { + return "out"; + } + else { } + if (true) MethodCall( + ); + else MethodCall( + ); + switch ("test") + { + case "one": + return MethodCall( + ); + case "two": + break; + } + (int, int) tuple = (1, 4); + switch (tuple) + { + case (1, 4): + MethodCall(); + break; + } + + return "ChangeMe"; + } + + string MethodCall(int a = 0, int b = 0) => "test"; +} diff --git a/t/t4018/csharp-exclude-exceptions b/t/t4018/csharp-exclude-exceptions new file mode 100644 index 00000000000..b1e64256cfe --- /dev/null +++ b/t/t4018/csharp-exclude-exceptions @@ -0,0 +1,29 @@ +using System; + +class Example +{ + string Method(int RIGHT) + { + try + { + throw new Exception("fail"); + } + catch (Exception) + { + } + finally + { + } + try { } catch (Exception) {} + try + { + throw GetException( + ); + } + catch (Exception) { } + + return "ChangeMe"; + } + + Exception GetException() => new Exception("fail"); +} diff --git a/t/t4018/csharp-exclude-generic-method-calls b/t/t4018/csharp-exclude-generic-method-calls new file mode 100644 index 00000000000..31af546665d --- /dev/null +++ b/t/t4018/csharp-exclude-generic-method-calls @@ -0,0 +1,12 @@ +class Example +{ + string Method(int RIGHT) + { + GenericMethodCall<int, int>( + ); + + return "ChangeMe"; + } + + string GenericMethodCall<T, T2>() => "test"; +} diff --git a/t/t4018/csharp-exclude-init-dispose b/t/t4018/csharp-exclude-init-dispose new file mode 100644 index 00000000000..2bc8e194e20 --- /dev/null +++ b/t/t4018/csharp-exclude-init-dispose @@ -0,0 +1,22 @@ +using System; + +class Example : IDisposable +{ + string Method(int RIGHT) + { + new Example(); + new Example( + ); + new Example { }; + using (this) + { + } + var def = + this is default( + Example); + + return "ChangeMe"; + } + + public void Dispose() {} +} diff --git a/t/t4018/csharp-exclude-iterations b/t/t4018/csharp-exclude-iterations new file mode 100644 index 00000000000..960aa182ae2 --- /dev/null +++ b/t/t4018/csharp-exclude-iterations @@ -0,0 +1,26 @@ +using System.Linq; + +class Example +{ + string Method(int RIGHT) + { + do { } while (true); + do MethodCall( + ); while (true); + while (true); + while (true) { + break; + } + for (int i = 0; i < 10; ++i) + { + } + foreach (int i in Enumerable.Range(0, 10)) + { + } + int[] numbers = [5, 4, 1, 3, 9, 8, 6, 7, 2, 0]; + + return "ChangeMe"; + } + + string MethodCall(int a = 0, int b = 0) => "test"; +} diff --git a/t/t4018/csharp-exclude-method-calls b/t/t4018/csharp-exclude-method-calls new file mode 100644 index 00000000000..51e2dc20407 --- /dev/null +++ b/t/t4018/csharp-exclude-method-calls @@ -0,0 +1,20 @@ +class Example +{ + string Method(int RIGHT) + { + MethodCall(); + MethodCall(1, 2); + MethodCall( + 1, 2); + MethodCall( + 1, 2, + 3); + MethodCall( + 1, MethodCall(), + 2); + + return "ChangeMe"; + } + + int MethodCall(int a = 0, int b = 0, int c = 0) => 42; +} diff --git a/t/t4018/csharp-exclude-other b/t/t4018/csharp-exclude-other new file mode 100644 index 00000000000..4d5581cf3e1 --- /dev/null +++ b/t/t4018/csharp-exclude-other @@ -0,0 +1,18 @@ +class Example +{ + string Method(int RIGHT) + { + lock (this) + { + } + unsafe + { + byte[] bytes = [1, 2, 3]; + fixed (byte* pointerToFirst = bytes) + { + } + } + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method b/t/t4018/csharp-method new file mode 100644 index 00000000000..16b367aca2b --- /dev/null +++ b/t/t4018/csharp-method @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-array b/t/t4018/csharp-method-array new file mode 100644 index 00000000000..1126de8201d --- /dev/null +++ b/t/t4018/csharp-method-array @@ -0,0 +1,10 @@ +class Example +{ + string[] Method(int RIGHT) + { + // Filler + // Filler + + return ["ChangeMe"]; + } +} diff --git a/t/t4018/csharp-method-explicit b/t/t4018/csharp-method-explicit new file mode 100644 index 00000000000..5a710116cc4 --- /dev/null +++ b/t/t4018/csharp-method-explicit @@ -0,0 +1,12 @@ +using System; + +class Example : IDisposable +{ + void IDisposable.Dispose() // RIGHT + { + // Filler + // Filler + + // ChangeMe + } +} diff --git a/t/t4018/csharp-method-generics b/t/t4018/csharp-method-generics new file mode 100644 index 00000000000..b3216bfb2a7 --- /dev/null +++ b/t/t4018/csharp-method-generics @@ -0,0 +1,11 @@ +class Example<T1, T2> +{ + Example<int, string> Method<TA, TB>(TA RIGHT, TB b) + { + // Filler + // Filler + + // ChangeMe + return null; + } +} diff --git a/t/t4018/csharp-method-generics-alternate-spaces b/t/t4018/csharp-method-generics-alternate-spaces new file mode 100644 index 00000000000..95836217430 --- /dev/null +++ b/t/t4018/csharp-method-generics-alternate-spaces @@ -0,0 +1,11 @@ +class Example<T1, T2> +{ + Example<int,string> Method<TA ,TB>(TA RIGHT, TB b) + { + // Filler + // Filler + + // ChangeMe + return null; + } +} diff --git a/t/t4018/csharp-method-modifiers b/t/t4018/csharp-method-modifiers new file mode 100644 index 00000000000..caefa8ee99c --- /dev/null +++ b/t/t4018/csharp-method-modifiers @@ -0,0 +1,13 @@ +using System.Threading.Tasks; + +class Example +{ + static internal async Task Method(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + await Task.Delay(1); + } +} diff --git a/t/t4018/csharp-method-multiline b/t/t4018/csharp-method-multiline new file mode 100644 index 00000000000..3983ff42f51 --- /dev/null +++ b/t/t4018/csharp-method-multiline @@ -0,0 +1,10 @@ +class Example +{ + string Method_RIGHT( + int a, + int b, + int c) + { + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-params b/t/t4018/csharp-method-params new file mode 100644 index 00000000000..3f00410ba1f --- /dev/null +++ b/t/t4018/csharp-method-params @@ -0,0 +1,10 @@ +class Example +{ + string Method(int RIGHT, int b, int c = 42) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-method-special-chars b/t/t4018/csharp-method-special-chars new file mode 100644 index 00000000000..e6c7bc01a18 --- /dev/null +++ b/t/t4018/csharp-method-special-chars @@ -0,0 +1,11 @@ +class @Some_Type +{ + @Some_Type @Method_With_Underscore(int RIGHT) + { + // Filler + // Filler + + // ChangeMe + return new @Some_Type(); + } +} diff --git a/t/t4018/csharp-method-with-spacing b/t/t4018/csharp-method-with-spacing new file mode 100644 index 00000000000..233bb976cc2 --- /dev/null +++ b/t/t4018/csharp-method-with-spacing @@ -0,0 +1,10 @@ +class Example +{ + string Method ( int RIGHT ) + { + // Filler + // Filler + + return "ChangeMe"; + } +} diff --git a/t/t4018/csharp-property b/t/t4018/csharp-property new file mode 100644 index 00000000000..e56dfce34c1 --- /dev/null +++ b/t/t4018/csharp-property @@ -0,0 +1,11 @@ +class Example +{ + public bool RIGHT + { + get { return true; } + set + { + // ChangeMe + } + } +} diff --git a/t/t4018/csharp-property-braces-same-line b/t/t4018/csharp-property-braces-same-line new file mode 100644 index 00000000000..608131d3d31 --- /dev/null +++ b/t/t4018/csharp-property-braces-same-line @@ -0,0 +1,10 @@ +class Example +{ + public bool RIGHT { + get { return true; } + set + { + // ChangeMe + } + } +} diff --git a/userdiff.c b/userdiff.c index e399543823b..0d667a1f5a6 100644 --- a/userdiff.c +++ b/userdiff.c @@ -89,12 +89,48 @@ PATTERNS("cpp", "|\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?" "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>"), PATTERNS("csharp", - /* Keywords */ - "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" - /* Methods and constructors */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe|async)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" - /* Properties */ - "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + /* + * Jump over reserved keywords which are illegal method names, but which + * can be followed by parentheses without special characters in between, + * making them look like methods. + */ + "!(^|[ \t]+)" /* Start of line or whitespace. */ + "(do|while|for|foreach|if|else|new|default|return|switch|case|throw" + "|catch|using|lock|fixed)" + "([ \t(]+|$)\n" /* Whitespace, "(", or end of line. */ + /* + * Methods/constructors: + * The strategy is to identify a minimum of two groups (any combination + * of keywords/type/name) before the opening parenthesis, and without + * final unexpected characters, normally only used in ordinary statements. + */ + "^[ \t]*" /* Remove leading whitespace. */ + "(" /* Start chunk header capture. */ + "(" /* First group. */ + "[][[:alnum:]@_.]" /* Name. */ + "(<[][[:alnum:]@_, \t<>]+>)?" /* Optional generic parameters. */ + ")+" + "([ \t]+" /* Subsequent groups, prepended with space. */ + "([][[:alnum:]@_.](<[][[:alnum:]@_, \t<>]+>)?)+" + ")+" + "[ \t]*" /* Optional space before parameters start. */ + "\\(" /* Start of method parameters. */ + "[^;]*" /* Allow complex parameters, but exclude statements (;). */ + ")$\n" /* Close chunk header capture. */ + /* + * Properties: + * As with methods, expect a minimum of two groups. But, more trivial than + * methods, the vast majority of properties long enough to be worth + * showing a chunk header for don't include "=:;,()" on the line they are + * defined, since they don't have a parameter list. + */ + "^[ \t]*(" + "([][[:alnum:]@_.](<[][[:alnum:]@_, \t<>]+>)?)+" + "([ \t]+" + "([][[:alnum:]@_.](<[][[:alnum:]@_, \t<>]+>)?)+" + ")+" /* Up to here, same as methods regex. */ + "[^;=:,()]*" /* Compared to methods, no parameter list allowed. */ + ")$\n" /* Type definitions */ "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct|record)[ \t]+.*)$\n" /* Namespace */ base-commit: f41f85c9ec8d4d46de0fd5fded88db94d3ec8c11 -- gitgitgadget ^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v5] userdiff: better method/property matching for C# 2024-04-03 21:42 ` [PATCH v5] " Steven Jeuris via GitGitGadget @ 2024-04-05 22:02 ` Johannes Sixt 2024-04-05 22:10 ` Junio C Hamano 0 siblings, 1 reply; 14+ messages in thread From: Johannes Sixt @ 2024-04-05 22:02 UTC (permalink / raw) To: Steven Jeuris via GitGitGadget, git Cc: Ævar Arnfjörð Bjarmason, Jeff King, Linus Arver, Steven Jeuris, Steven Jeuris Am 03.04.24 um 23:42 schrieb Steven Jeuris via GitGitGadget: > From: Steven Jeuris <steven.jeuris@3shape.com> > > - Support multi-line methods by not requiring closing parenthesis. > - Support multiple generics (comma was missing before). > - Add missing `foreach`, `lock` and `fixed` keywords to skip over. > - Remove `instanceof` keyword, which isn't C#. > - Also detect non-method keywords not positioned at the start of a line. > - Added tests; none existed before. > > The overall strategy is to focus more on what isn't expected for > method/property definitions, instead of what is, but is fully optional. > > Signed-off-by: Steven Jeuris <steven.jeuris@gmail.com> > Change since v4: > > * Better matching of at least two "words". > * Better handling of generics by restricting commas within < ... >. > * Allow any spaces around commas in generics. > * Because of stricter use of comma, Johannes' identified failing cases > now pass. > * Updated tests to cover all of the above. The proposed patterns look reasonable and are an improvement over the existing patterns, so I think we can move this patch forward. Thank you, -- Hannes ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v5] userdiff: better method/property matching for C# 2024-04-05 22:02 ` Johannes Sixt @ 2024-04-05 22:10 ` Junio C Hamano 0 siblings, 0 replies; 14+ messages in thread From: Junio C Hamano @ 2024-04-05 22:10 UTC (permalink / raw) To: Johannes Sixt Cc: Steven Jeuris via GitGitGadget, git, Ævar Arnfjörð Bjarmason, Jeff King, Linus Arver, Steven Jeuris, Steven Jeuris Johannes Sixt <j6t@kdbg.org> writes: > Am 03.04.24 um 23:42 schrieb Steven Jeuris via GitGitGadget: >> From: Steven Jeuris <steven.jeuris@3shape.com> >> ... > The proposed patterns look reasonable and are an improvement over the > existing patterns, so I think we can move this patch forward. Thanks, both. Let me mark the topic for 'next'. ^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2024-04-05 22:10 UTC | newest] Thread overview: 14+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2024-02-25 17:33 [PATCH] userdiff: better method/property matching for C# Steven Jeuris via GitGitGadget 2024-03-06 20:21 ` [PATCH v2] " Steven Jeuris via GitGitGadget 2024-03-07 2:11 ` Junio C Hamano 2024-03-16 18:14 ` Linus Arver 2024-03-26 21:38 ` Junio C Hamano 2024-03-27 8:40 ` Jeff King 2024-03-27 7:30 ` Johannes Sixt 2024-03-28 8:07 ` [PATCH v3] " Steven Jeuris via GitGitGadget 2024-03-28 19:14 ` [PATCH v4] " Steven Jeuris via GitGitGadget 2024-03-28 19:33 ` Junio C Hamano 2024-03-30 18:49 ` Johannes Sixt 2024-04-03 21:42 ` [PATCH v5] " Steven Jeuris via GitGitGadget 2024-04-05 22:02 ` Johannes Sixt 2024-04-05 22:10 ` Junio C Hamano
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).