* [PATCH] unicode: update the width tables to Unicode 16
@ 2024-09-12 20:40 Beat Bolli
2024-09-17 12:37 ` Johannes Schindelin
0 siblings, 1 reply; 5+ messages in thread
From: Beat Bolli @ 2024-09-12 20:40 UTC (permalink / raw)
To: git; +Cc: gitster, Beat Bolli
Unicode 16 has been announced on 2024-09-10 [0], so update the character
width tables to the new version.
[0] https://blog.unicode.org/2024/09/announcing-unicode-standard-version-160.html
Signed-off-by: Beat Bolli <dev+git@drbeat.li>
---
unicode-width.h | 37 +++++++++++++++++++++++++------------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/unicode-width.h b/unicode-width.h
index be5bf8c4f2..3ffee123a0 100644
--- a/unicode-width.h
+++ b/unicode-width.h
@@ -27,7 +27,7 @@ static const struct interval zero_width[] = {
{ 0x0829, 0x082D },
{ 0x0859, 0x085B },
{ 0x0890, 0x0891 },
-{ 0x0898, 0x089F },
+{ 0x0897, 0x089F },
{ 0x08CA, 0x0902 },
{ 0x093A, 0x093A },
{ 0x093C, 0x093C },
@@ -227,8 +227,9 @@ static const struct interval zero_width[] = {
{ 0x10A3F, 0x10A3F },
{ 0x10AE5, 0x10AE6 },
{ 0x10D24, 0x10D27 },
+{ 0x10D69, 0x10D6D },
{ 0x10EAB, 0x10EAC },
-{ 0x10EFD, 0x10EFF },
+{ 0x10EFC, 0x10EFF },
{ 0x10F46, 0x10F50 },
{ 0x10F82, 0x10F85 },
{ 0x11001, 0x11001 },
@@ -261,6 +262,11 @@ static const struct interval zero_width[] = {
{ 0x11340, 0x11340 },
{ 0x11366, 0x1136C },
{ 0x11370, 0x11374 },
+{ 0x113BB, 0x113C0 },
+{ 0x113CE, 0x113CE },
+{ 0x113D0, 0x113D0 },
+{ 0x113D2, 0x113D2 },
+{ 0x113E1, 0x113E2 },
{ 0x11438, 0x1143F },
{ 0x11442, 0x11444 },
{ 0x11446, 0x11446 },
@@ -280,7 +286,8 @@ static const struct interval zero_width[] = {
{ 0x116AD, 0x116AD },
{ 0x116B0, 0x116B5 },
{ 0x116B7, 0x116B7 },
-{ 0x1171D, 0x1171F },
+{ 0x1171D, 0x1171D },
+{ 0x1171F, 0x1171F },
{ 0x11722, 0x11725 },
{ 0x11727, 0x1172B },
{ 0x1182F, 0x11837 },
@@ -319,8 +326,11 @@ static const struct interval zero_width[] = {
{ 0x11F36, 0x11F3A },
{ 0x11F40, 0x11F40 },
{ 0x11F42, 0x11F42 },
+{ 0x11F5A, 0x11F5A },
{ 0x13430, 0x13440 },
{ 0x13447, 0x13455 },
+{ 0x1611E, 0x16129 },
+{ 0x1612D, 0x1612F },
{ 0x16AF0, 0x16AF4 },
{ 0x16B30, 0x16B36 },
{ 0x16F4F, 0x16F4F },
@@ -351,6 +361,7 @@ static const struct interval zero_width[] = {
{ 0x1E2AE, 0x1E2AE },
{ 0x1E2EC, 0x1E2EF },
{ 0x1E4EC, 0x1E4EF },
+{ 0x1E5EE, 0x1E5EF },
{ 0x1E8D0, 0x1E8D6 },
{ 0x1E944, 0x1E94A },
{ 0xE0001, 0xE0001 },
@@ -366,8 +377,10 @@ static const struct interval double_width[] = {
{ 0x23F3, 0x23F3 },
{ 0x25FD, 0x25FE },
{ 0x2614, 0x2615 },
+{ 0x2630, 0x2637 },
{ 0x2648, 0x2653 },
{ 0x267F, 0x267F },
+{ 0x268A, 0x268F },
{ 0x2693, 0x2693 },
{ 0x26A1, 0x26A1 },
{ 0x26AA, 0x26AB },
@@ -401,11 +414,10 @@ static const struct interval double_width[] = {
{ 0x3099, 0x30FF },
{ 0x3105, 0x312F },
{ 0x3131, 0x318E },
-{ 0x3190, 0x31E3 },
+{ 0x3190, 0x31E5 },
{ 0x31EF, 0x321E },
{ 0x3220, 0x3247 },
-{ 0x3250, 0x4DBF },
-{ 0x4E00, 0xA48C },
+{ 0x3250, 0xA48C },
{ 0xA490, 0xA4C6 },
{ 0xA960, 0xA97C },
{ 0xAC00, 0xD7A3 },
@@ -420,7 +432,7 @@ static const struct interval double_width[] = {
{ 0x16FF0, 0x16FF1 },
{ 0x17000, 0x187F7 },
{ 0x18800, 0x18CD5 },
-{ 0x18D00, 0x18D08 },
+{ 0x18CFF, 0x18D08 },
{ 0x1AFF0, 0x1AFF3 },
{ 0x1AFF5, 0x1AFFB },
{ 0x1AFFD, 0x1AFFE },
@@ -430,6 +442,8 @@ static const struct interval double_width[] = {
{ 0x1B155, 0x1B155 },
{ 0x1B164, 0x1B167 },
{ 0x1B170, 0x1B2FB },
+{ 0x1D300, 0x1D356 },
+{ 0x1D360, 0x1D376 },
{ 0x1F004, 0x1F004 },
{ 0x1F0CF, 0x1F0CF },
{ 0x1F18E, 0x1F18E },
@@ -470,11 +484,10 @@ static const struct interval double_width[] = {
{ 0x1F93C, 0x1F945 },
{ 0x1F947, 0x1F9FF },
{ 0x1FA70, 0x1FA7C },
-{ 0x1FA80, 0x1FA88 },
-{ 0x1FA90, 0x1FABD },
-{ 0x1FABF, 0x1FAC5 },
-{ 0x1FACE, 0x1FADB },
-{ 0x1FAE0, 0x1FAE8 },
+{ 0x1FA80, 0x1FA89 },
+{ 0x1FA8F, 0x1FAC6 },
+{ 0x1FACE, 0x1FADC },
+{ 0x1FADF, 0x1FAE9 },
{ 0x1FAF0, 0x1FAF8 },
{ 0x20000, 0x2FFFD },
{ 0x30000, 0x3FFFD }
--
2.45.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] unicode: update the width tables to Unicode 16
2024-09-12 20:40 [PATCH] unicode: update the width tables to Unicode 16 Beat Bolli
@ 2024-09-17 12:37 ` Johannes Schindelin
2024-09-17 21:54 ` Junio C Hamano
0 siblings, 1 reply; 5+ messages in thread
From: Johannes Schindelin @ 2024-09-17 12:37 UTC (permalink / raw)
To: Beat Bolli; +Cc: git, gitster
Hi Beat,
On Thu, 12 Sep 2024, Beat Bolli wrote:
> Unicode 16 has been announced on 2024-09-10 [0], so update the character
> width tables to the new version.
>
> [0] https://blog.unicode.org/2024/09/announcing-unicode-standard-version-160.html
I can confirm that the output is identical to the result of running
./contrib/update-unicode/update_unicode.sh.
Maybe we should add an automated, scheduled workflow for these updates?
Ciao,
Johannes
>
> Signed-off-by: Beat Bolli <dev+git@drbeat.li>
> ---
> unicode-width.h | 37 +++++++++++++++++++++++++------------
> 1 file changed, 25 insertions(+), 12 deletions(-)
>
> diff --git a/unicode-width.h b/unicode-width.h
> index be5bf8c4f2..3ffee123a0 100644
> --- a/unicode-width.h
> +++ b/unicode-width.h
> @@ -27,7 +27,7 @@ static const struct interval zero_width[] = {
> { 0x0829, 0x082D },
> { 0x0859, 0x085B },
> { 0x0890, 0x0891 },
> -{ 0x0898, 0x089F },
> +{ 0x0897, 0x089F },
> { 0x08CA, 0x0902 },
> { 0x093A, 0x093A },
> { 0x093C, 0x093C },
> @@ -227,8 +227,9 @@ static const struct interval zero_width[] = {
> { 0x10A3F, 0x10A3F },
> { 0x10AE5, 0x10AE6 },
> { 0x10D24, 0x10D27 },
> +{ 0x10D69, 0x10D6D },
> { 0x10EAB, 0x10EAC },
> -{ 0x10EFD, 0x10EFF },
> +{ 0x10EFC, 0x10EFF },
> { 0x10F46, 0x10F50 },
> { 0x10F82, 0x10F85 },
> { 0x11001, 0x11001 },
> @@ -261,6 +262,11 @@ static const struct interval zero_width[] = {
> { 0x11340, 0x11340 },
> { 0x11366, 0x1136C },
> { 0x11370, 0x11374 },
> +{ 0x113BB, 0x113C0 },
> +{ 0x113CE, 0x113CE },
> +{ 0x113D0, 0x113D0 },
> +{ 0x113D2, 0x113D2 },
> +{ 0x113E1, 0x113E2 },
> { 0x11438, 0x1143F },
> { 0x11442, 0x11444 },
> { 0x11446, 0x11446 },
> @@ -280,7 +286,8 @@ static const struct interval zero_width[] = {
> { 0x116AD, 0x116AD },
> { 0x116B0, 0x116B5 },
> { 0x116B7, 0x116B7 },
> -{ 0x1171D, 0x1171F },
> +{ 0x1171D, 0x1171D },
> +{ 0x1171F, 0x1171F },
> { 0x11722, 0x11725 },
> { 0x11727, 0x1172B },
> { 0x1182F, 0x11837 },
> @@ -319,8 +326,11 @@ static const struct interval zero_width[] = {
> { 0x11F36, 0x11F3A },
> { 0x11F40, 0x11F40 },
> { 0x11F42, 0x11F42 },
> +{ 0x11F5A, 0x11F5A },
> { 0x13430, 0x13440 },
> { 0x13447, 0x13455 },
> +{ 0x1611E, 0x16129 },
> +{ 0x1612D, 0x1612F },
> { 0x16AF0, 0x16AF4 },
> { 0x16B30, 0x16B36 },
> { 0x16F4F, 0x16F4F },
> @@ -351,6 +361,7 @@ static const struct interval zero_width[] = {
> { 0x1E2AE, 0x1E2AE },
> { 0x1E2EC, 0x1E2EF },
> { 0x1E4EC, 0x1E4EF },
> +{ 0x1E5EE, 0x1E5EF },
> { 0x1E8D0, 0x1E8D6 },
> { 0x1E944, 0x1E94A },
> { 0xE0001, 0xE0001 },
> @@ -366,8 +377,10 @@ static const struct interval double_width[] = {
> { 0x23F3, 0x23F3 },
> { 0x25FD, 0x25FE },
> { 0x2614, 0x2615 },
> +{ 0x2630, 0x2637 },
> { 0x2648, 0x2653 },
> { 0x267F, 0x267F },
> +{ 0x268A, 0x268F },
> { 0x2693, 0x2693 },
> { 0x26A1, 0x26A1 },
> { 0x26AA, 0x26AB },
> @@ -401,11 +414,10 @@ static const struct interval double_width[] = {
> { 0x3099, 0x30FF },
> { 0x3105, 0x312F },
> { 0x3131, 0x318E },
> -{ 0x3190, 0x31E3 },
> +{ 0x3190, 0x31E5 },
> { 0x31EF, 0x321E },
> { 0x3220, 0x3247 },
> -{ 0x3250, 0x4DBF },
> -{ 0x4E00, 0xA48C },
> +{ 0x3250, 0xA48C },
> { 0xA490, 0xA4C6 },
> { 0xA960, 0xA97C },
> { 0xAC00, 0xD7A3 },
> @@ -420,7 +432,7 @@ static const struct interval double_width[] = {
> { 0x16FF0, 0x16FF1 },
> { 0x17000, 0x187F7 },
> { 0x18800, 0x18CD5 },
> -{ 0x18D00, 0x18D08 },
> +{ 0x18CFF, 0x18D08 },
> { 0x1AFF0, 0x1AFF3 },
> { 0x1AFF5, 0x1AFFB },
> { 0x1AFFD, 0x1AFFE },
> @@ -430,6 +442,8 @@ static const struct interval double_width[] = {
> { 0x1B155, 0x1B155 },
> { 0x1B164, 0x1B167 },
> { 0x1B170, 0x1B2FB },
> +{ 0x1D300, 0x1D356 },
> +{ 0x1D360, 0x1D376 },
> { 0x1F004, 0x1F004 },
> { 0x1F0CF, 0x1F0CF },
> { 0x1F18E, 0x1F18E },
> @@ -470,11 +484,10 @@ static const struct interval double_width[] = {
> { 0x1F93C, 0x1F945 },
> { 0x1F947, 0x1F9FF },
> { 0x1FA70, 0x1FA7C },
> -{ 0x1FA80, 0x1FA88 },
> -{ 0x1FA90, 0x1FABD },
> -{ 0x1FABF, 0x1FAC5 },
> -{ 0x1FACE, 0x1FADB },
> -{ 0x1FAE0, 0x1FAE8 },
> +{ 0x1FA80, 0x1FA89 },
> +{ 0x1FA8F, 0x1FAC6 },
> +{ 0x1FACE, 0x1FADC },
> +{ 0x1FADF, 0x1FAE9 },
> { 0x1FAF0, 0x1FAF8 },
> { 0x20000, 0x2FFFD },
> { 0x30000, 0x3FFFD }
> --
> 2.45.2
>
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] unicode: update the width tables to Unicode 16
2024-09-17 12:37 ` Johannes Schindelin
@ 2024-09-17 21:54 ` Junio C Hamano
2024-09-29 18:58 ` Johannes Schindelin
0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2024-09-17 21:54 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Beat Bolli, git
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> Hi Beat,
>
> On Thu, 12 Sep 2024, Beat Bolli wrote:
>
>> Unicode 16 has been announced on 2024-09-10 [0], so update the character
>> width tables to the new version.
>>
>> [0] https://blog.unicode.org/2024/09/announcing-unicode-standard-version-160.html
>
> I can confirm that the output is identical to the result of running
> ./contrib/update-unicode/update_unicode.sh.
Thanks for double checking. I did the same when I queued the patch
and it indeed looked good.
> Maybe we should add an automated, scheduled workflow for these updates?
We could, but the consortium aims to issue major updates once a year
in September, with minor versions and updates "will be avoided", so
we may need to devise automation that makes better use of resources
than to scrape http://www.unicode.org/Public/UCD/latest/ucd/ daily.
44dc651132 2024-09-12T22:40:47+02:00 unicode: update the width tables to Unicode 16
872976c37e 2023-09-25T21:07:04+02:00 unicode: update the width tables to Unicode 15.1
b10cbdac4c 2023-03-30T21:15:17+02:00 unicode: update the width tables to Unicode 15
187fc8b8b6 2021-09-17T12:19:20-07:00 unicode: update the width tables to Unicode 14
65588b0b2e 2020-03-17T16:36:05+01:00 unicode: update the width tables to Unicode 13.0
5817f9caa3 2019-05-29T22:50:45+02:00 unicode: update the width tables to Unicode 12.1
584b62c37b 2019-03-21T22:06:17+01:00 unicode: update the width tables to Unicode 12
570951eea2 2018-07-09T21:44:52+02:00 unicode: update the width tables to Unicode 11
e233bef43e 2018-04-10T14:26:17-07:00 unicode_width.h: rename to use dash in file name
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] unicode: update the width tables to Unicode 16
2024-09-17 21:54 ` Junio C Hamano
@ 2024-09-29 18:58 ` Johannes Schindelin
2024-09-30 18:12 ` Junio C Hamano
0 siblings, 1 reply; 5+ messages in thread
From: Johannes Schindelin @ 2024-09-29 18:58 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Beat Bolli, git
Hi Junio,
On Tue, 17 Sep 2024, Junio C Hamano wrote:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
> > On Thu, 12 Sep 2024, Beat Bolli wrote:
> >
> >> Unicode 16 has been announced on 2024-09-10 [0], so update the character
> >> width tables to the new version.
> >>
> >> [0] https://blog.unicode.org/2024/09/announcing-unicode-standard-version-160.html
> >
> > I can confirm that the output is identical to the result of running
> > ./contrib/update-unicode/update_unicode.sh.
>
> Thanks for double checking. I did the same when I queued the patch
> and it indeed looked good.
>
> > Maybe we should add an automated, scheduled workflow for these updates?
>
> We could, but the consortium aims to issue major updates once a year
> in September, with minor versions and updates "will be avoided", so
> we may need to devise automation that makes better use of resources
> than to scrape http://www.unicode.org/Public/UCD/latest/ucd/ daily.
Oh, but I obviously was not suggesting as crude a thing as to scrape it
unconditionally, and certainly not daily. No, I was thinking about
something checking the `Last-Modified:` header and only acting upon
updated Unicode definitions, and checking for updates only on a weekly
basis. Something along these lines:
```yml
name: update Unicode definitions
on:
schedule:
- cron: '1 15 * * 4' # 3:01pm on Wednesdays
workflow_dispatch:
jobs:
update-repo-variable:
if: vars.UNICODE_LAST_MODIFIED != ''
runs-on: ubuntu-latest
steps:
- id: check
run: |
set -x
latest_update="$(curl -I https://www.unicode.org/Public/UCD/latest/ucd/UCD.zip |
sed -n 's/^Last-Modified: //p')" &&
if test '${{ vars.UNICODE_LAST_MODIFIED }}' = "$latest_update"
then
echo "result=skip" >>$GITHUB_OUTPUT
exit 0
fi
echo "result=$latest_update" >>$GITHUB_OUTPUT
- if: steps.check.outputs.result != 'skip'
run: echo ::notice::_Now_ we scrape and do stuff
- if: steps.check.outputs.result != 'skip'
env:
GH_TOKEN: ${{ secrets.UNICODE_LAST_MODIFIED_PAT }}
run: |
gh api -X PATCH \
repos/$GITHUB_REPOSITORY/actions/variables/UNICODE_LAST_MODIFIED \
-f value='${{ steps.check.outputs.result }}'
This would use the repository variable `UNICODE_LAST_MODIFIED` to store
the `Last-Modified:` value that was last seen (and implicitly act as the
knob to prevent running in forks: if the variable is not yet set, the job
will be skipped).
Sadly, to update the repository variable, we cannot use `permissions:`
because the workflow syntax does not offer the `variables` scope.
Therefore a Personal Access Token would need to be stored as a repository
secret. I used a fine-grained token in my tests whose sope was
Repository > Variables: read-write.
Ciao,
Johannes
>
> 44dc651132 2024-09-12T22:40:47+02:00 unicode: update the width tables to Unicode 16
> 872976c37e 2023-09-25T21:07:04+02:00 unicode: update the width tables to Unicode 15.1
> b10cbdac4c 2023-03-30T21:15:17+02:00 unicode: update the width tables to Unicode 15
> 187fc8b8b6 2021-09-17T12:19:20-07:00 unicode: update the width tables to Unicode 14
> 65588b0b2e 2020-03-17T16:36:05+01:00 unicode: update the width tables to Unicode 13.0
> 5817f9caa3 2019-05-29T22:50:45+02:00 unicode: update the width tables to Unicode 12.1
> 584b62c37b 2019-03-21T22:06:17+01:00 unicode: update the width tables to Unicode 12
> 570951eea2 2018-07-09T21:44:52+02:00 unicode: update the width tables to Unicode 11
> e233bef43e 2018-04-10T14:26:17-07:00 unicode_width.h: rename to use dash in file name
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] unicode: update the width tables to Unicode 16
2024-09-29 18:58 ` Johannes Schindelin
@ 2024-09-30 18:12 ` Junio C Hamano
0 siblings, 0 replies; 5+ messages in thread
From: Junio C Hamano @ 2024-09-30 18:12 UTC (permalink / raw)
To: Johannes Schindelin; +Cc: Beat Bolli, git
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> Oh, but I obviously was not suggesting as crude a thing as to scrape it
> unconditionally, and certainly not daily. No, I was thinking about
> something checking the `Last-Modified:` header and only acting upon
> updated Unicode definitions, and checking for updates only on a weekly
> basis. Something along these lines:
> ...
> Sadly, to update the repository variable, we cannot use `permissions:`
> because the workflow syntax does not offer the `variables` scope.
> Therefore a Personal Access Token would need to be stored as a repository
> secret. I used a fine-grained token in my tests whose sope was
> Repository > Variables: read-write.
And it can make a patch and send it to the list and get reviewed the
usual way. It is a bit curious whose Sob should be on such a patch,
but we can work out the details, if we were seriously to automate
it. It all seems workable.
You may have already noticed, but I am lazy and tolerate manual
tasks if they do not come more often than once per quarter ;-)
Thanks.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-09-30 18:12 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-12 20:40 [PATCH] unicode: update the width tables to Unicode 16 Beat Bolli
2024-09-17 12:37 ` Johannes Schindelin
2024-09-17 21:54 ` Junio C Hamano
2024-09-29 18:58 ` Johannes Schindelin
2024-09-30 18:12 ` Junio C Hamano
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).