* [PATCH] Add support for UTF-8 in file context labels
@ 2026-03-03 12:23 Petr Lautrbach
2026-03-04 0:22 ` Thiébaud Weksteen
0 siblings, 1 reply; 3+ messages in thread
From: Petr Lautrbach @ 2026-03-03 12:23 UTC (permalink / raw)
To: selinux; +Cc: Petr Lautrbach
- libselinux to be able to read UTF-8 spec entries
- libselinux to compile regexes with UTF strings and unicode properties
- initialize locales in setfiles, sefcontext_compile, semodule
- semanage_exec_prog to execute external programs with LC_CTYPE set to
the current environment
Fixes:
# cat unicode.cil
(filecon "/opt/žluťoučký(/.*)?" any (system_u object_r user_home_t ((s0) (s0))))
# semodule -i unicode.cil
/sbin/setfiles: /var/lib/selinux/final/targeted/contexts/files/file_contexts: line 2145 error due to: Non-ASCII characters found
/sbin/setfiles: /var/lib/selinux/final/targeted/contexts/files/file_contexts: line 2145 error due to: Non-ASCII characters found
/var/lib/selinux/final/targeted/contexts/files/file_contexts: Invalid argument
libsemanage.semanage_validate_and_compile_fcontexts: setfiles returned error code 1.
semodule: Failed!
# semanage fcontext --add -t user_home_t "/opt/žluťoučký(/.*)?"
/sbin/setfiles: /var/lib/selinux/final/targeted/contexts/files/file_contexts.local: line 4 error due to: Non-ASCII characters found
/sbin/setfiles: /var/lib/selinux/final/targeted/contexts/files/file_contexts.local: line 4 error due to: Non-ASCII characters found
/var/lib/selinux/final/targeted/contexts/files/file_contexts: Invalid argument
libsemanage.semanage_validate_and_compile_fcontexts: setfiles returned error code 1.
OSError: Error
Signed-off-by: Petr Lautrbach <lautrbach@redhat.com>
---
libselinux/src/label_support.c | 15 +++++++++++----
libselinux/src/regex.c | 8 +++++---
libselinux/utils/sefcontext_compile.c | 5 +++++
libsemanage/src/semanage_store.c | 10 +++++++++-
policycoreutils/semodule/semodule.c | 5 +++++
policycoreutils/setfiles/setfiles.c | 4 ++++
6 files changed, 39 insertions(+), 8 deletions(-)
diff --git a/libselinux/src/label_support.c b/libselinux/src/label_support.c
index 57e191c8eb77..2eed5b8cea51 100644
--- a/libselinux/src/label_support.c
+++ b/libselinux/src/label_support.c
@@ -11,6 +11,7 @@
#include <string.h>
#include <stdio.h>
#include <errno.h>
+#include <wchar.h>
#include "label_internal.h"
/*
@@ -35,13 +36,19 @@ static inline int read_spec_entry(char **entry, const char **ptr, size_t *len, c
*len = 0;
while (!isspace((unsigned char)**ptr) && **ptr != '\0') {
- if (!isascii((unsigned char)**ptr)) {
+ size_t char_len;
+
+ char_len = mbrtowc(NULL, *ptr, MB_CUR_MAX, NULL);
+
+ if ((char_len == (size_t) -1) || (char_len == (size_t) -2)) {
errno = EINVAL;
- *errbuf = "Non-ASCII characters found";
+ *errbuf = "Invalid UTF-8 encoding";
return -1;
}
- (*ptr)++;
- (*len)++;
+
+ *ptr += char_len;
+ *len += char_len;
+
}
if (*len) {
diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
index 976f00d4806d..a6e3e899f397 100644
--- a/libselinux/src/regex.c
+++ b/libselinux/src/regex.c
@@ -85,9 +85,11 @@ int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
if (!(*regex))
return -1;
- (*regex)->regex = pcre2_compile(
- (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
- &errordata->error_code, &errordata->error_offset, NULL);
+ (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
+ PCRE2_ZERO_TERMINATED,
+ PCRE2_DOTALL | PCRE2_UTF | PCRE2_UCP,
+ &errordata->error_code,
+ &errordata->error_offset, NULL);
if (!(*regex)->regex) {
goto err;
}
diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
index 811b2a1a6a98..fb848ff112ca 100644
--- a/libselinux/utils/sefcontext_compile.c
+++ b/libselinux/utils/sefcontext_compile.c
@@ -1,6 +1,7 @@
#include <endian.h>
#include <errno.h>
#include <getopt.h>
+#include <locale.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
@@ -551,6 +552,10 @@ int main(int argc, char *argv[])
struct spec_node *root = NULL;
struct sidtab stab = {};
+ /* Initialize locale for UTF-8 support */
+ setlocale(LC_ALL, "");
+
+
if (argc < 2)
usage(argv[0]);
diff --git a/libsemanage/src/semanage_store.c b/libsemanage/src/semanage_store.c
index e3048c08df0e..21ee6dfb78b0 100644
--- a/libsemanage/src/semanage_store.c
+++ b/libsemanage/src/semanage_store.c
@@ -57,6 +57,7 @@ typedef struct dbase_policydb dbase_t;
#include <sys/wait.h>
#include <limits.h>
#include <libgen.h>
+#include <locale.h>
#include "debug.h"
#include "utilities.h"
@@ -1479,6 +1480,11 @@ static int semanage_exec_prog(semanage_handle_t * sh,
char **argv;
pid_t forkval;
int status = 0;
+ char *envp[] = { NULL, NULL};
+
+ /* we expect that locales are already initialized */
+ if (asprintf(&envp[0], "LC_CTYPE=%s", setlocale(LC_CTYPE, NULL)) == -1)
+ envp[0] = NULL;
argv = split_args(e->path, e->args, new_name, old_name);
if (argv == NULL) {
@@ -1492,11 +1498,13 @@ static int semanage_exec_prog(semanage_handle_t * sh,
if (forkval == 0) {
/* child process. file descriptors will be closed
* because they were set as close-on-exec. */
- execve(e->path, argv, NULL);
+ execve(e->path, argv, envp);
_exit(EXIT_FAILURE); /* if execve() failed */
}
free_argv(argv);
+ if (envp[0])
+ free(envp[0]);
if (forkval == -1) {
ERR(sh, "Error while forking process.");
diff --git a/policycoreutils/semodule/semodule.c b/policycoreutils/semodule/semodule.c
index ab5168ebdc00..aea22f6e94e8 100644
--- a/policycoreutils/semodule/semodule.c
+++ b/policycoreutils/semodule/semodule.c
@@ -21,6 +21,7 @@
#include <sys/types.h>
#include <libgen.h>
#include <limits.h>
+#include <locale.h>
#include <sepol/cil/cil.h>
#include <semanage/modules.h>
@@ -418,6 +419,10 @@ int main(int argc, char *argv[])
int i, commit = 0;
int result;
int status = EXIT_FAILURE;
+
+ /* Initialize locale for UTF-8 support */
+ setlocale(LC_ALL, "");
+
const char *genhomedirconargv[] = { "genhomedircon", "-B", "-n" };
create_signal_handlers();
if (strcmp(basename(argv[0]), "genhomedircon") == 0) {
diff --git a/policycoreutils/setfiles/setfiles.c b/policycoreutils/setfiles/setfiles.c
index 351940f33a9b..7824cffe460e 100644
--- a/policycoreutils/setfiles/setfiles.c
+++ b/policycoreutils/setfiles/setfiles.c
@@ -7,6 +7,7 @@
#include <regex.h>
#include <sys/vfs.h>
#include <libgen.h>
+#include <locale.h>
#ifdef USE_AUDIT
#include <libaudit.h>
@@ -153,6 +154,9 @@ int main(int argc, char **argv)
long unsigned skipped_errors;
long unsigned relabeled_files;
+ /* Initialize locale for UTF-8 support */
+ setlocale(LC_ALL, "");
+
/* Initialize variables */
memset(&r_opts, 0, sizeof(r_opts));
altpath = NULL;
--
2.53.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH] Add support for UTF-8 in file context labels
2026-03-03 12:23 [PATCH] Add support for UTF-8 in file context labels Petr Lautrbach
@ 2026-03-04 0:22 ` Thiébaud Weksteen
2026-03-06 8:48 ` Petr Lautrbach
0 siblings, 1 reply; 3+ messages in thread
From: Thiébaud Weksteen @ 2026-03-04 0:22 UTC (permalink / raw)
To: Petr Lautrbach; +Cc: selinux
On Tue, Mar 3, 2026 at 11:39 PM Petr Lautrbach <lautrbach@redhat.com> wrote:
>
> - libselinux to be able to read UTF-8 spec entries
> - libselinux to compile regexes with UTF strings and unicode properties
Thanks Petr. I'm a bit worried about the performance impact here of
always enabling UTF support. Were you able to run benchmarks on this
change? Android's file_contexts [1] could be useful data here. If the
impact is non-negligible, could this feature be behind an option for
selabel_file, something like SELABEL_OPT_UTF?
[1] https://cs.android.com/android/platform/superproject/+/android-latest-release:system/sepolicy/private/file_contexts
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] Add support for UTF-8 in file context labels
2026-03-04 0:22 ` Thiébaud Weksteen
@ 2026-03-06 8:48 ` Petr Lautrbach
0 siblings, 0 replies; 3+ messages in thread
From: Petr Lautrbach @ 2026-03-06 8:48 UTC (permalink / raw)
To: selinux; +Cc: Thiébaud Weksteen
Thiébaud Weksteen <tweek@google.com> writes:
> On Tue, Mar 3, 2026 at 11:39 PM Petr Lautrbach <lautrbach@redhat.com> wrote:
>>
>> - libselinux to be able to read UTF-8 spec entries
>> - libselinux to compile regexes with UTF strings and unicode properties
>
> Thanks Petr. I'm a bit worried about the performance impact here of
> always enabling UTF support. Were you able to run benchmarks on this
> change? Android's file_contexts [1] could be useful data here. If the
> impact is non-negligible, could this feature be behind an option for
> selabel_file, something like SELABEL_OPT_UTF?
>
> [1] https://cs.android.com/android/platform/superproject/+/android-latest-release:system/sepolicy/private/file_contexts
I've run perf on
`setfiles /etc/selinux/targeted/contexts/files/file_contexts /` and if I
get it right there's about 5% overhead in libpcre2 when it's used with
PCRE2_UTF.
I'll prepare another patch where support for UTF8 could be disabled
during build time, e.g.
make LIBDIR=/usr/lib64 SHLIBDIR=/lib64 DISABLE_UTF=y install
Petr
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-03-06 8:48 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-03 12:23 [PATCH] Add support for UTF-8 in file context labels Petr Lautrbach
2026-03-04 0:22 ` Thiébaud Weksteen
2026-03-06 8:48 ` Petr Lautrbach
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox