public inbox for selinux@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Add support for UTF-8 in file context labels
@ 2026-03-03 12:23 Petr Lautrbach
  2026-03-04  0:22 ` Thiébaud Weksteen
  0 siblings, 1 reply; 3+ messages in thread
From: Petr Lautrbach @ 2026-03-03 12:23 UTC (permalink / raw)
  To: selinux; +Cc: Petr Lautrbach

- libselinux to be able to read UTF-8 spec entries
- libselinux to compile regexes with UTF strings and unicode properties
- initialize locales in setfiles, sefcontext_compile, semodule
- semanage_exec_prog to execute external programs with LC_CTYPE set to
the current environment

Fixes:
    # cat unicode.cil
    (filecon "/opt/žluťoučký(/.*)?" any (system_u object_r user_home_t ((s0) (s0))))

    # semodule -i unicode.cil
    /sbin/setfiles: /var/lib/selinux/final/targeted/contexts/files/file_contexts:  line 2145 error due to: Non-ASCII characters found
    /sbin/setfiles: /var/lib/selinux/final/targeted/contexts/files/file_contexts:  line 2145 error due to: Non-ASCII characters found
    /var/lib/selinux/final/targeted/contexts/files/file_contexts: Invalid argument
    libsemanage.semanage_validate_and_compile_fcontexts: setfiles returned error code 1.
    semodule:  Failed!

    # semanage fcontext --add -t user_home_t "/opt/žluťoučký(/.*)?"
    /sbin/setfiles: /var/lib/selinux/final/targeted/contexts/files/file_contexts.local:  line 4 error due to: Non-ASCII characters found
    /sbin/setfiles: /var/lib/selinux/final/targeted/contexts/files/file_contexts.local:  line 4 error due to: Non-ASCII characters found
    /var/lib/selinux/final/targeted/contexts/files/file_contexts: Invalid argument
    libsemanage.semanage_validate_and_compile_fcontexts: setfiles returned error code 1.
    OSError: Error

Signed-off-by: Petr Lautrbach <lautrbach@redhat.com>
---
 libselinux/src/label_support.c        | 15 +++++++++++----
 libselinux/src/regex.c                |  8 +++++---
 libselinux/utils/sefcontext_compile.c |  5 +++++
 libsemanage/src/semanage_store.c      | 10 +++++++++-
 policycoreutils/semodule/semodule.c   |  5 +++++
 policycoreutils/setfiles/setfiles.c   |  4 ++++
 6 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/libselinux/src/label_support.c b/libselinux/src/label_support.c
index 57e191c8eb77..2eed5b8cea51 100644
--- a/libselinux/src/label_support.c
+++ b/libselinux/src/label_support.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <stdio.h>
 #include <errno.h>
+#include <wchar.h>
 #include "label_internal.h"
 
 /*
@@ -35,13 +36,19 @@ static inline int read_spec_entry(char **entry, const char **ptr, size_t *len, c
 	*len = 0;
 
 	while (!isspace((unsigned char)**ptr) && **ptr != '\0') {
-		if (!isascii((unsigned char)**ptr)) {
+		size_t char_len;
+
+		char_len = mbrtowc(NULL, *ptr, MB_CUR_MAX, NULL);
+
+		if ((char_len == (size_t) -1) || (char_len == (size_t) -2)) {
 			errno = EINVAL;
-			*errbuf = "Non-ASCII characters found";
+			*errbuf = "Invalid UTF-8 encoding";
 			return -1;
 		}
-		(*ptr)++;
-		(*len)++;
+
+		*ptr += char_len;
+		*len += char_len;
+
 	}
 
 	if (*len) {
diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
index 976f00d4806d..a6e3e899f397 100644
--- a/libselinux/src/regex.c
+++ b/libselinux/src/regex.c
@@ -85,9 +85,11 @@ int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
 	if (!(*regex))
 		return -1;
 
-	(*regex)->regex = pcre2_compile(
-	    (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
-	    &errordata->error_code, &errordata->error_offset, NULL);
+	(*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
+					PCRE2_ZERO_TERMINATED,
+					PCRE2_DOTALL | PCRE2_UTF | PCRE2_UCP,
+					&errordata->error_code,
+					&errordata->error_offset, NULL);
 	if (!(*regex)->regex) {
 		goto err;
 	}
diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
index 811b2a1a6a98..fb848ff112ca 100644
--- a/libselinux/utils/sefcontext_compile.c
+++ b/libselinux/utils/sefcontext_compile.c
@@ -1,6 +1,7 @@
 #include <endian.h>
 #include <errno.h>
 #include <getopt.h>
+#include <locale.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@@ -551,6 +552,10 @@ int main(int argc, char *argv[])
 	struct spec_node *root = NULL;
 	struct sidtab stab = {};
 
+	/* Initialize locale for UTF-8 support */
+	setlocale(LC_ALL, "");
+
+
 	if (argc < 2)
 		usage(argv[0]);
 
diff --git a/libsemanage/src/semanage_store.c b/libsemanage/src/semanage_store.c
index e3048c08df0e..21ee6dfb78b0 100644
--- a/libsemanage/src/semanage_store.c
+++ b/libsemanage/src/semanage_store.c
@@ -57,6 +57,7 @@ typedef struct dbase_policydb dbase_t;
 #include <sys/wait.h>
 #include <limits.h>
 #include <libgen.h>
+#include <locale.h>
 
 #include "debug.h"
 #include "utilities.h"
@@ -1479,6 +1480,11 @@ static int semanage_exec_prog(semanage_handle_t * sh,
 	char **argv;
 	pid_t forkval;
 	int status = 0;
+	char *envp[] = { NULL, NULL};
+
+	/* we expect that locales are already initialized */
+	if (asprintf(&envp[0], "LC_CTYPE=%s", setlocale(LC_CTYPE, NULL)) == -1)
+		envp[0] = NULL;
 
 	argv = split_args(e->path, e->args, new_name, old_name);
 	if (argv == NULL) {
@@ -1492,11 +1498,13 @@ static int semanage_exec_prog(semanage_handle_t * sh,
 	if (forkval == 0) {
 		/* child process.  file descriptors will be closed
 		 * because they were set as close-on-exec. */
-		execve(e->path, argv, NULL);
+		execve(e->path, argv, envp);
 		_exit(EXIT_FAILURE);	/* if execve() failed */
 	}
 
 	free_argv(argv);
+	if (envp[0])
+		free(envp[0]);
 
 	if (forkval == -1) {
 		ERR(sh, "Error while forking process.");
diff --git a/policycoreutils/semodule/semodule.c b/policycoreutils/semodule/semodule.c
index ab5168ebdc00..aea22f6e94e8 100644
--- a/policycoreutils/semodule/semodule.c
+++ b/policycoreutils/semodule/semodule.c
@@ -21,6 +21,7 @@
 #include <sys/types.h>
 #include <libgen.h>
 #include <limits.h>
+#include <locale.h>
 
 #include <sepol/cil/cil.h>
 #include <semanage/modules.h>
@@ -418,6 +419,10 @@ int main(int argc, char *argv[])
 	int i, commit = 0;
 	int result;
 	int status = EXIT_FAILURE;
+
+	/* Initialize locale for UTF-8 support */
+	setlocale(LC_ALL, "");
+
 	const char *genhomedirconargv[] = { "genhomedircon", "-B", "-n" };
 	create_signal_handlers();
 	if (strcmp(basename(argv[0]), "genhomedircon") == 0) {
diff --git a/policycoreutils/setfiles/setfiles.c b/policycoreutils/setfiles/setfiles.c
index 351940f33a9b..7824cffe460e 100644
--- a/policycoreutils/setfiles/setfiles.c
+++ b/policycoreutils/setfiles/setfiles.c
@@ -7,6 +7,7 @@
 #include <regex.h>
 #include <sys/vfs.h>
 #include <libgen.h>
+#include <locale.h>
 #ifdef USE_AUDIT
 #include <libaudit.h>
 
@@ -153,6 +154,9 @@ int main(int argc, char **argv)
 	long unsigned skipped_errors;
 	long unsigned relabeled_files;
 
+	/* Initialize locale for UTF-8 support */
+	setlocale(LC_ALL, "");
+
 	/* Initialize variables */
 	memset(&r_opts, 0, sizeof(r_opts));
 	altpath = NULL;
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-03-06  8:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-03 12:23 [PATCH] Add support for UTF-8 in file context labels Petr Lautrbach
2026-03-04  0:22 ` Thiébaud Weksteen
2026-03-06  8:48   ` Petr Lautrbach

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox