* [PATCH v2 RESEND] libselinux: Workaround for heap overhead of pcre
@ 2023-01-12 9:14 Inseob Kim
2023-01-15 22:17 ` Jason Zaman
0 siblings, 1 reply; 2+ messages in thread
From: Inseob Kim @ 2023-01-12 9:14 UTC (permalink / raw)
To: selinux; +Cc: Inseob Kim
pcre's behavior is changed so that pcre2_match always allocates heap for
match_data, rather than stack, regardless of size. The heap isn't freed
until explicitly calling pcre2_match_data_free. This new behavior may
result in heap overhead, which may increase the peak memory usage about
a few megabytes. It's because regex_match is first called for regex_data
objects, and then regex_data objects are freed at once.
To workaround it, free match_data as soon as we call regex_match. It's
fine because libselinux currently doesn't use match_data, but use only
the return value.
Signed-off-by: Inseob Kim <inseob@google.com>
---
v2:
- add AGGRESSIVE_FREE_AFTER_REGEX_MATCH macro
- remove match_data from struct regex_data
---
libselinux/src/regex.c | 32 +++++++++++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
index 149a7973..4b4b9f08 100644
--- a/libselinux/src/regex.c
+++ b/libselinux/src/regex.c
@@ -60,11 +60,13 @@ char const *regex_arch_string(void)
struct regex_data {
pcre2_code *regex; /* compiled regular expression */
+#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
/*
* match data block required for the compiled
* pattern in pcre2
*/
pcre2_match_data *match_data;
+#endif
pthread_mutex_t match_mutex;
};
@@ -84,11 +86,13 @@ int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
goto err;
}
+#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
(*regex)->match_data =
pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
if (!(*regex)->match_data) {
goto err;
}
+#endif
return 0;
err:
@@ -138,10 +142,12 @@ int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
if (rc != 1)
goto err;
+#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
(*regex)->match_data =
pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
if (!(*regex)->match_data)
goto err;
+#endif
*regex_compiled = true;
}
@@ -203,8 +209,12 @@ void regex_data_free(struct regex_data *regex)
if (regex) {
if (regex->regex)
pcre2_code_free(regex->regex);
+
+#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
if (regex->match_data)
pcre2_match_data_free(regex->match_data);
+#endif
+
__pthread_mutex_destroy(®ex->match_mutex);
free(regex);
}
@@ -213,10 +223,30 @@ void regex_data_free(struct regex_data *regex)
int regex_match(struct regex_data *regex, char const *subject, int partial)
{
int rc;
+ pcre2_match_data *match_data;
__pthread_mutex_lock(®ex->match_mutex);
+
+#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
+ match_data = pcre2_match_data_create_from_pattern(
+ regex->regex, NULL);
+ if (match_data == NULL) {
+ __pthread_mutex_unlock(®ex->match_mutex);
+ return REGEX_ERROR;
+ }
+#else
+ match_data = regex->match_data;
+#endif
+
rc = pcre2_match(
regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
- partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
+ partial ? PCRE2_PARTIAL_SOFT : 0, match_data, NULL);
+
+#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
+ // pcre2_match allocates heap and it won't be freed until
+ // pcre2_match_data_free, resulting in heap overhead.
+ pcre2_match_data_free(match_data);
+#endif
+
__pthread_mutex_unlock(®ex->match_mutex);
if (rc > 0)
return REGEX_MATCH;
--
2.39.0.314.g84b9a713c41-goog
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [PATCH v2 RESEND] libselinux: Workaround for heap overhead of pcre
2023-01-12 9:14 [PATCH v2 RESEND] libselinux: Workaround for heap overhead of pcre Inseob Kim
@ 2023-01-15 22:17 ` Jason Zaman
0 siblings, 0 replies; 2+ messages in thread
From: Jason Zaman @ 2023-01-15 22:17 UTC (permalink / raw)
To: Inseob Kim; +Cc: selinux
On Thu, Jan 12, 2023 at 06:14:09PM +0900, Inseob Kim wrote:
> pcre's behavior is changed so that pcre2_match always allocates heap for
> match_data, rather than stack, regardless of size. The heap isn't freed
> until explicitly calling pcre2_match_data_free. This new behavior may
> result in heap overhead, which may increase the peak memory usage about
> a few megabytes. It's because regex_match is first called for regex_data
> objects, and then regex_data objects are freed at once.
>
> To workaround it, free match_data as soon as we call regex_match. It's
> fine because libselinux currently doesn't use match_data, but use only
> the return value.
>
> Signed-off-by: Inseob Kim <inseob@google.com>
Acked-by: Jason Zaman <jason@perfinion.com>
Merged,
Thanks!
>
> ---
> v2:
> - add AGGRESSIVE_FREE_AFTER_REGEX_MATCH macro
> - remove match_data from struct regex_data
> ---
> libselinux/src/regex.c | 32 +++++++++++++++++++++++++++++++-
> 1 file changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> index 149a7973..4b4b9f08 100644
> --- a/libselinux/src/regex.c
> +++ b/libselinux/src/regex.c
> @@ -60,11 +60,13 @@ char const *regex_arch_string(void)
>
> struct regex_data {
> pcre2_code *regex; /* compiled regular expression */
> +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
> /*
> * match data block required for the compiled
> * pattern in pcre2
> */
> pcre2_match_data *match_data;
> +#endif
> pthread_mutex_t match_mutex;
> };
>
> @@ -84,11 +86,13 @@ int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
> goto err;
> }
>
> +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
> (*regex)->match_data =
> pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
> if (!(*regex)->match_data) {
> goto err;
> }
> +#endif
> return 0;
>
> err:
> @@ -138,10 +142,12 @@ int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
> if (rc != 1)
> goto err;
>
> +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
> (*regex)->match_data =
> pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
> if (!(*regex)->match_data)
> goto err;
> +#endif
>
> *regex_compiled = true;
> }
> @@ -203,8 +209,12 @@ void regex_data_free(struct regex_data *regex)
> if (regex) {
> if (regex->regex)
> pcre2_code_free(regex->regex);
> +
> +#ifndef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
> if (regex->match_data)
> pcre2_match_data_free(regex->match_data);
> +#endif
> +
> __pthread_mutex_destroy(®ex->match_mutex);
> free(regex);
> }
> @@ -213,10 +223,30 @@ void regex_data_free(struct regex_data *regex)
> int regex_match(struct regex_data *regex, char const *subject, int partial)
> {
> int rc;
> + pcre2_match_data *match_data;
> __pthread_mutex_lock(®ex->match_mutex);
> +
> +#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
> + match_data = pcre2_match_data_create_from_pattern(
> + regex->regex, NULL);
> + if (match_data == NULL) {
> + __pthread_mutex_unlock(®ex->match_mutex);
> + return REGEX_ERROR;
> + }
> +#else
> + match_data = regex->match_data;
> +#endif
> +
> rc = pcre2_match(
> regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> - partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
> + partial ? PCRE2_PARTIAL_SOFT : 0, match_data, NULL);
> +
> +#ifdef AGGRESSIVE_FREE_AFTER_REGEX_MATCH
> + // pcre2_match allocates heap and it won't be freed until
> + // pcre2_match_data_free, resulting in heap overhead.
> + pcre2_match_data_free(match_data);
> +#endif
> +
> __pthread_mutex_unlock(®ex->match_mutex);
> if (rc > 0)
> return REGEX_MATCH;
> --
> 2.39.0.314.g84b9a713c41-goog
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-01-15 22:17 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-12 9:14 [PATCH v2 RESEND] libselinux: Workaround for heap overhead of pcre Inseob Kim
2023-01-15 22:17 ` Jason Zaman
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.