* Incorrect automatic ALSA card ID when unicode is at play
@ 2024-10-02 16:54 Barnabás Pőcze
2024-10-02 18:00 ` Jaroslav Kysela
0 siblings, 1 reply; 3+ messages in thread
From: Barnabás Pőcze @ 2024-10-02 16:54 UTC (permalink / raw)
To: linux-sound@vger.kernel.org, Jaroslav Kysela, Takashi Iwai
Hi
When `snd_card::id` is not specified explicitly it is determined automatically
in `sound/core/init.c:snd_card_register()` as follows:
if (*card->id) {
// ...
} else {
/* create an id from either shortname or longname */
const char *src;
src = *card->shortname ? card->shortname : card->longname;
snd_card_set_id_no_lock(card, src,
retrieve_id_from_card_name(src));
}
However, `snd_card_set_id_no_lock()`, or more specifically the `copy_valid_id_string()`
function that it calls does not seem to copy very well with utf-8.
For example, based on the report at https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/4135
where `card->shortname="Redmi 电脑音箱"`, `card->id` will be set to \xE7\xE8\xE9\xE7,
which are the first bytes of the symbols in the suffix "电脑音箱" because only those
bytes of the string satisfy the `isalnum()` check in `copy_valid_id_string()`.
I am not sure what kind of 8-bit character set `isalnum()` is basing these results on, but
it certainly does not mix well with utf-8 in this scenario at least.
Regards,
Barnabás Pőcze
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: Incorrect automatic ALSA card ID when unicode is at play
2024-10-02 16:54 Incorrect automatic ALSA card ID when unicode is at play Barnabás Pőcze
@ 2024-10-02 18:00 ` Jaroslav Kysela
2024-10-02 19:36 ` Takashi Iwai
0 siblings, 1 reply; 3+ messages in thread
From: Jaroslav Kysela @ 2024-10-02 18:00 UTC (permalink / raw)
To: Barnabás Pőcze, linux-sound@vger.kernel.org,
Takashi Iwai
On 02. 10. 24 18:54, Barnabás Pőcze wrote:
> Hi
>
>
> When `snd_card::id` is not specified explicitly it is determined automatically
> in `sound/core/init.c:snd_card_register()` as follows:
>
> if (*card->id) {
> // ...
> } else {
> /* create an id from either shortname or longname */
> const char *src;
>
> src = *card->shortname ? card->shortname : card->longname;
> snd_card_set_id_no_lock(card, src,
> retrieve_id_from_card_name(src));
> }
>
> However, `snd_card_set_id_no_lock()`, or more specifically the `copy_valid_id_string()`
> function that it calls does not seem to copy very well with utf-8.
>
> For example, based on the report at https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/4135
> where `card->shortname="Redmi 电脑音箱"`, `card->id` will be set to \xE7\xE8\xE9\xE7,
> which are the first bytes of the symbols in the suffix "电脑音箱" because only those
> bytes of the string satisfy the `isalnum()` check in `copy_valid_id_string()`.
>
> I am not sure what kind of 8-bit character set `isalnum()` is basing these results on, but
> it certainly does not mix well with utf-8 in this scenario at least.
It seems that isalnum() also returns true for characters above 0x80 (see
lib/ctype.c):
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */
_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */
_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */
_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */
_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */
_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */
_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */
We should probably add isacii() check like:
diff --git a/sound/core/init.c b/sound/core/init.c
index b92aa7103589..114fb87de990 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -654,13 +654,19 @@ void snd_card_free(struct snd_card *card)
}
EXPORT_SYMBOL(snd_card_free);
+/* check, if the character is in the valid ASCII range */
+static inline bool safe_ascii_char(char c)
+{
+ return isascii(c) && isalnum(c);
+}
+
/* retrieve the last word of shortname or longname */
static const char *retrieve_id_from_card_name(const char *name)
{
const char *spos = name;
while (*name) {
- if (isspace(*name) && isalnum(name[1]))
+ if (isspace(*name) && safe_ascii_char(name[1]))
spos = name + 1;
name++;
}
@@ -687,12 +693,12 @@ static void copy_valid_id_string(struct snd_card *card,
const char *src,
{
char *id = card->id;
- while (*nid && !isalnum(*nid))
+ while (*nid && !safe_ascii_char(*nid))
nid++;
if (isdigit(*nid))
*id++ = isalpha(*src) ? *src : 'D';
while (*nid && (size_t)(id - card->id) < sizeof(card->id) - 1) {
- if (isalnum(*nid))
+ if (safe_ascii_char(*nid))
*id++ = *nid;
nid++;
}
@@ -787,7 +793,7 @@ static ssize_t id_store(struct device *dev, struct
device_attribute *attr,
for (idx = 0; idx < copy; idx++) {
c = buf[idx];
- if (!isalnum(c) && c != '_' && c != '-')
+ if (!safe_ascii_char(c) && c != '_' && c != '-')
return -EINVAL;
}
memcpy(buf1, buf, copy);
Takashi, do you have a better idea?
Jaroslav
--
Jaroslav Kysela <perex@perex.cz>
Linux Sound Maintainer; ALSA Project; Red Hat, Inc.
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: Incorrect automatic ALSA card ID when unicode is at play
2024-10-02 18:00 ` Jaroslav Kysela
@ 2024-10-02 19:36 ` Takashi Iwai
0 siblings, 0 replies; 3+ messages in thread
From: Takashi Iwai @ 2024-10-02 19:36 UTC (permalink / raw)
To: Jaroslav Kysela
Cc: Barnabás Pőcze, linux-sound@vger.kernel.org,
Takashi Iwai
On Wed, 02 Oct 2024 20:00:15 +0200,
Jaroslav Kysela wrote:
>
> On 02. 10. 24 18:54, Barnabás Pőcze wrote:
> > Hi
> >
> >
> > When `snd_card::id` is not specified explicitly it is determined automatically
> > in `sound/core/init.c:snd_card_register()` as follows:
> >
> > if (*card->id) {
> > // ...
> > } else {
> > /* create an id from either shortname or longname */
> > const char *src;
> >
> > src = *card->shortname ? card->shortname : card->longname;
> > snd_card_set_id_no_lock(card, src,
> > retrieve_id_from_card_name(src));
> > }
> >
> > However, `snd_card_set_id_no_lock()`, or more specifically the `copy_valid_id_string()`
> > function that it calls does not seem to copy very well with utf-8.
> >
> > For example, based on the report at https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/4135
> > where `card->shortname="Redmi 电脑音箱"`, `card->id` will be set to \xE7\xE8\xE9\xE7,
> > which are the first bytes of the symbols in the suffix "电脑音箱" because only those
> > bytes of the string satisfy the `isalnum()` check in `copy_valid_id_string()`.
> >
> > I am not sure what kind of 8-bit character set `isalnum()` is basing these results on, but
> > it certainly does not mix well with utf-8 in this scenario at least.
>
> It seems that isalnum() also returns true for characters above 0x80
> (see lib/ctype.c):
>
> 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */
> 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */
> _S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */
> _P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */
> _U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */
> _U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */
> _L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */
> _L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */
>
> We should probably add isacii() check like:
>
> diff --git a/sound/core/init.c b/sound/core/init.c
> index b92aa7103589..114fb87de990 100644
> --- a/sound/core/init.c
> +++ b/sound/core/init.c
> @@ -654,13 +654,19 @@ void snd_card_free(struct snd_card *card)
> }
> EXPORT_SYMBOL(snd_card_free);
>
> +/* check, if the character is in the valid ASCII range */
> +static inline bool safe_ascii_char(char c)
> +{
> + return isascii(c) && isalnum(c);
> +}
> +
> /* retrieve the last word of shortname or longname */
> static const char *retrieve_id_from_card_name(const char *name)
> {
> const char *spos = name;
>
> while (*name) {
> - if (isspace(*name) && isalnum(name[1]))
> + if (isspace(*name) && safe_ascii_char(name[1]))
> spos = name + 1;
> name++;
> }
> @@ -687,12 +693,12 @@ static void copy_valid_id_string(struct snd_card
> *card, const char *src,
> {
> char *id = card->id;
>
> - while (*nid && !isalnum(*nid))
> + while (*nid && !safe_ascii_char(*nid))
> nid++;
> if (isdigit(*nid))
> *id++ = isalpha(*src) ? *src : 'D';
> while (*nid && (size_t)(id - card->id) < sizeof(card->id) - 1) {
> - if (isalnum(*nid))
> + if (safe_ascii_char(*nid))
> *id++ = *nid;
> nid++;
> }
> @@ -787,7 +793,7 @@ static ssize_t id_store(struct device *dev, struct
> device_attribute *attr,
>
> for (idx = 0; idx < copy; idx++) {
> c = buf[idx];
> - if (!isalnum(c) && c != '_' && c != '-')
> + if (!safe_ascii_char(c) && c != '_' && c != '-')
> return -EINVAL;
> }
> memcpy(buf1, buf, copy);
>
>
> Takashi, do you have a better idea?
I believe your change is fine. Care to submit a proper fix patch?
thanks,
Takashi
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-10-02 19:35 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-02 16:54 Incorrect automatic ALSA card ID when unicode is at play Barnabás Pőcze
2024-10-02 18:00 ` Jaroslav Kysela
2024-10-02 19:36 ` Takashi Iwai
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox