From: "Alexander E. Patrakov" <patrakov@ums.usu.ru>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Jan Engelhardt <jengelh@linux01.gwdg.de>
Subject: Re: [PATCH] Fix console utf8 composing
Date: Sun, 25 Dec 2005 10:15:50 +0500 [thread overview]
Message-ID: <43AE2B06.4010906@ums.usu.ru> (raw)
In-Reply-To: <Pine.LNX.4.61.0512242300360.29877@yvahk01.tjqt.qr>
Jan Engelhardt wrote:
> Hi,
>
>
> the following patch had been posted around spring 2005, but has not since
> been included. It is about that hitting the keys <Compose><o><e> for example
> did not produce the utf8 sequence for o-with-diaeresis, but the ASCII/ISO8859-1
> char for o-w-diaeresis, as I seem to recall.
> http://groups.google.de/group/linux.kernel/browse_thread/thread/e60f286969e83d99/55688a5aab326aa7?tvc=2&q=chris+(+%22utf-8%22+OR+%22utf8%22+)+console#55688a5aab326aa7
>
> I am posting an updated version that I hope applies to 2.6.15-rc6.
> Not sure what the correct X-Y-by: procedure is for this, as it originally
> is not my work (see URL).
Looks like I did the same backporting independently. Please look at
http://www.linuxfromscratch.org/~alexander/patches/linux-2.6.14-utf8_input-1.patch
(that does apply to the latest kernels if you use patch -Np1 -l -i
linux-2.6.14-utf8_input-1.patch). Differences between our versions are
described below.
> diff -Ppru linux-2.6.15-rc6-20051219230006/drivers/char/consolemap.c linux-2.6-AS22/drivers/char/consolemap.c
> --- linux-2.6.15-rc6-20051219230006/drivers/char/consolemap.c 2005-12-11 13:42:23.000000000 +0100
> +++ linux-2.6-AS22/drivers/char/consolemap.c 2005-12-19 21:53:25.000000000 +0100
> @@ -178,6 +178,7 @@ struct uni_pagedir {
> unsigned long refcount;
> unsigned long sum;
> unsigned char *inverse_translations[4];
> + u16 *inverse_trans_unicode;
> int readonly;
> };
>
> @@ -208,6 +209,40 @@ static void set_inverse_transl(struct vc
> }
> }
>
> +static void set_inverse_trans_unicode(struct vc_data *conp,
> + struct uni_pagedir *p)
> +{
> + int i, j, k, glyph;
> + u16 **p1, *p2;
> + u16 *q;
> +
I'd insert the "if (!p) return;" statement here, for consistency with
non-Unicode path.
> + q = p->inverse_trans_unicode;
> + if (!q) {
> + q = p->inverse_trans_unicode =
> + kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
> + if (!q)
> + return;
> + }
> + memset(q, 0, MAX_GLYPH * sizeof(u16));
> +
> + for (i = 0; i < 32; i++) {
> + p1 = p->uni_pgdir[i];
> + if (!p1)
> + continue;
> + for (j = 0; j < 32; j++) {
> + p2 = p1[j];
> + if (!p2)
> + continue;
> + for (k = 0; k < 64; k++) {
> + glyph = p2[k];
> + if (glyph >= 0 && glyph < MAX_GLYPH
> + && q[glyph] < 32)
> + q[glyph] = (i << 11) + (j << 6) + k;
> + }
> + }
> + }
> +}
> +
> unsigned short *set_translate(int m, struct vc_data *vc)
> {
> inv_translate[vc->vc_num] = m;
> @@ -218,19 +253,29 @@ unsigned short *set_translate(int m, str
> * Inverse translation is impossible for several reasons:
> * 1. The font<->character maps are not 1-1.
> * 2. The text may have been written while a different translation map
> - * was active, or using Unicode.
> + * was active.
> * Still, it is now possible to a certain extent to cut and paste non-ASCII.
> */
> -unsigned char inverse_translate(struct vc_data *conp, int glyph)
> +u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
> {
> struct uni_pagedir *p;
> + int m;
> if (glyph < 0 || glyph >= MAX_GLYPH)
> return 0;
> - else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc) ||
> - !p->inverse_translations[inv_translate[conp->vc_num]])
> + else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
> return glyph;
> - else
> - return p->inverse_translations[inv_translate[conp->vc_num]][glyph];
> + else if (use_unicode) {
> + if (!p->inverse_trans_unicode)
> + return glyph;
> + else
> + return p->inverse_trans_unicode[glyph];
> + } else {
> + m = inv_translate[conp->vc_num];
> + if (!p->inverse_translations[m])
> + return glyph;
> + else
> + return p->inverse_translations[m][glyph];
> + }
> }
>
> static void update_user_maps(void)
There is probably a missed set_inverse_trans_unicode(vc_cons[i].d, p);
in update_user_maps() just after set_inverse_transl(vc_cons[i].d, p,
USER_MAP);
> @@ -354,6 +399,10 @@ static void con_release_unimap(struct un
> kfree(p->inverse_translations[i]);
> p->inverse_translations[i] = NULL;
> }
> + if (p->inverse_trans_unicode) {
> + kfree(p->inverse_trans_unicode);
> + p->inverse_trans_unicode = NULL;
> + }
> }
>
> void con_free_unimap(struct vc_data *vc)
> @@ -512,6 +561,7 @@ int con_set_unimap(struct vc_data *vc, u
>
> for (i = 0; i <= 3; i++)
> set_inverse_transl(vc, p, i); /* Update all inverse translations */
> + set_inverse_trans_unicode(vc, p);
>
> return err;
> }
> @@ -562,6 +612,7 @@ int con_set_default_unimap(struct vc_dat
>
> for (i = 0; i <= 3; i++)
> set_inverse_transl(vc, p, i); /* Update all inverse translations */
> + set_inverse_trans_unicode(vc, p);
> dflt = p;
> return err;
> }
> @@ -618,6 +669,19 @@ void con_protect_unimap(struct vc_data *
> p->readonly = rdonly;
> }
>
> +/* may be called during an interrupt */
> +u32 conv_8bit_to_uni(unsigned char c)
> +{
> + /*
> + * Always use USER_MAP. This function is used by the keyboard,
> + * which shouldn't be affected by G0/G1 switching, etc.
> + * If the user map still contains default values, i.e. the
> + * direct-to-font mapping, then assume user is using Latin1.
> + */
> + unsigned short uni = translations[USER_MAP][c];
> + return uni == (0xf000 | c) ? c : uni;
> +}
> +
> int
> conv_uni_to_pc(struct vc_data *conp, long ucs)
> {
> diff -Ppru linux-2.6.15-rc6-20051219230006/drivers/char/keyboard.c linux-2.6-AS22/drivers/char/keyboard.c
> --- linux-2.6.15-rc6-20051219230006/drivers/char/keyboard.c 2005-12-19 21:53:05.000000000 +0100
> +++ linux-2.6-AS22/drivers/char/keyboard.c 2005-12-19 21:53:25.000000000 +0100
> @@ -34,6 +34,7 @@
> #include <linux/init.h>
> #include <linux/slab.h>
>
> +#include <linux/consolemap.h>
> #include <linux/kbd_kern.h>
> #include <linux/kbd_diacr.h>
> #include <linux/vt_kern.h>
> @@ -353,6 +354,15 @@ static void to_utf8(struct vc_data *vc,
My version of to_utf8() takes uint as a second argument and handles
values beyonf 0xffff.
> }
> }
>
> +static void put_8bit(struct vc_data *vc, u8 c)
> +{
> + if (kbd->kbdmode != VC_UNICODE || c < 32 || c == 127)
> + /* Don't translate control chars */
> + put_queue(vc, c);
> + else
> + to_utf8(vc, conv_8bit_to_uni(c));
> +}
> +
> /*
> * Called after returning from RAW mode or when changing consoles - recompute
> * shift_down[] and shift_state from key_down[] maybe called when keymap is
> @@ -413,7 +423,7 @@ static unsigned char handle_diacr(struct
> if (ch == ' ' || ch == d)
> return d;
>
> - put_queue(vc, d);
> + put_8bit(vc, d);
> return ch;
> }
>
> @@ -423,7 +433,7 @@ static unsigned char handle_diacr(struct
> static void fn_enter(struct vc_data *vc, struct pt_regs *regs)
> {
> if (diacr) {
> - put_queue(vc, diacr);
> + put_8bit(vc, diacr);
> diacr = 0;
> }
> put_queue(vc, 13);
> @@ -632,7 +642,7 @@ static void k_self(struct vc_data *vc, u
> diacr = value;
> return;
> }
> - put_queue(vc, value);
> + put_8bit(vc, value);
> }
>
> /*
In k_shift(), there is a difference in the second argument of to_utf8().
My version doesn't have "& 0xffff" because of the abovementioned changes
to this function.
> diff -Ppru linux-2.6.15-rc6-20051219230006/drivers/char/selection.c linux-2.6-AS22/drivers/char/selection.c
> --- linux-2.6.15-rc6-20051219230006/drivers/char/selection.c 2005-12-11 13:42:24.000000000 +0100
> +++ linux-2.6-AS22/drivers/char/selection.c 2005-12-19 21:53:25.000000000 +0100
> @@ -20,6 +20,7 @@
>
> #include <asm/uaccess.h>
>
> +#include <linux/kbd_kern.h>
> #include <linux/vt_kern.h>
> #include <linux/consolemap.h>
> #include <linux/selection.h>
> @@ -34,6 +35,7 @@ extern void poke_blanked_console(void);
> /* Variables for selection control. */
> /* Use a dynamic buffer, instead of static (Dec 1994) */
> struct vc_data *sel_cons; /* must not be disallocated */
> +static int use_unicode;
> static volatile int sel_start = -1; /* cleared by clear_selection */
> static int sel_end;
> static int sel_buffer_lth;
> @@ -54,10 +56,8 @@ static inline void highlight_pointer(con
> complement_pos(sel_cons, where);
> }
>
> -static unsigned char
> -sel_pos(int n)
> -{
> - return inverse_translate(sel_cons, screen_glyph(sel_cons, n));
> +static u16 sel_pos(int n) {
> + return inverse_translate(sel_cons, screen_glyph(sel_cons, n), use_unicode);
> }
>
> /* remove the current selection highlight, if any,
> @@ -86,8 +86,8 @@ static u32 inwordLut[8]={
> 0xFF7FFFFF /* latin-1 accented letters, not division sign */
> };
>
> -static inline int inword(const unsigned char c) {
> - return ( inwordLut[c>>5] >> (c & 0x1F) ) & 1;
> +static inline int inword(const u16 c) {
> + return c > 0xff || ((inwordLut[c >> 5] >> (c & 0x1F)) & 1);
> }
>
> /* set inwordLut contents. Invoked by ioctl(). */
> @@ -108,13 +108,35 @@ static inline unsigned short limit(const
> return (v > u) ? u : v;
> }
>
> +/* stores the char in UTF8 and returns the number of bytes used (1-3) */
> +int store_utf8(u16 c, char *p) {
> + if (c < 0x80) {
> + /* 0******* */
> + p[0] = c;
> + return 1;
> + } else if (c < 0x800) {
> + /* 110***** 10****** */
> + p[0] = 0xc0 | (c >> 6);
> + p[1] = 0x80 | (c & 0x3f);
> + return 2;
> + } else {
> + /* 1110**** 10****** 10****** */
> + p[0] = 0xe0 | (c >> 12);
> + p[1] = 0x80 | ((c >> 6) & 0x3f);
> + p[2] = 0x80 | (c & 0x3f);
> + return 3;
> + }
> +}
> +
> /* set the current selection. Invoked by ioctl() or by kernel code. */
> int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty)
> {
> struct vc_data *vc = vc_cons[fg_console].d;
> int sel_mode, new_sel_start, new_sel_end, spc;
> char *bp, *obp;
> - int i, ps, pe;
> + int i, ps, pe, multiplier;
> + u16 c;
> + struct kbd_struct *kbd = kbd_table + fg_console;
>
> poke_blanked_console();
>
> @@ -158,6 +180,7 @@ int set_selection(const struct tiocl_sel
> clear_selection();
> sel_cons = vc_cons[fg_console].d;
> }
> + use_unicode = kbd && kbd->kbdmode == VC_UNICODE;
>
> switch (sel_mode)
> {
> @@ -240,7 +263,8 @@ int set_selection(const struct tiocl_sel
> sel_end = new_sel_end;
>
> /* Allocate a new buffer before freeing the old one ... */
> - bp = kmalloc((sel_end-sel_start)/2+1, GFP_KERNEL);
> + multiplier = use_unicode ? 3 : 1; /* chars can take up to 3 bytes */
> + bp = kmalloc((sel_end-sel_start)/2*multiplier+1, GFP_KERNEL);
> if (!bp) {
> printk(KERN_WARNING "selection: kmalloc() failed\n");
> clear_selection();
> @@ -251,8 +275,12 @@ int set_selection(const struct tiocl_sel
>
> obp = bp;
> for (i = sel_start; i <= sel_end; i += 2) {
> - *bp = sel_pos(i);
> - if (!isspace(*bp++))
> + c = sel_pos(i);
> + if (use_unicode)
> + bp += store_utf8(c, bp);
> + else
> + *bp++ = c;
> + if (!isspace(c))
> obp = bp;
> if (! ((i + 2) % vc->vc_size_row)) {
> /* strip trailing blanks from line and add newline,
> diff -Ppru linux-2.6.15-rc6-20051219230006/include/linux/consolemap.h linux-2.6-AS22/include/linux/consolemap.h
> --- linux-2.6.15-rc6-20051219230006/include/linux/consolemap.h 2005-10-28 02:02:08.000000000 +0200
> +++ linux-2.6-AS22/include/linux/consolemap.h 2005-12-19 21:53:25.000000000 +0100
> @@ -10,6 +10,7 @@
>
> struct vc_data;
>
> -extern unsigned char inverse_translate(struct vc_data *conp, int glyph);
> +extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
> extern unsigned short *set_translate(int m, struct vc_data *vc);
> extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
> +extern u32 conv_8bit_to_uni(unsigned char c);
> #eof
>
>
>
> Jan Engelhardt
next prev parent reply other threads:[~2005-12-25 5:16 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-12-24 22:07 [PATCH] Fix console utf8 composing Jan Engelhardt
2005-12-25 5:15 ` Alexander E. Patrakov [this message]
2005-12-25 21:13 ` Jan Engelhardt
2005-12-26 5:26 ` Alexander E. Patrakov
2006-01-30 9:26 ` Jan Engelhardt
[not found] <Pine.LNX.4.61.0604022005290.12603@yvahk01.tjqt.qr>
2006-04-03 1:57 ` [PATCH] Fix console utf8 composing (F) (fwd) Alexander E. Patrakov
2006-04-03 8:48 ` Jan Engelhardt
2006-05-08 20:13 ` [PATCH] Fix console utf8 composing Jan Engelhardt
2006-05-09 7:49 ` Alexander E. Patrakov
2006-05-09 23:31 ` Ingo Oeser
2006-05-10 7:05 ` Jan Engelhardt
2006-05-10 9:51 ` Alexander E. Patrakov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=43AE2B06.4010906@ums.usu.ru \
--to=patrakov@ums.usu.ru \
--cc=jengelh@linux01.gwdg.de \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.