From: "Adam Tlałka" <atlka@pg.gda.pl>
To: LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH]console utf-8 mode fixes
Date: Fri, 04 Aug 2006 12:15:13 +0200 [thread overview]
Message-ID: <44D31E31.70009@pg.gda.pl> (raw)
[-- Attachment #1: Type: text/plain, Size: 889 bytes --]
Description: patch for drivers/char/vt.c
Fixed utf-8 mode so alternate charset modes always work according
to control sequences interpreted in do_con_trol function
preserving backward US-ASCII and VT100 semigraphics compatibility.
Malformed utf-8 sequences are represented as sequences of replacement
glyphs,original codes or '?' as a last resort.
unicode-xterm, gnome-terminal, kconsole and other terminal emulators
in utf-8 mode respect acsc, enacs, rmacs sequences. Also I found that
some important system programs (from Debian distro) uses acsc in utf-8
mode - dselect, aptitude, w3m for example.
Signed-off-by: Adam Tla/lka <atlka@pg.gda.pl>
Regards
--
Adam Tlałka mailto:atlka@pg.gda.pl ^v^ ^v^ ^v^
System & Network Administration Group - - - ~~~~~~
Computer Center, Gdańsk University of Technology, Poland
PGP public key: finger atlka@sunrise.pg.gda.pl
[-- Attachment #2: vt.c.patch --]
[-- Type: text/x-patch, Size: 4502 bytes --]
--- vt_orig.c 2006-08-03 08:34:40.000000000 +0200
+++ vt.c 2006-08-03 09:12:21.000000000 +0200
@@ -63,6 +63,13 @@
*
* Removed console_lock, enabled interrupts across all console operations
* 13 March 2001, Andrew Morton
+ *
+ * Fixed UTF-8 mode so alternate charset modes always work according
+ * to control sequences interpreted in do_con_trol function
+ * preserving backward VT100 semigraphics compatibility,
+ * malformed UTF sequences represented as sequences of replacement glyphs,
+ * original codes or '?' as a last resort if replacement glyph is undefined
+ * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
*/
#include <linux/module.h>
@@ -1991,17 +1998,23 @@ static int do_con_write(struct tty_struc
/* Do no translation at all in control states */
if (vc->vc_state != ESnormal) {
tc = c;
- } else if (vc->vc_utf) {
+ } else if (vc->vc_utf && !vc->vc_disp_ctrl) {
/* Combine UTF-8 into Unicode */
- /* Incomplete characters silently ignored */
+ /* Malformed sequences as sequences of replacement glyphs */
+rescan_last_byte:
if(c > 0x7f) {
- if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) {
- vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
- vc->vc_utf_count--;
- if (vc->vc_utf_count == 0)
- tc = c = vc->vc_utf_char;
- else continue;
+ if (vc->vc_utf_count) {
+ if ((c & 0xc0) == 0x80) {
+ vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
+ if (--vc->vc_utf_count) {
+ vc->vc_npar++;
+ continue;
+ }
+ tc = c = vc->vc_utf_char;
+ } else
+ goto replacement_glyph;
} else {
+ vc->vc_npar = 0;
if ((c & 0xe0) == 0xc0) {
vc->vc_utf_count = 1;
vc->vc_utf_char = (c & 0x1f);
@@ -2018,14 +2031,15 @@ static int do_con_write(struct tty_struc
vc->vc_utf_count = 5;
vc->vc_utf_char = (c & 0x01);
} else
- vc->vc_utf_count = 0;
+ goto replacement_glyph;
continue;
}
} else {
+ if (vc->vc_utf_count)
+ goto replacement_glyph;
tc = c;
- vc->vc_utf_count = 0;
}
- } else { /* no utf */
+ } else { /* no utf or alternate charset mode */
tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
}
@@ -2040,31 +2054,33 @@ static int do_con_write(struct tty_struc
* direct-to-font zone in UTF-8 mode.
*/
ok = tc && (c >= 32 ||
- (!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS
- : CTRL_ACTION) >> c) & 1)))
+ !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 :
+ vc->vc_utf || ((CTRL_ACTION >> c) & 1)))
&& (c != 127 || vc->vc_disp_ctrl)
&& (c != 128+27);
if (vc->vc_state == ESnormal && ok) {
/* Now try to find out how to display it */
tc = conv_uni_to_pc(vc, tc);
- if ( tc == -4 ) {
+ if (tc & ~charmask) {
+ if ( tc == -4 ) {
/* If we got -4 (not found) then see if we have
defined a replacement character (U+FFFD) */
- tc = conv_uni_to_pc(vc, 0xfffd);
-
- /* One reason for the -4 can be that we just
- did a clear_unimap();
- try at least to show something. */
- if (tc == -4)
- tc = c;
- } else if ( tc == -3 ) {
- /* Bad hash table -- hope for the best */
- tc = c;
- }
- if (tc & ~charmask)
- continue; /* Conversion failed */
+replacement_glyph:
+ tc = conv_uni_to_pc(vc, 0xfffd);
+ if (!(tc & ~charmask))
+ goto display_glyph;
+ } else if ( tc != -3 )
+ continue; /* nothing to display */
+ /* no hash table or no replacement --
+ * hope for the best */
+ if ( c & ~charmask )
+ tc = '?';
+ else
+ tc = c;
+ }
+display_glyph:
if (vc->vc_need_wrap || vc->vc_decim)
FLUSH
if (vc->vc_need_wrap) {
@@ -2088,6 +2104,15 @@ static int do_con_write(struct tty_struc
vc->vc_x++;
draw_to = (vc->vc_pos += 2);
}
+ if (vc->vc_utf_count) {
+ if (vc->vc_npar) {
+ vc->vc_npar--;
+ goto display_glyph;
+ }
+ vc->vc_utf_count = 0;
+ c = orig;
+ goto rescan_last_byte;
+ }
continue;
}
FLUSH
next reply other threads:[~2006-08-04 10:12 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-08-04 10:15 Adam Tlałka [this message]
2006-08-04 11:08 ` [PATCH]console utf-8 mode fixes Alan Cox
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=44D31E31.70009@pg.gda.pl \
--to=atlka@pg.gda.pl \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.