[PATCH 1/4] Add so called reduced charset support to SMS

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/4] Add so called reduced charset support to SMS
@ 2010-09-02  7:52 Aki Niemi
  2010-09-02  7:52 ` [PATCH 2/4] Enable alphabets in smsutils Aki Niemi
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Aki Niemi @ 2010-09-02  7:52 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 13913 bytes --]

This "reduced" charset support is available in some current phones to
reduce the number of segments that a message takes.

Normally, when characters are encountered that don't have a
representation in the GSM 7 bit alphabet, the encoding switches to
UCS-2 that takes roughly twice as much space.

This reduced charset feature transliterates the input string, so that
more unicode characters fit the GSM alphabet. The obvious downside is
that transliterating loses information, i.e., the text gets dumbed
down, and what the recipient receives is not the original text.

Nevertheless, in some regions, this is a must-have feature.
---
 src/util.c       |  137 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 src/util.h       |    4 ++
 unit/test-util.c |   18 +++++++
 3 files changed, 152 insertions(+), 7 deletions(-)

diff --git a/src/util.c b/src/util.c
index fd8b305..4eeb060 100644
--- a/src/util.c
+++ b/src/util.c
@@ -3,6 +3,7 @@
  *  oFono - Open Source Telephony
  *
  *  Copyright (C) 2008-2010  Intel Corporation. All rights reserved.
+ *  Copyright (C) 2009-2010  Nokia Corporation and/or its subsidiary(-ies).
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -78,6 +79,7 @@ struct alphabet_conversion_table {
 	const struct codepoint *togsm_single_shift;
 	unsigned int togsm_single_shift_len;
 	const struct codepoint *tounicode_locking_shift;
+	unsigned int tounicode_locking_shift_len;
 	const struct codepoint *tounicode_single_shift;
 	unsigned int tounicode_single_shift_len;
 };
@@ -300,6 +302,7 @@ static const unsigned short def_gsm[] = {
 	0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0  /* 0x7F */
 };
 
+/* Used for conversion of Unicode to GSM */
 static const struct codepoint def_unicode[] = {
 	{ 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
 	{ 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
@@ -445,19 +448,127 @@ static const struct codepoint por_unicode[] = {
 	{ 0x00FC, 0x7E }, { 0x0394, 0x10 }, { 0x20AC, 0x18 }, { 0x221E, 0x15 }
 };
 
+/* Reduced character set pseudo dialect.
+ *
+ * This table includes the default Unicode to GSM table, and in
+ * addition entries for transliterating additional unicode characters
+ * to the GSM alphabet.  Certain mathematical symbols and accented
+ * characters are mapped to their closest-looking GSM 7bit character.
+ */
+static const struct codepoint reduced_unicode[] = {
+	{ 0x000A, 0x0A }, { 0x000D, 0x0D }, { 0x0020, 0x20 }, { 0x0021, 0x21 },
+	{ 0x0022, 0x22 }, { 0x0023, 0x23 }, { 0x0024, 0x02 }, { 0x0025, 0x25 },
+	{ 0x0026, 0x26 }, { 0x0027, 0x27 }, { 0x0028, 0x28 }, { 0x0029, 0x29 },
+	{ 0x002A, 0x2A }, { 0x002B, 0x2B }, { 0x002C, 0x2C }, { 0x002D, 0x2D },
+	{ 0x002E, 0x2E }, { 0x002F, 0x2F }, { 0x0030, 0x30 }, { 0x0031, 0x31 },
+	{ 0x0032, 0x32 }, { 0x0033, 0x33 }, { 0x0034, 0x34 }, { 0x0035, 0x35 },
+	{ 0x0036, 0x36 }, { 0x0037, 0x37 }, { 0x0038, 0x38 }, { 0x0039, 0x39 },
+	{ 0x003A, 0x3A }, { 0x003B, 0x3B }, { 0x003C, 0x3C }, { 0x003D, 0x3D },
+	{ 0x003E, 0x3E }, { 0x003F, 0x3F }, { 0x0040, 0x00 }, { 0x0041, 0x41 },
+	{ 0x0042, 0x42 }, { 0x0043, 0x43 }, { 0x0044, 0x44 }, { 0x0045, 0x45 },
+	{ 0x0046, 0x46 }, { 0x0047, 0x47 }, { 0x0048, 0x48 }, { 0x0049, 0x49 },
+	{ 0x004A, 0x4A }, { 0x004B, 0x4B }, { 0x004B, 0x4B }, { 0x004C, 0x4C },
+	{ 0x004D, 0x4D }, { 0x004E, 0x4E }, { 0x004F, 0x4F }, { 0x0050, 0x50 },
+	{ 0x0051, 0x51 }, { 0x0052, 0x52 }, { 0x0053, 0x53 }, { 0x0054, 0x54 },
+	{ 0x0055, 0x55 }, { 0x0056, 0x56 }, { 0x0057, 0x57 }, { 0x0058, 0x58 },
+	{ 0x0059, 0x59 }, { 0x005A, 0x5A }, { 0x005F, 0x11 }, { 0x005F, 0x11 },
+	{ 0x0061, 0x61 }, { 0x0062, 0x62 }, { 0x0063, 0x63 }, { 0x0064, 0x64 },
+	{ 0x0065, 0x65 }, { 0x0066, 0x66 }, { 0x0067, 0x67 }, { 0x0068, 0x68 },
+	{ 0x0069, 0x69 }, { 0x006A, 0x6A }, { 0x006B, 0x6B }, { 0x006C, 0x6C },
+	{ 0x006D, 0x6D }, { 0x006E, 0x6E }, { 0x006F, 0x6F }, { 0x0070, 0x70 },
+	{ 0x0071, 0x71 }, { 0x0072, 0x72 }, { 0x0072, 0x72 }, { 0x0073, 0x73 },
+	{ 0x0074, 0x74 }, { 0x0075, 0x75 }, { 0x0076, 0x76 }, { 0x0077, 0x77 },
+	{ 0x0078, 0x78 }, { 0x0079, 0x79 }, { 0x007A, 0x7A }, { 0x00A0, 0x20 },
+	{ 0x00A1, 0x40 }, { 0x00A3, 0x01 }, { 0x00A4, 0x24 }, { 0x00A5, 0x03 },
+	{ 0x00A7, 0x5F }, { 0x00BF, 0x60 }, { 0x00C0, 0x41 }, { 0x00C1, 0x41 },
+	{ 0x00C2, 0x41 }, { 0x00C3, 0x41 }, { 0x00C4, 0x5B }, { 0x00C5, 0x0E },
+	{ 0x00C6, 0x1C }, { 0x00C7, 0x09 }, { 0x00C8, 0x45 }, { 0x00C9, 0x1F },
+	{ 0x00CA, 0x45 }, { 0x00CB, 0x45 }, { 0x00CC, 0x49 }, { 0x00CD, 0x49 },
+	{ 0x00CE, 0x49 }, { 0x00CF, 0x49 }, { 0x00D0, 0x44 }, { 0x00D1, 0x5D },
+	{ 0x00D2, 0x4F }, { 0x00D3, 0x4F }, { 0x00D4, 0x4F }, { 0x00D5, 0x4F },
+	{ 0x00D6, 0x5C }, { 0x00D7, 0x2A }, { 0x00D8, 0x0B }, { 0x00D9, 0x55 },
+	{ 0x00DA, 0x55 }, { 0x00DB, 0x55 }, { 0x00DC, 0x5E }, { 0x00DD, 0x59 },
+	{ 0x00DE, 0x54 }, { 0x00DF, 0x1E }, { 0x00E0, 0x7F }, { 0x00E1, 0x61 },
+	{ 0x00E2, 0x61 }, { 0x00E3, 0x61 }, { 0x00E4, 0x7B }, { 0x00E5, 0x0F },
+	{ 0x00E6, 0x1D }, { 0x00E7, 0x63 }, { 0x00E8, 0x04 }, { 0x00E9, 0x05 },
+	{ 0x00EA, 0x65 }, { 0x00EB, 0x65 }, { 0x00EC, 0x07 }, { 0x00ED, 0x69 },
+	{ 0x00EE, 0x69 }, { 0x00EF, 0x69 }, { 0x00F0, 0x64 }, { 0x00F1, 0x7D },
+	{ 0x00F2, 0x08 }, { 0x00F3, 0x6F }, { 0x00F4, 0x6F }, { 0x00F5, 0x6F },
+	{ 0x00F6, 0x7C }, { 0x00F7, 0x6F }, { 0x00F8, 0x0C }, { 0x00F9, 0x06 },
+	{ 0x00FA, 0x75 }, { 0x00FB, 0x75 }, { 0x00FC, 0x7E }, { 0x00FD, 0x79 },
+	{ 0x00FE, 0x74 }, { 0x00FF, 0x79 }, { 0x0100, 0x41 }, { 0x0101, 0x61 },
+	{ 0x0102, 0x41 }, { 0x0103, 0x61 }, { 0x0104, 0x41 }, { 0x0105, 0x61 },
+	{ 0x0106, 0x43 }, { 0x0107, 0x63 }, { 0x0108, 0x43 }, { 0x0109, 0x63 },
+	{ 0x010A, 0x43 }, { 0x010B, 0x63 }, { 0x010C, 0x43 }, { 0x010D, 0x63 },
+	{ 0x010E, 0x44 }, { 0x010F, 0x64 }, { 0x0110, 0x44 }, { 0x0111, 0x64 },
+	{ 0x0112, 0x45 }, { 0x0113, 0x65 }, { 0x0114, 0x45 }, { 0x0115, 0x65 },
+	{ 0x0116, 0x45 }, { 0x0117, 0x65 }, { 0x0118, 0x45 }, { 0x0119, 0x65 },
+	{ 0x011A, 0x45 }, { 0x011B, 0x65 }, { 0x011C, 0x47 }, { 0x011D, 0x67 },
+	{ 0x011E, 0x47 }, { 0x011F, 0x67 }, { 0x0120, 0x47 }, { 0x0121, 0x67 },
+	{ 0x0122, 0x47 }, { 0x0123, 0x67 }, { 0x0124, 0x48 }, { 0x0125, 0x68 },
+	{ 0x0126, 0x48 }, { 0x0127, 0x68 }, { 0x0128, 0x49 }, { 0x0129, 0x69 },
+	{ 0x012A, 0x49 }, { 0x012B, 0x69 }, { 0x012C, 0x49 }, { 0x012D, 0x69 },
+	{ 0x012E, 0x49 }, { 0x012F, 0x69 }, { 0x0130, 0x49 }, { 0x0131, 0x69 },
+	{ 0x0134, 0x4A }, { 0x0135, 0x6A }, { 0x0136, 0x4B }, { 0x0137, 0x6B },
+	{ 0x0139, 0x4C }, { 0x013A, 0x6C }, { 0x013B, 0x4C }, { 0x013C, 0x6C },
+	{ 0x013D, 0x4C }, { 0x013E, 0x6C }, { 0x013F, 0x4C }, { 0x0140, 0x6C },
+	{ 0x0141, 0x4C }, { 0x0142, 0x6C }, { 0x0143, 0x4E }, { 0x0144, 0x6E },
+	{ 0x0145, 0x4E }, { 0x0146, 0x6E }, { 0x0147, 0x4E }, { 0x0148, 0x6E },
+	{ 0x014C, 0x4F }, { 0x014D, 0x6F }, { 0x014E, 0x4F }, { 0x014F, 0x6F },
+	{ 0x0150, 0x4F }, { 0x0151, 0x6F }, { 0x0154, 0x52 }, { 0x0155, 0x72 },
+	{ 0x0156, 0x52 }, { 0x0157, 0x72 }, { 0x0158, 0x52 }, { 0x0159, 0x72 },
+	{ 0x015A, 0x53 }, { 0x015B, 0x73 }, { 0x015C, 0x53 }, { 0x015D, 0x73 },
+	{ 0x015E, 0x53 }, { 0x015F, 0x73 }, { 0x0160, 0x53 }, { 0x0161, 0x73 },
+	{ 0x0162, 0x54 }, { 0x0163, 0x74 }, { 0x0164, 0x54 }, { 0x0165, 0x74 },
+	{ 0x0166, 0x54 }, { 0x0167, 0x74 }, { 0x0168, 0x55 }, { 0x0169, 0x75 },
+	{ 0x016A, 0x55 }, { 0x016B, 0x75 }, { 0x016C, 0x55 }, { 0x016D, 0x75 },
+	{ 0x016E, 0x55 }, { 0x016F, 0x75 }, { 0x0170, 0x55 }, { 0x0171, 0x75 },
+	{ 0x0172, 0x55 }, { 0x0173, 0x75 }, { 0x0179, 0x5A }, { 0x017A, 0x7A },
+	{ 0x017B, 0x5A }, { 0x017C, 0x7A }, { 0x017D, 0x5A }, { 0x017E, 0x7A },
+	{ 0x0386, 0x41 }, { 0x0388, 0x45 }, { 0x0389, 0x48 }, { 0x038A, 0x49 },
+	{ 0x038C, 0x4F }, { 0x038E, 0x59 }, { 0x038F, 0x15 }, { 0x0390, 0x49 },
+	{ 0x0391, 0x41 }, { 0x0392, 0x42 }, { 0x0393, 0x13 }, { 0x0394, 0x10 },
+	{ 0x0395, 0x45 }, { 0x0396, 0x5A }, { 0x0397, 0x48 }, { 0x0398, 0x19 },
+	{ 0x0399, 0x49 }, { 0x039A, 0x4B }, { 0x039B, 0x14 }, { 0x039C, 0x4D },
+	{ 0x039D, 0x4E }, { 0x039E, 0x1A }, { 0x039F, 0x4F }, { 0x03A0, 0x16 },
+	{ 0x03A1, 0x50 }, { 0x03A3, 0x18 }, { 0x03A4, 0x54 }, { 0x03A5, 0x59 },
+	{ 0x03A6, 0x12 }, { 0x03A7, 0x58 }, { 0x03A8, 0x17 }, { 0x03A9, 0x15 },
+	{ 0x03AA, 0x49 }, { 0x03AB, 0x59 }, { 0x03AC, 0x41 }, { 0x03AD, 0x45 },
+	{ 0x03AE, 0x48 }, { 0x03AF, 0x49 }, { 0x03B0, 0x59 }, { 0x03B1, 0x41 },
+	{ 0x03B2, 0x42 }, { 0x03B3, 0x13 }, { 0x03B4, 0x10 }, { 0x03B5, 0x45 },
+	{ 0x03B6, 0x5A }, { 0x03B7, 0x48 }, { 0x03B8, 0x19 }, { 0x03B9, 0x49 },
+	{ 0x03BA, 0x4B }, { 0x03BB, 0x14 }, { 0x03BC, 0x4D }, { 0x03BD, 0x4E },
+	{ 0x03BE, 0x1A }, { 0x03BF, 0x4F }, { 0x03C0, 0x16 }, { 0x03C1, 0x50 },
+	{ 0x03C2, 0x18 }, { 0x03C3, 0x18 }, { 0x03C4, 0x54 }, { 0x03C5, 0x59 },
+	{ 0x03C6, 0x12 }, { 0x03C7, 0x58 }, { 0x03C8, 0x17 }, { 0x03C9, 0x15 },
+	{ 0x03CA, 0x49 }, { 0x03CB, 0x59 }, { 0x03CC, 0x4F }, { 0x03CD, 0x59 },
+	{ 0x03CE, 0x15 }
+};
+
 static const struct alphabet_conversion_table alphabet_lookup[] = {
 	/* Default GSM 7 bit */
 	{ def_gsm, def_ext_gsm, TABLE_SIZE(def_ext_gsm),
-		def_unicode, def_ext_unicode, TABLE_SIZE(def_ext_unicode) },
+		def_unicode, TABLE_SIZE(def_unicode),
+		def_ext_unicode, TABLE_SIZE(def_ext_unicode) },
 	/* Turkish GSM dialect */
 	{ tur_gsm, tur_ext_gsm, TABLE_SIZE(tur_ext_gsm),
-		tur_unicode, tur_ext_unicode, TABLE_SIZE(tur_ext_unicode) },
+		tur_unicode, TABLE_SIZE(tur_unicode),
+		tur_ext_unicode, TABLE_SIZE(tur_ext_unicode) },
 	/* Spanish GSM dialect, note that this one only has extension table */
 	{ def_gsm, spa_ext_gsm, TABLE_SIZE(spa_ext_gsm),
-		def_unicode, spa_ext_unicode, TABLE_SIZE(spa_ext_unicode)  },
+		def_unicode, TABLE_SIZE(def_unicode),
+		spa_ext_unicode, TABLE_SIZE(spa_ext_unicode)  },
 	/* Portuguese GSM dialect */
 	{ por_gsm, por_ext_gsm, TABLE_SIZE(por_ext_gsm),
-		por_unicode, por_ext_unicode, TABLE_SIZE(por_ext_unicode) },
+		por_unicode, TABLE_SIZE(por_unicode),
+		por_ext_unicode, TABLE_SIZE(por_ext_unicode) },
+
+	/* End of valid real dialects */
+
+	/* Reduced character set pseudo dialect: use only for encoding */
+	{ NULL, NULL, 0,
+		reduced_unicode, TABLE_SIZE(reduced_unicode),
+		def_ext_unicode, TABLE_SIZE(def_ext_unicode) }
 };
 
 static int compare_codepoints(const void *a, const void *b)
@@ -504,9 +615,10 @@ static unsigned short unicode_locking_shift_lookup(unsigned short k,
 {
 	struct codepoint key = { k, 0 };
 	const struct codepoint *table;
-	unsigned int len = 128;
+	unsigned int len;
 
 	table = alphabet_lookup[lang].tounicode_locking_shift;
+	len = alphabet_lookup[lang].tounicode_locking_shift_len;
 
 	return codepoint_lookup(&key, table, len);
 }
@@ -655,10 +767,10 @@ unsigned char *convert_utf8_to_gsm_with_lang(const char *text, long len,
 	long res_len;
 	long i;
 
-	if (locking_lang >= GSM_DIALECT_INVALID)
+	if (locking_lang > GSM_DIALECT_INVALID)
 		return NULL;
 
-	if (single_lang >= GSM_DIALECT_INVALID)
+	if (single_lang > GSM_DIALECT_INVALID)
 		return NULL;
 
 	in = text;
@@ -744,6 +856,17 @@ unsigned char *convert_utf8_to_gsm(const char *text, long len,
 						GSM_DIALECT_DEFAULT);
 }
 
+unsigned char *convert_utf8_to_gsm_with_translit(const char *text, long len,
+					long *items_read, long *items_written,
+					unsigned char terminator)
+{
+	return convert_utf8_to_gsm_with_lang(text, len, items_read,
+						items_written,
+						terminator,
+						GSM_DIALECT_INVALID,
+						GSM_DIALECT_INVALID);
+}
+
 /*!
  * Decodes the hex encoded data and converts to a byte array.  If terminator
  * is not 0, the terminator character is appended to the end of the result.
diff --git a/src/util.h b/src/util.h
index 9da81aa..ddaee4e 100644
--- a/src/util.h
+++ b/src/util.h
@@ -38,6 +38,10 @@ char *convert_gsm_to_utf8_with_lang(const unsigned char *text, long len, long *i
 unsigned char *convert_utf8_to_gsm(const char *text, long len, long *items_read,
 				long *items_written, unsigned char terminator);
 
+unsigned char *convert_utf8_to_gsm_with_translit(const char *text, long len,
+				long *items_read, long *items_written,
+				unsigned char terminator);
+
 unsigned char *convert_utf8_to_gsm_with_lang(const char *text, long len, long *items_read,
 				long *items_written, unsigned char terminator,
 				enum gsm_dialect locking_shift_lang,
diff --git a/unit/test-util.c b/unit/test-util.c
index de62848..03728db 100644
--- a/unit/test-util.c
+++ b/unit/test-util.c
@@ -486,6 +486,23 @@ static void test_valid_turkish()
 	}
 }
 
+static void test_encode_translit()
+{
+	const char *utf8 = "��������������������������������������";
+	const char *expect = "ceAAuoiOOAaoOoaUAEa";
+	long nwritten;
+	long nread;
+	unsigned char *res;
+
+	res = convert_utf8_to_gsm_with_translit(utf8, sizeof(utf8), &nread, &nwritten, 0);
+
+	g_assert(res);
+	g_assert(nread == sizeof(utf8));
+	g_assert(memcmp(res, expect, nwritten) == 0);
+
+	g_free(res);
+}
+
 static const char hex_packed[] = "493A283D0795C3F33C88FE06C9CB6132885EC6D34"
 					"1EDF27C1E3E97E7207B3A0C0A5241E377BB1D"
 					"7693E72E";
@@ -912,6 +929,7 @@ int main(int argc, char **argv)
 	g_test_add_func("/testutil/Invalid Conversions", test_invalid);
 	g_test_add_func("/testutil/Valid Conversions", test_valid);
 	g_test_add_func("/testutil/Valid Turkish National Variant Conversions", test_valid_turkish);
+	g_test_add_func("/testutil/Encode with transliteration", test_encode_translit);
 	g_test_add_func("/testutil/Decode Encode", test_decode_encode);
 	g_test_add_func("/testutil/Pack Size", test_pack_size);
 	g_test_add_func("/testutil/CBS CR Handling", test_cr_handling);
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/4] Enable alphabets in smsutils
  2010-09-02  7:52 [PATCH 1/4] Add so called reduced charset support to SMS Aki Niemi
@ 2010-09-02  7:52 ` Aki Niemi
  2010-09-02 16:55   ` Denis Kenzior
  2010-09-03 14:40   ` Pekka Pessi
  2010-09-02  7:52 ` [PATCH 3/4] Add alphabet support to SMS atom Aki Niemi
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 16+ messages in thread
From: Aki Niemi @ 2010-09-02  7:52 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 5906 bytes --]

Add a new function allowing preparing a message list using alphabets.
---
 src/smsutil.c |  108 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 src/smsutil.h |   14 +++++++
 2 files changed, 113 insertions(+), 9 deletions(-)

diff --git a/src/smsutil.c b/src/smsutil.c
index 0de420b..7da7022 100644
--- a/src/smsutil.c
+++ b/src/smsutil.c
@@ -2987,9 +2987,10 @@ static inline GSList *sms_list_append(GSList *l, const struct sms *in)
  * @use_delivery_reports: value for the Status-Report-Request field
  *     (23.040 3.2.9, 9.2.2.2)
  */
-GSList *sms_text_prepare(const char *utf8, guint16 ref,
-				gboolean use_16bit, int *ref_offset,
-				gboolean use_delivery_reports)
+GSList *sms_text_prepare_with_alphabet(const char *utf8, guint16 ref,
+					gboolean use_16bit, int *ref_offset,
+					gboolean use_delivery_reports,
+					enum sms_alphabet alphabet)
 {
 	struct sms template;
 	int offset = 0;
@@ -3008,9 +3009,93 @@ GSList *sms_text_prepare(const char *utf8, guint16 ref,
 	template.submit.srr = use_delivery_reports;
 	template.submit.mr = 0;
 	template.submit.vp.relative = 0xA7; /* 24 Hours */
+	template.submit.udhi = FALSE;
+
+	/* UDHI, UDL, UD and DCS actually depend on what we have in
+	 * the text.  For the different GSM dialects, we use only
+	 * matching locking and single shift tables.  For example,
+	 * turkish locking shift with spanish single shift is not
+	 * supported. */
+	switch (alphabet) {
+
+	case SMS_ALPHABET_REDUCED:
+		gsm_encoded = convert_utf8_to_gsm_with_translit(utf8, -1, NULL,
+								&written, 0);
+		break;
+
+	case SMS_ALPHABET_TURKISH:
+		gsm_encoded = convert_utf8_to_gsm_with_lang(utf8, -1, NULL,
+							&written, 0,
+							GSM_DIALECT_TURKISH,
+							GSM_DIALECT_TURKISH);
+
+		if (!gsm_encoded)
+			 break;
+
+		if (offset == 0)
+			offset = 1;
+
+		template.submit.udhi = TRUE;
+		template.submit.ud[0] += 6;
+		template.submit.ud[offset] = SMS_IEI_NATIONAL_LANGUAGE_SINGLE_SHIFT;
+		template.submit.ud[offset + 1] = 1;
+		template.submit.ud[offset + 2] = GSM_DIALECT_TURKISH;
+		template.submit.ud[offset + 3] = SMS_IEI_NATIONAL_LANGUAGE_LOCKING_SHIFT;
+		template.submit.ud[offset + 4] = 1;
+		template.submit.ud[offset + 5] = GSM_DIALECT_TURKISH;
+
+		offset += 6;
+		break;
+
+	case SMS_ALPHABET_SPANISH:
+		gsm_encoded = convert_utf8_to_gsm_with_lang(utf8, -1, NULL,
+							&written, 0,
+							GSM_DIALECT_DEFAULT,
+							GSM_DIALECT_SPANISH);
+
+		if (!gsm_encoded)
+			 break;
+
+		if (offset == 0)
+			offset = 1;
+
+		template.submit.udhi = TRUE;
+		template.submit.ud[0] += 3;
+		template.submit.ud[offset] = SMS_IEI_NATIONAL_LANGUAGE_SINGLE_SHIFT;
+		template.submit.ud[offset + 1] = 1;
+		template.submit.ud[offset + 2] = GSM_DIALECT_SPANISH;
+
+		offset += 3;
+		break;
 
-	/* UDHI, UDL, UD and DCS actually depend on what we have in the text */
-	gsm_encoded = convert_utf8_to_gsm(utf8, -1, NULL, &written, 0);
+	case SMS_ALPHABET_PORTUGUESE:
+		gsm_encoded = convert_utf8_to_gsm_with_lang(utf8, -1, NULL,
+							&written, 0,
+							GSM_DIALECT_PORTUGUESE,
+							GSM_DIALECT_PORTUGUESE);
+
+		if (!gsm_encoded)
+			 break;
+
+		if (offset == 0)
+			offset = 1;
+
+		template.submit.udhi = TRUE;
+		template.submit.ud[0] += 6;
+		template.submit.ud[offset] = SMS_IEI_NATIONAL_LANGUAGE_SINGLE_SHIFT;
+		template.submit.ud[offset + 1] = 1;
+		template.submit.ud[offset + 2] = GSM_DIALECT_PORTUGUESE;
+		template.submit.ud[offset + 3] = SMS_IEI_NATIONAL_LANGUAGE_LOCKING_SHIFT;
+		template.submit.ud[offset + 4] = 1;
+		template.submit.ud[offset + 5] = GSM_DIALECT_PORTUGUESE;
+
+		offset += 6;
+		break;
+
+	case SMS_ALPHABET_DEFAULT:
+	default:
+		gsm_encoded = convert_utf8_to_gsm(utf8, -1, NULL, &written, 0);
+	}
 
 	if (!gsm_encoded) {
 		gsize converted;
@@ -3028,9 +3113,6 @@ GSList *sms_text_prepare(const char *utf8, guint16 ref,
 	else
 		template.submit.dcs = 0x08; /* Class Unspecified, UCS2 */
 
-	if (offset != 0)
-		template.submit.udhi = FALSE;
-
 	if (gsm_encoded && (written <= sms_text_capacity_gsm(160, offset))) {
 		if (ref_offset)
 			*ref_offset = 0;
@@ -3056,7 +3138,7 @@ GSList *sms_text_prepare(const char *utf8, guint16 ref,
 
 	template.submit.udhi = TRUE;
 
-	if (!offset)
+	if (offset == 0)
 		offset = 1;
 
 	if (ref_offset)
@@ -3150,6 +3232,14 @@ GSList *sms_text_prepare(const char *utf8, guint16 ref,
 	return r;
 }
 
+GSList *sms_text_prepare(const char *utf8, guint16 ref,
+				gboolean use_16bit, int *ref_offset,
+				gboolean use_delivery_reports)
+{
+	return sms_text_prepare_with_alphabet(utf8, ref, use_16bit, ref_offset,
+				use_delivery_reports, SMS_ALPHABET_DEFAULT);
+}
+
 gboolean cbs_dcs_decode(guint8 dcs, gboolean *udhi, enum sms_class *cls,
 			enum sms_charset *charset, gboolean *compressed,
 			enum cbs_language *language, gboolean *iso639)
diff --git a/src/smsutil.h b/src/smsutil.h
index 3c6b3ae..e58332c 100644
--- a/src/smsutil.h
+++ b/src/smsutil.h
@@ -153,6 +153,15 @@ enum sms_charset {
 	SMS_CHARSET_UCS2 = 2,
 };
 
+enum sms_alphabet {
+	SMS_ALPHABET_DEFAULT = 0,
+	SMS_ALPHABET_TURKISH,
+	SMS_ALPHABET_SPANISH,
+	SMS_ALPHABET_PORTUGUESE,
+	SMS_ALPHABET_REDUCED,
+	SMS_ALHPABET_INVALID,
+};
+
 enum sms_mwi_type {
 	SMS_MWI_TYPE_VOICE = 0,
 	SMS_MWI_TYPE_FAX = 1,
@@ -516,6 +525,11 @@ void status_report_assembly_expire(struct status_report_assembly *assembly,
 					time_t before, GFunc foreach_func,
 					gpointer data);
 
+GSList *sms_text_prepare_with_alphabet(const char *utf8, guint16 ref,
+					gboolean use_16bit, int *ref_offset,
+					gboolean use_delivery_reports,
+					enum sms_alphabet alphabet);
+
 GSList *sms_text_prepare(const char *utf8, guint16 ref,
 				gboolean use_16bit, int *ref_offset,
 				gboolean use_delivery_reports);
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] Enable alphabets in smsutils
  2010-09-02  7:52 ` [PATCH 2/4] Enable alphabets in smsutils Aki Niemi
@ 2010-09-02 16:55   ` Denis Kenzior
  2010-09-03 12:15     ` Aki Niemi
  2010-09-03 14:40   ` Pekka Pessi
  1 sibling, 1 reply; 16+ messages in thread
From: Denis Kenzior @ 2010-09-02 16:55 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 6739 bytes --]

Hi Aki,

On 09/02/2010 02:52 AM, Aki Niemi wrote:
> Add a new function allowing preparing a message list using alphabets.
> ---
>  src/smsutil.c |  108 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  src/smsutil.h |   14 +++++++
>  2 files changed, 113 insertions(+), 9 deletions(-)
> 
> diff --git a/src/smsutil.c b/src/smsutil.c
> index 0de420b..7da7022 100644
> --- a/src/smsutil.c
> +++ b/src/smsutil.c
> @@ -2987,9 +2987,10 @@ static inline GSList *sms_list_append(GSList *l, const struct sms *in)
>   * @use_delivery_reports: value for the Status-Report-Request field
>   *     (23.040 3.2.9, 9.2.2.2)
>   */
> -GSList *sms_text_prepare(const char *utf8, guint16 ref,
> -				gboolean use_16bit, int *ref_offset,
> -				gboolean use_delivery_reports)
> +GSList *sms_text_prepare_with_alphabet(const char *utf8, guint16 ref,
> +					gboolean use_16bit, int *ref_offset,
> +					gboolean use_delivery_reports,
> +					enum sms_alphabet alphabet)
>  {
>  	struct sms template;
>  	int offset = 0;
> @@ -3008,9 +3009,93 @@ GSList *sms_text_prepare(const char *utf8, guint16 ref,
>  	template.submit.srr = use_delivery_reports;
>  	template.submit.mr = 0;
>  	template.submit.vp.relative = 0xA7; /* 24 Hours */
> +	template.submit.udhi = FALSE;
> +
> +	/* UDHI, UDL, UD and DCS actually depend on what we have in
> +	 * the text.  For the different GSM dialects, we use only
> +	 * matching locking and single shift tables.  For example,
> +	 * turkish locking shift with spanish single shift is not
> +	 * supported. */
> +	switch (alphabet) {
> +

An empty line is not really needed here
> +	case SMS_ALPHABET_REDUCED:
> +		gsm_encoded = convert_utf8_to_gsm_with_translit(utf8, -1, NULL,
> +								&written, 0);
> +		break;
> +
> +	case SMS_ALPHABET_TURKISH:
> +		gsm_encoded = convert_utf8_to_gsm_with_lang(utf8, -1, NULL,
> +							&written, 0,
> +							GSM_DIALECT_TURKISH,
> +							GSM_DIALECT_TURKISH);
> +
> +		if (!gsm_encoded)
> +			 break;
> +
> +		if (offset == 0)
> +			offset = 1;
> +
> +		template.submit.udhi = TRUE;
> +		template.submit.ud[0] += 6;
> +		template.submit.ud[offset] = SMS_IEI_NATIONAL_LANGUAGE_SINGLE_SHIFT;
> +		template.submit.ud[offset + 1] = 1;
> +		template.submit.ud[offset + 2] = GSM_DIALECT_TURKISH;
> +		template.submit.ud[offset + 3] = SMS_IEI_NATIONAL_LANGUAGE_LOCKING_SHIFT;
> +		template.submit.ud[offset + 4] = 1;
> +		template.submit.ud[offset + 5] = GSM_DIALECT_TURKISH;
> +
> +		offset += 6;
> +		break;
> +
> +	case SMS_ALPHABET_SPANISH:
> +		gsm_encoded = convert_utf8_to_gsm_with_lang(utf8, -1, NULL,
> +							&written, 0,
> +							GSM_DIALECT_DEFAULT,
> +							GSM_DIALECT_SPANISH);
> +
> +		if (!gsm_encoded)
> +			 break;
> +
> +		if (offset == 0)
> +			offset = 1;
> +
> +		template.submit.udhi = TRUE;
> +		template.submit.ud[0] += 3;
> +		template.submit.ud[offset] = SMS_IEI_NATIONAL_LANGUAGE_SINGLE_SHIFT;
> +		template.submit.ud[offset + 1] = 1;
> +		template.submit.ud[offset + 2] = GSM_DIALECT_SPANISH;
> +
> +		offset += 3;
> +		break;
>  
> -	/* UDHI, UDL, UD and DCS actually depend on what we have in the text */
> -	gsm_encoded = convert_utf8_to_gsm(utf8, -1, NULL, &written, 0);
> +	case SMS_ALPHABET_PORTUGUESE:
> +		gsm_encoded = convert_utf8_to_gsm_with_lang(utf8, -1, NULL,
> +							&written, 0,
> +							GSM_DIALECT_PORTUGUESE,
> +							GSM_DIALECT_PORTUGUESE);
> +
> +		if (!gsm_encoded)
> +			 break;
> +
> +		if (offset == 0)
> +			offset = 1;
> +
> +		template.submit.udhi = TRUE;
> +		template.submit.ud[0] += 6;
> +		template.submit.ud[offset] = SMS_IEI_NATIONAL_LANGUAGE_SINGLE_SHIFT;
> +		template.submit.ud[offset + 1] = 1;
> +		template.submit.ud[offset + 2] = GSM_DIALECT_PORTUGUESE;
> +		template.submit.ud[offset + 3] = SMS_IEI_NATIONAL_LANGUAGE_LOCKING_SHIFT;
> +		template.submit.ud[offset + 4] = 1;
> +		template.submit.ud[offset + 5] = GSM_DIALECT_PORTUGUESE;
> +
> +		offset += 6;
> +		break;
> +
> +	case SMS_ALPHABET_DEFAULT:
> +	default:

Marcel likes it when the compiler warns of missing enum handlers.  So
the default statement should be removed.

> +		gsm_encoded = convert_utf8_to_gsm(utf8, -1, NULL, &written, 0);
> +	}
>  
>  	if (!gsm_encoded) {
>  		gsize converted;
> @@ -3028,9 +3113,6 @@ GSList *sms_text_prepare(const char *utf8, guint16 ref,
>  	else
>  		template.submit.dcs = 0x08; /* Class Unspecified, UCS2 */
>  
> -	if (offset != 0)
> -		template.submit.udhi = FALSE;
> -

This part doesn't look right.  The check should actually set udhi to
TRUE, since we can be issuing a return in the next if statement...

>  	if (gsm_encoded && (written <= sms_text_capacity_gsm(160, offset))) {
>  		if (ref_offset)
>  			*ref_offset = 0;
> @@ -3056,7 +3138,7 @@ GSList *sms_text_prepare(const char *utf8, guint16 ref,
>  
>  	template.submit.udhi = TRUE;
>  
> -	if (!offset)
> +	if (offset == 0)
>  		offset = 1;
>  
>  	if (ref_offset)
> @@ -3150,6 +3232,14 @@ GSList *sms_text_prepare(const char *utf8, guint16 ref,
>  	return r;
>  }
>  
> +GSList *sms_text_prepare(const char *utf8, guint16 ref,
> +				gboolean use_16bit, int *ref_offset,
> +				gboolean use_delivery_reports)
> +{
> +	return sms_text_prepare_with_alphabet(utf8, ref, use_16bit, ref_offset,
> +				use_delivery_reports, SMS_ALPHABET_DEFAULT);
> +}
> +
>  gboolean cbs_dcs_decode(guint8 dcs, gboolean *udhi, enum sms_class *cls,
>  			enum sms_charset *charset, gboolean *compressed,
>  			enum cbs_language *language, gboolean *iso639)
> diff --git a/src/smsutil.h b/src/smsutil.h
> index 3c6b3ae..e58332c 100644
> --- a/src/smsutil.h
> +++ b/src/smsutil.h
> @@ -153,6 +153,15 @@ enum sms_charset {
>  	SMS_CHARSET_UCS2 = 2,
>  };
>  
> +enum sms_alphabet {
> +	SMS_ALPHABET_DEFAULT = 0,
> +	SMS_ALPHABET_TURKISH,
> +	SMS_ALPHABET_SPANISH,
> +	SMS_ALPHABET_PORTUGUESE,
> +	SMS_ALPHABET_REDUCED,
> +	SMS_ALHPABET_INVALID,

Why is the INVALID part needed at all?

> +};
> +
>  enum sms_mwi_type {
>  	SMS_MWI_TYPE_VOICE = 0,
>  	SMS_MWI_TYPE_FAX = 1,
> @@ -516,6 +525,11 @@ void status_report_assembly_expire(struct status_report_assembly *assembly,
>  					time_t before, GFunc foreach_func,
>  					gpointer data);
>  
> +GSList *sms_text_prepare_with_alphabet(const char *utf8, guint16 ref,
> +					gboolean use_16bit, int *ref_offset,
> +					gboolean use_delivery_reports,
> +					enum sms_alphabet alphabet);
> +
>  GSList *sms_text_prepare(const char *utf8, guint16 ref,
>  				gboolean use_16bit, int *ref_offset,
>  				gboolean use_delivery_reports);

Do you have time to write unit tests for this as well?

Regards,
-Denis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] Enable alphabets in smsutils
  2010-09-02 16:55   ` Denis Kenzior
@ 2010-09-03 12:15     ` Aki Niemi
  2010-09-03 14:05       ` Denis Kenzior
  0 siblings, 1 reply; 16+ messages in thread
From: Aki Niemi @ 2010-09-03 12:15 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 1441 bytes --]

On Thu, 2010-09-02 at 18:55 +0200, ext Denis Kenzior wrote:
> > -	if (offset != 0)
> > -		template.submit.udhi = FALSE;
> > -
> 
> This part doesn't look right.  The check should actually set udhi to
> TRUE, since we can be issuing a return in the next if statement...

That's why it's removed. This was doing nothing before, as offset at
this point was always zero, and the template had anyway been memset to
zero.

> > +enum sms_alphabet {
> > +	SMS_ALPHABET_DEFAULT = 0,
> > +	SMS_ALPHABET_TURKISH,
> > +	SMS_ALPHABET_SPANISH,
> > +	SMS_ALPHABET_PORTUGUESE,
> > +	SMS_ALPHABET_REDUCED,
> > +	SMS_ALHPABET_INVALID,
> 
> Why is the INVALID part needed at all?

Removed.

> > +};
> > +
> >  enum sms_mwi_type {
> >  	SMS_MWI_TYPE_VOICE = 0,
> >  	SMS_MWI_TYPE_FAX = 1,
> > @@ -516,6 +525,11 @@ void status_report_assembly_expire(struct status_report_assembly *assembly,
> >  					time_t before, GFunc foreach_func,
> >  					gpointer data);
> >  
> > +GSList *sms_text_prepare_with_alphabet(const char *utf8, guint16 ref,
> > +					gboolean use_16bit, int *ref_offset,
> > +					gboolean use_delivery_reports,
> > +					enum sms_alphabet alphabet);
> > +
> >  GSList *sms_text_prepare(const char *utf8, guint16 ref,
> >  				gboolean use_16bit, int *ref_offset,
> >  				gboolean use_delivery_reports);
> 
> Do you have time to write unit tests for this as well?

I'll do that.

Cheers,
Aki


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] Enable alphabets in smsutils
  2010-09-03 12:15     ` Aki Niemi
@ 2010-09-03 14:05       ` Denis Kenzior
  2010-09-03 14:12         ` Aki Niemi
  0 siblings, 1 reply; 16+ messages in thread
From: Denis Kenzior @ 2010-09-03 14:05 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 664 bytes --]

Hi Aki,

On 09/03/2010 07:15 AM, Aki Niemi wrote:
> On Thu, 2010-09-02 at 18:55 +0200, ext Denis Kenzior wrote:
>>> -	if (offset != 0)
>>> -		template.submit.udhi = FALSE;
>>> -
>>
>> This part doesn't look right.  The check should actually set udhi to
>> TRUE, since we can be issuing a return in the next if statement...
> 
> That's why it's removed. This was doing nothing before, as offset at
> this point was always zero, and the template had anyway been memset to
> zero.
> 

I see what you're doing now.  I'd rather you left out setting udhi in
the dialect switch/case labels and changed this line to set UDHI to TRUE.

Regards,
-Denis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] Enable alphabets in smsutils
  2010-09-03 14:05       ` Denis Kenzior
@ 2010-09-03 14:12         ` Aki Niemi
  0 siblings, 0 replies; 16+ messages in thread
From: Aki Niemi @ 2010-09-03 14:12 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 321 bytes --]

On Fri, 2010-09-03 at 16:05 +0200, ext Denis Kenzior wrote:
> I see what you're doing now.  I'd rather you left out setting udhi in
> the dialect switch/case labels and changed this line to set UDHI to TRUE.

Ah, then I missed your point. Yes, it's better to set UDHI outside the
switch, I agree.

Cheers,
Aki


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] Enable alphabets in smsutils
  2010-09-02  7:52 ` [PATCH 2/4] Enable alphabets in smsutils Aki Niemi
  2010-09-02 16:55   ` Denis Kenzior
@ 2010-09-03 14:40   ` Pekka Pessi
  2010-09-03 15:08     ` Denis Kenzior
  2010-09-03 17:09     ` Aki Niemi
  1 sibling, 2 replies; 16+ messages in thread
From: Pekka Pessi @ 2010-09-03 14:40 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 1036 bytes --]

Morjes Aki,

2010/9/2 Aki Niemi <aki.niemi@nokia.com>:
> +       case SMS_ALPHABET_TURKISH:
> +               gsm_encoded = convert_utf8_to_gsm_with_lang(utf8, -1, NULL,
> +                                                       &written, 0,
> +                                                       GSM_DIALECT_TURKISH,
> +                                                       GSM_DIALECT_TURKISH);

If you look at the tables,  the idea is normally to use either locking
Turkish and default single, or default locking and Turkish single
shift, never both.

Also, if it is possible to fit the message to one segment only using
national single shift table, single shift should be used (if receiver
does not support national variants, the result is much less garbled).

I just peeked in the 23.038 from release 9, it seems to me that we are
bound to get much more entertainment.

--Pekka

-- 
Pekka.Pessi mail at nokia.com

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] Enable alphabets in smsutils
  2010-09-03 14:40   ` Pekka Pessi
@ 2010-09-03 15:08     ` Denis Kenzior
  2010-09-03 17:09     ` Aki Niemi
  1 sibling, 0 replies; 16+ messages in thread
From: Denis Kenzior @ 2010-09-03 15:08 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 1390 bytes --]

Hi Pekka,

On 09/03/2010 09:40 AM, Pekka Pessi wrote:
> Morjes Aki,
> 
> 2010/9/2 Aki Niemi <aki.niemi@nokia.com>:
>> +       case SMS_ALPHABET_TURKISH:
>> +               gsm_encoded = convert_utf8_to_gsm_with_lang(utf8, -1, NULL,
>> +                                                       &written, 0,
>> +                                                       GSM_DIALECT_TURKISH,
>> +                                                       GSM_DIALECT_TURKISH);
> 
> If you look at the tables,  the idea is normally to use either locking
> Turkish and default single, or default locking and Turkish single
> shift, never both.

The spec also says: "however it is possible to use both single shift and
locking shift with the corresponding tables in a single message."

So how exactly do we know which combination to try? Is it language
dependent?

> 
> Also, if it is possible to fit the message to one segment only using
> national single shift table, single shift should be used (if receiver
> does not support national variants, the result is much less garbled).
> 

So what you're saying is that we should try these three combinations:

default locking, default single shift
default locking, language single shift
language locking shift, language single shift

Trying language locking, default single shift does not seem useful.

Regards,
-Denis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] Enable alphabets in smsutils
  2010-09-03 14:40   ` Pekka Pessi
  2010-09-03 15:08     ` Denis Kenzior
@ 2010-09-03 17:09     ` Aki Niemi
  1 sibling, 0 replies; 16+ messages in thread
From: Aki Niemi @ 2010-09-03 17:09 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 1850 bytes --]

Hi,

2010/9/3 Pekka Pessi <ppessi@gmail.com>:
> Morjes Aki,
>
> 2010/9/2 Aki Niemi <aki.niemi@nokia.com>:
>> +       case SMS_ALPHABET_TURKISH:
>> +               gsm_encoded = convert_utf8_to_gsm_with_lang(utf8, -1, NULL,
>> +                                                       &written, 0,
>> +                                                       GSM_DIALECT_TURKISH,
>> +                                                       GSM_DIALECT_TURKISH);
>
> If you look at the tables,  the idea is normally to use either locking
> Turkish and default single, or default locking and Turkish single
> shift, never both.

I suppose I should've looked at the tables, then.

Anyway, the best way to use the dialects would obviously be to find
the optimal encoding, and use as few extension tables as possible.
That is missing currently.

> Also, if it is possible to fit the message to one segment only using
> national single shift table, single shift should be used (if receiver
> does not support national variants, the result is much less garbled).

Sure, you could even try any combination of dialects to find the
optimal encoding.

If you look at the Turkish tables, they try to align with the default
tables. That is, if the recipient doesn't understand the extensions,
they will use a rather similar looking character from the default
table.

That principle is gone with Portuguese and later extensions.

> I just peeked in the 23.038 from release 9, it seems to me that we are
> bound to get much more entertainment.

The funny thing is that the new tables seem to pack a lot of new
characters into single shift. Which compared with UCS-2, is of course
only 2bits per character more efficient.

Cheers,
Aki

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 3/4] Add alphabet support to SMS atom
  2010-09-02  7:52 [PATCH 1/4] Add so called reduced charset support to SMS Aki Niemi
  2010-09-02  7:52 ` [PATCH 2/4] Enable alphabets in smsutils Aki Niemi
@ 2010-09-02  7:52 ` Aki Niemi
  2010-09-02 17:00   ` Denis Kenzior
  2010-09-02  7:52 ` [PATCH 4/4] Add documentation for the Alphabet property Aki Niemi
  2010-09-02 16:43 ` [PATCH 1/4] Add so called reduced charset support to SMS Denis Kenzior
  3 siblings, 1 reply; 16+ messages in thread
From: Aki Niemi @ 2010-09-02  7:52 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 4752 bytes --]

---
 src/sms.c |   87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/src/sms.c b/src/sms.c
index 2012fe5..8262d0c 100644
--- a/src/sms.c
+++ b/src/sms.c
@@ -65,6 +65,7 @@ struct ofono_sms {
 	GKeyFile *settings;
 	char *imsi;
 	int bearer;
+	int alphabet;
 	const struct ofono_sms_driver *driver;
 	void *driver_data;
 	struct ofono_atom *atom;
@@ -121,6 +122,39 @@ static int sms_bearer_from_string(const char *str)
 	return -1;
 }
 
+static const char *sms_alphabet_to_string(int alphabet)
+{
+	switch (alphabet) {
+	case SMS_ALPHABET_TURKISH:
+		return "turkish";
+	case SMS_ALPHABET_SPANISH:
+		return "spanish";
+	case SMS_ALPHABET_PORTUGUESE:
+		return "portuguese";
+	case SMS_ALPHABET_REDUCED:
+		return "reduced";
+	case SMS_ALPHABET_DEFAULT:
+	default:
+		return "default";
+	}
+}
+
+static int sms_alphabet_from_string(const char *str)
+{
+	if (g_str_equal(str, "default"))
+		return SMS_ALPHABET_DEFAULT;
+	else if (g_str_equal(str, "turkish"))
+		return SMS_ALPHABET_TURKISH;
+	else if (g_str_equal(str, "spanish"))
+		return SMS_ALPHABET_SPANISH;
+	else if (g_str_equal(str, "portuguese"))
+		return SMS_ALPHABET_PORTUGUESE;
+	else if (g_str_equal(str, "reduced"))
+		return SMS_ALPHABET_REDUCED;
+
+	return -1;
+}
+
 static void set_bearer(struct ofono_sms *sms, int bearer)
 {
 	DBusConnection *conn = ofono_dbus_get_connection();
@@ -140,6 +174,25 @@ static void set_bearer(struct ofono_sms *sms, int bearer)
 						DBUS_TYPE_STRING, &value);
 }
 
+static void set_alphabet(struct ofono_sms *sms, int alphabet)
+{
+	DBusConnection *conn = ofono_dbus_get_connection();
+	const char *path = __ofono_atom_get_path(sms->atom);
+	const char *value;
+
+	if (sms->alphabet == alphabet)
+		return;
+
+	sms->alphabet = alphabet;
+
+	value = sms_alphabet_to_string(sms->alphabet);
+
+	ofono_dbus_signal_property_changed(conn, path,
+						OFONO_MESSAGE_MANAGER_INTERFACE,
+						"Alphabet",
+						DBUS_TYPE_STRING, &value);
+}
+
 static void set_sca(struct ofono_sms *sms,
 			const struct ofono_phone_number *sca)
 {
@@ -171,6 +224,7 @@ static DBusMessage *generate_get_properties_reply(struct ofono_sms *sms,
 	DBusMessageIter dict;
 	const char *sca;
 	const char *bearer;
+	const char *alphabet;
 
 	reply = dbus_message_new_method_return(msg);
 
@@ -194,6 +248,9 @@ static DBusMessage *generate_get_properties_reply(struct ofono_sms *sms,
 	bearer = sms_bearer_to_string(sms->bearer);
 	ofono_dbus_dict_append(&dict, "Bearer", DBUS_TYPE_STRING, &bearer);
 
+	alphabet = sms_alphabet_to_string(sms->alphabet);
+	ofono_dbus_dict_append(&dict, "Alphabet", DBUS_TYPE_STRING, &alphabet);
+
 	dbus_message_iter_close_container(&iter, &dict);
 
 	return reply;
@@ -403,6 +460,25 @@ static DBusMessage *sms_set_property(DBusConnection *conn, DBusMessage *msg,
 		return NULL;
 	}
 
+	if (!strcmp(property, "Alphabet")) {
+		const char *value;
+		int alphabet;
+
+		if (dbus_message_iter_get_arg_type(&var) != DBUS_TYPE_STRING)
+			return __ofono_error_invalid_args(msg);
+
+		dbus_message_iter_get_basic(&var, &value);
+
+		alphabet = sms_alphabet_from_string(value);
+		if (alphabet < 0)
+			return __ofono_error_invalid_format(msg);
+
+		set_alphabet(sms, alphabet);
+
+		g_dbus_send_reply(conn, msg, DBUS_TYPE_INVALID);
+		return NULL;
+	}
+
 	return __ofono_error_invalid_args(msg);
 }
 
@@ -620,8 +696,10 @@ static DBusMessage *sms_send_message(DBusConnection *conn, DBusMessage *msg,
 	if (valid_phone_number_format(to) == FALSE)
 		return __ofono_error_invalid_format(msg);
 
-	msg_list = sms_text_prepare(text, 0, TRUE, &ref_offset,
-					sms->use_delivery_reports);
+	msg_list = sms_text_prepare_with_alphabet(text, 0, TRUE,
+						&ref_offset,
+						sms->use_delivery_reports,
+						sms->alphabet);
 
 	if (!msg_list)
 		return __ofono_error_invalid_format(msg);
@@ -1146,6 +1224,8 @@ static void sms_remove(struct ofono_atom *atom)
 					sms->use_delivery_reports);
 		g_key_file_set_integer(sms->settings, SETTINGS_GROUP,
 					"Bearer", sms->bearer);
+		g_key_file_set_integer(sms->settings, SETTINGS_GROUP,
+					"Alphabet", sms->alphabet);
 
 		storage_close(sms->imsi, SETTINGS_STORE, sms->settings, TRUE);
 
@@ -1251,6 +1331,9 @@ static void sms_load_settings(struct ofono_sms *sms, const char *imsi)
 							"Bearer", &error);
 	if (error)
 		sms->bearer = 3; /* Default to CS then PS */
+
+	sms->alphabet = g_key_file_get_integer(sms->settings, SETTINGS_GROUP,
+						"Alphabet", &error);
 }
 
 static void bearer_init_callback(const struct ofono_error *error, void *data)
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] Add alphabet support to SMS atom
  2010-09-02  7:52 ` [PATCH 3/4] Add alphabet support to SMS atom Aki Niemi
@ 2010-09-02 17:00   ` Denis Kenzior
  0 siblings, 0 replies; 16+ messages in thread
From: Denis Kenzior @ 2010-09-02 17:00 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 5342 bytes --]

Hi Aki,

On 09/02/2010 02:52 AM, Aki Niemi wrote:
> ---
>  src/sms.c |   87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 85 insertions(+), 2 deletions(-)
> 
> diff --git a/src/sms.c b/src/sms.c
> index 2012fe5..8262d0c 100644
> --- a/src/sms.c
> +++ b/src/sms.c
> @@ -65,6 +65,7 @@ struct ofono_sms {
>  	GKeyFile *settings;
>  	char *imsi;
>  	int bearer;
> +	int alphabet;

You might want to use the enum here

>  	const struct ofono_sms_driver *driver;
>  	void *driver_data;
>  	struct ofono_atom *atom;
> @@ -121,6 +122,39 @@ static int sms_bearer_from_string(const char *str)
>  	return -1;
>  }
>  
> +static const char *sms_alphabet_to_string(int alphabet)
> +{
> +	switch (alphabet) {
> +	case SMS_ALPHABET_TURKISH:
> +		return "turkish";
> +	case SMS_ALPHABET_SPANISH:
> +		return "spanish";
> +	case SMS_ALPHABET_PORTUGUESE:
> +		return "portuguese";
> +	case SMS_ALPHABET_REDUCED:
> +		return "reduced";
> +	case SMS_ALPHABET_DEFAULT:
> +	default:
> +		return "default";
> +	}

I suggest dropping the default label and returning NULL here.

> +}
> +
> +static int sms_alphabet_from_string(const char *str)
> +{
> +	if (g_str_equal(str, "default"))
> +		return SMS_ALPHABET_DEFAULT;
> +	else if (g_str_equal(str, "turkish"))
> +		return SMS_ALPHABET_TURKISH;
> +	else if (g_str_equal(str, "spanish"))
> +		return SMS_ALPHABET_SPANISH;
> +	else if (g_str_equal(str, "portuguese"))
> +		return SMS_ALPHABET_PORTUGUESE;
> +	else if (g_str_equal(str, "reduced"))
> +		return SMS_ALPHABET_REDUCED;
> +
> +	return -1;
> +}
> +

You might want to do:
static gboolean sms_alphabet_from_string(const char *str,
						enum *alphabet)

>  static void set_bearer(struct ofono_sms *sms, int bearer)
>  {
>  	DBusConnection *conn = ofono_dbus_get_connection();
> @@ -140,6 +174,25 @@ static void set_bearer(struct ofono_sms *sms, int bearer)
>  						DBUS_TYPE_STRING, &value);
>  }
>  
> +static void set_alphabet(struct ofono_sms *sms, int alphabet)
> +{
> +	DBusConnection *conn = ofono_dbus_get_connection();
> +	const char *path = __ofono_atom_get_path(sms->atom);
> +	const char *value;
> +
> +	if (sms->alphabet == alphabet)
> +		return;
> +
> +	sms->alphabet = alphabet;
> +
> +	value = sms_alphabet_to_string(sms->alphabet);
> +
> +	ofono_dbus_signal_property_changed(conn, path,
> +						OFONO_MESSAGE_MANAGER_INTERFACE,
> +						"Alphabet",
> +						DBUS_TYPE_STRING, &value);
> +}
> +
>  static void set_sca(struct ofono_sms *sms,
>  			const struct ofono_phone_number *sca)
>  {
> @@ -171,6 +224,7 @@ static DBusMessage *generate_get_properties_reply(struct ofono_sms *sms,
>  	DBusMessageIter dict;
>  	const char *sca;
>  	const char *bearer;
> +	const char *alphabet;
>  
>  	reply = dbus_message_new_method_return(msg);
>  
> @@ -194,6 +248,9 @@ static DBusMessage *generate_get_properties_reply(struct ofono_sms *sms,
>  	bearer = sms_bearer_to_string(sms->bearer);
>  	ofono_dbus_dict_append(&dict, "Bearer", DBUS_TYPE_STRING, &bearer);
>  
> +	alphabet = sms_alphabet_to_string(sms->alphabet);
> +	ofono_dbus_dict_append(&dict, "Alphabet", DBUS_TYPE_STRING, &alphabet);
> +
>  	dbus_message_iter_close_container(&iter, &dict);
>  
>  	return reply;
> @@ -403,6 +460,25 @@ static DBusMessage *sms_set_property(DBusConnection *conn, DBusMessage *msg,
>  		return NULL;
>  	}
>  
> +	if (!strcmp(property, "Alphabet")) {
> +		const char *value;
> +		int alphabet;
> +
> +		if (dbus_message_iter_get_arg_type(&var) != DBUS_TYPE_STRING)
> +			return __ofono_error_invalid_args(msg);
> +
> +		dbus_message_iter_get_basic(&var, &value);
> +
> +		alphabet = sms_alphabet_from_string(value);
> +		if (alphabet < 0)
> +			return __ofono_error_invalid_format(msg);
> +
> +		set_alphabet(sms, alphabet);
> +
> +		g_dbus_send_reply(conn, msg, DBUS_TYPE_INVALID);
> +		return NULL;
> +	}
> +
>  	return __ofono_error_invalid_args(msg);
>  }
>  
> @@ -620,8 +696,10 @@ static DBusMessage *sms_send_message(DBusConnection *conn, DBusMessage *msg,
>  	if (valid_phone_number_format(to) == FALSE)
>  		return __ofono_error_invalid_format(msg);
>  
> -	msg_list = sms_text_prepare(text, 0, TRUE, &ref_offset,
> -					sms->use_delivery_reports);
> +	msg_list = sms_text_prepare_with_alphabet(text, 0, TRUE,
> +						&ref_offset,
> +						sms->use_delivery_reports,
> +						sms->alphabet);
>  
>  	if (!msg_list)
>  		return __ofono_error_invalid_format(msg);
> @@ -1146,6 +1224,8 @@ static void sms_remove(struct ofono_atom *atom)
>  					sms->use_delivery_reports);
>  		g_key_file_set_integer(sms->settings, SETTINGS_GROUP,
>  					"Bearer", sms->bearer);
> +		g_key_file_set_integer(sms->settings, SETTINGS_GROUP,
> +					"Alphabet", sms->alphabet);
>  
>  		storage_close(sms->imsi, SETTINGS_STORE, sms->settings, TRUE);
>  
> @@ -1251,6 +1331,9 @@ static void sms_load_settings(struct ofono_sms *sms, const char *imsi)
>  							"Bearer", &error);
>  	if (error)
>  		sms->bearer = 3; /* Default to CS then PS */
> +
> +	sms->alphabet = g_key_file_get_integer(sms->settings, SETTINGS_GROUP,
> +						"Alphabet", &error);
>  }
>  
>  static void bearer_init_callback(const struct ofono_error *error, void *data)

Regards,
-Denis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 4/4] Add documentation for the Alphabet property
  2010-09-02  7:52 [PATCH 1/4] Add so called reduced charset support to SMS Aki Niemi
  2010-09-02  7:52 ` [PATCH 2/4] Enable alphabets in smsutils Aki Niemi
  2010-09-02  7:52 ` [PATCH 3/4] Add alphabet support to SMS atom Aki Niemi
@ 2010-09-02  7:52 ` Aki Niemi
  2010-09-02 17:01   ` Denis Kenzior
  2010-09-02 16:43 ` [PATCH 1/4] Add so called reduced charset support to SMS Denis Kenzior
  3 siblings, 1 reply; 16+ messages in thread
From: Aki Niemi @ 2010-09-02  7:52 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 877 bytes --]

---
 doc/message-api.txt |   15 +++++++++++++++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/doc/message-api.txt b/doc/message-api.txt
index 693a111..9e8ea33 100644
--- a/doc/message-api.txt
+++ b/doc/message-api.txt
@@ -64,3 +64,18 @@ Properties	string ServiceCenterAddress
 				"ps-preferred" - Use CS if PS is unavailable
 
 			By default oFono uses "cs-preferred" setting.
+
+		string Alphabet
+
+			Contains the alphabet setting for outgoing SMSs.
+			Possible values are:
+
+				"default" - Default GSM alphabet
+				"turkish" - Turkish alphabet
+				"spanish" - Spanish alphabet
+				"portuguese" - Portuguese alphabet
+				"reduced" - Non-standard extension that adds
+					translit support to the default alphabet
+
+			The standard, language-specific alphabets are defined in
+			3GPP TS23.038, Annex A.
-- 
1.7.0.4


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] Add documentation for the Alphabet property
  2010-09-02  7:52 ` [PATCH 4/4] Add documentation for the Alphabet property Aki Niemi
@ 2010-09-02 17:01   ` Denis Kenzior
  2010-09-02 18:19     ` Marcel Holtmann
  0 siblings, 1 reply; 16+ messages in thread
From: Denis Kenzior @ 2010-09-02 17:01 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 1127 bytes --]

Hi Aki,

On 09/02/2010 02:52 AM, Aki Niemi wrote:
> ---
>  doc/message-api.txt |   15 +++++++++++++++
>  1 files changed, 15 insertions(+), 0 deletions(-)
> 
> diff --git a/doc/message-api.txt b/doc/message-api.txt
> index 693a111..9e8ea33 100644
> --- a/doc/message-api.txt
> +++ b/doc/message-api.txt
> @@ -64,3 +64,18 @@ Properties	string ServiceCenterAddress
>  				"ps-preferred" - Use CS if PS is unavailable
>  
>  			By default oFono uses "cs-preferred" setting.
> +
> +		string Alphabet
> +
> +			Contains the alphabet setting for outgoing SMSs.
> +			Possible values are:
> +
> +				"default" - Default GSM alphabet
> +				"turkish" - Turkish alphabet
> +				"spanish" - Spanish alphabet
> +				"portuguese" - Portuguese alphabet
> +				"reduced" - Non-standard extension that adds
> +					translit support to the default alphabet
> +
> +			The standard, language-specific alphabets are defined in
> +			3GPP TS23.038, Annex A.

I want to hear everyone's thoughts on whether this should be in
ofono/main.conf or an actual property.  We still never decides this...

Regards,
-Denis

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] Add documentation for the Alphabet property
  2010-09-02 17:01   ` Denis Kenzior
@ 2010-09-02 18:19     ` Marcel Holtmann
  2010-09-02 19:08       ` Aki Niemi
  0 siblings, 1 reply; 16+ messages in thread
From: Marcel Holtmann @ 2010-09-02 18:19 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 1611 bytes --]

Hi Denis,

> >  doc/message-api.txt |   15 +++++++++++++++
> >  1 files changed, 15 insertions(+), 0 deletions(-)
> > 
> > diff --git a/doc/message-api.txt b/doc/message-api.txt
> > index 693a111..9e8ea33 100644
> > --- a/doc/message-api.txt
> > +++ b/doc/message-api.txt
> > @@ -64,3 +64,18 @@ Properties	string ServiceCenterAddress
> >  				"ps-preferred" - Use CS if PS is unavailable
> >  
> >  			By default oFono uses "cs-preferred" setting.
> > +
> > +		string Alphabet
> > +
> > +			Contains the alphabet setting for outgoing SMSs.
> > +			Possible values are:
> > +
> > +				"default" - Default GSM alphabet
> > +				"turkish" - Turkish alphabet
> > +				"spanish" - Spanish alphabet
> > +				"portuguese" - Portuguese alphabet
> > +				"reduced" - Non-standard extension that adds
> > +					translit support to the default alphabet
> > +
> > +			The standard, language-specific alphabets are defined in
> > +			3GPP TS23.038, Annex A.
> 
> I want to hear everyone's thoughts on whether this should be in
> ofono/main.conf or an actual property.  We still never decides this...

so the general rule is that if it is exposed in the UI, then it needs to
be a D-Bus API. If it is manufacturer/integrator specific then this
should be in main.conf (to be created of course). There might be
exception here and there, but that is the general rule.

If we think we wanna expose this inside the UI, then of course we have
to ask ourselves if this is something a user should be really exposed
to. So this needs use cases and justification for it.

Regards

Marcel



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] Add documentation for the Alphabet property
  2010-09-02 18:19     ` Marcel Holtmann
@ 2010-09-02 19:08       ` Aki Niemi
  0 siblings, 0 replies; 16+ messages in thread
From: Aki Niemi @ 2010-09-02 19:08 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 694 bytes --]

Hi,

2010/9/2 Marcel Holtmann <marcel@holtmann.org>:
> If we think we wanna expose this inside the UI, then of course we have
> to ask ourselves if this is something a user should be really exposed
> to. So this needs use cases and justification for it.

Ideally, you'd want none of these parameters exposed to the user. I
mean seriously, do you know your SMSC address, or anyone who does? Or
care about the SMS bearer? I sure don't... But if you're a user in
Spain, it sure would be nice to change the factory default of
"reduced" alphabet, so that you don't look like an idiot when bragging
to your colleagues at work about sitting on the beach sipping Curaçao.

Cheers,
Aki

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] Add so called reduced charset support to SMS
  2010-09-02  7:52 [PATCH 1/4] Add so called reduced charset support to SMS Aki Niemi
                   ` (2 preceding siblings ...)
  2010-09-02  7:52 ` [PATCH 4/4] Add documentation for the Alphabet property Aki Niemi
@ 2010-09-02 16:43 ` Denis Kenzior
  3 siblings, 0 replies; 16+ messages in thread
From: Denis Kenzior @ 2010-09-02 16:43 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 1694 bytes --]

Hi Aki,

On 09/02/2010 02:52 AM, Aki Niemi wrote:
> This "reduced" charset support is available in some current phones to
> reduce the number of segments that a message takes.
> 
> Normally, when characters are encountered that don't have a
> representation in the GSM 7 bit alphabet, the encoding switches to
> UCS-2 that takes roughly twice as much space.
> 
> This reduced charset feature transliterates the input string, so that
> more unicode characters fit the GSM alphabet. The obvious downside is
> that transliterating loses information, i.e., the text gets dumbed
> down, and what the recipient receives is not the original text.
> 
> Nevertheless, in some regions, this is a must-have feature.
> ---
>  src/util.c       |  137 +++++++++++++++++++++++++++++++++++++++++++++++++++---
>  src/util.h       |    4 ++
>  unit/test-util.c |   18 +++++++

Please break this up into two patches.  By convention patches that touch
multiple directories should be broken up, unless needed for compilation
to succeed.

>  3 files changed, 152 insertions(+), 7 deletions(-)

<snip>

> 
> -	if (locking_lang >= GSM_DIALECT_INVALID)
> +	if (locking_lang > GSM_DIALECT_INVALID)
>  		return NULL;
>  
> -	if (single_lang >= GSM_DIALECT_INVALID)
> +	if (single_lang > GSM_DIALECT_INVALID)
>  		return NULL;
>  

I think this is simply too evil, can't we just change
unicode_single_shift_lookup and gsm_locking_shift_lookup to take the
codepoint table and length directly?  Then breakout the core of
convert_gsm_to_utf8_with_lang into a separate function and pass the
table + len to it.  Should be cleaner and more readable in my opinion.

Regards,
-Denis

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2010-09-03 17:09 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-02  7:52 [PATCH 1/4] Add so called reduced charset support to SMS Aki Niemi
2010-09-02  7:52 ` [PATCH 2/4] Enable alphabets in smsutils Aki Niemi
2010-09-02 16:55   ` Denis Kenzior
2010-09-03 12:15     ` Aki Niemi
2010-09-03 14:05       ` Denis Kenzior
2010-09-03 14:12         ` Aki Niemi
2010-09-03 14:40   ` Pekka Pessi
2010-09-03 15:08     ` Denis Kenzior
2010-09-03 17:09     ` Aki Niemi
2010-09-02  7:52 ` [PATCH 3/4] Add alphabet support to SMS atom Aki Niemi
2010-09-02 17:00   ` Denis Kenzior
2010-09-02  7:52 ` [PATCH 4/4] Add documentation for the Alphabet property Aki Niemi
2010-09-02 17:01   ` Denis Kenzior
2010-09-02 18:19     ` Marcel Holtmann
2010-09-02 19:08       ` Aki Niemi
2010-09-02 16:43 ` [PATCH 1/4] Add so called reduced charset support to SMS Denis Kenzior

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.