linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] unicode: Expose available encodings in sysfs
@ 2020-04-11 23:58 Gabriel Krisman Bertazi
  2020-04-12 12:01 ` kbuild test robot
  2020-04-12 12:01 ` [RFC PATCH] unicode: ucd_init() can be static kbuild test robot
  0 siblings, 2 replies; 3+ messages in thread
From: Gabriel Krisman Bertazi @ 2020-04-11 23:58 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: linux-ext4, Gabriel Krisman Bertazi, kernel, Theodore Ts'o,
	Jaegeuk Kim

A filesystem configuration utility has no way to detect which filename
encodings are supported by the running kernel.  This means, for
instance, mkfs has no way to tell if the generated filesystem will be
mountable in the current kernel or not.  Also, users have no easy way to
know if they can update the encoding in their filesystems and still have
something functional in the end.

This exposes details of the encodings available in the unicode
subsystem, to fill that gap.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
---
 Documentation/ABI/testing/sysfs-fs-unicode | 13 +++++
 fs/unicode/utf8-core.c                     | 64 ++++++++++++++++++++++
 fs/unicode/utf8-norm.c                     | 18 ++++++
 fs/unicode/utf8n.h                         |  5 ++
 4 files changed, 100 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-fs-unicode

diff --git a/Documentation/ABI/testing/sysfs-fs-unicode b/Documentation/ABI/testing/sysfs-fs-unicode
new file mode 100644
index 000000000000..15c63367bb8e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-unicode
@@ -0,0 +1,13 @@
+What:		/sys/fs/unicode/latest
+Date:		April 2020
+Contact:	Gabriel Krisman Bertazi <krisman@collabora.com>
+Description:
+		The latest version of the Unicode Standard supported by
+		this kernel
+
+What:		/sys/fs/unicode/encodings
+Date:		April 2020
+Contact:	Gabriel Krisman Bertazi <krisman@collabora.com>
+Description:
+		List of encodings and corresponding versions supported
+		by this kernel
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 2a878b739115..7e0282707435 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -6,6 +6,7 @@
 #include <linux/parser.h>
 #include <linux/errno.h>
 #include <linux/unicode.h>
+#include <linux/fs.h>
 
 #include "utf8n.h"
 
@@ -212,4 +213,67 @@ void utf8_unload(struct unicode_map *um)
 }
 EXPORT_SYMBOL(utf8_unload);
 
+static ssize_t latest_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	int l = utf8version_latest();
+
+	return snprintf(buf, PAGE_SIZE, "UTF-8 %d.%d.%d\n", UNICODE_AGE_MAJ(l),
+			UNICODE_AGE_MIN(l), UNICODE_AGE_REV(l));
+
+}
+static ssize_t encodings_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *buf)
+{
+	int n;
+
+	n = snprintf(buf, PAGE_SIZE, "UTF-8:");
+	n += utf8version_list(buf + n, PAGE_SIZE - n);
+	n += snprintf(buf+n, PAGE_SIZE-n, "\n");
+
+	return n;
+}
+
+#define UCD_ATTR(x) \
+	static struct kobj_attribute x ## _attr = __ATTR_RO(x)
+
+UCD_ATTR(latest);
+UCD_ATTR(encodings);
+
+static struct attribute *ucd_attrs[] = {
+	&latest_attr.attr,
+	&encodings_attr.attr,
+	NULL,
+};
+static const struct attribute_group ucd_attr_group = {
+	.attrs = ucd_attrs,
+};
+static struct kobject *ucd_root;
+
+int __init ucd_init(void)
+{
+	int ret;
+
+	ucd_root = kobject_create_and_add("unicode", fs_kobj);
+	if (!ucd_root)
+		return -ENOMEM;
+
+	ret = sysfs_create_group(ucd_root, &ucd_attr_group);
+	if (ret) {
+		kobject_put(ucd_root);
+		ucd_root = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+void __exit ucd_exit(void)
+{
+	kobject_put(ucd_root);
+}
+
+module_init(ucd_init);
+module_exit(ucd_exit)
+
 MODULE_LICENSE("GPL v2");
diff --git a/fs/unicode/utf8-norm.c b/fs/unicode/utf8-norm.c
index 1d2d2e5b906a..f9ebba89a138 100644
--- a/fs/unicode/utf8-norm.c
+++ b/fs/unicode/utf8-norm.c
@@ -35,6 +35,24 @@ int utf8version_latest(void)
 }
 EXPORT_SYMBOL(utf8version_latest);
 
+int utf8version_list(char *buf, int len)
+{
+	int i = ARRAY_SIZE(utf8agetab) - 1;
+	int ret = 0;
+
+	/*
+	 * Print most relevant (latest) first.  No filesystem uses
+	 * unicode <= 12.0.0, so don't expose them to userspace.
+	 */
+	for (; utf8agetab[i] >= UNICODE_AGE(12, 0, 0); i--) {
+		ret += snprintf(buf+ret, len-ret, " %d.%d.%d",
+				UNICODE_AGE_MAJ(utf8agetab[i]),
+				UNICODE_AGE_MIN(utf8agetab[i]),
+				UNICODE_AGE_REV(utf8agetab[i]));
+	}
+	return ret;
+}
+
 /*
  * UTF-8 valid ranges.
  *
diff --git a/fs/unicode/utf8n.h b/fs/unicode/utf8n.h
index 0acd530c2c79..5dea2c4af1f3 100644
--- a/fs/unicode/utf8n.h
+++ b/fs/unicode/utf8n.h
@@ -21,9 +21,14 @@
 	 ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) |	\
 	 ((unsigned int)(REV)))
 
+#define UNICODE_AGE_MAJ(x) ((x) >> UNICODE_MAJ_SHIFT & 0xff)
+#define UNICODE_AGE_MIN(x) ((x) >> UNICODE_MIN_SHIFT & 0xff)
+#define UNICODE_AGE_REV(x) ((x) & 0xff)
+
 /* Highest unicode version supported by the data tables. */
 extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
 extern int utf8version_latest(void);
+extern int utf8version_list(char *buf, int len);
 
 /*
  * Look for the correct const struct utf8data for a unicode version.
-- 
2.26.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] unicode: Expose available encodings in sysfs
  2020-04-11 23:58 [PATCH] unicode: Expose available encodings in sysfs Gabriel Krisman Bertazi
@ 2020-04-12 12:01 ` kbuild test robot
  2020-04-12 12:01 ` [RFC PATCH] unicode: ucd_init() can be static kbuild test robot
  1 sibling, 0 replies; 3+ messages in thread
From: kbuild test robot @ 2020-04-12 12:01 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi
  Cc: kbuild-all, linux-fsdevel, linux-ext4, Gabriel Krisman Bertazi,
	kernel, Theodore Ts'o, Jaegeuk Kim

Hi Gabriel,

I love your patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.6 next-20200412]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Gabriel-Krisman-Bertazi/unicode-Expose-available-encodings-in-sysfs/20200412-080010
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git b032227c62939b5481bcd45442b36dfa263f4a7c
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.1-188-g79f7ac98-dirty
        make ARCH=x86_64 allmodconfig
        make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

>> fs/unicode/utf8-core.c:253:12: sparse: sparse: symbol 'ucd_init' was not declared. Should it be static?
>> fs/unicode/utf8-core.c:271:13: sparse: sparse: symbol 'ucd_exit' was not declared. Should it be static?

Please review and possibly fold the followup patch.

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [RFC PATCH] unicode: ucd_init() can be static
  2020-04-11 23:58 [PATCH] unicode: Expose available encodings in sysfs Gabriel Krisman Bertazi
  2020-04-12 12:01 ` kbuild test robot
@ 2020-04-12 12:01 ` kbuild test robot
  1 sibling, 0 replies; 3+ messages in thread
From: kbuild test robot @ 2020-04-12 12:01 UTC (permalink / raw)
  To: Gabriel Krisman Bertazi
  Cc: kbuild-all, linux-fsdevel, linux-ext4, Gabriel Krisman Bertazi,
	kernel, Theodore Ts'o, Jaegeuk Kim


Signed-off-by: kbuild test robot <lkp@intel.com>
---
 utf8-core.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 7e02827074356..b48e13e823a5a 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -250,7 +250,7 @@ static const struct attribute_group ucd_attr_group = {
 };
 static struct kobject *ucd_root;
 
-int __init ucd_init(void)
+static int __init ucd_init(void)
 {
 	int ret;
 
@@ -268,7 +268,7 @@ int __init ucd_init(void)
 	return 0;
 }
 
-void __exit ucd_exit(void)
+static void __exit ucd_exit(void)
 {
 	kobject_put(ucd_root);
 }

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-04-12 12:02 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-04-11 23:58 [PATCH] unicode: Expose available encodings in sysfs Gabriel Krisman Bertazi
2020-04-12 12:01 ` kbuild test robot
2020-04-12 12:01 ` [RFC PATCH] unicode: ucd_init() can be static kbuild test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).