qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Hervé Poussineau" <hpoussin@reactos.org>
To: qemu-devel@nongnu.org
Cc: "Kevin Wolf" <kwolf@redhat.com>, "Max Reitz" <mreitz@redhat.com>,
	qemu-block@nongnu.org, "Hervé Poussineau" <hpoussin@reactos.org>
Subject: [Qemu-devel] [PATCH 08/13] vvfat: correctly create long names for non-ASCII filenames
Date: Mon, 15 May 2017 22:31:08 +0200	[thread overview]
Message-ID: <20170515203114.9477-9-hpoussin@reactos.org> (raw)
In-Reply-To: <20170515203114.9477-1-hpoussin@reactos.org>

Assume that input filename is encoded as UTF-8, so correctly create UTF-16 encoding.
Reuse long_file_name structure to give back to caller the generated long name.
It will be used in next commit to transform the long file name into short file name.

Reference: http://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
Signed-off-by: Hervé Poussineau <hpoussin@reactos.org>
---
 block/vvfat.c | 132 ++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 97 insertions(+), 35 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 7da07068b8..5f6356c834 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -357,6 +357,23 @@ typedef struct BDRVVVFATState {
     Error *migration_blocker;
 } BDRVVVFATState;
 
+typedef struct {
+    /*
+     * Since the sequence number is at most 0x3f, and the filename
+     * length is at most 13 times the sequence number, the maximal
+     * filename length is 0x3f * 13 bytes.
+     */
+    unsigned char name[0x3f * 13 + 1];
+    int checksum, len;
+    int sequence_number;
+} long_file_name;
+
+static void lfn_init(long_file_name *lfn)
+{
+   lfn->sequence_number = lfn->len = 0;
+   lfn->checksum = 0x100;
+}
+
 /* take the sector position spos and convert it to Cylinder/Head/Sector position
  * if the position is outside the specified geometry, fill maximum value for CHS
  * and return 1 to signal overflow.
@@ -418,29 +435,90 @@ static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs)
 
 /* direntry functions */
 
-/* dest is assumed to hold 258 bytes, and pads with 0xffff up to next multiple of 26 */
-static inline int short2long_name(char* dest,const char* src)
-{
-    int i;
-    int len;
-    for(i=0;i<129 && src[i];i++) {
-        dest[2*i]=src[i];
-        dest[2*i+1]=0;
+/* fills lfn with UTF-16 representation of src filename */
+/* return true if src is valid UTF-8 string, false otherwise */
+static bool filename2long_name(long_file_name *lfn, const char* src)
+{
+    uint8_t *dest = lfn->name;
+    int i = 0, j;
+    int len = 0;
+    while (src[i]) {
+        uint32_t uni = 0;
+        size_t todo;
+        uint8_t ch = src[i++];
+        if (ch <= 0x7f) {
+            uni = ch;
+            todo = 0;
+        } else if (ch <= 0xbf) {
+            return false;
+        } else if (ch <= 0xdf) {
+            uni = ch & 0x1f;
+            todo = 1;
+        } else if (ch <= 0xef) {
+            uni = ch & 0x0f;
+            todo = 2;
+        } else if (ch <= 0xf7) {
+            uni = ch & 0x07;
+            todo = 3;
+        } else {
+            return false;
+        }
+        for (j = 0; j < todo; j++) {
+            uint8_t ch;
+            if (src[i] == '\0') {
+                return false;
+            }
+            ch = src[i++];
+            if (ch < 0x80 || ch >= 0xbf) {
+                return false;
+            }
+            uni <<= 6;
+            uni += ch & 0x3f;
+        }
+        if (uni >= 0xd800 && uni <= 0xdfff) {
+            return false;
+        } else if (uni >= 0x10ffff) {
+            return false;
+        }
+        if (uni <= 0xffff) {
+            dest[len++] = uni & 0xff;
+            dest[len++] = uni >> 8;
+        } else {
+            uint16_t w;
+            uni -= 0x10000;
+            w = (uni >> 10) + 0xd800;
+            dest[len++] = w & 0xff;
+            dest[len++] = w >> 8;
+            w = (uni & 0x3ff) + 0xdc00;
+            dest[len++] = w & 0xff;
+            dest[len++] = w >> 8;
+        }
+    }
+    dest[len++] = 0;
+    dest[len++] = 0;
+    while (len % 26 != 0) {
+        dest[len++] = 0xff;
     }
-    len=2*i;
-    dest[2*i]=dest[2*i+1]=0;
-    for(i=2*i+2;(i%26);i++)
-        dest[i]=0xff;
-    return len;
+    lfn->len = len;
+    return true;
 }
 
-static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* filename)
+static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename,
+                                        long_file_name *lfn)
 {
-    char buffer[258];
-    int length=short2long_name(buffer,filename),
-        number_of_entries=(length+25)/26,i;
+    uint8_t *buffer;
+    int length, number_of_entries, i;
     direntry_t* entry;
 
+    lfn_init(lfn);
+    if (!filename2long_name(lfn, filename)) {
+        fprintf(stderr, "vvfat: invalid UTF-8 name: %s\n", filename);
+        return NULL;
+    }
+    buffer = lfn->name;
+    length = lfn->len;
+    number_of_entries = (length + 25) / 26;
+
     for(i=0;i<number_of_entries;i++) {
         entry=array_get_next(&(s->directory));
         entry->attributes=0xf;
@@ -612,6 +690,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
     int i,j,long_index=s->directory.next;
     direntry_t* entry = NULL;
     direntry_t* entry_long = NULL;
+    long_file_name lfn;
 
     if(is_dot) {
         entry=array_get_next(&(s->directory));
@@ -620,7 +699,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
         return entry;
     }
 
-    entry_long=create_long_filename(s,filename);
+    entry_long = create_long_filename(s, filename, &lfn);
 
     i = strlen(filename);
     for(j = i - 1; j>0  && filename[j]!='.';j--);
@@ -1575,23 +1654,6 @@ static void schedule_mkdir(BDRVVVFATState* s, uint32_t cluster, char* path)
     commit->action = ACTION_MKDIR;
 }
 
-typedef struct {
-    /*
-     * Since the sequence number is at most 0x3f, and the filename
-     * length is at most 13 times the sequence number, the maximal
-     * filename length is 0x3f * 13 bytes.
-     */
-    unsigned char name[0x3f * 13 + 1];
-    int checksum, len;
-    int sequence_number;
-} long_file_name;
-
-static void lfn_init(long_file_name* lfn)
-{
-   lfn->sequence_number = lfn->len = 0;
-   lfn->checksum = 0x100;
-}
-
 /* return 0 if parsed successfully, > 0 if no long name, < 0 if error */
 static int parse_long_name(long_file_name* lfn,
         const direntry_t* direntry)
-- 
2.11.0

  parent reply	other threads:[~2017-05-15 20:31 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-15 20:31 [Qemu-devel] [PATCH 00/13] vvfat: misc fixes for read-only mode Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 01/13] vvfat: fix qemu-img map and qemu-img convert Hervé Poussineau
2017-05-15 20:42   ` [Qemu-devel] [Qemu-block] " Eric Blake
2017-05-16 13:17     ` Kevin Wolf
2017-05-15 20:31 ` [Qemu-devel] [PATCH 02/13] vvfat: replace tabs by 8 spaces Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 03/13] vvfat: fix typos Hervé Poussineau
2017-05-16 13:21   ` Kevin Wolf
2017-05-17  5:15     ` Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 04/13] vvfat: rename useless enumeration values Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 05/13] vvfat: introduce offset_to_bootsector, offset_to_fat and offset_to_root_dir Hervé Poussineau
2017-05-16 14:16   ` Kevin Wolf
2017-05-16 15:05     ` Eric Blake
2017-05-16 15:51       ` Kevin Wolf
2017-05-17  5:23     ` Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 06/13] vvfat: fix field names in FAT12/FAT16 boot sector Hervé Poussineau
2017-05-16 14:39   ` Kevin Wolf
2017-05-17  5:28     ` Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 07/13] vvfat: always create . and .. entries at first and in that order Hervé Poussineau
2017-05-15 20:31 ` Hervé Poussineau [this message]
2017-05-16 15:33   ` [Qemu-devel] [PATCH 08/13] vvfat: correctly create long names for non-ASCII filenames Kevin Wolf
2017-05-15 20:31 ` [Qemu-devel] [PATCH 09/13] vvfat: correctly create base short " Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 10/13] vvfat: correctly generate numeric-tail of short file names Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 11/13] vvfat: limit number of entries in root directory in FAT12/FAT16 Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 12/13] vvfat: handle KANJI lead byte 0xe5 Hervé Poussineau
2017-05-15 20:31 ` [Qemu-devel] [PATCH 13/13] vvfat: change OEM name to 'MSWIN4.1' Hervé Poussineau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170515203114.9477-9-hpoussin@reactos.org \
    --to=hpoussin@reactos.org \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).