From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: sandeen@redhat.com, darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 05/14] xfs_scrub: make name_entry a first class structure
Date: Tue, 20 Mar 2018 20:40:10 -0700 [thread overview]
Message-ID: <152160361091.8288.13615401988451480485.stgit@magnolia> (raw)
In-Reply-To: <152160358015.8288.2700156777231657519.stgit@magnolia>
From: Darrick J. Wong <darrick.wong@oracle.com>
Instead of open-coding the construction and hashtable insertion of name
entries, make name_entry a first class object. This means that we now
have name_entry_ prefix functions that take care of computing Unicode
normalized names as part of name_entry construction, and we pass around
the name_entries when we're looking for suspicious characters and
identically rendering names.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
scrub/unicrash.c | 264 ++++++++++++++++++++++++++++++++----------------------
1 file changed, 157 insertions(+), 107 deletions(-)
diff --git a/scrub/unicrash.c b/scrub/unicrash.c
index 3538a60..51da32c 100644
--- a/scrub/unicrash.c
+++ b/scrub/unicrash.c
@@ -61,9 +61,16 @@
struct name_entry {
struct name_entry *next;
+
+ /* NFKC normalized name */
+ uint8_t *normstr;
+ size_t normstrlen;
+
xfs_ino_t ino;
- size_t uninamelen;
- uint8_t uniname[0];
+
+ /* Raw UTF8 name */
+ size_t namelen;
+ char name[0];
};
#define NAME_ENTRY_SZ(nl) (sizeof(struct name_entry) + 1 + \
(nl * sizeof(uint8_t)))
@@ -119,6 +126,120 @@ is_utf8_locale(void)
return answer;
}
+/*
+ * Generate normalized form of the name.
+ * If this fails, just forget everything; this is an advisory checker.
+ */
+static bool
+name_entry_compute_checknames(
+ struct unicrash *uc,
+ struct name_entry *entry)
+{
+ uint8_t *normstr;
+ size_t normstrlen;
+
+ normstrlen = (entry->namelen * 2) + 1;
+ normstr = calloc(normstrlen, sizeof(uint8_t));
+ if (!normstr)
+ return false;
+
+ if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)entry->name,
+ entry->namelen, normstr, &normstrlen));
+ goto out_normstr;
+
+ entry->normstr = normstr;
+ entry->normstrlen = normstrlen;
+ return true;
+out_normstr:
+ free(normstr);
+ return false;
+}
+
+/* Create a new name entry, returns false if we could not succeed. */
+static bool
+name_entry_create(
+ struct unicrash *uc,
+ const char *name,
+ xfs_ino_t ino,
+ struct name_entry **entry)
+{
+ struct name_entry *new_entry;
+ size_t namelen = strlen(name);
+
+ /* Create new entry */
+ new_entry = calloc(NAME_ENTRY_SZ(namelen), 1);
+ if (!new_entry)
+ return false;
+ new_entry->next = NULL;
+ new_entry->ino = ino;
+ memcpy(new_entry->name, name, namelen);
+ new_entry->name[namelen] = 0;
+ new_entry->namelen = namelen;
+
+ /* Normalize name to find collisions. */
+ if (!name_entry_compute_checknames(uc, new_entry))
+ goto out;
+
+ *entry = new_entry;
+ return true;
+
+out:
+ free(new_entry);
+ return false;
+}
+
+/* Free a name entry */
+static void
+name_entry_free(
+ struct name_entry *entry)
+{
+ free(entry->normstr);
+ free(entry);
+}
+
+/* Adapt the dirhash function from libxfs, avoid linking with libxfs. */
+
+#define rol32(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
+
+/*
+ * Implement a simple hash on a character string.
+ * Rotate the hash value by 7 bits, then XOR each character in.
+ * This is implemented with some source-level loop unrolling.
+ */
+static xfs_dahash_t
+name_entry_hash(
+ struct name_entry *entry)
+{
+ uint8_t *name;
+ size_t namelen;
+ xfs_dahash_t hash;
+
+ name = entry->normstr;
+ namelen = entry->normstrlen;
+
+ /*
+ * Do four characters at a time as long as we can.
+ */
+ for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
+ hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
+ (name[3] << 0) ^ rol32(hash, 7 * 4);
+
+ /*
+ * Now do the rest of the characters.
+ */
+ switch (namelen) {
+ case 3:
+ return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
+ rol32(hash, 7 * 3);
+ case 2:
+ return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
+ case 1:
+ return (name[0] << 0) ^ rol32(hash, 7 * 1);
+ default: /* case 0: */
+ return hash;
+ }
+}
+
/* Initialize the collision detector. */
static bool
unicrash_init(
@@ -190,89 +311,28 @@ unicrash_free(
for (i = 0; i < uc->nr_buckets; i++) {
for (ne = uc->buckets[i]; ne != NULL; ne = x) {
x = ne->next;
- free(ne);
+ name_entry_free(ne);
}
}
free(uc);
}
-/* Steal the dirhash function from libxfs, avoid linking with libxfs. */
-
-#define rol32(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
-
-/*
- * Implement a simple hash on a character string.
- * Rotate the hash value by 7 bits, then XOR each character in.
- * This is implemented with some source-level loop unrolling.
- */
-static xfs_dahash_t
-unicrash_hashname(
- const uint8_t *name,
- size_t namelen)
-{
- xfs_dahash_t hash;
-
- /*
- * Do four characters at a time as long as we can.
- */
- for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
- hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
- (name[3] << 0) ^ rol32(hash, 7 * 4);
-
- /*
- * Now do the rest of the characters.
- */
- switch (namelen) {
- case 3:
- return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
- rol32(hash, 7 * 3);
- case 2:
- return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
- case 1:
- return (name[0] << 0) ^ rol32(hash, 7 * 1);
- default: /* case 0: */
- return hash;
- }
-}
-
-/*
- * Normalize a name according to Unicode NFKC normalization rules.
- * Returns true if the name was already normalized.
- */
-static bool
-unicrash_normalize(
- const char *in,
- uint8_t *out,
- size_t outlen)
-{
- size_t inlen = strlen(in);
-
- assert(inlen <= outlen);
- if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)in, inlen,
- out, &outlen)) {
- /* Didn't normalize, just return the same buffer. */
- memcpy(out, in, inlen + 1);
- return true;
- }
- out[outlen] = 0;
- return outlen == inlen ? memcmp(in, out, inlen) == 0 : false;
-}
-
/* Complain about Unicode problems. */
static void
unicrash_complain(
struct unicrash *uc,
const char *descr,
const char *what,
+ struct name_entry *entry,
unsigned int badflags,
- const char *name,
- uint8_t *uniname)
+ struct name_entry *dup_entry)
{
char *bad1 = NULL;
char *bad2 = NULL;
- bad1 = string_escape(name);
- bad2 = string_escape((char *)uniname);
+ bad1 = string_escape(entry->name);
+ if (dup_entry)
+ bad2 = string_escape(dup_entry->name);
/*
* Two names that normalize to the same string will render
@@ -294,52 +354,39 @@ _("Unicode name \"%s\" in %s renders identically to \"%s\"."),
/*
* Try to add a name -> ino entry to the collision detector. The name
- * must be normalized according to Unicode NFKC normalization rules to
- * detect byte-unique names that map to the same sequence of Unicode
- * code points.
- *
- * This function returns true either if there was no previous mapping or
- * there was a mapping that matched exactly. It returns false if
- * there is already a record with that name pointing to a different
- * inode.
+ * must be normalized according to Unicode NFKC rules to detect names that
+ * could be confused with each other.
*/
static bool
unicrash_add(
struct unicrash *uc,
- uint8_t *uniname,
- xfs_ino_t ino,
- unsigned int *badflags)
+ struct name_entry *new_entry,
+ unsigned int *badflags,
+ struct name_entry **existing_entry)
{
- struct name_entry *ne;
- struct name_entry *x;
- struct name_entry **nep;
- size_t uninamelen = u8_strlen(uniname);
+ struct name_entry *entry;
size_t bucket;
xfs_dahash_t hash;
- /* Do we already know about that name? */
- hash = unicrash_hashname(uniname, uninamelen);
+ /* Store name in hashtable. */
+ hash = name_entry_hash(new_entry);
bucket = hash % uc->nr_buckets;
- for (nep = &uc->buckets[bucket], ne = *nep; ne != NULL; ne = x) {
- if (u8_strcmp(uniname, ne->uniname) == 0 &&
- (uc->compare_ino ? ino != ne->ino : true)) {
+ entry = uc->buckets[bucket];
+ new_entry->next = entry;
+ uc->buckets[bucket] = new_entry;
+
+ while (entry != NULL) {
+ /* Same normalization? */
+ if (new_entry->normstrlen == entry->normstrlen &&
+ !u8_strcmp(new_entry->normstr, entry->normstr) &&
+ (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
*badflags |= UNICRASH_NOT_UNIQUE;
+ *existing_entry = entry;
return true;
}
- nep = &ne->next;
- x = ne->next;
+ entry = entry->next;
}
- /* Remember that name. */
- x = malloc(NAME_ENTRY_SZ(uninamelen));
- if (!x)
- return false;
- x->next = NULL;
- x->ino = ino;
- x->uninamelen = uninamelen;
- memcpy(x->uniname, uniname, uninamelen + 1);
- *nep = x;
-
return true;
}
@@ -352,19 +399,22 @@ __unicrash_check_name(
const char *name,
xfs_ino_t ino)
{
- uint8_t uniname[(NAME_MAX * 2) + 1];
+ struct name_entry *dup_entry = NULL;
+ struct name_entry *new_entry;
unsigned int badflags = 0;
bool moveon;
- memset(uniname, 0, (NAME_MAX * 2) + 1);
- unicrash_normalize(name, uniname, NAME_MAX * 2);
- moveon = unicrash_add(uc, uniname, ino, &badflags);
+ /* If we can't create entry data, just skip it. */
+ if (!name_entry_create(uc, name, ino, &new_entry))
+ return true;
+
+ moveon = unicrash_add(uc, new_entry, &badflags, &dup_entry);
if (!moveon)
return false;
if (badflags)
- unicrash_complain(uc, descr, namedescr, badflags, name,
- uniname);
+ unicrash_complain(uc, descr, namedescr, new_entry, badflags,
+ dup_entry);
return true;
}
next prev parent reply other threads:[~2018-03-21 3:40 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-21 3:39 [PATCH 00/14] xfsprogs: online scrub fixes Darrick J. Wong
2018-03-21 3:39 ` [PATCH 01/14] xfs_scrub: avoid buffer overflow when scanning attributes Darrick J. Wong
2018-04-03 17:30 ` Eric Sandeen
2018-04-05 3:57 ` Darrick J. Wong
2018-04-11 0:20 ` Darrick J. Wong
2018-04-11 0:27 ` [PATCH v2 " Darrick J. Wong
2018-03-21 3:39 ` [PATCH 02/14] xfs_scrub: only run ascii name checks if unicode name checker Darrick J. Wong
2018-04-03 17:49 ` Eric Sandeen
2018-03-21 3:39 ` [PATCH 03/14] xfs_scrub: don't complain about different normalization Darrick J. Wong
2018-04-10 23:37 ` Eric Sandeen
2018-03-21 3:40 ` [PATCH 04/14] xfs_scrub: communicate name problems via flagset instead of booleans Darrick J. Wong
2018-04-10 23:46 ` Eric Sandeen
2018-03-21 3:40 ` Darrick J. Wong [this message]
2018-03-21 3:40 ` [PATCH 06/14] xfs_scrub: transition from libunistring to libicu for Unicode processing Darrick J. Wong
2018-03-21 3:40 ` [PATCH 07/14] xfs_scrub: check name for suspicious characters Darrick J. Wong
2018-03-21 3:40 ` [PATCH 08/14] xfs_scrub: use Unicode skeleton function to find confusing names Darrick J. Wong
2018-03-26 19:58 ` [PATCH v2 " Darrick J. Wong
2018-03-21 3:40 ` [PATCH 09/14] xfs_scrub: don't warn about confusing names if dir/file only writable by root Darrick J. Wong
2018-03-26 19:59 ` [PATCH v2 " Darrick J. Wong
2018-03-21 3:40 ` [PATCH 10/14] xfs_scrub: refactor mountpoint finding code to use libfrog path code Darrick J. Wong
2018-04-11 1:48 ` Eric Sandeen
2018-03-21 3:40 ` [PATCH 11/14] xfs_scrub_all: report version Darrick J. Wong
2018-04-11 0:28 ` Eric Sandeen
2018-03-21 3:40 ` [PATCH 12/14] xfs_scrub: disable private /tmp for scrub service Darrick J. Wong
2018-04-11 1:45 ` Eric Sandeen
2018-04-11 1:49 ` Darrick J. Wong
2018-04-11 1:53 ` [PATCH v2 " Darrick J. Wong
2018-03-21 3:41 ` [PATCH 13/14] xfs_scrub_all: escape paths being passed to systemd service instances Darrick J. Wong
2018-04-11 1:31 ` Eric Sandeen
2018-03-21 3:41 ` [PATCH 14/14] xfs_scrub_all: use system encoding for lsblk output decoding Darrick J. Wong
2018-04-11 1:35 ` Eric Sandeen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=152160361091.8288.13615401988451480485.stgit@magnolia \
--to=darrick.wong@oracle.com \
--cc=linux-xfs@vger.kernel.org \
--cc=sandeen@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox