From: "Darrick J. Wong" <djwong@kernel.org>
To: cem@kernel.org
Cc: linux-xfs@vger.kernel.org, david@fromorbit.com,
Christoph Hellwig <hch@infradead.org>
Subject: [RFC PATCH 8/6] xfs_db: create dirents and xattrs with colliding names
Date: Thu, 13 Apr 2023 08:20:14 -0700 [thread overview]
Message-ID: <20230413152014.GX360889@frogsfrogsfrogs> (raw)
In-Reply-To: <168073977341.1656666.5994535770114245232.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Create a new debugger command that will create dirent and xattr names
that induce dahash collisions. This is the driver program that xfs/861
uses to reproduce dabtree node block checking errors.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
db/hash.c | 376 +++++++++++++++++++++++++++++++++++++++++++++++++++++
man/man8/xfs_db.8 | 31 ++++
2 files changed, 407 insertions(+)
diff --git a/db/hash.c b/db/hash.c
index 68c53e7f9bc..79a250526e9 100644
--- a/db/hash.c
+++ b/db/hash.c
@@ -5,12 +5,15 @@
*/
#include "libxfs.h"
+#include "init.h"
#include "addr.h"
#include "command.h"
#include "type.h"
#include "io.h"
#include "output.h"
#include "hash.h"
+#include "obfuscate.h"
+#include <sys/xattr.h>
static int hash_f(int argc, char **argv);
static void hash_help(void);
@@ -46,8 +49,381 @@ hash_f(
return 0;
}
+static void
+hashcoll_help(void)
+{
+ printf(_(
+"\n"
+" Generate obfuscated variants of the provided name. Each variant will have\n"
+" the same dahash value. Names are written to stdout with a NULL separating\n"
+" each name.\n"
+"\n"
+" -a -- create extended attributes.\n"
+" -i -- read standard input for the name, up to %d bytes.\n"
+" -n -- create this many names.\n"
+" -p -- create directory entries or extended attributes in this file.\n"
+" -s -- seed the rng with this value.\n"
+"\n"),
+ MAXNAMELEN - 1);
+}
+
+struct name_dup {
+ struct name_dup *next;
+ uint32_t crc;
+ uint8_t namelen;
+ uint8_t name[];
+};
+
+static inline size_t
+name_dup_sizeof(
+ unsigned int namelen)
+{
+ return sizeof(struct name_dup) + namelen;
+}
+
+#define MAX_DUP_TABLE_BUCKETS (1048575)
+
+struct dup_table {
+ unsigned int nr_buckets;
+ struct name_dup *buckets[];
+};
+
+static inline size_t
+dup_table_sizeof(
+ unsigned int nr_buckets)
+{
+ return sizeof(struct dup_table) +
+ (nr_buckets * sizeof(struct name_dup *));
+}
+
+static int
+dup_table_alloc(
+ unsigned long nr_names,
+ struct dup_table **tabp)
+{
+ struct dup_table *t;
+
+ *tabp = NULL;
+
+ if (nr_names == 1)
+ return 0;
+
+ nr_names = min(MAX_DUP_TABLE_BUCKETS, nr_names);
+ t = calloc(1, dup_table_sizeof(nr_names));
+ if (!t)
+ return ENOMEM;
+
+ t->nr_buckets = nr_names;
+ *tabp = t;
+ return 0;
+}
+
+static void
+dup_table_free(
+ struct dup_table *tab)
+{
+ struct name_dup *ent, *next;
+ unsigned int i;
+
+ if (!tab)
+ return;
+
+ for (i = 0; i < tab->nr_buckets; i++) {
+ ent = tab->buckets[i];
+
+ while (ent) {
+ next = ent->next;
+ free(ent);
+ ent = next;
+ }
+ }
+ free(tab);
+}
+
+static struct name_dup *
+dup_table_find(
+ struct dup_table *tab,
+ unsigned char *name,
+ size_t namelen)
+{
+ struct name_dup *ent;
+ uint32_t crc = crc32c(~0, name, namelen);
+
+ ent = tab->buckets[crc % tab->nr_buckets];
+ while (ent) {
+ if (ent->crc == crc &&
+ ent->namelen == namelen &&
+ !memcmp(ent->name, name, namelen))
+ return ent;
+
+ ent = ent->next;
+ }
+
+ return NULL;
+}
+
+static int
+dup_table_store(
+ struct dup_table *tab,
+ unsigned char *name,
+ size_t namelen)
+{
+ struct name_dup *dup;
+ uint32_t seq = 1;
+
+ ASSERT(namelen < MAXNAMELEN);
+
+ while ((dup = dup_table_find(tab, name, namelen)) != NULL) {
+ int ret;
+
+ do {
+ ret = find_alternate(namelen, name, seq++);
+ } while (ret == 0);
+ if (ret < 0)
+ return EEXIST;
+ }
+
+ dup = malloc(name_dup_sizeof(namelen));
+ if (!dup)
+ return ENOMEM;
+
+ dup->crc = crc32c(~0, name, namelen);
+ dup->namelen = namelen;
+ memcpy(dup->name, name, namelen);
+ dup->next = tab->buckets[dup->crc % tab->nr_buckets];
+
+ tab->buckets[dup->crc % tab->nr_buckets] = dup;
+ return 0;
+}
+
+static int
+collide_dirents(
+ unsigned long nr,
+ const unsigned char *name,
+ size_t namelen,
+ int fd)
+{
+ struct xfs_name dname = {
+ .name = name,
+ .len = namelen,
+ };
+ unsigned char direntname[MAXNAMELEN + 1];
+ struct dup_table *tab = NULL;
+ xfs_dahash_t old_hash;
+ unsigned long i;
+ int error = 0;
+
+ old_hash = libxfs_dir2_hashname(mp, &dname);
+
+ if (fd >= 0) {
+ int newfd;
+
+ /*
+ * User passed in a fd, so we'll use the directory to detect
+ * duplicate names. First create the name that we are passed
+ * in; the new names will be hardlinks to the first file.
+ */
+ newfd = openat(fd, name, O_CREAT, 0600);
+ if (newfd < 0)
+ return errno;
+ close(newfd);
+ } else if (nr > 1) {
+ /*
+ * Track every name we create so that we don't emit duplicates.
+ */
+ error = dup_table_alloc(nr, &tab);
+ if (error)
+ return error;
+ }
+
+ dname.name = direntname;
+ for (i = 0; i < nr; i++) {
+ strncpy(direntname, name, MAXNAMELEN);
+ obfuscate_name(old_hash, namelen, direntname, true);
+ ASSERT(old_hash == libxfs_dir2_hashname(mp, &dname));
+
+ if (fd >= 0) {
+ error = linkat(fd, name, fd, direntname, 0);
+ if (error && errno != EEXIST)
+ return errno;
+
+ /* don't print names to stdout */
+ continue;
+ } else if (tab) {
+ error = dup_table_store(tab, direntname, namelen);
+ if (error)
+ break;
+ }
+
+ printf("%s%c", direntname, 0);
+ }
+
+ dup_table_free(tab);
+ return error;
+}
+
+static int
+collide_xattrs(
+ unsigned long nr,
+ const unsigned char *name,
+ size_t namelen,
+ int fd)
+{
+ unsigned char xattrname[MAXNAMELEN + 5];
+ struct dup_table *tab = NULL;
+ xfs_dahash_t old_hash;
+ unsigned long i;
+ int error;
+
+ old_hash = libxfs_da_hashname(name, namelen);
+
+ if (fd >= 0) {
+ /*
+ * User passed in a fd, so we'll use the xattr structure to
+ * detect duplicate names. First create the attribute that we
+ * are passed in.
+ */
+ snprintf(xattrname, MAXNAMELEN + 5, "user.%s", name);
+ error = fsetxattr(fd, xattrname, "1", 1, 0);
+ if (error)
+ return errno;
+ } else if (nr > 1) {
+ /*
+ * Track every name we create so that we don't emit duplicates.
+ */
+ error = dup_table_alloc(nr, &tab);
+ if (error)
+ return error;
+ }
+
+ for (i = 0; i < nr; i++) {
+ snprintf(xattrname, MAXNAMELEN + 5, "user.%s", name);
+ obfuscate_name(old_hash, namelen, xattrname + 5, false);
+ ASSERT(old_hash == libxfs_da_hashname(xattrname + 5, namelen));
+
+ if (fd >= 0) {
+ error = fsetxattr(fd, xattrname, "1", 1, 0);
+ if (error)
+ return errno;
+
+ /* don't print names to stdout */
+ continue;
+ } else if (tab) {
+ error = dup_table_store(tab, xattrname, namelen + 5);
+ if (error)
+ break;
+ }
+
+ printf("%s%c", xattrname, 0);
+ }
+
+ dup_table_free(tab);
+ return error;
+}
+
+static int
+hashcoll_f(
+ int argc,
+ char **argv)
+{
+ const char *path = NULL;
+ bool read_stdin = false;
+ bool create_xattr = false;
+ unsigned long nr = 1, seed = 0;
+ int fd = -1;
+ int c;
+ int error;
+
+ while ((c = getopt(argc, argv, "ain:p:s:")) != EOF) {
+ switch (c) {
+ case 'a':
+ create_xattr = true;
+ break;
+ case 'i':
+ read_stdin = true;
+ break;
+ case 'n':
+ nr = strtoul(optarg, NULL, 10);
+ break;
+ case 'p':
+ path = optarg;
+ break;
+ case 's':
+ seed = strtoul(optarg, NULL, 10);
+ break;
+ default:
+ exitcode = 1;
+ hashcoll_help();
+ return 0;
+ }
+ }
+
+ if (path) {
+ int oflags = O_RDWR;
+
+ if (!create_xattr)
+ oflags = O_RDONLY | O_DIRECTORY;
+
+ fd = open(path, oflags);
+ if (fd < 0) {
+ perror(path);
+ exitcode = 1;
+ return 0;
+ }
+ }
+
+ if (seed)
+ srandom(seed);
+
+ if (read_stdin) {
+ char buf[MAXNAMELEN];
+ size_t len;
+
+ len = fread(buf, 1, MAXNAMELEN - 1, stdin);
+
+ if (create_xattr)
+ error = collide_xattrs(nr, buf, len, fd);
+ else
+ error = collide_dirents(nr, buf, len, fd);
+ if (error) {
+ printf(_("hashcoll: %s\n"), strerror(error));
+ exitcode = 1;
+ }
+ goto done;
+ }
+
+ for (c = optind; c < argc; c++) {
+ size_t len = strlen(argv[c]);
+
+ if (create_xattr)
+ error = collide_xattrs(nr, argv[c], len, fd);
+ else
+ error = collide_dirents(nr, argv[c], len, fd);
+ if (error) {
+ printf(_("hashcoll: %s\n"), strerror(error));
+ exitcode = 1;
+ }
+ }
+
+done:
+ if (fd >= 0)
+ close(fd);
+ return 0;
+}
+
+static cmdinfo_t hashcoll_cmd = {
+ .name = "hashcoll",
+ .cfunc = hashcoll_f,
+ .argmin = 0,
+ .argmax = -1,
+ .args = N_("[-a] [-s seed] [-n nr] [-p path] -i|names..."),
+ .oneline = N_("create names that produce dahash collisions"),
+ .help = hashcoll_help,
+};
+
void
hash_init(void)
{
add_command(&hash_cmd);
+ add_command(&hashcoll_cmd);
}
diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8
index 43c7db5e225..793d0042319 100644
--- a/man/man8/xfs_db.8
+++ b/man/man8/xfs_db.8
@@ -791,6 +791,37 @@ Prints the hash value of
.I string
using the hash function of the XFS directory and attribute implementation.
.TP
+.BI "hashcoll [-a] [-s seed] [-n " nr "] [-p " path "] -i | " names...
+Create directory entries or extended attributes names that all have the same
+hash value.
+The metadump name obfuscation algorithm is used here.
+Names are written to standard output, with a NULL between each name for use
+with xargs -0.
+.RS 1.0i
+.PD 0
+.TP 0.4i
+.TP 0.4i
+.B \-a
+Create extended attribute names.
+.TP 0.4i
+.B \-i
+Read the first name to create from standard input.
+Up to 255 bytes are read.
+If this option is not specified, first names are taken from the command line.
+.TP 0.4i
+.BI \-n " nr"
+Create this many duplicated names.
+The default is to create one name.
+.TP 0.4i
+.BI \-p " path"
+Create directory entries or extended attributes in this file instead of
+writing the names to standard output.
+.TP 0.4i
+.BI \-s " seed"
+Seed the random number generator with this value.
+.PD
+.RE
+.TP
.BI "help [" command ]
Print help for one or all commands.
.TP
next prev parent reply other threads:[~2023-04-13 15:21 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-06 0:02 [PATCHSET v2 0/4] xfs: fix ascii-ci problems, then kill it Darrick J. Wong
2023-04-06 0:02 ` [PATCH 1/4] xfs: stabilize the dirent name transformation function used for ascii-ci dir hash computation Darrick J. Wong
2023-04-11 4:50 ` Christoph Hellwig
2023-04-06 0:03 ` [PATCH 2/4] xfs: test the ascii case-insensitive hash Darrick J. Wong
2023-04-11 4:50 ` Christoph Hellwig
2023-04-06 0:03 ` [PATCH 3/4] xfs: use the directory name hash function for dir scrubbing Darrick J. Wong
2023-04-11 4:51 ` Christoph Hellwig
2023-04-06 0:03 ` [PATCH 4/4] xfs: deprecate the ascii-ci feature Darrick J. Wong
2023-04-11 4:52 ` Christoph Hellwig
2023-04-06 0:09 ` [PATCHSET v2 0/6] xfsprogs: fix ascii-ci problems, then kill it Darrick J. Wong
2023-04-06 0:09 ` [PATCH 1/6] xfs: stabilize the dirent name transformation function used for ascii-ci dir hash computation Darrick J. Wong
2023-04-06 0:09 ` [PATCH 2/6] xfs: test the ascii case-insensitive hash Darrick J. Wong
2023-04-06 0:09 ` [PATCH 3/6] xfs_db: move obfuscate_name assertion to callers Darrick J. Wong
2023-04-11 4:52 ` Christoph Hellwig
2023-04-06 0:09 ` [PATCH 4/6] xfs_db: fix metadump name obfuscation for ascii-ci filesystems Darrick J. Wong
2023-04-11 4:58 ` Christoph Hellwig
2023-04-11 15:35 ` Darrick J. Wong
2023-04-12 12:09 ` Christoph Hellwig
2023-04-12 22:04 ` Darrick J. Wong
2023-04-06 0:10 ` [PATCH 5/6] mkfs.xfs.8: warn about the version=ci feature Darrick J. Wong
2023-04-11 4:59 ` Christoph Hellwig
2023-04-06 0:10 ` [PATCH 6/6] mkfs: deprecate the ascii-ci feature Darrick J. Wong
2023-04-11 4:59 ` Christoph Hellwig
2023-04-13 15:19 ` [RFC PATCH 7/6] xfs_db: hoist name obfuscation code out of metadump.c Darrick J. Wong
2023-04-13 15:20 ` Darrick J. Wong [this message]
2023-04-06 0:11 ` [PATCH] fstests: add a couple more tests for ascii-ci problems Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230413152014.GX360889@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=cem@kernel.org \
--cc=david@fromorbit.com \
--cc=hch@infradead.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.