From: Saranya Muruganandam <saranyamohan@google.com>
To: linux-ext4@vger.kernel.org, tytso@mit.edu
Cc: adilger.kernel@dilger.ca, Li Xi <lixi@ddn.com>,
Wang Shilong <wshilong@ddn.com>,
Saranya Muruganandam <saranyamohan@google.com>
Subject: [RFC PATCH v3 16/61] e2fsck: optimize the inserting of dir_info_db
Date: Wed, 18 Nov 2020 07:39:02 -0800 [thread overview]
Message-ID: <20201118153947.3394530-17-saranyamohan@google.com> (raw)
In-Reply-To: <20201118153947.3394530-1-saranyamohan@google.com>
From: Li Xi <lixi@ddn.com>
Binary search is now used when inserting an dir info to the array.
Memmove is now used when moving array. Both of them improves
the performance of inserting.
This patch is also a prepartion for the merging of two dir db
arrays.
Signed-off-by: Li Xi <lixi@ddn.com>
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Signed-off-by: Saranya Muruganandam <saranyamohan@google.com>
---
e2fsck/dirinfo.c | 172 ++++++++++++++++++++++++++++++-----------------
1 file changed, 112 insertions(+), 60 deletions(-)
diff --git a/e2fsck/dirinfo.c b/e2fsck/dirinfo.c
index 49d624c5..28baaca2 100644
--- a/e2fsck/dirinfo.c
+++ b/e2fsck/dirinfo.c
@@ -7,6 +7,7 @@
#undef DIRINFO_DEBUG
+#include <assert.h>
#include "config.h"
#include "e2fsck.h"
#include <sys/stat.h>
@@ -122,6 +123,104 @@ static void setup_db(e2fsck_t ctx)
"directory map");
}
+/*
+ * Return the min index that has ino larger or equal to @ino
+ * If not found, return -ENOENT
+ */
+static int
+e2fsck_dir_info_min_larger_equal(struct dir_info_db *dir_info,
+ ext2_ino_t ino, ext2_ino_t *index)
+{
+ ext2_ino_t low = 0;
+ ext2_ino_t mid, high;
+ ext2_ino_t tmp_ino;
+ int found = 0;
+
+ if (dir_info->count == 0)
+ return -ENOENT;
+
+ high = dir_info->count - 1;
+ while (low <= high) {
+ /* sum may overflow, but result will fit into mid again */
+ mid = (unsigned long long)(low + high) / 2;
+ tmp_ino = dir_info->array[mid].ino;
+ if (ino == tmp_ino) {
+ *index = mid;
+ found = 1;
+ return 0;
+ } else if (ino < tmp_ino) {
+ /*
+ * The mid ino is larger than @ino, remember the index
+ * here so we won't miss this ino
+ */
+ *index = mid;
+ found = 1;
+ if (mid == 0)
+ break;
+ high = mid - 1;
+ } else {
+ low = mid + 1;
+ }
+ }
+
+ if (found)
+ return 0;
+
+ return -ENOENT;
+}
+
+/*
+ *
+ * Insert an inode into the sorted array. The array should have at least one
+ * free slot.
+ *
+ * Normally, add_dir_info is called with each inode in
+ * sequential order; but once in a while (like when pass 3
+ * needs to recreate the root directory or lost+found
+ * directory) it is called out of order. In those cases, we
+ * need to move the dir_info entries down to make room, since
+ * the dir_info array needs to be sorted by inode number for
+ * get_dir_info()'s sake.
+ */
+static void e2fsck_insert_dir_info(struct dir_info_db *dir_info, ext2_ino_t ino, ext2_ino_t parent)
+{
+ ext2_ino_t index;
+ struct dir_info *dir;
+ size_t dir_size = sizeof(*dir);
+ struct dir_info *array = dir_info->array;
+ ext2_ino_t array_count = dir_info->count;
+ int err;
+
+ /*
+ * Removing this check won't break anything. But since seqential ino
+ * inserting happens a lot, this check avoids binary search.
+ */
+ if (array_count == 0 || array[array_count - 1].ino < ino) {
+ dir = &array[array_count];
+ dir_info->count++;
+ goto out;
+ }
+
+ err = e2fsck_dir_info_min_larger_equal(dir_info, ino, &index);
+ if (err >= 0 && array[index].ino == ino) {
+ dir = &array[index];
+ goto out;
+ }
+ if (err < 0) {
+ dir = &array[array_count];
+ dir_info->count++;
+ goto out;
+ }
+
+ dir = &array[index];
+ memmove((char *)dir + dir_size, dir, dir_size * (array_count - index));
+ dir_info->count++;
+out:
+ dir->ino = ino;
+ dir->dotdot = parent;
+ dir->parent = parent;
+}
+
/*
* This subroutine is called during pass1 to create a directory info
* entry. During pass1, the passed-in parent is 0; it will get filled
@@ -171,30 +270,7 @@ void e2fsck_add_dir_info(e2fsck_t ctx, ext2_ino_t ino, ext2_ino_t parent)
}
#endif
- /*
- * Normally, add_dir_info is called with each inode in
- * sequential order; but once in a while (like when pass 3
- * needs to recreate the root directory or lost+found
- * directory) it is called out of order. In those cases, we
- * need to move the dir_info entries down to make room, since
- * the dir_info array needs to be sorted by inode number for
- * get_dir_info()'s sake.
- */
- if (ctx->dir_info->count &&
- ctx->dir_info->array[ctx->dir_info->count-1].ino >= ino) {
- for (i = ctx->dir_info->count-1; i > 0; i--)
- if (ctx->dir_info->array[i-1].ino < ino)
- break;
- dir = &ctx->dir_info->array[i];
- if (dir->ino != ino)
- for (j = ctx->dir_info->count++; j > i; j--)
- ctx->dir_info->array[j] = ctx->dir_info->array[j-1];
- } else
- dir = &ctx->dir_info->array[ctx->dir_info->count++];
-
- dir->ino = ino;
- dir->dotdot = parent;
- dir->parent = parent;
+ e2fsck_insert_dir_info(ctx->dir_info, ino, parent);
}
/*
@@ -204,7 +280,8 @@ void e2fsck_add_dir_info(e2fsck_t ctx, ext2_ino_t ino, ext2_ino_t parent)
static struct dir_info *e2fsck_get_dir_info(e2fsck_t ctx, ext2_ino_t ino)
{
struct dir_info_db *db = ctx->dir_info;
- ext2_ino_t low, high, mid;
+ ext2_ino_t index;
+ int err;
if (!db)
return 0;
@@ -245,44 +322,19 @@ static struct dir_info *e2fsck_get_dir_info(e2fsck_t ctx, ext2_ino_t ino)
if (db->last_lookup && db->last_lookup->ino == ino)
return db->last_lookup;
- low = 0;
- high = ctx->dir_info->count - 1;
- if (ino == ctx->dir_info->array[low].ino) {
+ err = e2fsck_dir_info_min_larger_equal(ctx->dir_info, ino, &index);
+ if (err < 0)
+ return NULL;
+ assert(ino <= ctx->dir_info->array[index].ino);
+ if (ino == ctx->dir_info->array[index].ino) {
#ifdef DIRINFO_DEBUG
- printf("(%u,%u,%u)\n", ino,
- ctx->dir_info->array[low].dotdot,
- ctx->dir_info->array[low].parent);
+ printf("(%d,%d,%d)\n", ino,
+ ctx->dir_info->array[index].dotdot,
+ ctx->dir_info->array[index].parent);
#endif
- return &ctx->dir_info->array[low];
+ return &ctx->dir_info->array[index];
}
- if (ino == ctx->dir_info->array[high].ino) {
-#ifdef DIRINFO_DEBUG
- printf("(%u,%u,%u)\n", ino,
- ctx->dir_info->array[high].dotdot,
- ctx->dir_info->array[high].parent);
-#endif
- return &ctx->dir_info->array[high];
- }
-
- while (low < high) {
- /* sum may overflow, but result will fit into mid again */
- mid = (unsigned long long)(low + high) / 2;
- if (mid == low || mid == high)
- break;
- if (ino == ctx->dir_info->array[mid].ino) {
-#ifdef DIRINFO_DEBUG
- printf("(%u,%u,%u)\n", ino,
- ctx->dir_info->array[mid].dotdot,
- ctx->dir_info->array[mid].parent);
-#endif
- return &ctx->dir_info->array[mid];
- }
- if (ino < ctx->dir_info->array[mid].ino)
- high = mid;
- else
- low = mid;
- }
- return 0;
+ return NULL;
}
static void e2fsck_put_dir_info(e2fsck_t ctx EXT2FS_NO_TDB_UNUSED,
--
2.29.2.299.gdc1121823c-goog
next prev parent reply other threads:[~2020-11-18 15:40 UTC|newest]
Thread overview: 84+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-18 15:38 [RFC PATCH v3 00/61] Introduce parallel fsck to e2fsck pass1 Saranya Muruganandam
2020-11-18 15:38 ` [RFC PATCH v3 01/61] e2fsck: add -m option for multithread Saranya Muruganandam
2020-11-23 19:53 ` harshad shirwadkar
2020-11-23 21:28 ` Theodore Y. Ts'o
2020-11-18 15:38 ` [RFC PATCH v3 02/61] e2fsck: copy context when using multi-thread fsck Saranya Muruganandam
2020-11-23 19:55 ` harshad shirwadkar
2020-11-23 21:38 ` Theodore Y. Ts'o
2020-12-17 23:56 ` Darrick J. Wong
2020-12-18 1:13 ` Wang Shilong
2020-12-18 1:27 ` Darrick J. Wong
2020-11-18 15:38 ` [RFC PATCH v3 03/61] e2fsck: copy fs " Saranya Muruganandam
2020-11-23 22:12 ` Theodore Y. Ts'o
2020-11-18 15:38 ` [RFC PATCH v3 04/61] e2fsck: clear icache " Saranya Muruganandam
2020-11-23 22:27 ` Theodore Y. Ts'o
2020-11-18 15:38 ` [RFC PATCH v3 05/61] e2fsck: add assert when copying context Saranya Muruganandam
2020-11-18 15:38 ` [RFC PATCH v3 06/61] e2fsck: copy bitmaps " Saranya Muruganandam
2020-11-18 15:38 ` [RFC PATCH v3 07/61] e2fsck: copy badblocks when copying fs Saranya Muruganandam
2020-11-18 15:38 ` [RFC PATCH v3 08/61] e2fsck: open io-channel " Saranya Muruganandam
2020-11-23 22:38 ` Theodore Y. Ts'o
2020-11-24 14:17 ` Theodore Y. Ts'o
2020-11-18 15:38 ` [RFC PATCH v3 09/61] e2fsck: create logs for mult-threads Saranya Muruganandam
2020-11-23 23:05 ` Theodore Y. Ts'o
2020-11-18 15:38 ` [RFC PATCH v3 10/61] e2fsck: optionally configure one pfsck thread Saranya Muruganandam
2020-11-23 23:16 ` Theodore Y. Ts'o
2020-11-18 15:38 ` [RFC PATCH v3 11/61] e2fsck: add start/end group for thread Saranya Muruganandam
2020-11-18 15:38 ` [RFC PATCH v3 12/61] e2fsck: split groups to different threads Saranya Muruganandam
2020-11-18 15:38 ` [RFC PATCH v3 13/61] e2fsck: print thread log properly Saranya Muruganandam
2020-11-23 23:40 ` Theodore Y. Ts'o
2020-11-18 15:39 ` [RFC PATCH v3 14/61] e2fsck: merge bitmaps after thread completes Saranya Muruganandam
2020-11-24 2:00 ` Theodore Y. Ts'o
2020-11-18 15:39 ` [RFC PATCH v3 15/61] e2fsck: do not change global variables Saranya Muruganandam
2020-11-18 15:39 ` Saranya Muruganandam [this message]
2020-11-18 15:39 ` [RFC PATCH v3 17/61] e2fsck: merge dir_info after thread finishes Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 18/61] e2fsck: rbtree bitmap for dir Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 19/61] e2fsck: merge badblocks after thread finishes Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 20/61] e2fsck: merge icounts " Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 21/61] e2fsck: merge dblist " Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 22/61] e2fsck: add debug codes for multiple threads Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 23/61] e2fsck: merge counts after threads finish Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 24/61] e2fsck: merge fs flags when " Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 25/61] e2fsck: merge dx_dir_info after " Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 26/61] e2fsck: merge dirs_to_hash when " Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 27/61] e2fsck: merge context flags properly Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 28/61] e2fsck: merge quota context after threads finish Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 29/61] e2fsck: serialize fix operations Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 30/61] e2fsck: move some fixes out of parallel pthreads Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 31/61] e2fsck: split and merge invalid bitmaps Saranya Muruganandam
2020-12-18 0:05 ` Darrick J. Wong
2020-12-18 1:19 ` Wang Shilong
2020-11-18 15:39 ` [RFC PATCH v3 32/61] e2fsck: merge EA blocks properly Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 33/61] e2fsck: kickoff mutex lock for block found map Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 34/61] e2fsck: allow admin specify number of threads Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 35/61] e2fsck: adjust " Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 36/61] e2fsck: fix readahead for pfsck of pass1 Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 37/61] e2fsck: merge options after threads finish Saranya Muruganandam
2020-12-17 23:30 ` Darrick J. Wong
2020-11-18 15:39 ` [RFC PATCH v3 38/61] e2fsck: reset lost_and_found " Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 39/61] e2fsck: merge extent depth count " Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 40/61] e2fsck: simplify e2fsck context merging codes Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 41/61] e2fsck: set E2F_FLAG_ALLOC_OK after threads Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 42/61] e2fsck: wait fix thread finish before checking Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 43/61] e2fsck: cleanup e2fsck_pass1_thread_join() Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 44/61] e2fsck: avoid too much memory allocation for pfsck Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 45/61] e2fsck: make default smallest RA size to 1M Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 46/61] ext2fs: parallel bitmap loading Saranya Muruganandam
2020-11-24 2:44 ` Theodore Y. Ts'o
2020-11-18 15:39 ` [RFC PATCH v3 47/61] e2fsck: update mmp block in one thread Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 48/61] e2fsck: reset @inodes_to_rebuild if restart Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 49/61] e2fsck: fix build for make rpm Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 50/61] e2fsck: move ext2fs_get_avg_group to rw_bitmaps.c Saranya Muruganandam
2020-11-24 2:12 ` Theodore Y. Ts'o
2020-11-18 15:39 ` [RFC PATCH v3 51/61] configure: enable pfsck by default Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 53/61] e2fsck: fix f_multithread_ok test Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 54/61] e2fsck: fix race in ext2fs_read_bitmaps() Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 55/61] e2fsck: fix readahead for pass1 without pfsck Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 56/61] e2fsck: fix memory leaks with pfsck enabled Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 57/61] ext2fs: fix to set tail flags " Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 58/61] e2fsck: misc cleanups for pfsck Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 59/61] e2fsck: update mmp block race Saranya Muruganandam
2020-11-18 15:39 ` [RFC PATCH v3 60/61] e2fsck: propagate number of threads Saranya Muruganandam
2020-11-24 3:56 ` Theodore Y. Ts'o
2020-11-18 15:39 ` [RFC PATCH v3 61/61] e2fsck: Annotating fields in e2fsck_struct Saranya Muruganandam
2020-11-19 15:58 ` [RFC PATCH v3 00/61] Introduce parallel fsck to e2fsck pass1 Theodore Y. Ts'o
2020-11-23 21:25 ` Theodore Y. Ts'o
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201118153947.3394530-17-saranyamohan@google.com \
--to=saranyamohan@google.com \
--cc=adilger.kernel@dilger.ca \
--cc=linux-ext4@vger.kernel.org \
--cc=lixi@ddn.com \
--cc=tytso@mit.edu \
--cc=wshilong@ddn.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.