Util-Linux package development
 help / color / mirror / Atom feed
From: yumkam@gmail.com (Yuriy M. Kaminskiy)
To: util-linux@vger.kernel.org
Subject: [RFC][WIP][PATCH] fsck -l and lvm/dmcrypt/md/etc
Date: Sat, 09 Apr 2016 00:40:18 +0300	[thread overview]
Message-ID: <m34mbbrd8d.fsf@gmail.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 704 bytes --]

I think this algorithm should work:

1) walk /dev/block/${dev}/slaves recursively (e.g. lvm-over- luks-
over-lvm- over-md, first-level slaves won't work), collect all
whole-disk devices;
2) sort this list by device numbers (to avoid AB/BA deadlocks), remove
duplicates;
3) acquire all locks consequently.

There are some unavoidable flaws (e.g. if someone alters structure while fsck
is running), and some things could be improved, but it should work in
most cases (and if it fails, it is just no worse than current behavior).

I've tested with LVM and LUKS-over-LVM on (debian's) 3.16 kernel, it seems
works as expected.

What is not covered: /dev/loop* (and I have no plans for it).

Comments? NACKs?


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: fsck--l-dm-md.patch --]
[-- Type: text/x-diff, Size: 7033 bytes --]

BEWARE: work-in-progress, and it is full of NIH.

diff --git a/disk-utils/fsck.c b/disk-utils/fsck.c
index 84d2dcc..6bd708f 100644
--- a/disk-utils/fsck.c
+++ b/disk-utils/fsck.c
@@ -100,6 +100,10 @@ struct fsck_fs_data
 			eval_device:1;
 };
 
+union disk_lock {
+	dev_t	dev;
+	int	lock;		/* flock()ed file descriptor or -1 */
+};
 /*
  * Structure to allow exit codes to be stored
  */
@@ -107,8 +111,9 @@ struct fsck_instance {
 	int	pid;
 	int	flags;		/* FLAG_{DONE|PROGRESS} */
 
-	int	lock;		/* flock()ed lockpath file descriptor or -1 */
-	char	*lockpath;	/* /run/fsck/<diskname>.lock or NULL */
+
+	union disk_lock *locks, lock;
+	size_t nlocks;
 
 	int	exit_status;
 	struct timeval start_time;
@@ -335,19 +340,25 @@ static int is_irrotational_disk(dev_t disk)
 	return rc == 1 ? !x : 0;
 }
 
+static int compare_locks(const void *a, const void *b)
+{
+	dev_t a_dev = ((const union disk_lock *)a)->dev;
+	dev_t b_dev = ((const union disk_lock *)b)->dev;
+	return (a_dev > b_dev) - (b_dev > a_dev);
+}
+
 static void lock_disk(struct fsck_instance *inst)
 {
 	dev_t disk = fs_get_disk(inst->fs, 1);
 	char *diskpath = NULL, *diskname;
-
-	inst->lock = -1;
+	union disk_lock *locks = NULL;
+	size_t nlocks = 0;
+	size_t i;
+	char path[PATH_MAX];
+	int rc;
 
 	if (!disk || is_irrotational_disk(disk))
-		goto done;
-
-	diskpath = blkid_devno_to_devname(disk);
-	if (!diskpath)
-		goto done;
+		return;
 
 	if (access(FSCK_RUNTIME_DIRNAME, F_OK) != 0) {
 		int rc = mkdir(FSCK_RUNTIME_DIRNAME,
@@ -357,64 +368,151 @@ static void lock_disk(struct fsck_instance *inst)
 		if (rc && errno != EEXIST) {
 			warn(_("cannot create directory %s"),
 					FSCK_RUNTIME_DIRNAME);
-			goto done;
+			return;
 		}
 	}
 
+	if (fs_is_stacked(inst->fs)) {
+		/* XXX this code (gracefully) ignores errors */
+		size_t allocated = 4;
+	       	locks = malloc(allocated*sizeof(union disk_lock));
+		locks[nlocks++].dev = disk;
+		for(i = 0; i < nlocks; i++) {
+			size_t path_end;
+			DIR *dir;
+			struct dirent *dp;
+			size_t j;
+
+			disk = locks[i].dev;
+			rc = snprintf(path, sizeof(path),
+					"/sys/dev/block/%d:%d/slaves",
+					major(disk), minor(disk));
+			if (rc < 0 || (unsigned int) rc >= sizeof(path))
+				continue;
+			path_end = rc;
+			if ((dir = opendir(path)) == NULL)
+				continue;
+			while((dp = readdir(dir)) != NULL) {
+				FILE *f;
+				int maj, min;
+#ifdef _DIRENT_HAVE_D_TYPE
+				if (dp->d_type != DT_UNKNOWN &&
+				    dp->d_type != DT_LNK)
+					continue;
+#endif
+				if (dp->d_name[0] == '.' &&
+				    ((dp->d_name[1] == 0) ||
+				    ((dp->d_name[1] == '.') && (dp->d_name[2] == 0))))
+					continue;
+				rc = snprintf(path + path_end,
+					       sizeof(path) - path_end,
+					       "/%s/dev", dp->d_name);
+				if (rc < 0 || (unsigned int)rc >= sizeof(path) - path_end)
+					continue;
+
+				if ((f = fopen(path, "r")) == NULL)
+					continue;
+				rc = fscanf(f, "%d:%d", &maj, &min);
+				fclose(f);
+				if (rc != 2)
+					continue;
+
+				disk = makedev(maj, min);
+				if (blkid_devno_to_wholedisk(disk, NULL, 0, &disk))
+					continue;;
+				for (j = nlocks; j > 0; j--) {
+					/* XXX O(n^2) */
+					if (disk == locks[j-1].dev)
+						break;
+				}
+				if (j > 0) /* Don't add duplicates */
+					continue;;
+				if (is_irrotational_disk(disk))
+					continue;;
+				if (allocated >= nlocks) {
+					void *p;
+					if (allocated > INT_MAX/(2*sizeof(locks[0])))
+						continue;;
+					p = realloc(locks, allocated*2*sizeof(locks[0]));
+					if (p == NULL)
+						continue;;
+					locks = p;
+					allocated *= 2;
+				}
+				locks[nlocks++].dev = disk;
+			}
+			closedir(dir);
+		}
+		qsort(locks, nlocks, sizeof(locks[0]), compare_locks);
+		locks = realloc(locks, nlocks*sizeof(locks[0]));
+	} else {
+		inst->lock.dev = disk;
+		locks = &inst->lock;
+		nlocks = 1;
+	}
+	inst->locks = locks;
+	inst->nlocks = nlocks;
+
+	for (i = 0; i < nlocks; i++) {
+		disk = locks[i].dev;
+	diskpath = blkid_devno_to_devname(disk);
+	if (!diskpath) {
+		locks[i].lock = -1;
+		continue;
+	}
+	/* XXX is this necessary? E.g. why not replace with ..."/%d_%d.lock", major(), minor()); */
 	diskname = stripoff_last_component(diskpath);
 	if (!diskname)
 		diskname = diskpath;
 
-	xasprintf(&inst->lockpath, FSCK_RUNTIME_DIRNAME "/%s.lock", diskname);
+	rc = snprintf(path, sizeof(path), FSCK_RUNTIME_DIRNAME "/%s.lock", diskname);
+	if (rc < 0 || (unsigned int)rc >= sizeof(path)) {
+		locks[i].lock = -1;
+		free(diskpath);
+		continue;
+	}
 
 	if (verbose)
-		printf(_("Locking disk by %s ... "), inst->lockpath);
+		printf(_("Locking disk %d:%d by %s ... "),
+			major(disk), minor(disk), path);
 
-	inst->lock = open(inst->lockpath, O_RDONLY|O_CREAT|O_CLOEXEC,
+	locks[i].lock = open(path, O_RDONLY|O_CREAT|O_CLOEXEC,
 				    S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
-	if (inst->lock >= 0) {
+	if (locks[i].lock >= 0) {
 		int rc = -1;
 
 		/* inform users that we're waiting on the lock */
 		if (verbose &&
-		    (rc = flock(inst->lock, LOCK_EX | LOCK_NB)) != 0 &&
+		    (rc = flock(locks[i].lock, LOCK_EX | LOCK_NB)) != 0 &&
 		    errno == EWOULDBLOCK)
 			printf(_("(waiting) "));
 
-		if (rc != 0 && flock(inst->lock, LOCK_EX) != 0) {
-			close(inst->lock);			/* failed */
-			inst->lock = -1;
+		if (rc != 0 && flock(locks[i].lock, LOCK_EX) != 0) {
+			close(locks[i].lock);			/* failed */
+			locks[i].lock = -1;
 		}
 	}
 
 	if (verbose)
 		/* TRANSLATORS: These are followups to "Locking disk...". */
-		printf("%s.\n", inst->lock >= 0 ? _("succeeded") : _("failed"));
+		printf("%s.\n", locks[i].lock >= 0 ? _("succeeded") : _("failed"));
 
-
-done:
-	if (inst->lock < 0) {
-		free(inst->lockpath);
-		inst->lockpath = NULL;
-	}
 	free(diskpath);
+	}
 	return;
 }
 
 static void unlock_disk(struct fsck_instance *inst)
 {
-	if (inst->lock < 0)
-		return;
 
 	if (verbose)
-		printf(_("Unlocking %s.\n"), inst->lockpath);
-
-	close(inst->lock);			/* unlock */
-
-	free(inst->lockpath);
+		printf(_("Unlocking disks.\n"));
 
-	inst->lock = -1;
-	inst->lockpath = NULL;
+	while(inst->nlocks > 0)
+		close(inst->locks[--(inst->nlocks)].lock);	/* unlock */
+	if (inst->locks != &inst->lock)
+		free(inst->locks);
+	inst->locks = NULL;
 }
 
 static void free_instance(struct fsck_instance *i)
@@ -422,7 +520,8 @@ static void free_instance(struct fsck_instance *i)
 	if (lockdisk)
 		unlock_disk(i);
 	free(i->prog);
-	free(i->lockpath);
+	if (i->locks != &i->lock)
+		free(i->locks);
 	mnt_unref_fs(i->fs);
 	free(i);
 	return;
@@ -469,7 +568,7 @@ static void fs_interpret_type(struct libmnt_fs *fs)
 static int parser_errcb(struct libmnt_table *tb __attribute__ ((__unused__)),
 			const char *filename, int line)
 {
-	warnx(_("%s: parse error at line %d -- ignored"), filename, line);
+	warnx(_("%s: parse error at line %d -- ignore"), filename, line);
 	return 1;
 }
 
@@ -666,7 +765,8 @@ static int execute(const char *progname, const char *progpath,
 
 	mnt_ref_fs(fs);
 	inst->fs = fs;
-	inst->lock = -1;
+	inst->locks = NULL;
+	inst->nlocks = 0;
 
 	if (lockdisk)
 		lock_disk(inst);

             reply	other threads:[~2016-04-08 21:40 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-08 21:40 Yuriy M. Kaminskiy [this message]
2016-04-18 10:59 ` [RFC][WIP][PATCH] fsck -l and lvm/dmcrypt/md/etc Karel Zak
2016-04-19 14:42   ` Yuriy M. Kaminskiy
2016-04-19 15:11     ` [RFC][PATCH alt v2] fsck -l: block concurrent fsck for any stacked devices (Re: [RFC][WIP][PATCH] fsck -l and lvm/dmcrypt/md/etc) Yuriy M. Kaminskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m34mbbrd8d.fsf@gmail.com \
    --to=yumkam@gmail.com \
    --cc=util-linux@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox