* [PATCH] libmultipath: don't lock block device but use lock files @ 2015-05-23 21:00 Christian Seiler 2015-05-24 7:07 ` Hannes Reinecke 0 siblings, 1 reply; 5+ messages in thread From: Christian Seiler @ 2015-05-23 21:00 UTC (permalink / raw) To: dm-devel In recent versions udev uses flock() on the device node itself, breaking libmultipath's current locking logic. Since libmultipath doesn't actually modify anything on the device itself (and hence does not actually need an exclusive lock on the device node, unlike e.g. tools that create partitions etc.), and the reason for the lock is to guard against multipath interfering with multipathd, use lock files (named after the devices) in /run/lock/multipath instead. See the discussion under: https://www.redhat.com/archives/dm-devel/2014-December/msg00083.html Especially: https://www.redhat.com/archives/dm-devel/2014-December/msg00117.html Signed-off-by: Christian Seiler <christian@iwakd.de> --- libmultipath/configure.c | 45 +++++++++++++++++++++++++++++++++++++++++---- libmultipath/configure.h | 2 ++ libmultipath/structs.c | 7 +++++++ libmultipath/structs.h | 3 +++ 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/libmultipath/configure.c b/libmultipath/configure.c index 24ad948..f77f97f 100644 --- a/libmultipath/configure.c +++ b/libmultipath/configure.c @@ -534,6 +534,8 @@ lock_multipath (struct multipath * mpp, int lock) struct path * pp; int i, j; int x, y; + char *ptr; + size_t len; if (!mpp || !mpp->pg) return 0; @@ -542,11 +544,45 @@ lock_multipath (struct multipath * mpp, int lock) if (!pgp->paths) continue; vector_foreach_slot(pgp->paths, pp, j) { - if (lock && flock(pp->fd, LOCK_EX | LOCK_NB) && + if (!pp->lock_path) { + len = sizeof(CONFIGURE_LOCK_DIRECTORY) + strlen(udev_device_get_devnode(pp->udev)) + 1; + pp->lock_path = MALLOC(len); + if (!pp->lock_path) { + condlog(0, "couldn't allocate lock path"); + goto fail; + } + if (safe_snprintf(pp->lock_path, len, "%s/%s", + CONFIGURE_LOCK_DIRECTORY, udev_device_get_devnode(pp->udev))) { + condlog(0, "couldn't allocate lock path"); + FREE(pp->lock_path); + pp->lock_path = NULL; + goto fail; + } + for (ptr = pp->lock_path + sizeof(CONFIGURE_LOCK_DIRECTORY); *ptr; ptr++) { + if ((*ptr < '0' || *ptr > '9') && (*ptr < 'A' || *ptr > 'Z') && + (*ptr < 'a' || *ptr > 'z') && *ptr != '-' && *ptr != '_' && *ptr != '.') + *ptr = '_'; + } + } + if (pp->lock_fd < 0) { + pp->lock_fd = open(pp->lock_path, O_RDWR | O_CREAT, 0600); + if (pp->lock_fd < 0 && errno == ENOENT) { + if (mkdir(CONFIGURE_LOCK_DIRECTORY, 0700) < 0) { + condlog(0, "%s: couldn't create lock directory", CONFIGURE_LOCK_DIRECTORY); + goto fail; + } + pp->lock_fd = open(pp->lock_path, O_RDWR | O_CREAT, 0600); + } + if (pp->lock_fd < 0) { + condlog(0, "%s: couldn't open lock file", pp->lock_path); + goto fail; + } + } + if (lock && flock(pp->lock_fd, LOCK_EX | LOCK_NB) && errno == EWOULDBLOCK) goto fail; - else if (!lock) - flock(pp->fd, LOCK_UN); + else if (!lock && pp->lock_fd >= 0) + flock(pp->lock_fd, LOCK_UN); } } return 0; @@ -559,7 +595,8 @@ fail: vector_foreach_slot(pgp->paths, pp, y) { if (x == i && y >= j) return 1; - flock(pp->fd, LOCK_UN); + if (pp->lock_fd >= 0) + flock(pp->fd, LOCK_UN); } } return 1; diff --git a/libmultipath/configure.h b/libmultipath/configure.h index c014b55..0415453 100644 --- a/libmultipath/configure.h +++ b/libmultipath/configure.h @@ -24,6 +24,8 @@ enum actions { #define FLUSH_ONE 1 #define FLUSH_ALL 2 +#define CONFIGURE_LOCK_DIRECTORY "/run/lock/multipath" + int setup_map (struct multipath * mpp, char * params, int params_size ); int domap (struct multipath * mpp, char * params); int reinstate_paths (struct multipath *mpp); diff --git a/libmultipath/structs.c b/libmultipath/structs.c index 0538327..ca7bcb5 100644 --- a/libmultipath/structs.c +++ b/libmultipath/structs.c @@ -95,6 +95,7 @@ alloc_path (void) pp->sg_id.scsi_id = -1; pp->sg_id.lun = -1; pp->fd = -1; + pp->lock_fd = -1; pp->priority = PRIO_UNDEF; } return pp; @@ -115,6 +116,12 @@ free_path (struct path * pp) if (pp->fd >= 0) close(pp->fd); + if (pp->lock_fd >= 0) + close(pp->lock_fd); + + if (pp->lock_path) + free(pp->lock_path); + if (pp->udev) { udev_device_unref(pp->udev); pp->udev = NULL; diff --git a/libmultipath/structs.h b/libmultipath/structs.h index c1fce04..916aea5 100644 --- a/libmultipath/structs.h +++ b/libmultipath/structs.h @@ -204,6 +204,9 @@ struct path { /* configlet pointers */ struct hwentry * hwe; + + int lock_fd; + char * lock_path; }; typedef int (pgpolicyfn) (struct multipath *); -- 2.1.4 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] libmultipath: don't lock block device but use lock files 2015-05-23 21:00 [PATCH] libmultipath: don't lock block device but use lock files Christian Seiler @ 2015-05-24 7:07 ` Hannes Reinecke 2015-05-24 8:05 ` Christian Seiler 0 siblings, 1 reply; 5+ messages in thread From: Hannes Reinecke @ 2015-05-24 7:07 UTC (permalink / raw) To: Christian Seiler, dm-devel [-- Attachment #1: Type: text/plain, Size: 1136 bytes --] On 05/23/2015 11:00 PM, Christian Seiler wrote: > In recent versions udev uses flock() on the device node itself, > breaking libmultipath's current locking logic. Since libmultipath > doesn't actually modify anything on the device itself (and hence does > not actually need an exclusive lock on the device node, unlike e.g. > tools that create partitions etc.), and the reason for the lock is to > guard against multipath interfering with multipathd, use lock files > (named after the devices) in /run/lock/multipath instead. > > See the discussion under: > https://www.redhat.com/archives/dm-devel/2014-December/msg00083.html > Especially: > https://www.redhat.com/archives/dm-devel/2014-December/msg00117.html > > Signed-off-by: Christian Seiler <christian@iwakd.de> > > --- Well ... couldn't you just use a shared lock here? I thought to have send this patch already; hmm. Cheers, Hannes -- Dr. Hannes Reinecke zSeries & Storage hare@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) [-- Attachment #2: multipath-shared-lock.patch --] [-- Type: text/x-patch, Size: 1538 bytes --] From 841977fc9c3432702c296d6239e4a54291a6007a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke <hare@suse.de> Date: Tue, 24 Jun 2014 08:49:15 +0200 Subject: [PATCH] libmultipath: use a shared lock to co-operate with udev udev since v214 is placing a shared lock on the device node whenever it's processing the event. This introduces a race condition with multipathd, as multipathd is processing the event for the block device at the same time as udev is processing the events for the partitions. And a lock on the partitions will also be visible on the block device itself, hence multipathd won't be able to lock the device. When multipath manages to take a lock on the device, udev will fail, and consequently ignore this entire event. Which in turn might cause the system to malfunction as it might have been a crucial event like 'remove' or 'link down'. So we should better use LOCK_SH here; with that the flock call in multipathd _and_ udev will succeed and the events can be processed. References: bnc#883878 Signed-off-by: Hannes Reinecke <hare@suse.de> diff --git a/libmultipath/configure.c b/libmultipath/configure.c index 0ddd3d5..dc2ebf0 100644 --- a/libmultipath/configure.c +++ b/libmultipath/configure.c @@ -529,7 +529,7 @@ lock_multipath (struct multipath * mpp, int lock) if (!pgp->paths) continue; vector_foreach_slot(pgp->paths, pp, j) { - if (lock && flock(pp->fd, LOCK_EX | LOCK_NB) && + if (lock && flock(pp->fd, LOCK_SH | LOCK_NB) && errno == EWOULDBLOCK) goto fail; else if (!lock) [-- Attachment #3: Type: text/plain, Size: 0 bytes --] ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] libmultipath: don't lock block device but use lock files 2015-05-24 7:07 ` Hannes Reinecke @ 2015-05-24 8:05 ` Christian Seiler 2015-06-02 12:09 ` Christian Seiler 0 siblings, 1 reply; 5+ messages in thread From: Christian Seiler @ 2015-05-24 8:05 UTC (permalink / raw) To: device-mapper development [-- Attachment #1: Type: text/plain, Size: 3132 bytes --] Thanks for your quick response! On 05/24/2015 09:07 AM, Hannes Reinecke wrote: > On 05/23/2015 11:00 PM, Christian Seiler wrote: >> In recent versions udev uses flock() on the device node itself, >> breaking libmultipath's current locking logic. Since libmultipath >> doesn't actually modify anything on the device itself (and hence does >> not actually need an exclusive lock on the device node, unlike e.g. >> tools that create partitions etc.), and the reason for the lock is to >> guard against multipath interfering with multipathd, use lock files >> (named after the devices) in /run/lock/multipath instead. >> >> See the discussion under: >> https://www.redhat.com/archives/dm-devel/2014-December/msg00083.html >> Especially: >> https://www.redhat.com/archives/dm-devel/2014-December/msg00117.html >> >> Signed-off-by: Christian Seiler <christian@iwakd.de> >> >> --- > Well ... couldn't you just use a shared lock here? I think you misunderstand the nature of the lock here: the lock is in place to serialize different libmultipath instances against each other, i.e. two multipath programs running at the same time or multipath called at the same time multipathd is running. As was mentioned in the mail I linked in the commit message of my patch: it was introduced in commit [1] with a description of the problem. So you definitely need an exclusive lock to serialize libmultipath against other instances of it. Because with a shared lock, you don't gain anything, that can always be taken, so it will always succeed. But since udev uses a shared lock itself nowadays, taking it out on the device node itself is not going to work, so that's why my patch resorts to using lock files. Of course, _additionally_ taking out a shared lock on the device might be advisable if you want to protect against tools that modify devices, such as partitioning tools etc. (This is the reason why udev takes out a shared lock: to make sure that when processing events and determining the contents of the device, problems with tools that modify the partition table etc. don't occur.) But that's not really necessary for multipath, because libmultipath only opens the device to do some information gathering on the device (INQUIRY command, determine geometry, etc.), but doesn't really care about the contents (there's never a see/read done on the fd). That said, I've noticed that my patch might cause multipathd to have the lock file fds open unnecessarily, so I've updated it to close them on regular unlocking (but not on failure, because that means there will be a retry). Version 2 is attached. > I thought to have send this patch already; hmm. I didn't see that, sorry. (I started looking at this problem in-depth yesterday, found the thread in December, noticed no followup there in the archive's web interface, noticed no commit in git regarding this issue and hence checked out the git source code for the first time and started working on the patch.) But as I elaborated: I don't think that helps. Christian [1] http://git.opensvc.com/gitweb.cgi?p=multipath-tools/.git;a=commit;h=4bc295cef7a50ea46bfc7b98c65848babf56c822 [-- Warning: decoded text below may be mangled, UTF-8 assumed --] [-- Attachment #2: 0001-libmultipath-don-t-lock-block-device-but-use-lock-fi.patch --] [-- Type: text/x-diff; name="0001-libmultipath-don-t-lock-block-device-but-use-lock-fi.patch", Size: 5209 bytes --] From 75d877f1161d4bd33dea2b57ce2ef371121e94e3 Mon Sep 17 00:00:00 2001 From: Christian Seiler <christian@iwakd.de> Date: Sat, 23 May 2015 22:57:31 +0200 Subject: [PATCH v2] libmultipath: don't lock block device but use lock files In recent versions udev uses flock() on the device node itself, breaking libmultipath's current locking logic. Since libmultipath doesn't actually modify anything on the device itself (and hence does not actually need an exclusive lock on the device node, unlike e.g. tools that create partitions etc.), and the reason for the lock is to guard against multipath interfering with multipathd, use lock files (named after the devices) in /run/lock/multipath instead. See the discussion under: https://www.redhat.com/archives/dm-devel/2014-December/msg00083.html Especially: https://www.redhat.com/archives/dm-devel/2014-December/msg00117.html Signed-off-by: Christian Seiler <christian@iwakd.de> --- v2: don't keep lock fds open after regular unlocking (multipathd is long- lived) libmultipath/configure.c | 50 ++++++++++++++++++++++++++++++++++++++++++++---- libmultipath/configure.h | 2 ++ libmultipath/structs.c | 7 +++++++ libmultipath/structs.h | 3 +++ 4 files changed, 58 insertions(+), 4 deletions(-) diff --git a/libmultipath/configure.c b/libmultipath/configure.c index 24ad948..5bcd6d9 100644 --- a/libmultipath/configure.c +++ b/libmultipath/configure.c @@ -534,6 +534,8 @@ lock_multipath (struct multipath * mpp, int lock) struct path * pp; int i, j; int x, y; + char *ptr; + size_t len; if (!mpp || !mpp->pg) return 0; @@ -542,11 +544,48 @@ lock_multipath (struct multipath * mpp, int lock) if (!pgp->paths) continue; vector_foreach_slot(pgp->paths, pp, j) { - if (lock && flock(pp->fd, LOCK_EX | LOCK_NB) && + if (!pp->lock_path) { + len = sizeof(CONFIGURE_LOCK_DIRECTORY) + strlen(udev_device_get_devnode(pp->udev)) + 1; + pp->lock_path = MALLOC(len); + if (!pp->lock_path) { + condlog(0, "couldn't allocate lock path"); + goto fail; + } + if (safe_snprintf(pp->lock_path, len, "%s/%s", + CONFIGURE_LOCK_DIRECTORY, udev_device_get_devnode(pp->udev))) { + condlog(0, "couldn't allocate lock path"); + FREE(pp->lock_path); + pp->lock_path = NULL; + goto fail; + } + for (ptr = pp->lock_path + sizeof(CONFIGURE_LOCK_DIRECTORY); *ptr; ptr++) { + if ((*ptr < '0' || *ptr > '9') && (*ptr < 'A' || *ptr > 'Z') && + (*ptr < 'a' || *ptr > 'z') && *ptr != '-' && *ptr != '_' && *ptr != '.') + *ptr = '_'; + } + } + if (pp->lock_fd < 0) { + pp->lock_fd = open(pp->lock_path, O_RDWR | O_CREAT, 0600); + if (pp->lock_fd < 0 && errno == ENOENT) { + if (mkdir(CONFIGURE_LOCK_DIRECTORY, 0700) < 0) { + condlog(0, "%s: couldn't create lock directory", CONFIGURE_LOCK_DIRECTORY); + goto fail; + } + pp->lock_fd = open(pp->lock_path, O_RDWR | O_CREAT, 0600); + } + if (pp->lock_fd < 0) { + condlog(0, "%s: couldn't open lock file", pp->lock_path); + goto fail; + } + } + if (lock && flock(pp->lock_fd, LOCK_EX | LOCK_NB) && errno == EWOULDBLOCK) goto fail; - else if (!lock) - flock(pp->fd, LOCK_UN); + else if (!lock && pp->lock_fd >= 0) { + flock(pp->lock_fd, LOCK_UN); + close(pp->lock_fd); + pp->lock_fd = -1; + } } } return 0; @@ -559,7 +598,10 @@ fail: vector_foreach_slot(pgp->paths, pp, y) { if (x == i && y >= j) return 1; - flock(pp->fd, LOCK_UN); + /* don't close the lock fd in failure case, we will want + * to retry later */ + if (pp->lock_fd >= 0) + flock(pp->fd, LOCK_UN); } } return 1; diff --git a/libmultipath/configure.h b/libmultipath/configure.h index c014b55..0415453 100644 --- a/libmultipath/configure.h +++ b/libmultipath/configure.h @@ -24,6 +24,8 @@ enum actions { #define FLUSH_ONE 1 #define FLUSH_ALL 2 +#define CONFIGURE_LOCK_DIRECTORY "/run/lock/multipath" + int setup_map (struct multipath * mpp, char * params, int params_size ); int domap (struct multipath * mpp, char * params); int reinstate_paths (struct multipath *mpp); diff --git a/libmultipath/structs.c b/libmultipath/structs.c index 0538327..ca7bcb5 100644 --- a/libmultipath/structs.c +++ b/libmultipath/structs.c @@ -95,6 +95,7 @@ alloc_path (void) pp->sg_id.scsi_id = -1; pp->sg_id.lun = -1; pp->fd = -1; + pp->lock_fd = -1; pp->priority = PRIO_UNDEF; } return pp; @@ -115,6 +116,12 @@ free_path (struct path * pp) if (pp->fd >= 0) close(pp->fd); + if (pp->lock_fd >= 0) + close(pp->lock_fd); + + if (pp->lock_path) + free(pp->lock_path); + if (pp->udev) { udev_device_unref(pp->udev); pp->udev = NULL; diff --git a/libmultipath/structs.h b/libmultipath/structs.h index c1fce04..916aea5 100644 --- a/libmultipath/structs.h +++ b/libmultipath/structs.h @@ -204,6 +204,9 @@ struct path { /* configlet pointers */ struct hwentry * hwe; + + int lock_fd; + char * lock_path; }; typedef int (pgpolicyfn) (struct multipath *); -- 2.1.4 [-- Attachment #3: Type: text/plain, Size: 0 bytes --] ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] libmultipath: don't lock block device but use lock files 2015-05-24 8:05 ` Christian Seiler @ 2015-06-02 12:09 ` Christian Seiler 2015-06-03 11:22 ` Hannes Reinecke 0 siblings, 1 reply; 5+ messages in thread From: Christian Seiler @ 2015-06-02 12:09 UTC (permalink / raw) To: device-mapper development Am 2015-05-24 10:05, schrieb Christian Seiler: > That said, I've noticed that my patch might cause multipathd to have > the lock file fds open unnecessarily, so I've updated it to close > them > on regular unlocking (but not on failure, because that means there > will > be a retry). Version 2 is attached. Ping? Christian ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] libmultipath: don't lock block device but use lock files 2015-06-02 12:09 ` Christian Seiler @ 2015-06-03 11:22 ` Hannes Reinecke 0 siblings, 0 replies; 5+ messages in thread From: Hannes Reinecke @ 2015-06-03 11:22 UTC (permalink / raw) To: dm-devel On 06/02/2015 02:09 PM, Christian Seiler wrote: > Am 2015-05-24 10:05, schrieb Christian Seiler: >> That said, I've noticed that my patch might cause multipathd to have >> the lock file fds open unnecessarily, so I've updated it to close them >> on regular unlocking (but not on failure, because that means there will >> be a retry). Version 2 is attached. > > Ping? > Probably I'm showing my ignorance here, but do we _really_ need this? Is this really an issue? Personally I've never had any issue with locking multipathd against multipath; plus I'm not aware of any scenario which uses 'multipath' on a regular basis. All scripts I know of rely on multipathd to maintain the path states, and 'multipath' is only ever used to query the state of the devices. Using 'multipath' to maintain the paths themselves are typically used only manually, so any failure can easily be overcome. Do you have an example where this patch is required? Cheers, Hannes -- Dr. Hannes Reinecke zSeries & Storage hare@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-06-03 11:22 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2015-05-23 21:00 [PATCH] libmultipath: don't lock block device but use lock files Christian Seiler 2015-05-24 7:07 ` Hannes Reinecke 2015-05-24 8:05 ` Christian Seiler 2015-06-02 12:09 ` Christian Seiler 2015-06-03 11:22 ` Hannes Reinecke
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.