From: Christoph Hellwig <hch@infradead.org>
To: xfs@oss.sgi.com
Subject: [PATCH] repair: avoid ABBA deadlocks on prefetched buffers
Date: Tue, 15 Nov 2011 16:09:53 -0500 [thread overview]
Message-ID: <20111115210953.GA6670@infradead.org> (raw)
Both the prefetch threads and actual repair processing threads can have
multiple buffers at a time locked, but they do no use a common locker
order, which can lead to ABBA deadlocks while trying to lock the buffers.
Switch the prefetch code to do a trylock and skip buffers that have
already been locked to avoid this deadlock.
Reported-by: Arkadiusz Mi??kiewicz <arekm@maven.pl>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: xfsprogs-dev/include/libxfs.h
===================================================================
--- xfsprogs-dev.orig/include/libxfs.h 2011-11-15 20:43:02.513069998 +0100
+++ xfsprogs-dev/include/libxfs.h 2011-11-15 20:43:16.669736580 +0100
@@ -279,27 +279,41 @@ enum xfs_buf_flags_t { /* b_flags bits *
extern struct cache *libxfs_bcache;
extern struct cache_operations libxfs_bcache_operations;
+#define LIBXFS_GETBUF_TRYLOCK (1 << 0)
+
#ifdef XFS_BUF_TRACING
#define libxfs_readbuf(dev, daddr, len, flags) \
- libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, (dev), (daddr), (len), (flags))
+ libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, \
+ (dev), (daddr), (len), (flags))
#define libxfs_writebuf(buf, flags) \
- libxfs_trace_writebuf(__FUNCTION__, __FILE__, __LINE__, (buf), (flags))
+ libxfs_trace_writebuf(__FUNCTION__, __FILE__, __LINE__, \
+ (buf), (flags))
#define libxfs_getbuf(dev, daddr, len) \
- libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, (dev), (daddr), (len))
+ libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
+ (dev), (daddr), (len))
+#define libxfs_getbuf_flags(dev, daddr, len, flags) \
+ libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
+ (dev), (daddr), (len), (flags))
#define libxfs_putbuf(buf) \
- libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf))
+ libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf))
-extern xfs_buf_t *libxfs_trace_readbuf(const char *, const char *, int, dev_t, xfs_daddr_t, int, int);
-extern int libxfs_trace_writebuf(const char *, const char *, int, xfs_buf_t *, int);
+extern xfs_buf_t *libxfs_trace_readbuf(const char *, const char *, int,
+ dev_t, xfs_daddr_t, int, int);
+extern int libxfs_trace_writebuf(const char *, const char *, int,
+ xfs_buf_t *, int);
extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, dev_t, xfs_daddr_t, int);
-extern void libxfs_trace_putbuf (const char *, const char *, int, xfs_buf_t *);
+extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int,
+ dev_t, xfs_daddr_t, int, unsigned int);
+extern void libxfs_trace_putbuf (const char *, const char *, int,
+ xfs_buf_t *);
#else
extern xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
extern int libxfs_writebuf(xfs_buf_t *, int);
extern xfs_buf_t *libxfs_getbuf(dev_t, xfs_daddr_t, int);
+extern xfs_buf_t *libxfs_getbuf_flags(dev_t, xfs_daddr_t, int, unsigned int);
extern void libxfs_putbuf (xfs_buf_t *);
#endif
Index: xfsprogs-dev/libxfs/rdwr.c
===================================================================
--- xfsprogs-dev.orig/libxfs/rdwr.c 2011-11-15 20:43:02.503069998 +0100
+++ xfsprogs-dev/libxfs/rdwr.c 2011-11-15 20:43:16.669736580 +0100
@@ -195,6 +195,7 @@ libxfs_log_header(
#undef libxfs_readbuf
#undef libxfs_writebuf
#undef libxfs_getbuf
+#undef libxfs_getbuf_flags
#undef libxfs_putbuf
xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
@@ -238,6 +239,19 @@ libxfs_trace_getbuf(const char *func, co
return bp;
}
+xfs_buf_t *
+libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
+ dev_t device, xfs_daddr_t blkno, int len, unsigned long flags)
+{
+ xfs_buf_t *bp = libxfs_getbuf(device, blkno, len, flags);
+
+ bp->b_func = func;
+ bp->b_file = file;
+ bp->b_line = line;
+
+ return bp;
+}
+
void
libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
{
@@ -380,8 +394,8 @@ int lock_buf_count = 0;
extern int use_xfs_buf_lock;
-xfs_buf_t *
-libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
+struct xfs_buf *
+libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags)
{
xfs_buf_t *bp;
xfs_bufkey_t key;
@@ -392,28 +406,48 @@ libxfs_getbuf(dev_t device, xfs_daddr_t
key.bblen = len;
miss = cache_node_get(libxfs_bcache, &key, (struct cache_node **)&bp);
- if (bp) {
- if (use_xfs_buf_lock)
+ if (!bp)
+ return NULL;
+
+ if (use_xfs_buf_lock) {
+ if (flags & LIBXFS_GETBUF_TRYLOCK) {
+ int ret;
+
+ ret = pthread_mutex_trylock(&bp->b_lock);
+ if (ret) {
+ ASSERT(ret == EAGAIN);
+ cache_node_put(libxfs_bcache, (struct cache_node *)bp);
+ return NULL;
+ }
+ } else {
pthread_mutex_lock(&bp->b_lock);
- cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
- cache_node_get_priority((struct cache_node *)bp) -
+ }
+ }
+
+ cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
+ cache_node_get_priority((struct cache_node *)bp) -
CACHE_PREFETCH_PRIORITY);
#ifdef XFS_BUF_TRACING
- pthread_mutex_lock(&libxfs_bcache->c_mutex);
- lock_buf_count++;
- list_add(&bp->b_lock_list, &lock_buf_list);
- pthread_mutex_unlock(&libxfs_bcache->c_mutex);
+ pthread_mutex_lock(&libxfs_bcache->c_mutex);
+ lock_buf_count++;
+ list_add(&bp->b_lock_list, &lock_buf_list);
+ pthread_mutex_unlock(&libxfs_bcache->c_mutex);
#endif
#ifdef IO_DEBUG
- printf("%lx %s: %s buffer %p for bno = %llu\n",
- pthread_self(), __FUNCTION__, miss ? "miss" : "hit",
- bp, (long long)LIBXFS_BBTOOFF64(blkno));
+ printf("%lx %s: %s buffer %p for bno = %llu\n",
+ pthread_self(), __FUNCTION__, miss ? "miss" : "hit",
+ bp, (long long)LIBXFS_BBTOOFF64(blkno));
#endif
- }
return bp;
}
+struct xfs_buf *
+libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
+{
+ return libxfs_getbuf_flags(device, blkno, len, 0);
+}
+
void
libxfs_putbuf(xfs_buf_t *bp)
{
Index: xfsprogs-dev/repair/prefetch.c
===================================================================
--- xfsprogs-dev.orig/repair/prefetch.c 2011-11-15 20:44:30.903069469 +0100
+++ xfsprogs-dev/repair/prefetch.c 2011-11-15 20:48:23.073068083 +0100
@@ -112,8 +112,17 @@ pf_queue_io(
{
xfs_buf_t *bp;
- bp = libxfs_getbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
- XFS_FSB_TO_BB(mp, blen));
+ /*
+ * Never block on a buffer lock here, given that the actual repair
+ * code might lock buffers in a different order from us. Given that
+ * the lock holder is either reading it from disk himself or
+ * completely overwriting it this behaviour is perfectly fine.
+ */
+ bp = libxfs_getbuf_flags(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+ XFS_FSB_TO_BB(mp, blen), LIBXFS_GETBUF_TRYLOCK);
+ if (!bp)
+ return;
+
if (bp->b_flags & LIBXFS_B_UPTODATE) {
if (B_IS_INODE(flag))
pf_read_inode_dirs(args, bp);
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next reply other threads:[~2011-11-15 21:09 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-15 21:09 Christoph Hellwig [this message]
2011-11-17 4:25 ` [PATCH] repair: avoid ABBA deadlocks on prefetched buffers Dave Chinner
2011-11-18 8:44 ` Arkadiusz Miśkiewicz
2011-11-22 22:46 ` Christoph Hellwig
2011-11-23 17:27 ` Arkadiusz Miśkiewicz
2012-01-13 20:09 ` Mark Tinguely
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111115210953.GA6670@infradead.org \
--to=hch@infradead.org \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.