* [PATCH] repair: avoid ABBA deadlocks on prefetched buffers
@ 2011-11-15 21:09 Christoph Hellwig
2011-11-17 4:25 ` Dave Chinner
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Christoph Hellwig @ 2011-11-15 21:09 UTC (permalink / raw)
To: xfs
Both the prefetch threads and actual repair processing threads can have
multiple buffers at a time locked, but they do no use a common locker
order, which can lead to ABBA deadlocks while trying to lock the buffers.
Switch the prefetch code to do a trylock and skip buffers that have
already been locked to avoid this deadlock.
Reported-by: Arkadiusz Mi??kiewicz <arekm@maven.pl>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: xfsprogs-dev/include/libxfs.h
===================================================================
--- xfsprogs-dev.orig/include/libxfs.h 2011-11-15 20:43:02.513069998 +0100
+++ xfsprogs-dev/include/libxfs.h 2011-11-15 20:43:16.669736580 +0100
@@ -279,27 +279,41 @@ enum xfs_buf_flags_t { /* b_flags bits *
extern struct cache *libxfs_bcache;
extern struct cache_operations libxfs_bcache_operations;
+#define LIBXFS_GETBUF_TRYLOCK (1 << 0)
+
#ifdef XFS_BUF_TRACING
#define libxfs_readbuf(dev, daddr, len, flags) \
- libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, (dev), (daddr), (len), (flags))
+ libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, \
+ (dev), (daddr), (len), (flags))
#define libxfs_writebuf(buf, flags) \
- libxfs_trace_writebuf(__FUNCTION__, __FILE__, __LINE__, (buf), (flags))
+ libxfs_trace_writebuf(__FUNCTION__, __FILE__, __LINE__, \
+ (buf), (flags))
#define libxfs_getbuf(dev, daddr, len) \
- libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, (dev), (daddr), (len))
+ libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
+ (dev), (daddr), (len))
+#define libxfs_getbuf_flags(dev, daddr, len, flags) \
+ libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
+ (dev), (daddr), (len), (flags))
#define libxfs_putbuf(buf) \
- libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf))
+ libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf))
-extern xfs_buf_t *libxfs_trace_readbuf(const char *, const char *, int, dev_t, xfs_daddr_t, int, int);
-extern int libxfs_trace_writebuf(const char *, const char *, int, xfs_buf_t *, int);
+extern xfs_buf_t *libxfs_trace_readbuf(const char *, const char *, int,
+ dev_t, xfs_daddr_t, int, int);
+extern int libxfs_trace_writebuf(const char *, const char *, int,
+ xfs_buf_t *, int);
extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, dev_t, xfs_daddr_t, int);
-extern void libxfs_trace_putbuf (const char *, const char *, int, xfs_buf_t *);
+extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int,
+ dev_t, xfs_daddr_t, int, unsigned int);
+extern void libxfs_trace_putbuf (const char *, const char *, int,
+ xfs_buf_t *);
#else
extern xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
extern int libxfs_writebuf(xfs_buf_t *, int);
extern xfs_buf_t *libxfs_getbuf(dev_t, xfs_daddr_t, int);
+extern xfs_buf_t *libxfs_getbuf_flags(dev_t, xfs_daddr_t, int, unsigned int);
extern void libxfs_putbuf (xfs_buf_t *);
#endif
Index: xfsprogs-dev/libxfs/rdwr.c
===================================================================
--- xfsprogs-dev.orig/libxfs/rdwr.c 2011-11-15 20:43:02.503069998 +0100
+++ xfsprogs-dev/libxfs/rdwr.c 2011-11-15 20:43:16.669736580 +0100
@@ -195,6 +195,7 @@ libxfs_log_header(
#undef libxfs_readbuf
#undef libxfs_writebuf
#undef libxfs_getbuf
+#undef libxfs_getbuf_flags
#undef libxfs_putbuf
xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
@@ -238,6 +239,19 @@ libxfs_trace_getbuf(const char *func, co
return bp;
}
+xfs_buf_t *
+libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
+ dev_t device, xfs_daddr_t blkno, int len, unsigned long flags)
+{
+ xfs_buf_t *bp = libxfs_getbuf(device, blkno, len, flags);
+
+ bp->b_func = func;
+ bp->b_file = file;
+ bp->b_line = line;
+
+ return bp;
+}
+
void
libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t *bp)
{
@@ -380,8 +394,8 @@ int lock_buf_count = 0;
extern int use_xfs_buf_lock;
-xfs_buf_t *
-libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
+struct xfs_buf *
+libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int flags)
{
xfs_buf_t *bp;
xfs_bufkey_t key;
@@ -392,28 +406,48 @@ libxfs_getbuf(dev_t device, xfs_daddr_t
key.bblen = len;
miss = cache_node_get(libxfs_bcache, &key, (struct cache_node **)&bp);
- if (bp) {
- if (use_xfs_buf_lock)
+ if (!bp)
+ return NULL;
+
+ if (use_xfs_buf_lock) {
+ if (flags & LIBXFS_GETBUF_TRYLOCK) {
+ int ret;
+
+ ret = pthread_mutex_trylock(&bp->b_lock);
+ if (ret) {
+ ASSERT(ret == EAGAIN);
+ cache_node_put(libxfs_bcache, (struct cache_node *)bp);
+ return NULL;
+ }
+ } else {
pthread_mutex_lock(&bp->b_lock);
- cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
- cache_node_get_priority((struct cache_node *)bp) -
+ }
+ }
+
+ cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
+ cache_node_get_priority((struct cache_node *)bp) -
CACHE_PREFETCH_PRIORITY);
#ifdef XFS_BUF_TRACING
- pthread_mutex_lock(&libxfs_bcache->c_mutex);
- lock_buf_count++;
- list_add(&bp->b_lock_list, &lock_buf_list);
- pthread_mutex_unlock(&libxfs_bcache->c_mutex);
+ pthread_mutex_lock(&libxfs_bcache->c_mutex);
+ lock_buf_count++;
+ list_add(&bp->b_lock_list, &lock_buf_list);
+ pthread_mutex_unlock(&libxfs_bcache->c_mutex);
#endif
#ifdef IO_DEBUG
- printf("%lx %s: %s buffer %p for bno = %llu\n",
- pthread_self(), __FUNCTION__, miss ? "miss" : "hit",
- bp, (long long)LIBXFS_BBTOOFF64(blkno));
+ printf("%lx %s: %s buffer %p for bno = %llu\n",
+ pthread_self(), __FUNCTION__, miss ? "miss" : "hit",
+ bp, (long long)LIBXFS_BBTOOFF64(blkno));
#endif
- }
return bp;
}
+struct xfs_buf *
+libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
+{
+ return libxfs_getbuf_flags(device, blkno, len, 0);
+}
+
void
libxfs_putbuf(xfs_buf_t *bp)
{
Index: xfsprogs-dev/repair/prefetch.c
===================================================================
--- xfsprogs-dev.orig/repair/prefetch.c 2011-11-15 20:44:30.903069469 +0100
+++ xfsprogs-dev/repair/prefetch.c 2011-11-15 20:48:23.073068083 +0100
@@ -112,8 +112,17 @@ pf_queue_io(
{
xfs_buf_t *bp;
- bp = libxfs_getbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
- XFS_FSB_TO_BB(mp, blen));
+ /*
+ * Never block on a buffer lock here, given that the actual repair
+ * code might lock buffers in a different order from us. Given that
+ * the lock holder is either reading it from disk himself or
+ * completely overwriting it this behaviour is perfectly fine.
+ */
+ bp = libxfs_getbuf_flags(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+ XFS_FSB_TO_BB(mp, blen), LIBXFS_GETBUF_TRYLOCK);
+ if (!bp)
+ return;
+
if (bp->b_flags & LIBXFS_B_UPTODATE) {
if (B_IS_INODE(flag))
pf_read_inode_dirs(args, bp);
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] repair: avoid ABBA deadlocks on prefetched buffers
2011-11-15 21:09 [PATCH] repair: avoid ABBA deadlocks on prefetched buffers Christoph Hellwig
@ 2011-11-17 4:25 ` Dave Chinner
2011-11-18 8:44 ` Arkadiusz Miśkiewicz
2012-01-13 20:09 ` Mark Tinguely
2 siblings, 0 replies; 6+ messages in thread
From: Dave Chinner @ 2011-11-17 4:25 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Tue, Nov 15, 2011 at 04:09:53PM -0500, Christoph Hellwig wrote:
> Both the prefetch threads and actual repair processing threads can have
> multiple buffers at a time locked, but they do no use a common locker
> order, which can lead to ABBA deadlocks while trying to lock the buffers.
>
> Switch the prefetch code to do a trylock and skip buffers that have
> already been locked to avoid this deadlock.
>
> Reported-by: Arkadiusz Mi??kiewicz <arekm@maven.pl>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Looks sane to me.
Reviewed-by: Dave Chinner <dchinner@redhat.com>
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] repair: avoid ABBA deadlocks on prefetched buffers
2011-11-15 21:09 [PATCH] repair: avoid ABBA deadlocks on prefetched buffers Christoph Hellwig
2011-11-17 4:25 ` Dave Chinner
@ 2011-11-18 8:44 ` Arkadiusz Miśkiewicz
2011-11-22 22:46 ` Christoph Hellwig
2012-01-13 20:09 ` Mark Tinguely
2 siblings, 1 reply; 6+ messages in thread
From: Arkadiusz Miśkiewicz @ 2011-11-18 8:44 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Tuesday 15 of November 2011, Christoph Hellwig wrote:
> Both the prefetch threads and actual repair processing threads can have
> multiple buffers at a time locked, but they do no use a common locker
> order, which can lead to ABBA deadlocks while trying to lock the buffers.
There is still some issue with deadlocking.
The last printed messages:
błędna liczba magiczna 0x41425443 w bloku inobt 2/1438099
błędna liczba magiczna 0x41425443 w bloku inobt 2/1438196
błędna liczba magiczna 0x41425443 w bloku inobt 2/1438732
(invalid magic number ... in block inobt ...)
# gdb ./xfs_repair_tcmalloc `pidof xfs_repair_tcmalloc`
GNU gdb (GDB) 7.3.1-1 (PLD Linux)
Copyright (C) 2011 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-pld-linux".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /root/xfs_repair_tcmalloc...done.
Attaching to program: /root/xfs_repair_tcmalloc, process 21440
Reading symbols from /lib64/libuuid.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib64/libuuid.so.1
Reading symbols from /lib64/librt.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib64/librt.so.1
Reading symbols from /lib64/libtcmalloc_minimal.so.0...(no debugging symbols found)...done.
Loaded symbols for /lib64/libtcmalloc_minimal.so.0
Reading symbols from /lib64/libpthread.so.0...(no debugging symbols found)...done.
[Thread debugging using libthread_db enabled]
[New Thread 0x7fdf93a73700 (LWP 21462)]
Loaded symbols for /lib64/libpthread.so.0
Reading symbols from /usr/lib64/libstdc++.so.6...(no debugging symbols found)...done.
Loaded symbols for /usr/lib64/libstdc++.so.6
Reading symbols from /lib64/libm.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib64/libm.so.6
Reading symbols from /lib64/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib64/libc.so.6
Reading symbols from /lib64/ld-linux-x86-64.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib64/ld-linux-x86-64.so.2
Reading symbols from /lib64/libgcc_s.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib64/libgcc_s.so.1
0x00007fdf9c2c21bf in pthread_join () from /lib64/libpthread.so.0
(gdb) bt
#0 0x00007fdf9c2c21bf in pthread_join () from /lib64/libpthread.so.0
#1 0x000000000042dd8f in destroy_work_queue (wq=0x7fff62659180) at threads.c:146
#2 0x000000000042d89f in scan_ags (mp=0x7fff62659300, scan_threads=<optimized out>) at scan.c:1353
#3 0x000000000041b68e in phase2 (mp=0x7fff62659300, scan_threads=32) at phase2.c:142
#4 0x0000000000402bd6 in main (argc=<optimized out>, argv=<optimized out>) at xfs_repair.c:747
(gdb) info threads
Id Target Id Frame
2 Thread 0x7fdf93a73700 (LWP 21462) "xfs_repair_tcma" 0x00007fdf9c2c78e4 in __lll_lock_wait () from /lib64/libpthread.so.0
* 1 Thread 0x7fdf9cbba760 (LWP 21440) "xfs_repair_tcma" 0x00007fdf9c2c21bf in pthread_join () from /lib64/libpthread.so.0
(gdb) thread 2
[Switching to thread 2 (Thread 0x7fdf93a73700 (LWP 21462))]
#0 0x00007fdf9c2c78e4 in __lll_lock_wait () from /lib64/libpthread.so.0
(gdb) bt
#0 0x00007fdf9c2c78e4 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007fdf9c2c31b5 in _L_lock_883 () from /lib64/libpthread.so.0
#2 0x00007fdf9c2c300a in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00000000004334ba in libxfs_getbuf_flags (device=<optimized out>, blkno=<optimized out>, len=<optimized out>, flags=<optimized out>) at rdwr.c:423
#4 0x000000000043370e in libxfs_readbuf (dev=65024, blkno=4294967344, len=8, flags=0) at rdwr.c:530
#5 0x000000000042b44f in scan_sbtree (root=8, nlevels=25160588, agno=2, suspect=1, func=0x42c5d0 <scanfunc_ino>, isroot=<optimized out>, priv=0x7143f0)
at scan.c:90
#6 0x000000000042ccdd in scanfunc_ino (block=<optimized out>, level=25160588, bno=<optimized out>, agno=2, suspect=1, isroot=1, priv=0x7143f0)
at scan.c:1037
#7 0x000000000042b476 in scan_sbtree (root=8, nlevels=25160589, agno=2, suspect=0, func=0x42c5d0 <scanfunc_ino>, isroot=<optimized out>, priv=0x7143f0)
at scan.c:96
#8 0x000000000042c3a8 in validate_agi (agcnts=0x7143f0, agno=2, agi=0x783a00) at scan.c:1151
#9 scan_ag (wq=<optimized out>, agno=2, arg=0x7143f0) at scan.c:1293
#10 0x000000000042da4a in worker_thread (arg=0x7fff62659180) at threads.c:46
#11 0x00007fdf9c2c0ed5 in start_thread () from /lib64/libpthread.so.0
#12 0x00007fdf9ba7de5d in clone () from /lib64/libc.so.6
#13 0x0000000000000000 in ?? ()
(gdb)
--
Arkadiusz Miśkiewicz PLD/Linux Team
arekm / maven.pl http://ftp.pld-linux.org/
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] repair: avoid ABBA deadlocks on prefetched buffers
2011-11-18 8:44 ` Arkadiusz Miśkiewicz
@ 2011-11-22 22:46 ` Christoph Hellwig
2011-11-23 17:27 ` Arkadiusz Miśkiewicz
0 siblings, 1 reply; 6+ messages in thread
From: Christoph Hellwig @ 2011-11-22 22:46 UTC (permalink / raw)
To: Arkadiusz Mi??kiewicz; +Cc: Christoph Hellwig, xfs
On Fri, Nov 18, 2011 at 09:44:09AM +0100, Arkadiusz Mi??kiewicz wrote:
> On Tuesday 15 of November 2011, Christoph Hellwig wrote:
> > Both the prefetch threads and actual repair processing threads can have
> > multiple buffers at a time locked, but they do no use a common locker
> > order, which can lead to ABBA deadlocks while trying to lock the buffers.
>
> There is still some issue with deadlocking.
>
> The last printed messages:
> b????dna liczba magiczna 0x41425443 w bloku inobt 2/1438099
> b????dna liczba magiczna 0x41425443 w bloku inobt 2/1438196
> b????dna liczba magiczna 0x41425443 w bloku inobt 2/1438732
> (invalid magic number ... in block inobt ...)
It looks like you have a circular loop in the inobt tree, and repair
deadlocks trying to read the same node again. Below is a patch working
around that by allowing recursive locking for the buffer lock and then
letting the normal two strikes and out policy apply. I'm not overly
proud of the patch, but in the short term I can't think of anything
better.
Index: xfsprogs-dev/include/libxfs.h
===================================================================
--- xfsprogs-dev.orig/include/libxfs.h 2011-11-22 22:28:23.000000000 +0000
+++ xfsprogs-dev/include/libxfs.h 2011-11-22 22:34:27.000000000 +0000
@@ -226,6 +226,8 @@ typedef struct xfs_buf {
unsigned b_bcount;
dev_t b_dev;
pthread_mutex_t b_lock;
+ pthread_t b_holder;
+ unsigned int b_recur;
void *b_fsprivate;
void *b_fsprivate2;
void *b_fsprivate3;
Index: xfsprogs-dev/libxfs/rdwr.c
===================================================================
--- xfsprogs-dev.orig/libxfs/rdwr.c 2011-11-22 22:28:23.000000000 +0000
+++ xfsprogs-dev/libxfs/rdwr.c 2011-11-22 22:40:01.000000000 +0000
@@ -342,6 +342,8 @@ libxfs_initbuf(xfs_buf_t *bp, dev_t devi
list_head_init(&bp->b_lock_list);
#endif
pthread_mutex_init(&bp->b_lock, NULL);
+ bp->b_holder = 0;
+ bp->b_recur = 0;
}
xfs_buf_t *
@@ -410,18 +412,24 @@ libxfs_getbuf_flags(dev_t device, xfs_da
return NULL;
if (use_xfs_buf_lock) {
- if (flags & LIBXFS_GETBUF_TRYLOCK) {
- int ret;
+ int ret;
- ret = pthread_mutex_trylock(&bp->b_lock);
- if (ret) {
- ASSERT(ret == EAGAIN);
- cache_node_put(libxfs_bcache, (struct cache_node *)bp);
- return NULL;
+ ret = pthread_mutex_trylock(&bp->b_lock);
+ if (ret) {
+ ASSERT(ret == EAGAIN);
+ if (flags & LIBXFS_GETBUF_TRYLOCK)
+ goto out_put;
+
+ if (pthread_equal(bp->b_holder, pthread_self())) {
+ fprintf(stderr,
+ _("recursive buffer locking detected\n"));
+ bp->b_recur++;
+ } else {
+ pthread_mutex_lock(&bp->b_lock);
}
- } else {
- pthread_mutex_lock(&bp->b_lock);
}
+
+ bp->b_holder = pthread_self();
}
cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
@@ -440,6 +448,9 @@ libxfs_getbuf_flags(dev_t device, xfs_da
#endif
return bp;
+out_put:
+ cache_node_put(libxfs_bcache, (struct cache_node *)bp);
+ return NULL;
}
struct xfs_buf *
@@ -458,8 +469,14 @@ libxfs_putbuf(xfs_buf_t *bp)
list_del_init(&bp->b_lock_list);
pthread_mutex_unlock(&libxfs_bcache->c_mutex);
#endif
- if (use_xfs_buf_lock)
- pthread_mutex_unlock(&bp->b_lock);
+ if (use_xfs_buf_lock) {
+ if (bp->b_recur) {
+ bp->b_recur--;
+ } else {
+ bp->b_holder = 0;
+ pthread_mutex_unlock(&bp->b_lock);
+ }
+ }
cache_node_put(libxfs_bcache, (struct cache_node *)bp);
}
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] repair: avoid ABBA deadlocks on prefetched buffers
2011-11-22 22:46 ` Christoph Hellwig
@ 2011-11-23 17:27 ` Arkadiusz Miśkiewicz
0 siblings, 0 replies; 6+ messages in thread
From: Arkadiusz Miśkiewicz @ 2011-11-23 17:27 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Tuesday 22 of November 2011, Christoph Hellwig wrote:
> On Fri, Nov 18, 2011 at 09:44:09AM +0100, Arkadiusz Mi??kiewicz wrote:
> > On Tuesday 15 of November 2011, Christoph Hellwig wrote:
> > > Both the prefetch threads and actual repair processing threads can have
> > > multiple buffers at a time locked, but they do no use a common locker
> > > order, which can lead to ABBA deadlocks while trying to lock the
> > > buffers.
> >
> > There is still some issue with deadlocking.
> >
> > The last printed messages:
> > b????dna liczba magiczna 0x41425443 w bloku inobt 2/1438099
> > b????dna liczba magiczna 0x41425443 w bloku inobt 2/1438196
> > b????dna liczba magiczna 0x41425443 w bloku inobt 2/1438732
> > (invalid magic number ... in block inobt ...)
>
> It looks like you have a circular loop in the inobt tree, and repair
> deadlocks trying to read the same node again. Below is a patch working
> around that by allowing recursive locking for the buffer lock and then
> letting the normal two strikes and out policy apply. I'm not overly
> proud of the patch, but in the short term I can't think of anything
> better.
Seems still deadlocking
Last lines on console:
bad hash table for directory inode 13655493544 (brak wpisu danych): przebudowano
rebuilding directory inode 13655493544
bad hash table for directory inode 13655509455 (brak wpisu danych): przebudowano
rebuilding directory inode 13655509455
[root@berta ~]# gdb ./xfs_repair_tcmalloc 23701
GNU gdb (GDB) 7.3.1-1 (PLD Linux)
Copyright (C) 2011 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-pld-linux".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /root/xfs_repair_tcmalloc...done.
Attaching to program: /root/xfs_repair_tcmalloc, process 23701
Reading symbols from /lib64/libuuid.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib64/libuuid.so.1
Reading symbols from /lib64/librt.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib64/librt.so.1
Reading symbols from /lib64/libtcmalloc_minimal.so.0...(no debugging symbols found)...done.
Loaded symbols for /lib64/libtcmalloc_minimal.so.0
Reading symbols from /lib64/libpthread.so.0...(no debugging symbols found)...done.
[Thread debugging using libthread_db enabled]
[New Thread 0x7fab01798700 (LWP 5134)]
[New Thread 0x7fab00f97700 (LWP 5133)]
[New Thread 0x7fab0279a700 (LWP 5132)]
[New Thread 0x7fab01f99700 (LWP 5131)]
[New Thread 0x7fab02f9b700 (LWP 5130)]
Loaded symbols for /lib64/libpthread.so.0
Reading symbols from /usr/lib64/libstdc++.so.6...(no debugging symbols found)...done.
Loaded symbols for /usr/lib64/libstdc++.so.6
Reading symbols from /lib64/libm.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib64/libm.so.6
Reading symbols from /lib64/libc.so.6...(no debugging symbols found)...done.
Loaded symbols for /lib64/libc.so.6
Reading symbols from /lib64/ld-linux-x86-64.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib64/ld-linux-x86-64.so.2
Reading symbols from /lib64/libgcc_s.so.1...(no debugging symbols found)...done.
Loaded symbols for /lib64/libgcc_s.so.1
0x00007fab0a7ed8e4 in __lll_lock_wait () from /lib64/libpthread.so.0
(gdb) bt
#0 0x00007fab0a7ed8e4 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007fab0a7e91b5 in _L_lock_883 () from /lib64/libpthread.so.0
#2 0x00007fab0a7e900a in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00000000004334f8 in libxfs_getbuf_flags (device=<optimized out>, blkno=<optimized out>, len=<optimized out>, flags=<optimized out>) at rdwr.c:428
#4 0x00000000004337ce in libxfs_readbuf (dev=65024, blkno=6827773504, len=8, flags=0) at rdwr.c:547
#5 0x0000000000434369 in libxfs_trans_read_buf (mp=<optimized out>, tp=0x0, dev=65024, blkno=6827773504, len=8, flags=0, bpp=0x7fff7510edd8) at trans.c:485
#6 0x0000000000443147 in xfs_da_do_buf (trans=0x0, dp=<optimized out>, bno=<optimized out>, mappedbnop=0x7fff7510ee48, bpp=0x12922faa8,
whichfork=<optimized out>, caller=2, ra=0x422354) at xfs_da_btree.c:2016
#7 0x00000000004354f4 in libxfs_da_read_bufr (trans=<optimized out>, dp=<optimized out>, bno=<optimized out>, mappedbno=6827773504, bpp=<optimized out>,
whichfork=<optimized out>) at util.c:635
#8 0x0000000000422354 in longform_dir2_entry_check (mp=0x7fff7510f300, ino=13655547166, ip=0x9a1a7a20, num_illegal=0x7fff7510f258,
need_dot=0x7fff7510f24c, irec=0xacfefc0, ino_offset=30, hashtab=0xa0028900) at phase6.c:2517
#9 0x0000000000424358 in process_dir_inode (mp=0x7fff7510f300, agno=<optimized out>, irec=0xacfefc0, ino_offset=30) at phase6.c:3307
#10 0x0000000000426f64 in traverse_function (arg=0x12a06c360, agno=3, wq=<optimized out>) at phase6.c:3622
#11 traverse_ags (mp=0x7fff7510f300) at phase6.c:3664
#12 phase6 (mp=0x7fff7510f300) at phase6.c:3756
#13 0x0000000000402c69 in main (argc=<optimized out>, argv=<optimized out>) at xfs_repair.c:772
(gdb) info threads
Id Target Id Frame
6 Thread 0x7fab02f9b700 (LWP 5130) "xfs_repair_tcma" 0x00007fab0a7ed010 in sem_wait () from /lib64/libpthread.so.0
5 Thread 0x7fab01f99700 (LWP 5131) "xfs_repair_tcma" 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
4 Thread 0x7fab0279a700 (LWP 5132) "xfs_repair_tcma" 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
3 Thread 0x7fab00f97700 (LWP 5133) "xfs_repair_tcma" 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
2 Thread 0x7fab01798700 (LWP 5134) "xfs_repair_tcma" 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
* 1 Thread 0x7fab0b0e0760 (LWP 23701) "xfs_repair_tcma" 0x00007fab0a7ed8e4 in __lll_lock_wait () from /lib64/libpthread.so.0
(gdb) thread 2
[Switching to thread 2 (Thread 0x7fab01798700 (LWP 5134))]
#0 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
(gdb) bt
#0 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1 0x00000000004297b3 in pf_io_worker (param=0x12a06c360) at prefetch.c:565
#2 0x00007fab0a7e6ed5 in start_thread () from /lib64/libpthread.so.0
#3 0x00007fab09fa3e5d in clone () from /lib64/libc.so.6
#4 0x0000000000000000 in ?? ()
(gdb) thread 3
[Switching to thread 3 (Thread 0x7fab00f97700 (LWP 5133))]
#0 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
(gdb) bt
#0 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1 0x00000000004297b3 in pf_io_worker (param=0x12a06c360) at prefetch.c:565
#2 0x00007fab0a7e6ed5 in start_thread () from /lib64/libpthread.so.0
#3 0x00007fab09fa3e5d in clone () from /lib64/libc.so.6
#4 0x0000000000000000 in ?? ()
(gdb) thread 4
[Switching to thread 4 (Thread 0x7fab0279a700 (LWP 5132))]
#0 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
(gdb) bt
#0 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1 0x00000000004297b3 in pf_io_worker (param=0x12a06c360) at prefetch.c:565
#2 0x00007fab0a7e6ed5 in start_thread () from /lib64/libpthread.so.0
#3 0x00007fab09fa3e5d in clone () from /lib64/libc.so.6
#4 0x0000000000000000 in ?? ()
(gdb) thread 5
[Switching to thread 5 (Thread 0x7fab01f99700 (LWP 5131))]
#0 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
(gdb) bt
#0 0x00007fab0a7eae6c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1 0x00000000004297b3 in pf_io_worker (param=0x12a06c360) at prefetch.c:565
#2 0x00007fab0a7e6ed5 in start_thread () from /lib64/libpthread.so.0
#3 0x00007fab09fa3e5d in clone () from /lib64/libc.so.6
#4 0x0000000000000000 in ?? ()
(gdb) thread 6
[Switching to thread 6 (Thread 0x7fab02f9b700 (LWP 5130))]
#0 0x00007fab0a7ed010 in sem_wait () from /lib64/libpthread.so.0
(gdb) bt
#0 0x00007fab0a7ed010 in sem_wait () from /lib64/libpthread.so.0
#1 0x0000000000429c72 in pf_queuing_worker (param=0x12a06c360) at prefetch.c:644
#2 0x00007fab0a7e6ed5 in start_thread () from /lib64/libpthread.so.0
#3 0x00007fab09fa3e5d in clone () from /lib64/libc.so.6
#4 0x0000000000000000 in ?? ()
(gdb)
>
>
> Index: xfsprogs-dev/include/libxfs.h
> ===================================================================
> --- xfsprogs-dev.orig/include/libxfs.h 2011-11-22 22:28:23.000000000 +0000
> +++ xfsprogs-dev/include/libxfs.h 2011-11-22 22:34:27.000000000 +0000
> @@ -226,6 +226,8 @@ typedef struct xfs_buf {
> unsigned b_bcount;
> dev_t b_dev;
> pthread_mutex_t b_lock;
> + pthread_t b_holder;
> + unsigned int b_recur;
> void *b_fsprivate;
> void *b_fsprivate2;
> void *b_fsprivate3;
> Index: xfsprogs-dev/libxfs/rdwr.c
> ===================================================================
> --- xfsprogs-dev.orig/libxfs/rdwr.c 2011-11-22 22:28:23.000000000 +0000
> +++ xfsprogs-dev/libxfs/rdwr.c 2011-11-22 22:40:01.000000000 +0000
> @@ -342,6 +342,8 @@ libxfs_initbuf(xfs_buf_t *bp, dev_t devi
> list_head_init(&bp->b_lock_list);
> #endif
> pthread_mutex_init(&bp->b_lock, NULL);
> + bp->b_holder = 0;
> + bp->b_recur = 0;
> }
>
> xfs_buf_t *
> @@ -410,18 +412,24 @@ libxfs_getbuf_flags(dev_t device, xfs_da
> return NULL;
>
> if (use_xfs_buf_lock) {
> - if (flags & LIBXFS_GETBUF_TRYLOCK) {
> - int ret;
> + int ret;
>
> - ret = pthread_mutex_trylock(&bp->b_lock);
> - if (ret) {
> - ASSERT(ret == EAGAIN);
> - cache_node_put(libxfs_bcache, (struct cache_node *)bp);
> - return NULL;
> + ret = pthread_mutex_trylock(&bp->b_lock);
> + if (ret) {
> + ASSERT(ret == EAGAIN);
> + if (flags & LIBXFS_GETBUF_TRYLOCK)
> + goto out_put;
> +
> + if (pthread_equal(bp->b_holder, pthread_self())) {
> + fprintf(stderr,
> + _("recursive buffer locking detected\n"));
> + bp->b_recur++;
> + } else {
> + pthread_mutex_lock(&bp->b_lock);
> }
> - } else {
> - pthread_mutex_lock(&bp->b_lock);
> }
> +
> + bp->b_holder = pthread_self();
> }
>
> cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
> @@ -440,6 +448,9 @@ libxfs_getbuf_flags(dev_t device, xfs_da
> #endif
>
> return bp;
> +out_put:
> + cache_node_put(libxfs_bcache, (struct cache_node *)bp);
> + return NULL;
> }
>
> struct xfs_buf *
> @@ -458,8 +469,14 @@ libxfs_putbuf(xfs_buf_t *bp)
> list_del_init(&bp->b_lock_list);
> pthread_mutex_unlock(&libxfs_bcache->c_mutex);
> #endif
> - if (use_xfs_buf_lock)
> - pthread_mutex_unlock(&bp->b_lock);
> + if (use_xfs_buf_lock) {
> + if (bp->b_recur) {
> + bp->b_recur--;
> + } else {
> + bp->b_holder = 0;
> + pthread_mutex_unlock(&bp->b_lock);
> + }
> + }
> cache_node_put(libxfs_bcache, (struct cache_node *)bp);
> }
--
Arkadiusz Miśkiewicz PLD/Linux Team
arekm / maven.pl http://ftp.pld-linux.org/
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] repair: avoid ABBA deadlocks on prefetched buffers
2011-11-15 21:09 [PATCH] repair: avoid ABBA deadlocks on prefetched buffers Christoph Hellwig
2011-11-17 4:25 ` Dave Chinner
2011-11-18 8:44 ` Arkadiusz Miśkiewicz
@ 2012-01-13 20:09 ` Mark Tinguely
2 siblings, 0 replies; 6+ messages in thread
From: Mark Tinguely @ 2012-01-13 20:09 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On 01/-10/63 13:59, Christoph Hellwig wrote:
> Both the prefetch threads and actual repair processing threads can have
> multiple buffers at a time locked, but they do no use a common locker
> order, which can lead to ABBA deadlocks while trying to lock the buffers.
>
> Switch the prefetch code to do a trylock and skip buffers that have
> already been locked to avoid this deadlock.
>
> Reported-by: Arkadiusz Mi??kiewicz<arekm@maven.pl>
> Signed-off-by: Christoph Hellwig<hch@lst.de>
>
Part of a series of fixes. Needed for "
[PATCH 06/12] repair: use recursive buffer locking" patch.
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2012-01-13 20:09 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-15 21:09 [PATCH] repair: avoid ABBA deadlocks on prefetched buffers Christoph Hellwig
2011-11-17 4:25 ` Dave Chinner
2011-11-18 8:44 ` Arkadiusz Miśkiewicz
2011-11-22 22:46 ` Christoph Hellwig
2011-11-23 17:27 ` Arkadiusz Miśkiewicz
2012-01-13 20:09 ` Mark Tinguely
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox