* [Cluster-devel] GFS2: Add tracepoints
@ 2009-05-29 19:11 Steven Whitehouse
2009-05-29 19:23 ` [Cluster-devel] " Christoph Hellwig
2009-06-01 17:30 ` [Cluster-devel] [GFS2 PATCH] bz502531 - GFS2: smbd proccess hangs with flock() call Abhijith Das
0 siblings, 2 replies; 7+ messages in thread
From: Steven Whitehouse @ 2009-05-29 19:11 UTC (permalink / raw)
To: cluster-devel.redhat.com
This is the latest version of the patch vs. the latest -tip tree.
From a7e4ff5ebd845816feb90e65a1f07227ba3a17b3 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 29 May 2009 18:40:01 +0100
Subject: [PATCH] GFS2: Add tracepoints
This patch adds tracepoints to GFS2 in three categories: glock,
bmap and logging. That allows all the major functions to be
visible during the operation of the filesystem.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3a5d3f8..a17c2b1 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -26,6 +26,7 @@
#include "dir.h"
#include "util.h"
#include "ops_address.h"
+#include "trace_gfs2.h"
/* This doesn't need to be that large as max 64 bit pointers in a 4k
* block is 512, so __u16 is fine for that. It saves stack space to
@@ -585,6 +586,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
clear_buffer_mapped(bh_map);
clear_buffer_new(bh_map);
clear_buffer_boundary(bh_map);
+ trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
if (gfs2_is_dir(ip)) {
bsize = sdp->sd_jbsize;
arr = sdp->sd_jheightsize;
@@ -619,6 +621,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
ret = 0;
out:
release_metapath(&mp);
+ trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
bmap_unlock(ip, create);
return ret;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ff49810..5d3e112 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -39,6 +39,8 @@
#include "super.h"
#include "util.h"
#include "bmap.h"
+#define CREATE_TRACE_POINTS
+#include "trace_gfs2.h"
struct gfs2_gl_hash_bucket {
struct hlist_head hb_list;
@@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl)
if (aspace)
gfs2_aspace_put(aspace);
-
+ trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
}
@@ -317,14 +319,17 @@ restart:
return 2;
gh->gh_error = ret;
list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
gfs2_holder_wake(gh);
goto restart;
}
set_bit(HIF_HOLDER, &gh->gh_iflags);
+ trace_gfs2_promote(gh, 1);
gfs2_holder_wake(gh);
goto restart;
}
set_bit(HIF_HOLDER, &gh->gh_iflags);
+ trace_gfs2_promote(gh, 0);
gfs2_holder_wake(gh);
continue;
}
@@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret)
else
continue;
list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
gfs2_holder_wake(gh);
}
}
@@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
int rv;
spin_lock(&gl->gl_spin);
+ trace_gfs2_glock_state_change(gl, state);
state_change(gl, state);
gh = find_first_waiter(gl);
@@ -836,6 +843,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
gl->gl_demote_state != state) {
gl->gl_demote_state = LM_ST_UNLOCKED;
}
+ trace_gfs2_demote_rq(gl);
}
/**
@@ -921,6 +929,7 @@ fail:
goto do_cancel;
return;
}
+ trace_gfs2_glock_queue(gh, 1);
list_add_tail(&gh->gh_list, insert_pt);
do_cancel:
gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1017,6 +1026,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
}
+ trace_gfs2_glock_queue(gh, 0);
spin_unlock(&gl->gl_spin);
if (likely(fast_path))
return;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index a602a28..282b35b 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -260,4 +260,33 @@ void gfs2_unregister_debugfs(void);
extern const struct lm_lockops gfs2_dlm_ops;
+static inline u8 glock_trace_state(unsigned int state)
+{
+ switch(state) {
+ case LM_ST_SHARED:
+ return DLM_LOCK_PR;
+ case LM_ST_DEFERRED:
+ return DLM_LOCK_CW;
+ case LM_ST_EXCLUSIVE:
+ return DLM_LOCK_EX;
+ }
+ return DLM_LOCK_NL;
+}
+
+static inline const char *glock_trace_name(u8 state)
+{
+ switch(state) {
+ case DLM_LOCK_PR:
+ return "PR";
+ case DLM_LOCK_CW:
+ return "CW";
+ case DLM_LOCK_EX:
+ return "EX";
+ case DLM_LOCK_NL:
+ return "NL";
+ default:
+ return "--";
+ }
+}
+
#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 399d1b9..4e37c6e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -503,6 +503,7 @@ struct gfs2_sbd {
struct kobject sd_kobj;
unsigned long sd_flags; /* SDF_... */
struct gfs2_sb_host sd_sb;
+ unsigned char sd_devname[BDEVNAME_SIZE];
/* Constants computed on mount */
@@ -667,5 +668,20 @@ struct gfs2_sbd {
struct dentry *debugfs_dentry_glocks; /* for debugfs */
};
+static inline const char *gfs2_blkst_name(u8 state)
+{
+ switch(state) {
+ case GFS2_BLKST_FREE:
+ return "free";
+ case GFS2_BLKST_USED:
+ return "used";
+ case GFS2_BLKST_DINODE:
+ return "dinode";
+ case GFS2_BLKST_UNLINKED:
+ return "unlinked";
+ }
+ return "???";
+}
+
#endif /* __INCORE_DOT_H__ */
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 98918a7..f82d7be 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -28,6 +28,7 @@
#include "meta_io.h"
#include "util.h"
#include "dir.h"
+#include "trace_gfs2.h"
#define PULL 1
@@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
gfs2_log_lock(sdp);
}
atomic_sub(blks, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, -blks);
gfs2_log_unlock(sdp);
mutex_unlock(&sdp->sd_log_reserve_mutex);
@@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
gfs2_log_lock(sdp);
atomic_add(blks, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, blks);
gfs2_assert_withdraw(sdp,
atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
gfs2_log_unlock(sdp);
@@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
gfs2_log_lock(sdp);
atomic_add(dist, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, dist);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
gfs2_log_unlock(sdp);
@@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
up_write(&sdp->sd_log_flush_lock);
return;
}
+ trace_gfs2_log_flush(sdp, 1);
ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
INIT_LIST_HEAD(&ai->ai_ail1_list);
@@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
gfs2_log_lock(sdp);
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
+ trace_gfs2_log_blocks(sdp, -1);
gfs2_log_unlock(sdp);
log_write_header(sdp, 0, PULL);
}
@@ -765,6 +771,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
gfs2_log_unlock(sdp);
sdp->sd_vfs->s_dirt = 0;
+ trace_gfs2_log_flush(sdp, 0);
up_write(&sdp->sd_log_flush_lock);
kfree(ai);
@@ -788,6 +795,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
atomic_add(unused, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, unused);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
sdp->sd_jdesc->jd_blocks);
sdp->sd_log_blks_reserved = reserved;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 80e4f5f..52e591b 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -25,6 +25,7 @@
#include "rgrp.h"
#include "trans.h"
#include "util.h"
+#include "trace_gfs2.h"
/**
* gfs2_pin - Pin a buffer in memory
@@ -51,6 +52,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
if (bd->bd_ail)
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
get_bh(bh);
+ trace_gfs2_pin(bd, 1);
}
/**
@@ -87,6 +89,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+ trace_gfs2_pin(bd, 0);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1ff9473..8174aaf 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -32,6 +32,7 @@
#include "log.h"
#include "quota.h"
#include "dir.h"
+#include "trace_gfs2.h"
#define DO 0
#define UNDO 1
@@ -776,6 +777,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
/* Map the extents for this journal's blocks */
map_journal_extents(sdp);
}
+ trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
if (sdp->sd_lockstruct.ls_first) {
unsigned int x;
@@ -1165,6 +1167,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
+ bdevname(sb->s_bdev, sdp->sd_devname);
error = gfs2_mount_args(sdp, &sdp->sd_args, data);
if (error) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5650382..76e49f4 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -30,6 +30,7 @@
#include "log.h"
#include "inode.h"
#include "ops_address.h"
+#include "trace_gfs2.h"
#define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0)
@@ -1483,7 +1484,7 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
spin_lock(&sdp->sd_rindex_spin);
rgd->rd_free_clone -= *n;
spin_unlock(&sdp->sd_rindex_spin);
-
+ trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
return block;
}
@@ -1526,7 +1527,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
spin_lock(&sdp->sd_rindex_spin);
rgd->rd_free_clone--;
spin_unlock(&sdp->sd_rindex_spin);
-
+ trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
return block;
}
@@ -1546,7 +1547,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
-
+ trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1574,6 +1575,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
+ trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
@@ -1597,6 +1599,7 @@ void gfs2_unlink_di(struct inode *inode)
rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
if (!rgd)
return;
+ trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_trans_add_rg(rgd);
@@ -1628,6 +1631,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
gfs2_free_uninit_di(rgd, ip->i_no_addr);
+ trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_meta_wipe(ip, ip->i_no_addr, 1);
}
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
new file mode 100644
index 0000000..c0779db
--- /dev/null
+++ b/fs/gfs2/trace_gfs2.h
@@ -0,0 +1,352 @@
+#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GFS2_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gfs2
+#define TRACE_INCLUDE_FILE trace_gfs2
+
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/dlmconstants.h>
+#include <linux/gfs2_ondisk.h>
+#include "incore.h"
+#include "glock.h"
+
+/* Section 1 - Locking
+ *
+ * Objectives:
+ * Latency: Remote demote request to state change
+ * Latency: Local lock request to state change
+ * Latency: State change to lock grant
+ * Correctness: Ordering of local lock state vs. I/O requests
+ * Correctness: Responses to remote demote requests
+ */
+
+/* General glock state change (DLM lock request completes) */
+TRACE_EVENT(gfs2_glock_state_change,
+
+ TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state),
+
+ TP_ARGS(gl, new_state),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( u8, cur_state )
+ __field( u8, new_state )
+ __field( u8, dmt_state )
+ __field( u8, tgt_state )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, gl->gl_sbd->sd_devname);
+ __entry->glnum = gl->gl_name.ln_number;
+ __entry->gltype = gl->gl_name.ln_type;
+ __entry->cur_state = glock_trace_state(gl->gl_state);
+ __entry->new_state = glock_trace_state(new_state);
+ __entry->tgt_state = glock_trace_state(gl->gl_target);
+ __entry->dmt_state = DLM_LOCK_IV;
+ if (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
+ test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
+ __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
+ ),
+
+ TP_printk("%s glock %d:%lld state %s => %s tgt:%s dmt:%s",
+ __entry->devname, __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ glock_trace_name(__entry->cur_state),
+ glock_trace_name(__entry->new_state),
+ glock_trace_name(__entry->tgt_state),
+ glock_trace_name(__entry->dmt_state))
+);
+
+/* State change -> unlocked, glock is being deallocated */
+TRACE_EVENT(gfs2_glock_put,
+
+ TP_PROTO(const struct gfs2_glock *gl),
+
+ TP_ARGS(gl),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( u8, cur_state )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, gl->gl_sbd->sd_devname);
+ __entry->gltype = gl->gl_name.ln_type;
+ __entry->glnum = gl->gl_name.ln_number;
+ __entry->cur_state = glock_trace_state(gl->gl_state);
+ ),
+
+ TP_printk("%s glock %d:%lld state %s => %s",
+ __entry->devname,
+ __entry->gltype, (unsigned long long)__entry->glnum,
+ glock_trace_name(__entry->cur_state),
+ glock_trace_name(DLM_LOCK_IV))
+
+);
+
+/* Callback (local or remote) requesting lock demotion */
+TRACE_EVENT(gfs2_demote_rq,
+
+ TP_PROTO(const struct gfs2_glock *gl),
+
+ TP_ARGS(gl),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( u8, cur_state )
+ __field( u8, dmt_state )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, gl->gl_sbd->sd_devname);
+ __entry->gltype = gl->gl_name.ln_type;
+ __entry->glnum = gl->gl_name.ln_number;
+ __entry->cur_state = glock_trace_state(gl->gl_state);
+ __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
+ ),
+
+ TP_printk("%s glock %d:%lld demote %s to %s",
+ __entry->devname, __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ glock_trace_name(__entry->cur_state),
+ glock_trace_name(__entry->dmt_state))
+
+);
+
+/* Promotion/grant of a glock */
+TRACE_EVENT(gfs2_promote,
+
+ TP_PROTO(const struct gfs2_holder *gh, int first),
+
+ TP_ARGS(gh, first),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( int, first )
+ __field( u8, state )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, gh->gh_gl->gl_sbd->sd_devname);
+ __entry->glnum = gh->gh_gl->gl_name.ln_number;
+ __entry->gltype = gh->gh_gl->gl_name.ln_type;
+ __entry->first = first;
+ __entry->state = glock_trace_state(gh->gh_state);
+ ),
+
+ TP_printk("%s glock %u:%llu promote %s %s",
+ __entry->devname, __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ __entry->first ? "first": "other",
+ glock_trace_name(__entry->state))
+);
+
+/* Queue/dequeue a lock request */
+TRACE_EVENT(gfs2_glock_queue,
+
+ TP_PROTO(const struct gfs2_holder *gh, int queue),
+
+ TP_ARGS(gh, queue),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( int, queue )
+ __field( u8, state )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, gh->gh_gl->gl_sbd->sd_devname);
+ __entry->glnum = gh->gh_gl->gl_name.ln_number;
+ __entry->gltype = gh->gh_gl->gl_name.ln_type;
+ __entry->queue = queue;
+ __entry->state = glock_trace_state(gh->gh_state);
+ ),
+
+ TP_printk("%s glock %u:%llu %squeue %s",
+ __entry->devname, __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ __entry->queue ? "" : "de",
+ glock_trace_name(__entry->state))
+);
+
+/* Section 2 - Log/journal
+ *
+ * Objectives:
+ * Latency: Log flush time
+ * Correctness: pin/unpin vs. disk I/O ordering
+ * Performance: Log usage stats
+ */
+
+/* Pin/unpin a block in the log */
+TRACE_EVENT(gfs2_pin,
+
+ TP_PROTO(const struct gfs2_bufdata *bd, int pin),
+
+ TP_ARGS(bd, pin),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( int, pin )
+ __field( u32, len )
+ __field( sector_t, block )
+ __field( u64, ino )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, bd->bd_gl->gl_sbd->sd_devname);
+ __entry->pin = pin;
+ __entry->len = bd->bd_bh->b_size;
+ __entry->block = bd->bd_bh->b_blocknr;
+ __entry->ino = bd->bd_gl->gl_name.ln_number;
+ ),
+
+ TP_printk("%s log %s %llu/%lu inode %llu",
+ __entry->devname, __entry->pin ? "pin" : "unpin",
+ (unsigned long long)__entry->block,
+ (unsigned long)__entry->len,
+ (unsigned long long)__entry->ino)
+);
+
+/* Flushing the log */
+TRACE_EVENT(gfs2_log_flush,
+
+ TP_PROTO(const struct gfs2_sbd *sdp, int start),
+
+ TP_ARGS(sdp, start),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( int, start )
+ __field( u64, log_seq )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, sdp->sd_devname);
+ __entry->start = start;
+ __entry->log_seq = sdp->sd_log_sequence;
+ ),
+
+ TP_printk("%s log flush %s %llu",
+ __entry->devname, __entry->start ? "start" : "end",
+ (unsigned long long)__entry->log_seq)
+);
+
+/* Reserving/releasing blocks in the log */
+TRACE_EVENT(gfs2_log_blocks,
+
+ TP_PROTO(const struct gfs2_sbd *sdp, int blocks),
+
+ TP_ARGS(sdp, blocks),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( int, blocks )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, sdp->sd_devname);
+ __entry->blocks = blocks;
+ ),
+
+ TP_printk("%s log reserve %d", __entry->devname, __entry->blocks)
+);
+
+/* Section 3 - bmap
+ *
+ * Objectives:
+ * Latency: Bmap request time
+ * Performance: Block allocator tracing
+ * Correctness: Test of disard generation vs. blocks allocated
+ */
+
+/* Map an extent of blocks, possibly a new allocation */
+TRACE_EVENT(gfs2_bmap,
+
+ TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh,
+ sector_t lblock, int create, int errno),
+
+ TP_ARGS(ip, bh, lblock, create, errno),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( sector_t, lblock )
+ __field( sector_t, pblock )
+ __field( u64, inum )
+ __field( unsigned long, state )
+ __field( u32, len )
+ __field( int, create )
+ __field( int, errno )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, GFS2_SB(&ip->i_inode)->sd_devname);
+ __entry->lblock = lblock;
+ __entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0;
+ __entry->inum = ip->i_no_addr;
+ __entry->state = bh->b_state;
+ __entry->len = bh->b_size;
+ __entry->create = create;
+ __entry->errno = errno;
+ ),
+
+ TP_printk("%s bmap %llu map %llu/%lu to %llu flags:%08lx %s %d",
+ __entry->devname, (unsigned long long)__entry->inum,
+ (unsigned long long)__entry->lblock,
+ (unsigned long)__entry->len,
+ (unsigned long long)__entry->pblock,
+ __entry->state, __entry->create ? "create " : "nocreate",
+ __entry->errno)
+);
+
+/* Keep track of blocks as they are allocated/freed */
+TRACE_EVENT(gfs2_block_alloc,
+
+ TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
+ u8 block_state),
+
+ TP_ARGS(ip, block, len, block_state),
+
+ TP_STRUCT__entry(
+ __array( char, devname, BDEVNAME_SIZE )
+ __field( u64, start )
+ __field( u64, inum )
+ __field( u32, len )
+ __field( u8, block_state )
+ ),
+
+ TP_fast_assign(
+ strcpy(__entry->devname, GFS2_SB(&ip->i_inode)->sd_devname);
+ __entry->start = block;
+ __entry->inum = ip->i_no_addr;
+ __entry->len = len;
+ __entry->block_state = block_state;
+ ),
+
+ TP_printk("%s bmap %llu alloc %llu/%lu %s",
+ __entry->devname, (unsigned long long)__entry->inum,
+ (unsigned long long)__entry->start,
+ (unsigned long)__entry->len,
+ gfs2_blkst_name(__entry->block_state))
+);
+
+#endif /* _TRACE_GFS2_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../fs/gfs2/
+#include <trace/define_trace.h>
+
--
1.6.0.6
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Cluster-devel] Re: GFS2: Add tracepoints
2009-05-29 19:11 [Cluster-devel] GFS2: Add tracepoints Steven Whitehouse
@ 2009-05-29 19:23 ` Christoph Hellwig
2009-05-29 19:50 ` Steven Whitehouse
2009-06-01 17:30 ` [Cluster-devel] [GFS2 PATCH] bz502531 - GFS2: smbd proccess hangs with flock() call Abhijith Das
1 sibling, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2009-05-29 19:23 UTC (permalink / raw)
To: cluster-devel.redhat.com
On Fri, May 29, 2009 at 08:11:00PM +0100, Steven Whitehouse wrote:
> +static inline u8 glock_trace_state(unsigned int state)
> +{
> + switch(state) {
> + case LM_ST_SHARED:
> + return DLM_LOCK_PR;
> + case LM_ST_DEFERRED:
> + return DLM_LOCK_CW;
> + case LM_ST_EXCLUSIVE:
> + return DLM_LOCK_EX;
> + }
> + return DLM_LOCK_NL;
> +}
I think this would be better done using __print_symbolic.
> +static inline const char *glock_trace_name(u8 state)
> +{
> + switch(state) {
> + case DLM_LOCK_PR:
> + return "PR";
> + case DLM_LOCK_CW:
> + return "CW";
> + case DLM_LOCK_EX:
> + return "EX";
> + case DLM_LOCK_NL:
> + return "NL";
> + default:
> + return "--";
> + }
> +}
Same here.
> +static inline const char *gfs2_blkst_name(u8 state)
> +{
> + switch(state) {
> + case GFS2_BLKST_FREE:
> + return "free";
> + case GFS2_BLKST_USED:
> + return "used";
> + case GFS2_BLKST_DINODE:
> + return "dinode";
> + case GFS2_BLKST_UNLINKED:
> + return "unlinked";
> + }
> + return "???";
> +}
Same here.
> + TP_STRUCT__entry(
> + __array( char, devname, BDEVNAME_SIZE )
This is extremly inefficient. We'd be much better off just storing
the dev_t and introducing a __trace_bdevname to expand a bdevname
into the tracer buffer. It's been on my todo list for a while and
I'll look into it next week unless you beat me to it.
> + __entry->gltype = gl->gl_name.ln_type;
> + __entry->cur_state = glock_trace_state(gl->gl_state);
> + __entry->new_state = glock_trace_state(new_state);
> + __entry->tgt_state = glock_trace_state(gl->gl_target);
> + __entry->dmt_state = DLM_LOCK_IV;
> + if (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
> + test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
> + __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
Wouldn't it be better to just trace gl_flags and gl_demote_state?
> +);
> +
> +#endif /* _TRACE_GFS2_H */
> +
> +/* This part must be outside protection */
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH ../../fs/gfs2/
Shouldn't an
#define TRACE_INCLUDE_PATH .
do it? (although that would need -I$(src), not sure how good that is.
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Cluster-devel] Re: GFS2: Add tracepoints
2009-05-29 19:23 ` [Cluster-devel] " Christoph Hellwig
@ 2009-05-29 19:50 ` Steven Whitehouse
2009-05-30 10:25 ` Christoph Hellwig
0 siblings, 1 reply; 7+ messages in thread
From: Steven Whitehouse @ 2009-05-29 19:50 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
On Fri, 2009-05-29 at 15:23 -0400, Christoph Hellwig wrote:
> On Fri, May 29, 2009 at 08:11:00PM +0100, Steven Whitehouse wrote:
> > +static inline u8 glock_trace_state(unsigned int state)
> > +{
> > + switch(state) {
> > + case LM_ST_SHARED:
> > + return DLM_LOCK_PR;
> > + case LM_ST_DEFERRED:
> > + return DLM_LOCK_CW;
> > + case LM_ST_EXCLUSIVE:
> > + return DLM_LOCK_EX;
> > + }
> > + return DLM_LOCK_NL;
> > +}
>
> I think this would be better done using __print_symbolic.
>
This is just a conversion of constants, but the others below I'll fix up
in the next version.
> > +static inline const char *glock_trace_name(u8 state)
> > +{
> > + switch(state) {
> > + case DLM_LOCK_PR:
> > + return "PR";
> > + case DLM_LOCK_CW:
> > + return "CW";
> > + case DLM_LOCK_EX:
> > + return "EX";
> > + case DLM_LOCK_NL:
> > + return "NL";
> > + default:
> > + return "--";
> > + }
> > +}
>
> Same here.
>
> > +static inline const char *gfs2_blkst_name(u8 state)
> > +{
> > + switch(state) {
> > + case GFS2_BLKST_FREE:
> > + return "free";
> > + case GFS2_BLKST_USED:
> > + return "used";
> > + case GFS2_BLKST_DINODE:
> > + return "dinode";
> > + case GFS2_BLKST_UNLINKED:
> > + return "unlinked";
> > + }
> > + return "???";
> > +}
>
> Same here.
>
> > + TP_STRUCT__entry(
> > + __array( char, devname, BDEVNAME_SIZE )
>
> This is extremly inefficient. We'd be much better off just storing
> the dev_t and introducing a __trace_bdevname to expand a bdevname
> into the tracer buffer. It's been on my todo list for a while and
> I'll look into it next week unless you beat me to it.
>
Ok. I wasn't sure how efficient bdevname() was vs. copying the name
around, but that sounds like a good plan if its not too expensive an
operation.
> > + __entry->gltype = gl->gl_name.ln_type;
> > + __entry->cur_state = glock_trace_state(gl->gl_state);
> > + __entry->new_state = glock_trace_state(new_state);
> > + __entry->tgt_state = glock_trace_state(gl->gl_target);
> > + __entry->dmt_state = DLM_LOCK_IV;
> > + if (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
> > + test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
> > + __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
>
> Wouldn't it be better to just trace gl_flags and gl_demote_state?
>
Yes, that sounds good. That is legacy code from the original patch to
blktrace which was rather more limited in scope and wasn't able to print
the flags.
> > +);
> > +
> > +#endif /* _TRACE_GFS2_H */
> > +
> > +/* This part must be outside protection */
> > +#undef TRACE_INCLUDE_PATH
> > +#define TRACE_INCLUDE_PATH ../../fs/gfs2/
>
> Shouldn't an
>
> #define TRACE_INCLUDE_PATH .
>
> do it? (although that would need -I$(src), not sure how good that is.
Ah, I though this was an alternative after the comments in the earlier
review, but I can look into it if you think thats better,
Steve.
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Cluster-devel] Re: GFS2: Add tracepoints
2009-05-29 19:50 ` Steven Whitehouse
@ 2009-05-30 10:25 ` Christoph Hellwig
2009-06-01 14:19 ` Steven Whitehouse
0 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2009-05-30 10:25 UTC (permalink / raw)
To: cluster-devel.redhat.com
On Fri, May 29, 2009 at 08:50:10PM +0100, Steven Whitehouse wrote:
> > This is extremly inefficient. We'd be much better off just storing
> > the dev_t and introducing a __trace_bdevname to expand a bdevname
> > into the tracer buffer. It's been on my todo list for a while and
> > I'll look into it next week unless you beat me to it.
> >
> Ok. I wasn't sure how efficient bdevname() was vs. copying the name
> around, but that sounds like a good plan if its not too expensive an
> operation.
Ok, I finally started looking into and it's not that easy. bdevname
actually requires a struct block_device * which we can't safely store
and __bdevname is just an obsfucated way to print major and minor.
I'd still hate copying the whole device name as it's rather inefficient,
on the other hand it's quite a bit nicer than the raw major/minor
output.
At this point I think I would prefer the raw manjor/minor output as
done in the block trace even patches. But what I think is most
important is that we all (block trace events, all filesystem trace
events (current xfs + gfs2 + ext4, and possibly vfs/vm trace points)
agree on one single format so that we can do global filtering over
all of them.
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Cluster-devel] Re: GFS2: Add tracepoints
2009-05-30 10:25 ` Christoph Hellwig
@ 2009-06-01 14:19 ` Steven Whitehouse
0 siblings, 0 replies; 7+ messages in thread
From: Steven Whitehouse @ 2009-06-01 14:19 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
On Sat, 2009-05-30 at 06:25 -0400, Christoph Hellwig wrote:
> On Fri, May 29, 2009 at 08:50:10PM +0100, Steven Whitehouse wrote:
> > > This is extremly inefficient. We'd be much better off just storing
> > > the dev_t and introducing a __trace_bdevname to expand a bdevname
> > > into the tracer buffer. It's been on my todo list for a while and
> > > I'll look into it next week unless you beat me to it.
> > >
> > Ok. I wasn't sure how efficient bdevname() was vs. copying the name
> > around, but that sounds like a good plan if its not too expensive an
> > operation.
>
> Ok, I finally started looking into and it's not that easy. bdevname
> actually requires a struct block_device * which we can't safely store
> and __bdevname is just an obsfucated way to print major and minor.
>
> I'd still hate copying the whole device name as it's rather inefficient,
> on the other hand it's quite a bit nicer than the raw major/minor
> output.
>
> At this point I think I would prefer the raw manjor/minor output as
> done in the block trace even patches. But what I think is most
> important is that we all (block trace events, all filesystem trace
> events (current xfs + gfs2 + ext4, and possibly vfs/vm trace points)
> agree on one single format so that we can do global filtering over
> all of them.
>
Ok. I've been working on the other issues and I've come up against this:
Kernel: arch/x86/boot/bzImage is ready (#1)
Building modules, stage 2.
MODPOST 730 modules
ERROR: "ftrace_print_flags_seq" [fs/gfs2/gfs2.ko] undefined!
ERROR: "per_cpu__ftrace_event_seq" [fs/gfs2/gfs2.ko] undefined!
ERROR: "ftrace_print_symbols_seq" [fs/gfs2/gfs2.ko] undefined!
make[1]: *** [__modpost] Error 1
make: *** [modules] Error 2
which I think is due to missing exports, but aside from that and the dev
name issue, its looking much better. I updated my base to the latest
-tip tree this morning,
Steve.
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Cluster-devel] [GFS2 PATCH] bz502531 - GFS2: smbd proccess hangs with flock() call.
2009-05-29 19:11 [Cluster-devel] GFS2: Add tracepoints Steven Whitehouse
2009-05-29 19:23 ` [Cluster-devel] " Christoph Hellwig
@ 2009-06-01 17:30 ` Abhijith Das
2009-06-02 8:08 ` Steven Whitehouse
1 sibling, 1 reply; 7+ messages in thread
From: Abhijith Das @ 2009-06-01 17:30 UTC (permalink / raw)
To: cluster-devel.redhat.com
GFS2 currently does not support mandatory flocks. An flock() call with
LOCK_MAND triggers unexpected behavior because gfs2 is not checking for
this lock type. This patch corrects that.
Signed-off-by: Abhi Das <adas@redhat.com>
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: bz502531-git.patch
URL: <http://listman.redhat.com/archives/cluster-devel/attachments/20090601/bbb46e11/attachment.ksh>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Cluster-devel] [GFS2 PATCH] bz502531 - GFS2: smbd proccess hangs with flock() call.
2009-06-01 17:30 ` [Cluster-devel] [GFS2 PATCH] bz502531 - GFS2: smbd proccess hangs with flock() call Abhijith Das
@ 2009-06-02 8:08 ` Steven Whitehouse
0 siblings, 0 replies; 7+ messages in thread
From: Steven Whitehouse @ 2009-06-02 8:08 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
Now applied to the -nmw git tree. Thanks,
Steve.
On Mon, 2009-06-01 at 12:30 -0500, Abhijith Das wrote:
> GFS2 currently does not support mandatory flocks. An flock() call with
> LOCK_MAND triggers unexpected behavior because gfs2 is not checking for
> this lock type. This patch corrects that.
>
> Signed-off-by: Abhi Das <adas@redhat.com>
> plain text document attachment (bz502531-git.patch)
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 73b6f55..841ddc9 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -698,8 +698,8 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
>
> if (!(fl->fl_flags & FL_FLOCK))
> return -ENOLCK;
> - if (__mandatory_lock(&ip->i_inode))
> - return -ENOLCK;
> + if (fl->fl_type & LOCK_MAND)
> + return -EOPNOTSUPP;
>
> if (fl->fl_type == F_UNLCK) {
> do_unflock(file, fl);
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2009-06-02 8:08 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-05-29 19:11 [Cluster-devel] GFS2: Add tracepoints Steven Whitehouse
2009-05-29 19:23 ` [Cluster-devel] " Christoph Hellwig
2009-05-29 19:50 ` Steven Whitehouse
2009-05-30 10:25 ` Christoph Hellwig
2009-06-01 14:19 ` Steven Whitehouse
2009-06-01 17:30 ` [Cluster-devel] [GFS2 PATCH] bz502531 - GFS2: smbd proccess hangs with flock() call Abhijith Das
2009-06-02 8:08 ` Steven Whitehouse
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.