[Cluster-devel] cluster/gfs2 fsck/fs_recovery.c fsck/fs

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

* [Cluster-devel] cluster/gfs2 fsck/fs_recovery.c fsck/fs_recove ...
@ 2007-10-19 15:06 rpeterso
  0 siblings, 0 replies; 2+ messages in thread
From: rpeterso @ 2007-10-19 15:06 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	rpeterso at sourceware.org	2007-10-19 15:06:56

Modified files:
	gfs2/fsck      : fs_recovery.c fs_recovery.h initialize.c 
	gfs2/libgfs2   : Makefile libgfs2.h 
Added files:
	gfs2/libgfs2   : recovery.c 

Log message:
	Resolves: bz 291551: gfs2_fsck clears journals without asking.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/fs_recovery.c.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/fs_recovery.h.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/initialize.c.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/recovery.c.diff?cvsroot=cluster&r1=NONE&r2=1.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/Makefile.diff?cvsroot=cluster&r1=1.11&r2=1.12
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/libgfs2.h.diff?cvsroot=cluster&r1=1.20&r2=1.21

--- cluster/gfs2/fsck/fs_recovery.c	2007/02/12 19:28:49	1.5
+++ cluster/gfs2/fsck/fs_recovery.c	2007/10/19 15:06:56	1.6
@@ -11,6 +11,7 @@
 *******************************************************************************
 ******************************************************************************/
 
+#include <errno.h>
 #include <inttypes.h>
 #include <linux_endian.h>
 #include <stdlib.h>
@@ -18,36 +19,421 @@
 #include <time.h>
 #include <unistd.h>
 
+#include "fsck.h"
+#include "fs_recovery.h"
 #include "libgfs2.h"
 #include "util.h"
-#include "fs_recovery.h"
 
 #define RANDOM(values) ((values) * (random() / (RAND_MAX + 1.0)))
 
+unsigned int sd_found_jblocks = 0, sd_replayed_jblocks = 0;
+unsigned int sd_found_metablocks = 0, sd_replayed_metablocks = 0;
+unsigned int sd_found_revokes = 0;
+osi_list_t sd_revoke_list;
+unsigned int sd_replay_tail;
+
+struct gfs2_revoke_replay {
+	osi_list_t rr_list;
+	uint64_t rr_blkno;
+	unsigned int rr_where;
+};
+
+int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+{
+	osi_list_t *tmp, *head = &sd_revoke_list;
+	struct gfs2_revoke_replay *rr;
+	int found = 0;
+
+	osi_list_foreach(tmp, head) {
+		rr = osi_list_entry(tmp, struct gfs2_revoke_replay, rr_list);
+		if (rr->rr_blkno == blkno) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (found) {
+		rr->rr_where = where;
+		return 0;
+	}
+
+	rr = malloc(sizeof(struct gfs2_revoke_replay));
+	if (!rr)
+		return -ENOMEM;
+
+	rr->rr_blkno = blkno;
+	rr->rr_where = where;
+	osi_list_add(&rr->rr_list, head);
+	return 1;
+}
+
+int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+{
+	osi_list_t *tmp;
+	struct gfs2_revoke_replay *rr;
+	int wrap, a, b, revoke;
+	int found = 0;
+
+	osi_list_foreach(tmp, &sd_revoke_list) {
+		rr = osi_list_entry(tmp, struct gfs2_revoke_replay, rr_list);
+		if (rr->rr_blkno == blkno) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found)
+		return 0;
+
+	wrap = (rr->rr_where < sd_replay_tail);
+	a = (sd_replay_tail < where);
+	b = (where < rr->rr_where);
+	revoke = (wrap) ? (a || b) : (a && b);
+	return revoke;
+}
+
+void gfs2_revoke_clean(struct gfs2_sbd *sdp)
+{
+	osi_list_t *head = &sd_revoke_list;
+	struct gfs2_revoke_replay *rr;
+
+	while (!osi_list_empty(head)) {
+		rr = osi_list_entry(head->next, struct gfs2_revoke_replay, rr_list);
+		osi_list_del(&rr->rr_list);
+		free(rr);
+	}
+}
+
+static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
+				struct gfs2_log_descriptor *ld, __be64 *ptr,
+				int pass)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	unsigned int blks = be32_to_cpu(ld->ld_data1);
+	struct gfs2_buffer_head *bh_log, *bh_ip;
+	uint64_t blkno;
+	int error = 0;
+	enum update_flags if_modified;
+
+	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
+		return 0;
+
+	gfs2_replay_incr_blk(ip, &start);
+
+	for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) {
+		uint32_t check_magic;
+
+		sd_found_metablocks++;
+
+		blkno = be64_to_cpu(*ptr++);
+		if (gfs2_revoke_check(sdp, blkno, start))
+			continue;
+
+		error = gfs2_replay_read_block(ip, start, &bh_log);
+		if (error)
+			return error;
+
+		bh_ip = bget(sdp, blkno);
+		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+		check_magic = ((struct gfs2_meta_header *)
+			       (bh_ip->b_data))->mh_magic;
+		check_magic = be32_to_cpu(check_magic);
+		if (check_magic != GFS2_MAGIC) {
+			if_modified = not_updated;
+			error = -EIO;
+		} else
+			if_modified = updated;
+
+		brelse(bh_log, not_updated);
+		brelse(bh_ip, if_modified);
+		if (error)
+			break;
+
+		sd_replayed_metablocks++;
+	}
+	return error;
+}
+
+static int revoke_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
+				   struct gfs2_log_descriptor *ld, __be64 *ptr,
+				   int pass)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	unsigned int blks = be32_to_cpu(ld->ld_length);
+	unsigned int revokes = be32_to_cpu(ld->ld_data1);
+	struct gfs2_buffer_head *bh;
+	unsigned int offset;
+	uint64_t blkno;
+	int first = 1;
+	int error;
+
+	if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
+		return 0;
+
+	offset = sizeof(struct gfs2_log_descriptor);
+
+	for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) {
+		error = gfs2_replay_read_block(ip, start, &bh);
+		if (error)
+			return error;
+
+		if (!first) {
+			if (gfs2_check_meta(bh, GFS2_METATYPE_LB))
+				continue;
+		}
+		while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) {
+			blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
+			error = gfs2_revoke_add(sdp, blkno, start);
+			if (error < 0)
+				return error;
+			else if (error)
+				sd_found_revokes++;
+
+			if (!--revokes)
+				break;
+			offset += sizeof(uint64_t);
+		}
+
+		brelse(bh, updated);
+		offset = sizeof(struct gfs2_meta_header);
+		first = 0;
+	}
+	return 0;
+}
+
+static int databuf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
+				    struct gfs2_log_descriptor *ld,
+				    __be64 *ptr, int pass)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	unsigned int blks = be32_to_cpu(ld->ld_data1);
+	struct gfs2_buffer_head *bh_log, *bh_ip;
+	uint64_t blkno;
+	uint64_t esc;
+	int error = 0;
+
+	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
+		return 0;
+
+	gfs2_replay_incr_blk(ip, &start);
+	for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) {
+		blkno = be64_to_cpu(*ptr++);
+		esc = be64_to_cpu(*ptr++);
+
+		sd_found_jblocks++;
+
+		if (gfs2_revoke_check(sdp, blkno, start))
+			continue;
+
+		error = gfs2_replay_read_block(ip, start, &bh_log);
+		if (error)
+			return error;
+
+		bh_ip = bget(sdp, blkno);
+		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+		/* Unescape */
+		if (esc) {
+			__be32 *eptr = (__be32 *)bh_ip->b_data;
+			*eptr = cpu_to_be32(GFS2_MAGIC);
+		}
+
+		brelse(bh_log, not_updated);
+		brelse(bh_ip, updated);
+
+		sd_replayed_jblocks++;
+	}
+	return error;
+}
+
+/**
+ * foreach_descriptor - go through the active part of the log
+ * @ip: the journal incore inode
+ * @start: the first log header in the active region
+ * @end: the last log header (don't process the contents of this entry))
+ *
+ * Call a given function once for every log descriptor in the active
+ * portion of the log.
+ *
+ * Returns: errno
+ */
+
+int foreach_descriptor(struct gfs2_inode *ip, unsigned int start,
+		       unsigned int end, int pass)
+{
+	struct gfs2_buffer_head *bh;
+	struct gfs2_log_descriptor *ld;
+	int error = 0;
+	uint32_t length;
+	__be64 *ptr;
+	unsigned int offset = sizeof(struct gfs2_log_descriptor);
+	offset += sizeof(__be64) - 1;
+	offset &= ~(sizeof(__be64) - 1);
+
+	while (start != end) {
+		uint32_t check_magic;
+
+		error = gfs2_replay_read_block(ip, start, &bh);
+		if (error)
+			return error;
+		check_magic = ((struct gfs2_meta_header *)
+			       (bh->b_data))->mh_magic;
+		check_magic = be32_to_cpu(check_magic);
+		if (check_magic != GFS2_MAGIC) {
+			brelse(bh, updated);
+			return -EIO;
+		}
+		ld = (struct gfs2_log_descriptor *)bh->b_data;
+		length = be32_to_cpu(ld->ld_length);
+
+		if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
+			struct gfs2_log_header lh;
+
+			error = get_log_header(ip, start, &lh);
+			if (!error) {
+				gfs2_replay_incr_blk(ip, &start);
+				brelse(bh, updated);
+				continue;
+			}
+			if (error == 1)
+				error = -EIO;
+			brelse(bh, updated);
+			return error;
+		} else if (gfs2_check_meta(bh, GFS2_METATYPE_LD)) {
+			brelse(bh, updated);
+			return -EIO;
+		}
+		ptr = (__be64 *)(bh->b_data + offset);
+		error = databuf_lo_scan_elements(ip, start, ld, ptr, pass);
+		if (error) {
+			brelse(bh, updated);
+			return error;
+		}
+		error = buf_lo_scan_elements(ip, start, ld, ptr, pass);
+		if (error) {
+			brelse(bh, updated);
+			return error;
+		}
+		error = revoke_lo_scan_elements(ip, start, ld, ptr, pass);
+		if (error) {
+			brelse(bh, updated);
+			return error;
+		}
+
+		while (length--)
+			gfs2_replay_incr_blk(ip, &start);
+
+		brelse(bh, updated);
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_recover_journal - recovery a given journal
+ * @ip: the journal incore inode
+ *
+ * Acquire the journal's lock, check to see if the journal is clean, and
+ * do recovery if necessary.
+ *
+ * Returns: errno
+ */
+
+int gfs2_recover_journal(struct gfs2_inode *ip, int j)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_log_header head;
+	unsigned int pass;
+	int error;
+
+	log_info("jid=%u: Looking at journal...\n", j);
+
+	osi_list_init(&sd_revoke_list);
+	error = gfs2_find_jhead(ip, &head);
+	if (error)
+		goto out;
+
+	if (head.lh_flags & GFS2_LOG_HEAD_UNMOUNT) {
+		log_info("jid=%u: Journal is clean.\n", j);
+		return 0;
+	}
+	if (query(&opts, "\nJournal #%d (\"journal%d\") is dirty.  Okay to replay it? (y/n)",
+		    j+1, j)) {
+		log_info("jid=%u: Replaying journal...\n", j);
+
+		sd_found_jblocks = sd_replayed_jblocks = 0;
+		sd_found_metablocks = sd_replayed_metablocks = 0;
+		sd_found_revokes = 0;
+		sd_replay_tail = head.lh_tail;
+		for (pass = 0; pass < 2; pass++) {
+			error = foreach_descriptor(ip, head.lh_tail,
+						   head.lh_blkno, pass);
+			if (error)
+				goto out;
+		}
+		log_info("jid=%u: Found %u revoke tags\n", j,
+			 sd_found_revokes);
+		gfs2_revoke_clean(sdp);
+		error = clean_journal(ip, &head);
+		if (error)
+			goto out;
+		log_err("jid=%u: Replayed %u of %u journaled data blocks\n",
+			j, sd_replayed_jblocks, sd_found_jblocks);
+		log_err("jid=%u: Replayed %u of %u metadata blocks\n",
+			j, sd_replayed_metablocks, sd_found_metablocks);
+	} else {
+		if (query(&opts, "Do you want to clear the dirty journal instead? (y/n)")) {
+			write_journal(sdp, sdp->md.journal[j], j,
+				      sdp->md.journal[j]->i_di.di_size /
+				      sdp->sd_sb.sb_bsize);
+			
+		} else
+			log_err("jid=%u: Dirty journal not replayed or cleared.\n", j);
+	}
+
+out:
+	log_info("jid=%u: %s\n", j, (error) ? "Failed" : "Done");
+	return error;
+}
+
 /*
- * reconstruct_journals - write fresh journals
+ * replay_journals - replay the journals
  * sdp: the super block
  *
- * FIXME: it would be nice to get this to actually replay the journals
- * - there should be a flag to the fsck to enable/disable this
- * feature, and the fsck should probably fall back to clearing the
- * journal if an inconsitancy is found, but only for the bad journal
+ * There should be a flag to the fsck to enable/disable this
+ * feature.  The fsck falls back to clearing the journal if an 
+ * inconsistency is found, but only for the bad journal.
  *
  * Returns: 0 on success, -1 on failure
  */
-int reconstruct_journals(struct gfs2_sbd *sdp){
+int replay_journals(struct gfs2_sbd *sdp){
 	int i;
 
-	log_notice("Clearing journals (this may take a while)");
-	for(i=0; i < sdp->md.journals; i++) {
-		/* Journal replay seems to have slowed down quite a bit in
-		 * the gfs2_fsck */
+	log_notice("Recovering journals (this may take a while)");
+
+	/* Get master dinode */
+	sdp->master_dir = gfs2_load_inode(sdp,
+					  sdp->sd_sb.sb_master_dir.no_addr);
+	gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode);
+
+	/* read in the journal index data */
+	if (ji_update(sdp)){
+		log_err("Unable to read in jindex inode.\n");
+		return -1;
+	}
+
+	for(i = 0; i < sdp->md.journals; i++) {
 		if((i % 2) == 0)
 			log_at_notice(".");
-		write_journal(sdp, sdp->md.journal[i], i,
-					  sdp->md.journal[i]->i_di.di_size / sdp->sd_sb.sb_bsize);
-		/* Can't use d_di.di_blocks because that also includes metadata. */
+		gfs2_recover_journal(sdp->md.journal[i], i);
+		inode_put(sdp->md.journal[i],
+			  (opts.no ? not_updated : updated));
 	}
-	log_notice("\nJournals cleared.\n");
+	log_notice("\nJournal recovery complete.\n");
+	inode_put(sdp->master_dir, not_updated);
+	inode_put(sdp->md.jiinode, not_updated);
+	/* Sync the buffers to disk so we get a fresh start. */
+	bsync(sdp);
 	return 0;
 }
--- cluster/gfs2/fsck/fs_recovery.h	2006/06/06 14:49:31	1.2
+++ cluster/gfs2/fsck/fs_recovery.h	2007/10/19 15:06:56	1.3
@@ -16,7 +16,7 @@
 
 #include "libgfs2.h"
 
-int reconstruct_journals(struct gfs2_sbd *sdp);
+int replay_journals(struct gfs2_sbd *sdp);
 
 #endif /* __FS_RECOVERY_H__ */
 
--- cluster/gfs2/fsck/initialize.c	2007/06/26 01:43:17	1.9
+++ cluster/gfs2/fsck/initialize.c	2007/10/19 15:06:56	1.10
@@ -42,8 +42,7 @@
 int init_journals(struct gfs2_sbd *sbp)
 {
 	if(!opts.no) {
-		/* ATTENTION -- Journal replay is not supported */
-		if(reconstruct_journals(sbp))
+		if(replay_journals(sbp))
 			return 1;
 	}
 	return 0;
@@ -192,51 +191,18 @@
 }
 
 /**
- * fill_super_block
- * @sdp:
+ * init_system_inodes
  *
  * Returns: 0 on success, -1 on failure
  */
-static int fill_super_block(struct gfs2_sbd *sdp)
+static int init_system_inodes(struct gfs2_sbd *sdp)
 {
-	uint32_t i;
-	char *buf;
 	uint64_t inumbuf;
+	char *buf;
 	struct gfs2_statfs_change sc;
 	int rgcount;
-	uint64_t addl_mem_needed;
 	enum rgindex_trust_level trust_lvl;
-
-	sync();
-
-	/********************************************************************
-	 ***************** First, initialize all lists **********************
-	 ********************************************************************/
-	log_info("Initializing lists...\n");
-	osi_list_init(&sdp->rglist);
-	osi_list_init(&sdp->buf_list);
-	for(i = 0; i < BUF_HASH_SIZE; i++) {
-		osi_list_init(&dir_hash[i]);
-		osi_list_init(&inode_hash[i]);
-		osi_list_init(&sdp->buf_hash[i]);
-	}
-
-	/********************************************************************
-	 ************  next, read in on-disk SB and set constants  **********
-	 ********************************************************************/
-	sdp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE;
-	sdp->bsize = sdp->sd_sb.sb_bsize;
-
-	if(sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize){
-		log_crit("GFS superblock is larger than the blocksize!\n");
-		log_debug("sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize\n");
-		return -1;
-	}
-
-	compute_constants(sdp);
-	if(read_sb(sdp) < 0){
-		return -1;
-	}
+	uint64_t addl_mem_needed;
 
 	/*******************************************************************
 	 ******************  Initialize important inodes  ******************
@@ -298,9 +264,10 @@
 	}
 	if (trust_lvl > distrust) {
 		log_err("RG recovery impossible; I can't fix this file system.\n");
-		goto fail;
+		return -1;
 	}
 	log_info("%u resource groups found.\n", rgcount);
+
 	/*******************************************************************
 	 *******  Now, set boundary fields in the super block  *************
 	 *******************************************************************/
@@ -318,7 +285,6 @@
 		goto fail;
 	}
 	return 0;
-
  fail:
 	empty_super_block(sdp);
 
@@ -326,6 +292,50 @@
 }
 
 /**
+ * fill_super_block
+ * @sdp:
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int fill_super_block(struct gfs2_sbd *sdp)
+{
+	uint32_t i;
+
+	sync();
+
+	/********************************************************************
+	 ***************** First, initialize all lists **********************
+	 ********************************************************************/
+	log_info("Initializing lists...\n");
+	osi_list_init(&sdp->rglist);
+	osi_list_init(&sdp->buf_list);
+	for(i = 0; i < BUF_HASH_SIZE; i++) {
+		osi_list_init(&dir_hash[i]);
+		osi_list_init(&inode_hash[i]);
+		osi_list_init(&sdp->buf_hash[i]);
+	}
+
+	/********************************************************************
+	 ************  next, read in on-disk SB and set constants  **********
+	 ********************************************************************/
+	sdp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE;
+	sdp->bsize = sdp->sd_sb.sb_bsize;
+
+	if(sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize){
+		log_crit("GFS superblock is larger than the blocksize!\n");
+		log_debug("sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize\n");
+		return -1;
+	}
+
+	compute_constants(sdp);
+	if(read_sb(sdp) < 0){
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
  * init_sbp - initialize superblock pointer
  *
  */
@@ -365,6 +375,9 @@
 		return -1;
 	}
 
+	if (init_system_inodes(sbp))
+		return -1;
+
 	return 0;
 }
 
/cvs/cluster/cluster/gfs2/libgfs2/recovery.c,v  -->  standard output
revision 1.1
--- cluster/gfs2/libgfs2/recovery.c
+++ -	2007-10-19 15:06:57.244036000 +0000
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ *
+ */
+
+/*
+ * NOTE:
+ *
+ * This code was pilfered from the gfs2 kernel and adapted to userland.
+ * If you change this part, you should evaluate whether the upstream kernel
+ * version of recovery.c should be changed as well.  Likewise, if the
+ * upstream version changes, this part should be kept in sync.
+ * 
+ */
+
+#include <errno.h>
+#include <string.h>
+#include "libgfs2.h"
+
+void gfs2_replay_incr_blk(struct gfs2_inode *ip, unsigned int *blk)
+{
+	uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+
+        if (++*blk == jd_blocks)
+                *blk = 0;
+}
+
+int gfs2_replay_read_block(struct gfs2_inode *ip, unsigned int blk,
+			   struct gfs2_buffer_head **bh)
+{
+	int new = 0;
+	uint64_t dblock;
+	uint32_t extlen;
+
+	block_map(ip, blk, &new, &dblock, &extlen, FALSE, not_updated);
+	if (!dblock)
+		return -EIO;
+
+	*bh = bread(ip->i_sbd, dblock);
+	return 0;
+}
+
+/**
+ * get_log_header - read the log header for a given segment
+ * @ip: the journal incore inode
+ * @blk: the block to look at
+ * @lh: the log header to return
+ *
+ * Read the log header for a given segement in a given journal.  Do a few
+ * sanity checks on it.
+ *
+ * Returns: 0 on success,
+ *          1 if the header was invalid or incomplete,
+ *          errno on error
+ */
+
+int get_log_header(struct gfs2_inode *ip, unsigned int blk,
+		   struct gfs2_log_header *head)
+{
+	struct gfs2_buffer_head *bh;
+	struct gfs2_log_header lh, *tmp;
+	uint32_t hash, saved_hash;
+	int error;
+
+	error = gfs2_replay_read_block(ip, blk, &bh);
+	if (error)
+		return error;
+
+	tmp = (struct gfs2_log_header *)bh->b_data;
+	saved_hash = tmp->lh_hash;
+	tmp->lh_hash = 0;
+	hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
+	tmp->lh_hash = saved_hash;
+	gfs2_log_header_in(&lh, bh->b_data);
+	brelse(bh, not_updated);
+
+	if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
+		return 1;
+
+	*head = lh;
+
+	return 0;
+}
+
+/**
+ * find_good_lh - find a good log header
+ * @ip: the journal incore inode
+ * @blk: the segment to start searching from
+ * @lh: the log header to fill in
+ * @forward: if true search forward in the log, else search backward
+ *
+ * Call get_log_header() to get a log header for a segment, but if the
+ * segment is bad, either scan forward or backward until we find a good one.
+ *
+ * Returns: errno
+ */
+
+int find_good_lh(struct gfs2_inode *ip, unsigned int *blk,
+		 struct gfs2_log_header *head)
+{
+	unsigned int orig_blk = *blk;
+	int error;
+	uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+
+	for (;;) {
+		error = get_log_header(ip, *blk, head);
+		if (error <= 0)
+			return error;
+
+		if (++*blk == jd_blocks)
+			*blk = 0;
+
+		if (*blk == orig_blk)
+			return -EIO;
+	}
+}
+
+/**
+ * jhead_scan - make sure we've found the head of the log
+ * @jd: the journal
+ * @head: this is filled in with the log descriptor of the head
+ *
+ * At this point, seg and lh should be either the head of the log or just
+ * before.  Scan forward until we find the head.
+ *
+ * Returns: errno
+ */
+
+int jhead_scan(struct gfs2_inode *ip, struct gfs2_log_header *head)
+{
+	unsigned int blk = head->lh_blkno;
+	uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+	struct gfs2_log_header lh;
+	int error;
+
+	for (;;) {
+		if (++blk == jd_blocks)
+			blk = 0;
+
+		error = get_log_header(ip, blk, &lh);
+		if (error < 0)
+			return error;
+		if (error == 1)
+			continue;
+
+		if (lh.lh_sequence == head->lh_sequence)
+			return -EIO;
+		if (lh.lh_sequence < head->lh_sequence)
+			break;
+
+		*head = lh;
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_find_jhead - find the head of a log
+ * @jd: the journal
+ * @head: the log descriptor for the head of the log is returned here
+ *
+ * Do a binary search of a journal and find the valid log entry with the
+ * highest sequence number.  (i.e. the log head)
+ *
+ * Returns: errno
+ */
+
+int gfs2_find_jhead(struct gfs2_inode *ip, struct gfs2_log_header *head)
+{
+	struct gfs2_log_header lh_1, lh_m;
+	uint32_t blk_1, blk_2, blk_m;
+	uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+	int error;
+
+	blk_1 = 0;
+	blk_2 = jd_blocks - 1;
+
+	for (;;) {
+		blk_m = (blk_1 + blk_2) / 2;
+
+		error = find_good_lh(ip, &blk_1, &lh_1);
+		if (error)
+			return error;
+
+		error = find_good_lh(ip, &blk_m, &lh_m);
+		if (error)
+			return error;
+
+		if (blk_1 == blk_m || blk_m == blk_2)
+			break;
+
+		if (lh_1.lh_sequence <= lh_m.lh_sequence)
+			blk_1 = blk_m;
+		else
+			blk_2 = blk_m;
+	}
+
+	error = jhead_scan(ip, &lh_1);
+	if (error)
+		return error;
+
+	*head = lh_1;
+
+	return error;
+}
+
+/**
+ * clean_journal - mark a dirty journal as being clean
+ * @sdp: the filesystem
+ * @jd: the journal
+ * @head: the head journal to start from
+ *
+ * Returns: errno
+ */
+
+int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head)
+{
+	unsigned int lblock;
+	struct gfs2_log_header *lh;
+	uint32_t hash, extlen;
+	struct gfs2_buffer_head *bh;
+	int new = 0;
+	uint64_t dblock;
+
+	lblock = head->lh_blkno;
+	gfs2_replay_incr_blk(ip, &lblock);
+	block_map(ip, lblock, &new, &dblock, &extlen, 0, not_updated);
+	if (!dblock)
+		return -EIO;
+
+	bh = bread(ip->i_sbd, dblock);
+	memset(bh->b_data, 0, bh->b_size);
+
+	lh = (struct gfs2_log_header *)bh->b_data;
+	memset(lh, 0, sizeof(struct gfs2_log_header));
+	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
+	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
+	lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
+	lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
+	lh->lh_blkno = cpu_to_be32(lblock);
+	hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
+	lh->lh_hash = cpu_to_be32(hash);
+
+	brelse(bh, updated);
+
+	return 0;
+}
+
--- cluster/gfs2/libgfs2/Makefile	2007/08/28 04:35:43	1.11
+++ cluster/gfs2/libgfs2/Makefile	2007/10/19 15:06:56	1.12
@@ -31,6 +31,7 @@
 	gfs2_log.o \
 	misc.o \
 	ondisk.o \
+	recovery.o \
 	size.o \
 	structures.o \
 	super.o \
--- cluster/gfs2/libgfs2/libgfs2.h	2007/10/17 20:16:35	1.20
+++ cluster/gfs2/libgfs2/libgfs2.h	2007/10/19 15:06:56	1.21
@@ -515,6 +515,22 @@
 void lock_for_admin(struct gfs2_sbd *sdp);
 void cleanup_metafs(struct gfs2_sbd *sdp);
 
+/* recovery.c */
+void gfs2_replay_incr_blk(struct gfs2_inode *ip, unsigned int *blk);
+int gfs2_replay_read_block(struct gfs2_inode *ip, unsigned int blk,
+			   struct gfs2_buffer_head **bh);
+int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
+int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno,
+		      unsigned int where);
+void gfs2_revoke_clean(struct gfs2_sbd *sdp);
+int get_log_header(struct gfs2_inode *ip, unsigned int blk,
+		   struct gfs2_log_header *head);
+int find_good_lh(struct gfs2_inode *ip, unsigned int *blk,
+		 struct gfs2_log_header *head);
+int jhead_scan(struct gfs2_inode *ip, struct gfs2_log_header *head);
+int gfs2_find_jhead(struct gfs2_inode *ip, struct gfs2_log_header *head);
+int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head);
+
 /* rgrp.c */
 int gfs2_compute_bitstructs(struct gfs2_sbd *sdp, struct rgrp_list *rgd);
 struct rgrp_list *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);



^ permalink raw reply	[flat|nested] 2+ messages in thread

* [Cluster-devel] cluster/gfs2 fsck/fs_recovery.c fsck/fs_recove ...
@ 2007-10-19 15:07 rpeterso
  0 siblings, 0 replies; 2+ messages in thread
From: rpeterso @ 2007-10-19 15:07 UTC (permalink / raw)
  To: cluster-devel.redhat.com

CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	rpeterso at sourceware.org	2007-10-19 15:07:59

Modified files:
	gfs2/fsck      : fs_recovery.c fs_recovery.h initialize.c 
	gfs2/libgfs2   : Makefile libgfs2.h 
Added files:
	gfs2/libgfs2   : recovery.c 

Log message:
	Resolves: bz 291551: gfs2_fsck clears journals without asking.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/fs_recovery.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4.2.1&r2=1.4.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/fs_recovery.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/initialize.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.6.2.3&r2=1.6.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/recovery.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4&r2=1.4.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/libgfs2.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.7.2.13&r2=1.7.2.14

--- cluster/gfs2/fsck/fs_recovery.c	2007/02/12 19:29:45	1.4.2.1
+++ cluster/gfs2/fsck/fs_recovery.c	2007/10/19 15:07:58	1.4.2.2
@@ -11,6 +11,7 @@
 *******************************************************************************
 ******************************************************************************/
 
+#include <errno.h>
 #include <inttypes.h>
 #include <linux_endian.h>
 #include <stdlib.h>
@@ -18,36 +19,421 @@
 #include <time.h>
 #include <unistd.h>
 
+#include "fsck.h"
+#include "fs_recovery.h"
 #include "libgfs2.h"
 #include "util.h"
-#include "fs_recovery.h"
 
 #define RANDOM(values) ((values) * (random() / (RAND_MAX + 1.0)))
 
+unsigned int sd_found_jblocks = 0, sd_replayed_jblocks = 0;
+unsigned int sd_found_metablocks = 0, sd_replayed_metablocks = 0;
+unsigned int sd_found_revokes = 0;
+osi_list_t sd_revoke_list;
+unsigned int sd_replay_tail;
+
+struct gfs2_revoke_replay {
+	osi_list_t rr_list;
+	uint64_t rr_blkno;
+	unsigned int rr_where;
+};
+
+int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+{
+	osi_list_t *tmp, *head = &sd_revoke_list;
+	struct gfs2_revoke_replay *rr;
+	int found = 0;
+
+	osi_list_foreach(tmp, head) {
+		rr = osi_list_entry(tmp, struct gfs2_revoke_replay, rr_list);
+		if (rr->rr_blkno == blkno) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (found) {
+		rr->rr_where = where;
+		return 0;
+	}
+
+	rr = malloc(sizeof(struct gfs2_revoke_replay));
+	if (!rr)
+		return -ENOMEM;
+
+	rr->rr_blkno = blkno;
+	rr->rr_where = where;
+	osi_list_add(&rr->rr_list, head);
+	return 1;
+}
+
+int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+{
+	osi_list_t *tmp;
+	struct gfs2_revoke_replay *rr;
+	int wrap, a, b, revoke;
+	int found = 0;
+
+	osi_list_foreach(tmp, &sd_revoke_list) {
+		rr = osi_list_entry(tmp, struct gfs2_revoke_replay, rr_list);
+		if (rr->rr_blkno == blkno) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found)
+		return 0;
+
+	wrap = (rr->rr_where < sd_replay_tail);
+	a = (sd_replay_tail < where);
+	b = (where < rr->rr_where);
+	revoke = (wrap) ? (a || b) : (a && b);
+	return revoke;
+}
+
+void gfs2_revoke_clean(struct gfs2_sbd *sdp)
+{
+	osi_list_t *head = &sd_revoke_list;
+	struct gfs2_revoke_replay *rr;
+
+	while (!osi_list_empty(head)) {
+		rr = osi_list_entry(head->next, struct gfs2_revoke_replay, rr_list);
+		osi_list_del(&rr->rr_list);
+		free(rr);
+	}
+}
+
+static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
+				struct gfs2_log_descriptor *ld, __be64 *ptr,
+				int pass)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	unsigned int blks = be32_to_cpu(ld->ld_data1);
+	struct gfs2_buffer_head *bh_log, *bh_ip;
+	uint64_t blkno;
+	int error = 0;
+	enum update_flags if_modified;
+
+	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
+		return 0;
+
+	gfs2_replay_incr_blk(ip, &start);
+
+	for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) {
+		uint32_t check_magic;
+
+		sd_found_metablocks++;
+
+		blkno = be64_to_cpu(*ptr++);
+		if (gfs2_revoke_check(sdp, blkno, start))
+			continue;
+
+		error = gfs2_replay_read_block(ip, start, &bh_log);
+		if (error)
+			return error;
+
+		bh_ip = bget(sdp, blkno);
+		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+		check_magic = ((struct gfs2_meta_header *)
+			       (bh_ip->b_data))->mh_magic;
+		check_magic = be32_to_cpu(check_magic);
+		if (check_magic != GFS2_MAGIC) {
+			if_modified = not_updated;
+			error = -EIO;
+		} else
+			if_modified = updated;
+
+		brelse(bh_log, not_updated);
+		brelse(bh_ip, if_modified);
+		if (error)
+			break;
+
+		sd_replayed_metablocks++;
+	}
+	return error;
+}
+
+static int revoke_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
+				   struct gfs2_log_descriptor *ld, __be64 *ptr,
+				   int pass)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	unsigned int blks = be32_to_cpu(ld->ld_length);
+	unsigned int revokes = be32_to_cpu(ld->ld_data1);
+	struct gfs2_buffer_head *bh;
+	unsigned int offset;
+	uint64_t blkno;
+	int first = 1;
+	int error;
+
+	if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
+		return 0;
+
+	offset = sizeof(struct gfs2_log_descriptor);
+
+	for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) {
+		error = gfs2_replay_read_block(ip, start, &bh);
+		if (error)
+			return error;
+
+		if (!first) {
+			if (gfs2_check_meta(bh, GFS2_METATYPE_LB))
+				continue;
+		}
+		while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) {
+			blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
+			error = gfs2_revoke_add(sdp, blkno, start);
+			if (error < 0)
+				return error;
+			else if (error)
+				sd_found_revokes++;
+
+			if (!--revokes)
+				break;
+			offset += sizeof(uint64_t);
+		}
+
+		brelse(bh, updated);
+		offset = sizeof(struct gfs2_meta_header);
+		first = 0;
+	}
+	return 0;
+}
+
+static int databuf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
+				    struct gfs2_log_descriptor *ld,
+				    __be64 *ptr, int pass)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	unsigned int blks = be32_to_cpu(ld->ld_data1);
+	struct gfs2_buffer_head *bh_log, *bh_ip;
+	uint64_t blkno;
+	uint64_t esc;
+	int error = 0;
+
+	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
+		return 0;
+
+	gfs2_replay_incr_blk(ip, &start);
+	for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) {
+		blkno = be64_to_cpu(*ptr++);
+		esc = be64_to_cpu(*ptr++);
+
+		sd_found_jblocks++;
+
+		if (gfs2_revoke_check(sdp, blkno, start))
+			continue;
+
+		error = gfs2_replay_read_block(ip, start, &bh_log);
+		if (error)
+			return error;
+
+		bh_ip = bget(sdp, blkno);
+		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+		/* Unescape */
+		if (esc) {
+			__be32 *eptr = (__be32 *)bh_ip->b_data;
+			*eptr = cpu_to_be32(GFS2_MAGIC);
+		}
+
+		brelse(bh_log, not_updated);
+		brelse(bh_ip, updated);
+
+		sd_replayed_jblocks++;
+	}
+	return error;
+}
+
+/**
+ * foreach_descriptor - go through the active part of the log
+ * @ip: the journal incore inode
+ * @start: the first log header in the active region
+ * @end: the last log header (don't process the contents of this entry))
+ *
+ * Call a given function once for every log descriptor in the active
+ * portion of the log.
+ *
+ * Returns: errno
+ */
+
+int foreach_descriptor(struct gfs2_inode *ip, unsigned int start,
+		       unsigned int end, int pass)
+{
+	struct gfs2_buffer_head *bh;
+	struct gfs2_log_descriptor *ld;
+	int error = 0;
+	uint32_t length;
+	__be64 *ptr;
+	unsigned int offset = sizeof(struct gfs2_log_descriptor);
+	offset += sizeof(__be64) - 1;
+	offset &= ~(sizeof(__be64) - 1);
+
+	while (start != end) {
+		uint32_t check_magic;
+
+		error = gfs2_replay_read_block(ip, start, &bh);
+		if (error)
+			return error;
+		check_magic = ((struct gfs2_meta_header *)
+			       (bh->b_data))->mh_magic;
+		check_magic = be32_to_cpu(check_magic);
+		if (check_magic != GFS2_MAGIC) {
+			brelse(bh, updated);
+			return -EIO;
+		}
+		ld = (struct gfs2_log_descriptor *)bh->b_data;
+		length = be32_to_cpu(ld->ld_length);
+
+		if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
+			struct gfs2_log_header lh;
+
+			error = get_log_header(ip, start, &lh);
+			if (!error) {
+				gfs2_replay_incr_blk(ip, &start);
+				brelse(bh, updated);
+				continue;
+			}
+			if (error == 1)
+				error = -EIO;
+			brelse(bh, updated);
+			return error;
+		} else if (gfs2_check_meta(bh, GFS2_METATYPE_LD)) {
+			brelse(bh, updated);
+			return -EIO;
+		}
+		ptr = (__be64 *)(bh->b_data + offset);
+		error = databuf_lo_scan_elements(ip, start, ld, ptr, pass);
+		if (error) {
+			brelse(bh, updated);
+			return error;
+		}
+		error = buf_lo_scan_elements(ip, start, ld, ptr, pass);
+		if (error) {
+			brelse(bh, updated);
+			return error;
+		}
+		error = revoke_lo_scan_elements(ip, start, ld, ptr, pass);
+		if (error) {
+			brelse(bh, updated);
+			return error;
+		}
+
+		while (length--)
+			gfs2_replay_incr_blk(ip, &start);
+
+		brelse(bh, updated);
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_recover_journal - recovery a given journal
+ * @ip: the journal incore inode
+ *
+ * Acquire the journal's lock, check to see if the journal is clean, and
+ * do recovery if necessary.
+ *
+ * Returns: errno
+ */
+
+int gfs2_recover_journal(struct gfs2_inode *ip, int j)
+{
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct gfs2_log_header head;
+	unsigned int pass;
+	int error;
+
+	log_info("jid=%u: Looking at journal...\n", j);
+
+	osi_list_init(&sd_revoke_list);
+	error = gfs2_find_jhead(ip, &head);
+	if (error)
+		goto out;
+
+	if (head.lh_flags & GFS2_LOG_HEAD_UNMOUNT) {
+		log_info("jid=%u: Journal is clean.\n", j);
+		return 0;
+	}
+	if (query(&opts, "\nJournal #%d (\"journal%d\") is dirty.  Okay to replay it? (y/n)",
+		    j+1, j)) {
+		log_info("jid=%u: Replaying journal...\n", j);
+
+		sd_found_jblocks = sd_replayed_jblocks = 0;
+		sd_found_metablocks = sd_replayed_metablocks = 0;
+		sd_found_revokes = 0;
+		sd_replay_tail = head.lh_tail;
+		for (pass = 0; pass < 2; pass++) {
+			error = foreach_descriptor(ip, head.lh_tail,
+						   head.lh_blkno, pass);
+			if (error)
+				goto out;
+		}
+		log_info("jid=%u: Found %u revoke tags\n", j,
+			 sd_found_revokes);
+		gfs2_revoke_clean(sdp);
+		error = clean_journal(ip, &head);
+		if (error)
+			goto out;
+		log_err("jid=%u: Replayed %u of %u journaled data blocks\n",
+			j, sd_replayed_jblocks, sd_found_jblocks);
+		log_err("jid=%u: Replayed %u of %u metadata blocks\n",
+			j, sd_replayed_metablocks, sd_found_metablocks);
+	} else {
+		if (query(&opts, "Do you want to clear the dirty journal instead? (y/n)")) {
+			write_journal(sdp, sdp->md.journal[j], j,
+				      sdp->md.journal[j]->i_di.di_size /
+				      sdp->sd_sb.sb_bsize);
+			
+		} else
+			log_err("jid=%u: Dirty journal not replayed or cleared.\n", j);
+	}
+
+out:
+	log_info("jid=%u: %s\n", j, (error) ? "Failed" : "Done");
+	return error;
+}
+
 /*
- * reconstruct_journals - write fresh journals
+ * replay_journals - replay the journals
  * sdp: the super block
  *
- * FIXME: it would be nice to get this to actually replay the journals
- * - there should be a flag to the fsck to enable/disable this
- * feature, and the fsck should probably fall back to clearing the
- * journal if an inconsitancy is found, but only for the bad journal
+ * There should be a flag to the fsck to enable/disable this
+ * feature.  The fsck falls back to clearing the journal if an 
+ * inconsistency is found, but only for the bad journal.
  *
  * Returns: 0 on success, -1 on failure
  */
-int reconstruct_journals(struct gfs2_sbd *sdp){
+int replay_journals(struct gfs2_sbd *sdp){
 	int i;
 
-	log_notice("Clearing journals (this may take a while)");
-	for(i=0; i < sdp->md.journals; i++) {
-		/* Journal replay seems to have slowed down quite a bit in
-		 * the gfs2_fsck */
+	log_notice("Recovering journals (this may take a while)");
+
+	/* Get master dinode */
+	sdp->master_dir = gfs2_load_inode(sdp,
+					  sdp->sd_sb.sb_master_dir.no_addr);
+	gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode);
+
+	/* read in the journal index data */
+	if (ji_update(sdp)){
+		log_err("Unable to read in jindex inode.\n");
+		return -1;
+	}
+
+	for(i = 0; i < sdp->md.journals; i++) {
 		if((i % 2) == 0)
 			log_at_notice(".");
-		write_journal(sdp, sdp->md.journal[i], i,
-					  sdp->md.journal[i]->i_di.di_size / sdp->sd_sb.sb_bsize);
-		/* Can't use d_di.di_blocks because that also includes metadata. */
+		gfs2_recover_journal(sdp->md.journal[i], i);
+		inode_put(sdp->md.journal[i],
+			  (opts.no ? not_updated : updated));
 	}
-	log_notice("\nJournals cleared.\n");
+	log_notice("\nJournal recovery complete.\n");
+	inode_put(sdp->master_dir, not_updated);
+	inode_put(sdp->md.jiinode, not_updated);
+	/* Sync the buffers to disk so we get a fresh start. */
+	bsync(sdp);
 	return 0;
 }
--- cluster/gfs2/fsck/fs_recovery.h	2006/06/06 14:49:31	1.2
+++ cluster/gfs2/fsck/fs_recovery.h	2007/10/19 15:07:58	1.2.2.1
@@ -16,7 +16,7 @@
 
 #include "libgfs2.h"
 
-int reconstruct_journals(struct gfs2_sbd *sdp);
+int replay_journals(struct gfs2_sbd *sdp);
 
 #endif /* __FS_RECOVERY_H__ */
 
--- cluster/gfs2/fsck/initialize.c	2007/06/26 01:51:44	1.6.2.3
+++ cluster/gfs2/fsck/initialize.c	2007/10/19 15:07:58	1.6.2.4
@@ -42,8 +42,7 @@
 int init_journals(struct gfs2_sbd *sbp)
 {
 	if(!opts.no) {
-		/* ATTENTION -- Journal replay is not supported */
-		if(reconstruct_journals(sbp))
+		if(replay_journals(sbp))
 			return 1;
 	}
 	return 0;
@@ -192,51 +191,18 @@
 }
 
 /**
- * fill_super_block
- * @sdp:
+ * init_system_inodes
  *
  * Returns: 0 on success, -1 on failure
  */
-static int fill_super_block(struct gfs2_sbd *sdp)
+static int init_system_inodes(struct gfs2_sbd *sdp)
 {
-	uint32_t i;
-	char *buf;
 	uint64_t inumbuf;
+	char *buf;
 	struct gfs2_statfs_change sc;
 	int rgcount;
-	uint64_t addl_mem_needed;
 	enum rgindex_trust_level trust_lvl;
-
-	sync();
-
-	/********************************************************************
-	 ***************** First, initialize all lists **********************
-	 ********************************************************************/
-	log_info("Initializing lists...\n");
-	osi_list_init(&sdp->rglist);
-	osi_list_init(&sdp->buf_list);
-	for(i = 0; i < BUF_HASH_SIZE; i++) {
-		osi_list_init(&dir_hash[i]);
-		osi_list_init(&inode_hash[i]);
-		osi_list_init(&sdp->buf_hash[i]);
-	}
-
-	/********************************************************************
-	 ************  next, read in on-disk SB and set constants  **********
-	 ********************************************************************/
-	sdp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE;
-	sdp->bsize = sdp->sd_sb.sb_bsize;
-
-	if(sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize){
-		log_crit("GFS superblock is larger than the blocksize!\n");
-		log_debug("sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize\n");
-		return -1;
-	}
-
-	compute_constants(sdp);
-	if(read_sb(sdp) < 0){
-		return -1;
-	}
+	uint64_t addl_mem_needed;
 
 	/*******************************************************************
 	 ******************  Initialize important inodes  ******************
@@ -298,9 +264,10 @@
 	}
 	if (trust_lvl > distrust) {
 		log_err("RG recovery impossible; I can't fix this file system.\n");
-		goto fail;
+		return -1;
 	}
 	log_info("%u resource groups found.\n", rgcount);
+
 	/*******************************************************************
 	 *******  Now, set boundary fields in the super block  *************
 	 *******************************************************************/
@@ -318,7 +285,6 @@
 		goto fail;
 	}
 	return 0;
-
  fail:
 	empty_super_block(sdp);
 
@@ -326,6 +292,50 @@
 }
 
 /**
+ * fill_super_block
+ * @sdp:
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int fill_super_block(struct gfs2_sbd *sdp)
+{
+	uint32_t i;
+
+	sync();
+
+	/********************************************************************
+	 ***************** First, initialize all lists **********************
+	 ********************************************************************/
+	log_info("Initializing lists...\n");
+	osi_list_init(&sdp->rglist);
+	osi_list_init(&sdp->buf_list);
+	for(i = 0; i < BUF_HASH_SIZE; i++) {
+		osi_list_init(&dir_hash[i]);
+		osi_list_init(&inode_hash[i]);
+		osi_list_init(&sdp->buf_hash[i]);
+	}
+
+	/********************************************************************
+	 ************  next, read in on-disk SB and set constants  **********
+	 ********************************************************************/
+	sdp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE;
+	sdp->bsize = sdp->sd_sb.sb_bsize;
+
+	if(sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize){
+		log_crit("GFS superblock is larger than the blocksize!\n");
+		log_debug("sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize\n");
+		return -1;
+	}
+
+	compute_constants(sdp);
+	if(read_sb(sdp) < 0){
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
  * init_sbp - initialize superblock pointer
  *
  */
@@ -365,6 +375,9 @@
 		return -1;
 	}
 
+	if (init_system_inodes(sbp))
+		return -1;
+
 	return 0;
 }
 
/cvs/cluster/cluster/gfs2/libgfs2/recovery.c,v  -->  standard output
revision 1.1.2.1
--- cluster/gfs2/libgfs2/recovery.c
+++ -	2007-10-19 15:07:59.613200000 +0000
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ *
+ */
+
+/*
+ * NOTE:
+ *
+ * This code was pilfered from the gfs2 kernel and adapted to userland.
+ * If you change this part, you should evaluate whether the upstream kernel
+ * version of recovery.c should be changed as well.  Likewise, if the
+ * upstream version changes, this part should be kept in sync.
+ * 
+ */
+
+#include <errno.h>
+#include <string.h>
+#include "libgfs2.h"
+
+void gfs2_replay_incr_blk(struct gfs2_inode *ip, unsigned int *blk)
+{
+	uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+
+        if (++*blk == jd_blocks)
+                *blk = 0;
+}
+
+int gfs2_replay_read_block(struct gfs2_inode *ip, unsigned int blk,
+			   struct gfs2_buffer_head **bh)
+{
+	int new = 0;
+	uint64_t dblock;
+	uint32_t extlen;
+
+	block_map(ip, blk, &new, &dblock, &extlen, FALSE, not_updated);
+	if (!dblock)
+		return -EIO;
+
+	*bh = bread(ip->i_sbd, dblock);
+	return 0;
+}
+
+/**
+ * get_log_header - read the log header for a given segment
+ * @ip: the journal incore inode
+ * @blk: the block to look at
+ * @lh: the log header to return
+ *
+ * Read the log header for a given segement in a given journal.  Do a few
+ * sanity checks on it.
+ *
+ * Returns: 0 on success,
+ *          1 if the header was invalid or incomplete,
+ *          errno on error
+ */
+
+int get_log_header(struct gfs2_inode *ip, unsigned int blk,
+		   struct gfs2_log_header *head)
+{
+	struct gfs2_buffer_head *bh;
+	struct gfs2_log_header lh, *tmp;
+	uint32_t hash, saved_hash;
+	int error;
+
+	error = gfs2_replay_read_block(ip, blk, &bh);
+	if (error)
+		return error;
+
+	tmp = (struct gfs2_log_header *)bh->b_data;
+	saved_hash = tmp->lh_hash;
+	tmp->lh_hash = 0;
+	hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
+	tmp->lh_hash = saved_hash;
+	gfs2_log_header_in(&lh, bh->b_data);
+	brelse(bh, not_updated);
+
+	if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
+		return 1;
+
+	*head = lh;
+
+	return 0;
+}
+
+/**
+ * find_good_lh - find a good log header
+ * @ip: the journal incore inode
+ * @blk: the segment to start searching from
+ * @lh: the log header to fill in
+ * @forward: if true search forward in the log, else search backward
+ *
+ * Call get_log_header() to get a log header for a segment, but if the
+ * segment is bad, either scan forward or backward until we find a good one.
+ *
+ * Returns: errno
+ */
+
+int find_good_lh(struct gfs2_inode *ip, unsigned int *blk,
+		 struct gfs2_log_header *head)
+{
+	unsigned int orig_blk = *blk;
+	int error;
+	uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+
+	for (;;) {
+		error = get_log_header(ip, *blk, head);
+		if (error <= 0)
+			return error;
+
+		if (++*blk == jd_blocks)
+			*blk = 0;
+
+		if (*blk == orig_blk)
+			return -EIO;
+	}
+}
+
+/**
+ * jhead_scan - make sure we've found the head of the log
+ * @jd: the journal
+ * @head: this is filled in with the log descriptor of the head
+ *
+ * At this point, seg and lh should be either the head of the log or just
+ * before.  Scan forward until we find the head.
+ *
+ * Returns: errno
+ */
+
+int jhead_scan(struct gfs2_inode *ip, struct gfs2_log_header *head)
+{
+	unsigned int blk = head->lh_blkno;
+	uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+	struct gfs2_log_header lh;
+	int error;
+
+	for (;;) {
+		if (++blk == jd_blocks)
+			blk = 0;
+
+		error = get_log_header(ip, blk, &lh);
+		if (error < 0)
+			return error;
+		if (error == 1)
+			continue;
+
+		if (lh.lh_sequence == head->lh_sequence)
+			return -EIO;
+		if (lh.lh_sequence < head->lh_sequence)
+			break;
+
+		*head = lh;
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_find_jhead - find the head of a log
+ * @jd: the journal
+ * @head: the log descriptor for the head of the log is returned here
+ *
+ * Do a binary search of a journal and find the valid log entry with the
+ * highest sequence number.  (i.e. the log head)
+ *
+ * Returns: errno
+ */
+
+int gfs2_find_jhead(struct gfs2_inode *ip, struct gfs2_log_header *head)
+{
+	struct gfs2_log_header lh_1, lh_m;
+	uint32_t blk_1, blk_2, blk_m;
+	uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+	int error;
+
+	blk_1 = 0;
+	blk_2 = jd_blocks - 1;
+
+	for (;;) {
+		blk_m = (blk_1 + blk_2) / 2;
+
+		error = find_good_lh(ip, &blk_1, &lh_1);
+		if (error)
+			return error;
+
+		error = find_good_lh(ip, &blk_m, &lh_m);
+		if (error)
+			return error;
+
+		if (blk_1 == blk_m || blk_m == blk_2)
+			break;
+
+		if (lh_1.lh_sequence <= lh_m.lh_sequence)
+			blk_1 = blk_m;
+		else
+			blk_2 = blk_m;
+	}
+
+	error = jhead_scan(ip, &lh_1);
+	if (error)
+		return error;
+
+	*head = lh_1;
+
+	return error;
+}
+
+/**
+ * clean_journal - mark a dirty journal as being clean
+ * @sdp: the filesystem
+ * @jd: the journal
+ * @head: the head journal to start from
+ *
+ * Returns: errno
+ */
+
+int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head)
+{
+	unsigned int lblock;
+	struct gfs2_log_header *lh;
+	uint32_t hash, extlen;
+	struct gfs2_buffer_head *bh;
+	int new = 0;
+	uint64_t dblock;
+
+	lblock = head->lh_blkno;
+	gfs2_replay_incr_blk(ip, &lblock);
+	block_map(ip, lblock, &new, &dblock, &extlen, 0, not_updated);
+	if (!dblock)
+		return -EIO;
+
+	bh = bread(ip->i_sbd, dblock);
+	memset(bh->b_data, 0, bh->b_size);
+
+	lh = (struct gfs2_log_header *)bh->b_data;
+	memset(lh, 0, sizeof(struct gfs2_log_header));
+	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
+	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
+	lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
+	lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
+	lh->lh_blkno = cpu_to_be32(lblock);
+	hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
+	lh->lh_hash = cpu_to_be32(hash);
+
+	brelse(bh, updated);
+
+	return 0;
+}
+
--- cluster/gfs2/libgfs2/Makefile	2006/08/11 15:18:13	1.4
+++ cluster/gfs2/libgfs2/Makefile	2007/10/19 15:07:59	1.4.2.1
@@ -27,7 +27,7 @@
        -D_GNU_SOURCE -DGFS2_RELEASE_NAME=\"2\" ${INCLUDEPATH}
 
 H=gfs2_disk_hash.h libgfs2.h linux_endian.h ondisk.h osi_list.h
-C=bitmap.c block_list.c buf.c device_geometry.c fs_bits.c fs_geometry.c fs_ops.c locking.c gfs2_log.c misc.c ondisk.c size.c structures.c super.c rgrp.c
+C=bitmap.c block_list.c buf.c device_geometry.c fs_bits.c fs_geometry.c fs_ops.c locking.c gfs2_log.c misc.c ondisk.c recovery.c size.c structures.c super.c rgrp.c
 O=$(subst .c,.o,${C})
 
 all: ${LIBGFS2}
--- cluster/gfs2/libgfs2/libgfs2.h	2007/10/17 20:16:44	1.7.2.13
+++ cluster/gfs2/libgfs2/libgfs2.h	2007/10/19 15:07:59	1.7.2.14
@@ -515,6 +515,22 @@
 void lock_for_admin(struct gfs2_sbd *sdp);
 void cleanup_metafs(struct gfs2_sbd *sdp);
 
+/* recovery.c */
+void gfs2_replay_incr_blk(struct gfs2_inode *ip, unsigned int *blk);
+int gfs2_replay_read_block(struct gfs2_inode *ip, unsigned int blk,
+			   struct gfs2_buffer_head **bh);
+int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
+int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno,
+		      unsigned int where);
+void gfs2_revoke_clean(struct gfs2_sbd *sdp);
+int get_log_header(struct gfs2_inode *ip, unsigned int blk,
+		   struct gfs2_log_header *head);
+int find_good_lh(struct gfs2_inode *ip, unsigned int *blk,
+		 struct gfs2_log_header *head);
+int jhead_scan(struct gfs2_inode *ip, struct gfs2_log_header *head);
+int gfs2_find_jhead(struct gfs2_inode *ip, struct gfs2_log_header *head);
+int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head);
+
 /* rgrp.c */
 int gfs2_compute_bitstructs(struct gfs2_sbd *sdp, struct rgrp_list *rgd);
 struct rgrp_list *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-10-19 15:07 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-10-19 15:07 [Cluster-devel] cluster/gfs2 fsck/fs_recovery.c fsck/fs_recove rpeterso
  -- strict thread matches above, loose matches on Subject: below --
2007-10-19 15:06 rpeterso

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).