reiserfs-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch] reiser4progs: Handle unprepped items by fsck
@ 2010-05-28 11:10 Edward Shishkin
       [not found] ` <4C7BD349.5070005@tls-tautenburg.de>
  0 siblings, 1 reply; 2+ messages in thread
From: Edward Shishkin @ 2010-05-28 11:10 UTC (permalink / raw)
  To: Reiserfs mailing list; +Cc: Bringfried Stecklum

[-- Attachment #1: Type: text/plain, Size: 314 bytes --]

Hello everyone. This patch fixes the problem (segmentation fault)
in fsck reported by Bringfried Stecklum and Jonáš Vidra:

http://marc.info/?l=reiserfs-devel&m=126936575206572&w=2

However, the source of the problem in the kernel (data loss by
cryptcompress file plugin) is not yet eliminated.

Thanks,
Edward.

[-- Attachment #2: reiser4progs-fsck-handle-unprepped-cluster.patch --]
[-- Type: text/x-patch, Size: 12273 bytes --]

. Handle orphan "unprepped" clusters;
. Don't repair the value of cluster shift in ctail item
  in all circumstances. If cluster shift is wrong, then
  remove the whole cluster.

Signed-off-by: Edward Shishkin <edward.shishkin@gmail.com>
---
 plugin/object/ccreg40/ccreg40.c        |   12 +-
 plugin/object/ccreg40/ccreg40.h        |    9 +
 plugin/object/ccreg40/ccreg40_repair.c |  196 +++++++++++++++++----------------
 3 files changed, 121 insertions(+), 96 deletions(-)

--- reiser4progs-1.0.7.orig/plugin/object/ccreg40/ccreg40.h
+++ reiser4progs-1.0.7/plugin/object/ccreg40/ccreg40.h
@@ -11,6 +11,12 @@
 #include "reiser4/plugin.h"
 #include "plugin/object/obj40/obj40.h"
 
+#define MIN_VALID_CLUSTER_SHIFT (0xc)
+#define MAX_VALID_CLUSTER_SHIFT (0x10)
+#define UNPREPPED_CLUSTER_SHIFT (0xff)
+
+#define ccreg40_cloff(off, size) ((off) & ((size) - 1))
+
 #define ccreg40_clstart(off, size) ((off) & ~((size) - 1))
 
 #define ccreg40_clnext(off, size) (ccreg40_clstart(off, size) + (size))
@@ -22,7 +28,8 @@ extern errno_t ccreg40_check_struct(reis
 				    place_func_t func,
 				    void *data, uint8_t mode);
 
-extern uint32_t ccreg40_get_cluster_size(reiser4_place_t *place);
+extern errno_t ccreg40_get_cluster_shift(reiser4_place_t *place,
+					 uint8_t *shift);
 
 extern errno_t ccreg40_set_cluster_size(reiser4_place_t *place, 
 					uint32_t cluster);
--- reiser4progs-1.0.7.orig/plugin/object/ccreg40/ccreg40_repair.c
+++ reiser4progs-1.0.7/plugin/object/ccreg40/ccreg40_repair.c
@@ -25,80 +25,81 @@ typedef struct ccreg40_hint {
 	uint64_t seek;       /* expected offset for lookup */
 	uint64_t found;      /* what has been really found */
 	uint64_t maxreal;    /* maximal (key) offset in the found item */
-
-	/* Bytes all clusters takes on disk. */
-	uint32_t bytes;
 	uint64_t sdsize;
+	/* The following two fields are to delete wrecks of a disk cluster */
+	uint64_t cut_from;   /* offset to cut from */
+	uint32_t cut_size;   /* how many bytes to cut */
+	uint32_t clsize;
+	/* Total number of units in a disk cluster */
+	uint32_t bytes;
 	uint32_t adler;
 	uint8_t mode;
-
-	/* If a hole is detected. */
+	/* Indicates if cluster has a hole in key space */
 	uint8_t hole;
-	
 	/* The cluster size & the buffer for the data. */
 	uint8_t data[64 * 1024];
-	uint64_t clstart;
-	uint32_t clsize;
 } ccreg40_hint_t;
 
 static errno_t ccreg40_check_item(reiser4_object_t *cc, void *data) {
 	ccreg40_hint_t *hint = (ccreg40_hint_t *)data;
-	uint32_t clsize;
-	errno_t res = 0;
-		
+	uint8_t shift;
+	errno_t result;
+
 	hint->found = objcall(&cc->body.key, get_offset);
 	hint->maxreal = obj40_place_maxreal(&cc->body);
 	
 	aal_assert("vpf-1871", hint->maxreal >= hint->found);
 	aal_assert("vpf-1872", hint->seek <= hint->found);
 	
-	/* Check the item plugin. */
+	/* check item plugin */
 	if (cc->body.plug != reiser4_psctail(cc)) {
-		fsck_mess("The file [%s] (%s), node [%llu], item "
-			  "[%u]: item of the illegal plugin (%s) "
-			  "with the key of this object found.%s",
-			  print_inode(obj40_core, &cc->info.object),
-			  reiser4_psobj(cc)->p.label, place_blknr(&cc->body),
-			  cc->body.pos.item, cc->body.plug->p.label, 
-			  hint->mode == RM_BUILD ? " Removed." : "");
-		
-		return hint->mode == RM_BUILD ? -ESTRUCT : RE_FATAL;
+		fsck_mess("Found item of illegal plugin (%s) "
+			  "with the key of this object ",
+			  cc->body.plug->p.label);
+		goto fatal;
 	}
-	
-	/* Check the shift. */
-	clsize = ccreg40_get_cluster_size(&cc->body);
-	
-	if (hint->clsize != clsize) {
-		fsck_mess("The file [%s] (%s), node [%llu], item [%u]: item "
-			  "of the wrong cluster size (%d) found, Should be "
-			  "(%d).%s", print_inode(obj40_core, &cc->info.object),
-			  reiser4_psobj(cc)->p.label, place_blknr(&cc->body),
-			  cc->body.pos.item, clsize, hint->clsize, 
-			  hint->mode != RM_CHECK ? " Fixed." : "");
-
-		/* Just fix the shift if wrong. */
-		if (hint->mode == RM_CHECK) {
-			res |= RE_FIXABLE;
-		} else {
-			ccreg40_set_cluster_size(&cc->body, hint->clsize);
-		}
+	/* check cluster shift. */
+	result = ccreg40_get_cluster_shift(&cc->body, &shift);
+	if (result < 0)
+		return result;
+	if (shift == UNPREPPED_CLUSTER_SHIFT) {
+		fsck_mess("Found unprepped disk cluster ");
+		goto fatal;
+	}
+	if (shift < MIN_VALID_CLUSTER_SHIFT ||
+	    shift > MAX_VALID_CLUSTER_SHIFT ||
+	    shift != aal_log2(hint->clsize)) {
+		fsck_mess("Found item with wrong cluster shift %d, "
+			  "should be %d", shift, aal_log2(hint->clsize));
+		goto fatal;
+	}
+	if (hint->seek &&
+	    !ccreg40_clsame(hint->prev_found, hint->found, hint->clsize) &&
+	    ccreg40_cloff(hint->found, hint->clsize) != 0){
+		fsck_mess("Found item of lenght (%llu) which has wrong "
+			  "offset %llu, should be a multiple of logical "
+			  "cluster size ",
+			  hint->maxreal - hint->found + 1, hint->found);
+		goto fatal;
 	}
-	
 	if (!ccreg40_clsame(hint->found, hint->maxreal, hint->clsize)) {
-		/* The item covers the cluster border. Delete it. */
-		fsck_mess("The file [%s] (%s), node [%llu], item [%u]: "
-			  "item of the lenght (%llu) found, it cannot "
-			  "contain data of 2 clusters.%s", 
-			  print_inode(obj40_core, &cc->info.object),
-			  reiser4_psobj(cc)->p.label, 
-			  place_blknr(&cc->body), cc->body.pos.item,
-			  hint->maxreal - hint->found + 1, 
-			  hint->mode == RM_BUILD ? " Removed." : "");
-		
-		return hint->mode == RM_BUILD ? -ESTRUCT : RE_FATAL;
+		fsck_mess("Found item of length %llu and offset %llu, "
+			  "which contains logical cluster boundary ",
+			  hint->maxreal - hint->found + 1, hint->found);
+		goto fatal;
 	}
-	
-	return res;
+	return 0;
+ fatal:
+	fsck_mess("(file [%s] (%s), node [%llu], item [%u]). %s",
+		  print_inode(obj40_core, &cc->info.object),
+		  reiser4_psobj(cc)->p.label,
+		  place_blknr(&cc->body),
+		  cc->body.pos.item,
+		  hint->mode == RM_BUILD ? " Removed." : "");
+	hint->cut_from = hint->found;
+	hint->cut_size = hint->maxreal - hint->found + 1;
+
+	return hint->mode == RM_BUILD ? -ESTRUCT : RE_FATAL;
 }
 
 static int64_t ccreg40_read_item(reiser4_place_t *place, ccreg40_hint_t *hint) {
@@ -133,23 +134,21 @@ static errno_t ccreg40_check_crc(ccreg40
 	return adler == disk ? 0 : RE_FATAL;
 }
 
+/*
+ * Read a found item to the stream.
+ * Check a checksum, if the previous iteration completed a disk cluster.
+ */
 static errno_t ccreg40_check_cluster(reiser4_object_t *cc, 
 				     ccreg40_hint_t *hint,
 				     uint8_t mode) 
 {
-	errno_t result;
+	errno_t result = 0;
 	errno_t res;
-	int start;
-	uint32_t lcl_size;
-
-	result = 0;
-	/* true, if the found item is the
-	   first one in the disk cluster */
-	start = (ccreg40_clstart(hint->found, hint->clsize) == hint->found);
+	uint32_t lcl_bytes;
 
 	if ((cc->body.plug == NULL) ||
-	    (hint->seek && start) ||
-	    !ccreg40_clsame(hint->prev_found, hint->found, hint->clsize)) {
+	    (hint->seek != 0 &&
+	     !ccreg40_clsame(hint->prev_found, hint->found, hint->clsize))) {
 		/* Cluster is over */
 
 		if (hint->prev_found > hint->sdsize) {
@@ -158,31 +157,32 @@ static errno_t ccreg40_check_cluster(rei
 			result = RE_FATAL;
 
 			/* set offset of the cluster to be deleted. */
-			hint->clstart = ccreg40_clstart(hint->prev_found,
-							hint->clsize);
+			hint->cut_from = ccreg40_clstart(hint->prev_found,
+							 hint->clsize);
 			fsck_mess("The file [%s] (%s): the cluster at [%llu] "
 				  "offset %u bytes long is orphan.%s",
 				  print_inode(obj40_core, &cc->info.object),
-				  reiser4_psobj(cc)->p.label, hint->clstart,
+				  reiser4_psobj(cc)->p.label, hint->cut_from,
 				  hint->clsize, hint->mode != RM_CHECK ?
 				  " Removed." : "");
 		}
 		/**
-		 * If there still is a hole in the logical cluster,
-		 * then check a checksum (no hole means no checksum)
+		 * If there still is a hole in the keyspace, then
+		 * check a checksum (no such hole means that no
+		 * checksum was appended)
 		 */
-		else if (hint->hole && ccreg40_check_crc(hint)) {
+		else if (hint->bytes && hint->hole && ccreg40_check_crc(hint)) {
 			/* wrong checksum */
 			hint->bytes = 0;
 			result = RE_FATAL;
 
 			/* set offset of the cluster to be deleted. */
-			hint->clstart = ccreg40_clstart(hint->prev_found,
-							hint->clsize);
+			hint->cut_from = ccreg40_clstart(hint->prev_found,
+							 hint->clsize);
 			fsck_mess("The file [%s] (%s): the cluster at [%llu] "
 				  "offset %u bytes long is corrupted.%s",
 				  print_inode(obj40_core, &cc->info.object),
-				  reiser4_psobj(cc)->p.label, hint->clstart,
+				  reiser4_psobj(cc)->p.label, hint->cut_from,
 				  hint->clsize, hint->mode != RM_CHECK ? 
 				  " Removed." : "");
 		}
@@ -192,16 +192,13 @@ static errno_t ccreg40_check_cluster(rei
 		hint->adler = 0;
 
 		if (!cc->body.plug)
+			/* finish with this object */
 			return result;
 
 		/* Update the cluster data. */
 		aal_memset(hint->data, 0, hint->clsize);
 	}
-	
 	/* An item found. */
-	aal_assert("edward-1",
-		   ccreg40_clstart(hint->found, hint->clsize) ==
-		   ccreg40_clstart(hint->maxreal, hint->clsize));
 
 	if ((res = ccreg40_read_item(&cc->body, hint)))
 		return res;
@@ -209,16 +206,19 @@ static errno_t ccreg40_check_cluster(rei
 	hint->prev_found = hint->found;
 	hint->bytes += objcall(&cc->body, object->bytes);
 	/**
-	 * Calculate a size of logical cluster
-	 * and figure out, if there is a hole
-	 * for the found items in the logical cluster.
+	 * Calculate actual number of file's bytes in the
+	 * logical cluster and figure out, if corresponding
+	 * disk cluster has a hole in key space.
+	 *
+	 * We need this to figure out if disk cluster contains
+	 * appended checksum.
 	 */
-	lcl_size = 0;
+	lcl_bytes = 0;
 	if (ccreg40_clsame(hint->found, hint->sdsize - 1, hint->clsize))
-		lcl_size = hint->sdsize % hint->clsize;
-	if (lcl_size == 0)
-		lcl_size = hint->clsize;
-	hint->hole = (hint->bytes != lcl_size);
+		lcl_bytes = hint->sdsize % hint->clsize;
+	if (lcl_bytes == 0)
+		lcl_bytes = hint->clsize;
+	hint->hole = (hint->bytes != lcl_bytes);
 
 	return result;
 }
@@ -256,8 +256,23 @@ errno_t ccreg40_check_struct(reiser4_obj
 		/* Get next item. */
 		lookup = obj40_check_item(cc, ccreg40_check_item, NULL, &hint);
 		
-		if (repair_error_fatal(lookup))
-			return lookup;
+		if (repair_error_fatal(lookup)) {
+			    if ((lookup & RE_FATAL) && mode == RM_BUILD) {
+				    /*
+				     * Delete item found in this iteration
+				     */
+				    res &= ~RE_FATAL;
+				    res |= obj40_cut(cc, &trans, hint.cut_from,
+						     hint.cut_size, NULL, NULL);
+				    if (res < 0)
+					    return res;
+				    hint.seek = hint.maxreal + 1;
+				    obj40_seek(cc, hint.seek);
+				    continue;
+			    }
+			    else
+				    return lookup;
+		}
 		else if (lookup == ABSENT)
 			cc->body.plug = NULL;
 		
@@ -267,16 +282,17 @@ errno_t ccreg40_check_struct(reiser4_obj
 				"registered yet.", print_key(obj40_core, 
 							     &info->object));
 		}
-		
+		/* check cluster found in previous iteration */
 		if ((res |= ccreg40_check_cluster(cc, &hint, mode)) < 0)
 			return res;
 
 		if (res & RE_FATAL) {
-			/* Delete the whole cluster. */
-			
+			/*
+			 * Delete cluster found in previous iteration
+			 */
 			if (mode == RM_BUILD) {
 				res &= ~RE_FATAL;
-				res |= obj40_cut(cc, &trans, hint.clstart,
+				res |= obj40_cut(cc, &trans, hint.cut_from,
 						 hint.clsize, NULL, NULL);
 				if (res < 0)
 					return res;
--- reiser4progs-1.0.7.orig/plugin/object/ccreg40/ccreg40.c
+++ reiser4progs-1.0.7/plugin/object/ccreg40/ccreg40.c
@@ -8,7 +8,7 @@
 #include "ccreg40.h"
 #include "plugin/object/obj40/obj40_repair.h"
 
-uint32_t ccreg40_get_cluster_size(reiser4_place_t *place) {
+errno_t ccreg40_get_cluster_shift(reiser4_place_t *place, uint8_t *shift) {
 	trans_hint_t hint;
 	ctail_hint_t chint;
 	
@@ -17,10 +17,12 @@ uint32_t ccreg40_get_cluster_size(reiser
 	hint.specific = &chint;
 	hint.count = 1;
 
-	if (objcall(place, object->fetch_units, &hint) != 1)
-		return MAX_UINT32;
-	
-	return 1 << chint.shift;
+	if (objcall(place, object->fetch_units, &hint) != 1) {
+	        aal_error("Can not extract cluster shift.");
+		return -EINVAL;
+	}
+	*shift = chint.shift;
+	return 0;
 }
 
 errno_t ccreg40_set_cluster_size(reiser4_place_t *place, uint32_t cluster) {

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: search_one_bitmap_forward oops
       [not found]                         ` <4D8B5646.8020609@tls-tautenburg.de>
@ 2011-03-24 21:42                           ` Edward Shishkin
  0 siblings, 0 replies; 2+ messages in thread
From: Edward Shishkin @ 2011-03-24 21:42 UTC (permalink / raw)
  To: Bringfried Stecklum, Serkan Kaba, ReiserFS Development List

[-- Attachment #1: Type: text/plain, Size: 1656 bytes --]

On 03/24/2011 03:33 PM, Bringfried Stecklum wrote:
> Edward Shishkin wrote:
>> On 03/24/2011 12:56 AM, Bringfried Stecklum wrote:
>>> Edward Shishkin wrote:
>>>> On 03/24/2011 12:34 AM, Bringfried Stecklum wrote:
>>>>> Edward Shishkin wrote:
>>>>>> On 03/23/2011 11:41 PM, Bringfried Stecklum wrote:
>>>>>>> Dear Edward,
>>>>>>>
>>>>>>> thanks for the reply. Since I contacted you again concerning this
>>>>>>> bug,
>>>>>>> my system is going mad because of it. I didn't change anything
>>>>>>> concerning the kernel but even just after login and idling, the bug
>>>>>>> happens. I'd like to stay with Reiser4. But right now it is getting
>>>>>>> really difficult.
>>>>>>
>>>>>> Do you use the mount option "dont_load_bitmap"?
>>>>>
>>>>> Yes, I used it so far for faster booting but switched it off now.
>>>>
>>>>
>>>> Please, don't use this mount option for now: it doesn't
>>>> work properly. I'll try to reproduce and fix it at leisure.
>>>
>>> Looks like, up'n running now for half an hour. Normally, I prefer to
>>> hibernate between sessions and thus boot rarely, the longer boot time is
>>> no obstacle. Good luck with fixing the issue.
>>
>>
>> It looks like missing bitmap is not uploaded somewhere..
>
> But it used to work quite some time...

It seems I have found the bug.

The problem arises when we get to the /* race: ...*/ point (see
the attached patch). This is not an error path, nevertheless we
release all bitmaps and set zeroes to bnode fields. Welcome to
the Oops...

Everyone, who experienced problems with on-demand bitmap loading
in reiser4 (the mount option "dont_load_bitmap"), please, try the 
attached patch.

Thanks,
Edward.

[-- Attachment #2: reiser4-fix-on-demand-bitmap-load.patch --]
[-- Type: text/plain, Size: 2378 bytes --]

Index: linux-2.6.38/fs/reiser4/plugin/space/bitmap.c
===================================================================
--- linux-2.6.38.orig/fs/reiser4/plugin/space/bitmap.c
+++ linux-2.6.38/fs/reiser4/plugin/space/bitmap.c
@@ -830,45 +830,43 @@ static int load_and_lock_bnode(struct bi
 	}
 
 	ret = prepare_bnode(bnode, &cjnode, &wjnode);
-	if (ret == 0) {
-		mutex_lock(&bnode->mutex);
+	if (ret)
+		return ret;
 
-		if (!atomic_read(&bnode->loaded)) {
-			assert("nikita-2822", cjnode != NULL);
-			assert("nikita-2823", wjnode != NULL);
-			assert("nikita-2824", jnode_is_loaded(cjnode));
-			assert("nikita-2825", jnode_is_loaded(wjnode));
-
-			bnode->wjnode = wjnode;
-			bnode->cjnode = cjnode;
-
-			ret = check_struct_bnode(bnode, current_blocksize);
-			if (!ret) {
-				cjnode = wjnode = NULL;
-				atomic_set(&bnode->loaded, 1);
-				/* working bitmap is initialized by on-disk
-				 * commit bitmap. This should be performed
-				 * under mutex. */
-				memcpy(bnode_working_data(bnode),
-				       bnode_commit_data(bnode),
-				       bmap_size(current_blocksize));
-			} else
-				mutex_unlock(&bnode->mutex);
-		} else
-			/* race: someone already loaded bitmap while we were
-			 * busy initializing data. */
-			check_bnode_loaded(bnode);
-	}
-
-	if (wjnode != NULL) {
-		release(wjnode);
-		bnode->wjnode = NULL;
-	}
-	if (cjnode != NULL) {
-		release(cjnode);
-		bnode->cjnode = NULL;
-	}
+	mutex_lock(&bnode->mutex);
 
+	if (!atomic_read(&bnode->loaded)) {
+		assert("nikita-2822", cjnode != NULL);
+		assert("nikita-2823", wjnode != NULL);
+		assert("nikita-2824", jnode_is_loaded(cjnode));
+		assert("nikita-2825", jnode_is_loaded(wjnode));
+
+		bnode->wjnode = wjnode;
+		bnode->cjnode = cjnode;
+
+		ret = check_struct_bnode(bnode, current_blocksize);
+		if (unlikely(ret != 0))
+			goto error;
+
+		atomic_set(&bnode->loaded, 1);
+		/* working bitmap is initialized by on-disk
+		 * commit bitmap. This should be performed
+		 * under mutex. */
+		memcpy(bnode_working_data(bnode),
+		       bnode_commit_data(bnode),
+		       bmap_size(current_blocksize));
+	} else
+		/* race: someone already loaded bitmap
+		 * while we were busy initializing data. */
+		check_bnode_loaded(bnode);
+	return 0;
+
+ error:
+	release(wjnode);
+	release(cjnode);
+	bnode->wjnode = NULL;
+	bnode->cjnode = NULL;
+	mutex_unlock(&bnode->mutex);
 	return ret;
 }
 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2011-03-24 21:42 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-28 11:10 [patch] reiser4progs: Handle unprepped items by fsck Edward Shishkin
     [not found] ` <4C7BD349.5070005@tls-tautenburg.de>
     [not found]   ` <4C7BD64B.9070005@gmail.com>
     [not found]     ` <4C7BF309.8070906@tls-tautenburg.de>
     [not found]       ` <4C7C04FB.9070901@gmail.com>
     [not found]         ` <4D7CAE07.2090907@tls-tautenburg.de>
     [not found]           ` <4D7FECB3.4090804@gmail.com>
     [not found]             ` <4D8A771B.7080903@tls-tautenburg.de>
     [not found]               ` <4D8A7E79.90407@gmail.com>
     [not found]                 ` <4D8A838C.5040702@tls-tautenburg.de>
     [not found]                   ` <4D8A85D5.2010201@gmail.com>
     [not found]                     ` <4D8A8893.9000802@tls-tautenburg.de>
     [not found]                       ` <4D8A8C6D.7070906@gmail.com>
     [not found]                         ` <4D8B5646.8020609@tls-tautenburg.de>
2011-03-24 21:42                           ` search_one_bitmap_forward oops Edward Shishkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).