All of lore.kernel.org
 help / color / mirror / Atom feed
* [Drbd-dev] drbd 2.6.19 crypto changes
@ 2007-01-10 12:31 Ard van Breemen
  2007-01-10 13:48 ` Lars Ellenberg
  2007-01-10 16:23 ` Philipp Reisner
  0 siblings, 2 replies; 20+ messages in thread
From: Ard van Breemen @ 2007-01-10 12:31 UTC (permalink / raw)
  To: drbd-dev

This is a preliminary patch as in: as far as I can see it
*should* work.
Biggest change in the crypto api is that calls are more
encapsulated.
Instead of a hmac, we talk about hash only. We allocate and free
hash structures, independent what kind of hash.
To calculate the digest there are now 2 calls necessary: a call
to setkey (if you want to use a key), and a call to generate the
digest itself.
This patch tries to keep the changes contained at a single point.
This means we set the hash_key 2 times instead of being clever
and setting it once in a more central point, and use that later
on a few times.
Anyway: it compiles without warning, it loads, what more do we
want.

Index: drbd-latest/drbd/drbd_receiver.c
===================================================================
--- drbd-latest/drbd/drbd_receiver.c	(revision 2678)
+++ drbd-latest/drbd/drbd_receiver.c	(working copy)
@@ -2754,7 +2754,11 @@ STATIC void drbd_disconnect(drbd_dev *md
 			mdev->tl_hash_s = 0;
 		}
 		if(mdev->cram_hmac_tfm) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 			crypto_free_tfm(mdev->cram_hmac_tfm);
+#else
+			crypto_free_hash(mdev->cram_hmac_tfm);
+#endif
 			mdev->cram_hmac_tfm = NULL;
 		}
 		kfree(mdev->net_conf);
@@ -2951,7 +2955,11 @@ STATIC int drbd_do_auth(drbd_dev *mdev)
 		goto fail;
 	}
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 	resp_size = crypto_tfm_alg_digestsize(mdev->cram_hmac_tfm);
+#else
+	resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm);
+#endif
 	response = kmalloc(resp_size,GFP_KERNEL);
 	if(response == NULL) {
 		ERR("kmalloc of response failed\n");
@@ -2962,8 +2970,22 @@ STATIC int drbd_do_auth(drbd_dev *mdev)
 	sg.page   = virt_to_page(peers_ch);
 	sg.offset = offset_in_page(peers_ch);
 	sg.length = p.length;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 	crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->net_conf->shared_secret,
 		    &key_len, &sg, 1, response);
+#else
+	{
+		struct hash_desc desc;
+		int ret;
+		desc.tfm=mdev->cram_hmac_tfm;
+		desc.flags=0;
+		ret=crypto_hash_setkey(mdev->cram_hmac_tfm,
+			(u8*)mdev->net_conf->shared_secret, key_len);
+		if(ret) printk("crypto_has_setkey()@" __FILE__":%d failed ret=%d\n",__LINE__,ret);
+		ret=crypto_hash_digest(&desc, &sg, sg.length, response);
+		if(ret) printk("crypto_has_digest()@" __FILE__":%d failed ret=%d\n",__LINE__,ret);
+	}
+#endif
 
 	rv = drbd_send_cmd2(mdev,AuthResponse,response,resp_size);
 	if (!rv) goto fail;
@@ -3002,8 +3024,22 @@ STATIC int drbd_do_auth(drbd_dev *mdev)
 	sg.page   = virt_to_page(my_challenge);
 	sg.offset = offset_in_page(my_challenge);
 	sg.length = CHALLENGE_LEN;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 	crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->net_conf->shared_secret,
 		    &key_len, &sg, 1, right_response);
+#else
+	{
+		struct hash_desc desc;
+		int ret;
+		desc.tfm=mdev->cram_hmac_tfm;
+		desc.flags=0;
+		ret=crypto_hash_setkey(mdev->cram_hmac_tfm,
+			(u8*)mdev->net_conf->shared_secret, key_len);
+		if(ret) printk("crypto_has_setkey()@" __FILE__":%d failed ret=%d\n",__LINE__,ret);
+		ret=crypto_hash_digest(&desc, &sg, sg.length, right_response);
+		if(ret) printk("crypto_has_digest()@" __FILE__":%d failed ret=%d\n",__LINE__,ret);
+	}
+#endif
 
 	rv = ! memcmp(response,right_response,resp_size);
 
Index: drbd-latest/drbd/drbd_nl.c
===================================================================
--- drbd-latest/drbd/drbd_nl.c	(revision 2678)
+++ drbd-latest/drbd/drbd_nl.c	(working copy)
@@ -966,7 +966,11 @@ STATIC int drbd_nl_net_conf(drbd_dev *md
 	int i,ns;
 	enum ret_codes retcode;
 	struct net_conf *new_conf = NULL;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 	struct crypto_tfm* tfm = NULL;
+#else
+	struct crypto_hash *tfm = NULL;
+#endif
 	struct hlist_head *new_tl_hash = NULL;
 	struct hlist_head *new_ee_hash = NULL;
 	drbd_dev *odev;
@@ -1047,13 +1051,17 @@ STATIC int drbd_nl_net_conf(drbd_dev *md
 #undef O_PORT
 
 	if( new_conf->cram_hmac_alg[0] != 0) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 		tfm = crypto_alloc_tfm(new_conf->cram_hmac_alg, 0);
+#else
+		tfm = crypto_alloc_hash(new_conf->cram_hmac_alg, 0, CRYPTO_ALG_ASYNC);
+#endif
 		if (tfm == NULL) {
 			retcode=CRAMAlgNotAvail;
 			goto fail;
 		}
 
-		if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST) {
+		if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) != CRYPTO_ALG_TYPE_DIGEST) {
 			retcode=CRAMAlgNotDigest;
 			goto fail;
 		}
@@ -1126,7 +1134,11 @@ FIXME LGE
 	}
 
 	if ( mdev->cram_hmac_tfm ) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 		crypto_free_tfm(mdev->cram_hmac_tfm);
+#else
+		crypto_free_hash(mdev->cram_hmac_tfm);
+#endif
 	}
 	mdev->cram_hmac_tfm = tfm;
 
@@ -1136,7 +1148,11 @@ FIXME LGE
 	return 0;
 
   fail:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 	if (tfm) crypto_free_tfm(tfm);
+#else
+	if (tfm) crypto_free_hash(tfm);
+#endif
 	if (new_tl_hash) kfree(new_tl_hash);
 	if (new_ee_hash) kfree(new_ee_hash);
 	if (new_conf) kfree(new_conf);
Index: drbd-latest/drbd/drbd_main.c
===================================================================
--- drbd-latest/drbd/drbd_main.c	(revision 2678)
+++ drbd-latest/drbd/drbd_main.c	(working copy)
@@ -2490,7 +2490,11 @@ void drbd_free_sock(drbd_dev *mdev)
 void drbd_free_resources(drbd_dev *mdev)
 {
 	if ( mdev->cram_hmac_tfm ) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 		crypto_free_tfm(mdev->cram_hmac_tfm);
+#else
+		crypto_free_hash(mdev->cram_hmac_tfm);
+#endif
 		mdev->cram_hmac_tfm = NULL;
 	}
 	drbd_free_sock(mdev);
Index: drbd-latest/drbd/drbd_int.h
===================================================================
--- drbd-latest/drbd/drbd_int.h	(revision 2678)
+++ drbd-latest/drbd/drbd_int.h	(working copy)
@@ -851,7 +851,11 @@ struct Drbd_Conf {
 	unsigned int al_tr_number;
 	int al_tr_cycle;
 	int al_tr_pos;     // position of the next transaction in the journal
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 	struct crypto_tfm* cram_hmac_tfm;
+#else
+	struct crypto_hash* cram_hmac_tfm;
+#endif
 	wait_queue_head_t seq_wait;
 	atomic_t packet_seq;
 	unsigned int peer_seq;

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] drbd 2.6.19 crypto changes
  2007-01-10 12:31 [Drbd-dev] drbd 2.6.19 crypto changes Ard van Breemen
@ 2007-01-10 13:48 ` Lars Ellenberg
  2007-01-10 16:09   ` Ard van Breemen
  2007-01-10 16:23 ` Philipp Reisner
  1 sibling, 1 reply; 20+ messages in thread
From: Lars Ellenberg @ 2007-01-10 13:48 UTC (permalink / raw)
  To: drbd-dev

/ 2007-01-10 13:31:16 +0100
\ Ard van Breemen:
> This is a preliminary patch as in: as far as I can see it
> *should* work.
> Biggest change in the crypto api is that calls are more
> encapsulated.
> Instead of a hmac, we talk about hash only. We allocate and free
> hash structures, independent what kind of hash.
> To calculate the digest there are now 2 calls necessary: a call
> to setkey (if you want to use a key), and a call to generate the
> digest itself.
> This patch tries to keep the changes contained at a single point.
> This means we set the hash_key 2 times instead of being clever
> and setting it once in a more central point, and use that later
> on a few times.
> Anyway: it compiles without warning, it loads, what more do we
> want.

thank you very much...
and just in time!
we started working on that api change just now :)
this probably helps a lot.

-- 
: Lars Ellenberg                            Tel +43-1-8178292-55 :
: LINBIT Information Technologies GmbH      Fax +43-1-8178292-82 :
: Vivenotgasse 48, A-1120 Vienna/Europe    http://www.linbit.com :

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] drbd 2.6.19 crypto changes
  2007-01-10 13:48 ` Lars Ellenberg
@ 2007-01-10 16:09   ` Ard van Breemen
  2007-01-10 19:33     ` Ard van Breemen
  0 siblings, 1 reply; 20+ messages in thread
From: Ard van Breemen @ 2007-01-10 16:09 UTC (permalink / raw)
  To: drbd-dev

On Wed, Jan 10, 2007 at 02:48:04PM +0100, Lars Ellenberg wrote:
> thank you very much...
> and just in time!
> we started working on that api change just now :)
> this probably helps a lot.

Hmmm, it does load, but somehow there are no minor devices :-(.
(/proc/drbd shows nothing more than the banner)
So it is not a definite fix.

-- 
begin  LOVE-LETTER-FOR-YOU.txt.vbs
I am a signature virus. Distribute me until the bitter
end

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] drbd 2.6.19 crypto changes
  2007-01-10 12:31 [Drbd-dev] drbd 2.6.19 crypto changes Ard van Breemen
  2007-01-10 13:48 ` Lars Ellenberg
@ 2007-01-10 16:23 ` Philipp Reisner
  2007-01-10 20:17   ` Ard van Breemen
  2007-01-11 14:38   ` Ard van Breemen
  1 sibling, 2 replies; 20+ messages in thread
From: Philipp Reisner @ 2007-01-10 16:23 UTC (permalink / raw)
  To: drbd-dev

[-- Attachment #1: Type: text/plain, Size: 541 bytes --]

Hi Ard,

Here is the result of my work, maybe you can test it with a new
kernel ? -- I only tested it on an old kernel with the old
API.

My idea was to have wrappers that look like the new API for the
old kernels, and make DRBD to use the new API in the DRBD code.

PS: In case you test it, please post the outcome.

-Phil
-- 
: Dipl-Ing Philipp Reisner                      Tel +43-1-8178292-50 :
: LINBIT Information Technologies GmbH          Fax +43-1-8178292-82 :
: Vivenotgasse 48, 1120 Vienna, Austria        http://www.linbit.com :

[-- Attachment #2: new_crypto_api.diff --]
[-- Type: text/x-diff, Size: 6711 bytes --]

Index: drbd/drbd_receiver.c
===================================================================
--- drbd/drbd_receiver.c	(revision 2678)
+++ drbd/drbd_receiver.c	(working copy)
@@ -2754,7 +2754,7 @@
 			mdev->tl_hash_s = 0;
 		}
 		if(mdev->cram_hmac_tfm) {
-			crypto_free_tfm(mdev->cram_hmac_tfm);
+			crypto_free_hash(mdev->cram_hmac_tfm);
 			mdev->cram_hmac_tfm = NULL;
 		}
 		kfree(mdev->net_conf);
@@ -2913,8 +2913,20 @@
 	Drbd_Header p;
 	unsigned int key_len = strlen(mdev->net_conf->shared_secret);
 	unsigned int resp_size;
+	struct hash_desc desc;
 	int rv;
 
+	desc.tfm=mdev->cram_hmac_tfm;
+	desc.flags=0;
+
+	rv = crypto_hash_setkey(mdev->cram_hmac_tfm,
+				(u8*)mdev->net_conf->shared_secret, key_len);
+	if(rv) {
+		ERR("crypto_hash_setkey() failed with %d\n",rv);
+		rv = 0;
+		goto fail;
+	}
+
 	get_random_bytes(my_challenge, CHALLENGE_LEN);
 
 	rv = drbd_send_cmd2(mdev,AuthChallenge,my_challenge,CHALLENGE_LEN);
@@ -2951,7 +2963,7 @@
 		goto fail;
 	}
 
-	resp_size = crypto_tfm_alg_digestsize(mdev->cram_hmac_tfm);
+	resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm);
 	response = kmalloc(resp_size,GFP_KERNEL);
 	if(response == NULL) {
 		ERR("kmalloc of response failed\n");
@@ -2962,9 +2974,14 @@
 	sg.page   = virt_to_page(peers_ch);
 	sg.offset = offset_in_page(peers_ch);
 	sg.length = p.length;
-	crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->net_conf->shared_secret,
-		    &key_len, &sg, 1, response);
 
+	rv = crypto_hash_digest(&desc, &sg, sg.length, response);
+	if(rv) {
+		ERR( "crypto_hash_digest() failed with %d\n",rv);
+		rv = 0;
+		goto fail;		
+	}
+
 	rv = drbd_send_cmd2(mdev,AuthResponse,response,resp_size);
 	if (!rv) goto fail;
 
@@ -3002,8 +3019,13 @@
 	sg.page   = virt_to_page(my_challenge);
 	sg.offset = offset_in_page(my_challenge);
 	sg.length = CHALLENGE_LEN;
-	crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->net_conf->shared_secret,
-		    &key_len, &sg, 1, right_response);
+	
+	rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
+	if(rv) {
+		ERR( "crypto_hash_digest() failed with %d\n",rv);
+		rv = 0;
+		goto fail;		
+	}
 
 	rv = ! memcmp(response,right_response,resp_size);
 
Index: drbd/drbd_nl.c
===================================================================
--- drbd/drbd_nl.c	(revision 2678)
+++ drbd/drbd_nl.c	(working copy)
@@ -960,16 +960,19 @@
 	return 0;
 }
 
+#define HMAC_NAME_L 20
+
 STATIC int drbd_nl_net_conf(drbd_dev *mdev, struct drbd_nl_cfg_req *nlp,
 			    struct drbd_nl_cfg_reply *reply)
 {
 	int i,ns;
 	enum ret_codes retcode;
 	struct net_conf *new_conf = NULL;
-	struct crypto_tfm* tfm = NULL;
+	struct crypto_hash *tfm = NULL;
 	struct hlist_head *new_tl_hash = NULL;
 	struct hlist_head *new_ee_hash = NULL;
 	drbd_dev *odev;
+	char hmac_name[HMAC_NAME_L];
 
 	if (mdev->state.conn > StandAlone) {
 		retcode=HaveNetConfig;
@@ -1047,13 +1050,14 @@
 #undef O_PORT
 
 	if( new_conf->cram_hmac_alg[0] != 0) {
-		tfm = crypto_alloc_tfm(new_conf->cram_hmac_alg, 0);
+		snprintf(hmac_name,HMAC_NAME_L,"hmac(%s)",new_conf->cram_hmac_alg);
+		tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
 		if (tfm == NULL) {
 			retcode=CRAMAlgNotAvail;
 			goto fail;
 		}
 
-		if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST) {
+		if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) != CRYPTO_ALG_TYPE_DIGEST) {
 			retcode=CRAMAlgNotDigest;
 			goto fail;
 		}
@@ -1126,7 +1130,7 @@
 	}
 
 	if ( mdev->cram_hmac_tfm ) {
-		crypto_free_tfm(mdev->cram_hmac_tfm);
+		crypto_free_hash(mdev->cram_hmac_tfm);
 	}
 	mdev->cram_hmac_tfm = tfm;
 
@@ -1136,7 +1140,7 @@
 	return 0;
 
   fail:
-	if (tfm) crypto_free_tfm(tfm);
+	if (tfm) crypto_free_hash(tfm);
 	if (new_tl_hash) kfree(new_tl_hash);
 	if (new_ee_hash) kfree(new_ee_hash);
 	if (new_conf) kfree(new_conf);
Index: drbd/drbd_compat_wrappers.h
===================================================================
--- drbd/drbd_compat_wrappers.h	(revision 2678)
+++ drbd/drbd_compat_wrappers.h	(working copy)
@@ -242,3 +242,86 @@
 #endif
 
 #endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+/* With Linux-2.6.19 the crypto API changed! */
+/* This is not a generic backport of the new api, it just implements
+   the corner case of "hmac(xxx)".  */
+
+#define CRYPTO_ALG_ASYNC 4711
+
+struct crypto_hash {
+        struct crypto_tfm *base;
+	const u8 *key;
+	int keylen;
+};
+
+struct hash_desc {
+        struct crypto_hash *tfm;
+        u32 flags;
+};
+
+static inline struct crypto_hash *
+crypto_alloc_hash(char *alg_name, u32 type, u32 mask)
+{
+	struct crypto_hash *ch;
+	char *closing_bracket;
+
+	// "hmac(xxx)" is in alg_name we need that xxx. 
+	closing_bracket = strchr(alg_name,')');
+	if(!closing_bracket) return NULL;
+	if(closing_bracket-alg_name < 6) return NULL;
+
+	ch = kmalloc(sizeof(struct crypto_hash),GFP_KERNEL);
+	if(!ch) return NULL;
+
+	*closing_bracket = 0;
+	ch->base = crypto_alloc_tfm(alg_name + 5, 0);
+	*closing_bracket = ')';
+
+	if (ch->base == NULL) {
+		kfree(ch);
+		return NULL;
+	}
+
+	return ch;
+}
+
+static inline int 
+crypto_hash_setkey(struct crypto_hash *hash,const u8 *key,unsigned int keylen)
+{
+	hash->key = key;
+	hash->keylen = keylen;
+
+	return 0;
+}
+
+static inline int 
+crypto_hash_digest(struct hash_desc *desc, struct scatterlist *sg,
+		   unsigned int nbytes, u8 *out)
+{
+	
+	crypto_hmac(desc->tfm->base, (u8*)desc->tfm->key,
+		    &desc->tfm->keylen, sg, 1 /* ! */ , out);
+	/* ! this is not generic. Would need to convert nbytes -> nsg */
+
+	return 0;
+}
+
+static inline void crypto_free_hash(struct crypto_hash *tfm)
+{
+	crypto_free_tfm(tfm->base);
+	kfree(tfm);
+}
+
+static inline unsigned int crypto_hash_digestsize(struct crypto_hash *tfm)
+{
+	return crypto_tfm_alg_digestsize(tfm->base);
+}
+
+static inline struct crypto_tfm *crypto_hash_tfm(struct crypto_hash *tfm)
+{
+        return tfm->base;
+}
+
+#endif
Index: drbd/drbd_main.c
===================================================================
--- drbd/drbd_main.c	(revision 2678)
+++ drbd/drbd_main.c	(working copy)
@@ -2490,7 +2490,7 @@
 void drbd_free_resources(drbd_dev *mdev)
 {
 	if ( mdev->cram_hmac_tfm ) {
-		crypto_free_tfm(mdev->cram_hmac_tfm);
+		crypto_free_hash(mdev->cram_hmac_tfm);
 		mdev->cram_hmac_tfm = NULL;
 	}
 	drbd_free_sock(mdev);
Index: drbd/drbd_int.h
===================================================================
--- drbd/drbd_int.h	(revision 2678)
+++ drbd/drbd_int.h	(working copy)
@@ -851,7 +851,7 @@
 	unsigned int al_tr_number;
 	int al_tr_cycle;
 	int al_tr_pos;     // position of the next transaction in the journal
-	struct crypto_tfm* cram_hmac_tfm;
+	struct crypto_hash* cram_hmac_tfm;
 	wait_queue_head_t seq_wait;
 	atomic_t packet_seq;
 	unsigned int peer_seq;

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] drbd 2.6.19 crypto changes
  2007-01-10 16:09   ` Ard van Breemen
@ 2007-01-10 19:33     ` Ard van Breemen
  0 siblings, 0 replies; 20+ messages in thread
From: Ard van Breemen @ 2007-01-10 19:33 UTC (permalink / raw)
  To: drbd-dev

On Wed, Jan 10, 2007 at 05:09:58PM +0100,  wrote:
> Hmmm, it does load, but somehow there are no minor devices :-(.
> (/proc/drbd shows nothing more than the banner)
> So it is not a definite fix.

My fault :-(

I thought I installed the up to date userspace utils, while
actually I did not install anything but the kernel module...

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] drbd 2.6.19 crypto changes
  2007-01-10 16:23 ` Philipp Reisner
@ 2007-01-10 20:17   ` Ard van Breemen
  2007-01-11 14:38   ` Ard van Breemen
  1 sibling, 0 replies; 20+ messages in thread
From: Ard van Breemen @ 2007-01-10 20:17 UTC (permalink / raw)
  To: Philipp Reisner; +Cc: drbd-dev

Hi,
On Wed, Jan 10, 2007 at 05:23:46PM +0100, Philipp Reisner wrote:
> Here is the result of my work, maybe you can test it with a new
> kernel ? -- I only tested it on an old kernel with the old
> API.

I will test it, since my patch did nothing with hmac(%s), and I
bumped my nose just now :-). (without authentication it works at
least ;-) ).

> My idea was to have wrappers that look like the new API for the
> old kernels, and make DRBD to use the new API in the DRBD code.

Good idea. I hoped my patch just would give you an idea like
that. But you were busy anyway ;-)

> PS: In case you test it, please post the outcome.

Yes. Tonight maybe...

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] drbd 2.6.19 crypto changes
  2007-01-10 16:23 ` Philipp Reisner
  2007-01-10 20:17   ` Ard van Breemen
@ 2007-01-11 14:38   ` Ard van Breemen
  2007-01-11 17:12     ` Ard van Breemen
  1 sibling, 1 reply; 20+ messages in thread
From: Ard van Breemen @ 2007-01-11 14:38 UTC (permalink / raw)
  To: Philipp Reisner; +Cc: drbd-dev

On Wed, Jan 10, 2007 at 05:23:46PM +0100, Philipp Reisner wrote:
> Here is the result of my work, maybe you can test it with a new
> kernel ? -- I only tested it on an old kernel with the old
> API.
> 
> My idea was to have wrappers that look like the new API for the
> old kernels, and make DRBD to use the new API in the DRBD code.
> 
> PS: In case you test it, please post the outcome.
drbd0: Writing meta data super block now.

crypto type=3
drbd0: conn( StandAlone -> Unconnected ) 

I got the message "not a digest", which is correct.
We are using CRYPTO_TYPE_HASH, which in turn uses
CRYPTO_TYPE_DIGEST.

Eh, never mind my babling, just this:

> -		if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST) {
> +		if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) != CRYPTO_ALG_TYPE_DIGEST) {
>  			retcode=CRAMAlgNotDigest;
>  			goto fail;
>  		}

s/CRYPTO_ALG_TYPE_DIGEST/CRYPTO_ALG_TYPE_HASH/g

then it probably works (I've removed the goto fail and just
printed the type, which was a HASH :-) ).


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] drbd 2.6.19 crypto changes
  2007-01-11 14:38   ` Ard van Breemen
@ 2007-01-11 17:12     ` Ard van Breemen
  2007-01-11 18:03       ` [Drbd-dev] oopses in 2.6.19.1 Ard van Breemen
  2007-01-12 13:50       ` [Drbd-dev] drbd 2.6.19 crypto changes Philipp Reisner
  0 siblings, 2 replies; 20+ messages in thread
From: Ard van Breemen @ 2007-01-11 17:12 UTC (permalink / raw)
  To: Philipp Reisner; +Cc: drbd-dev

[-- Attachment #1: Type: text/plain, Size: 192 bytes --]

On Thu, Jan 11, 2007 at 03:38:45PM +0100, Ard van Breemen wrote:
> s/CRYPTO_ALG_TYPE_DIGEST/CRYPTO_ALG_TYPE_HASH/g

Your patch, but fixed with ^^^^ and working. (I do have unrelated
oopses).


[-- Attachment #2: new_crypto_api-fixed.diff --]
[-- Type: text/plain, Size: 6709 bytes --]

Index: drbd/drbd_receiver.c
===================================================================
--- drbd/drbd_receiver.c	(revision 2679)
+++ drbd/drbd_receiver.c	(working copy)
@@ -2754,7 +2754,7 @@
 			mdev->tl_hash_s = 0;
 		}
 		if(mdev->cram_hmac_tfm) {
-			crypto_free_tfm(mdev->cram_hmac_tfm);
+			crypto_free_hash(mdev->cram_hmac_tfm);
 			mdev->cram_hmac_tfm = NULL;
 		}
 		kfree(mdev->net_conf);
@@ -2913,8 +2913,20 @@
 	Drbd_Header p;
 	unsigned int key_len = strlen(mdev->net_conf->shared_secret);
 	unsigned int resp_size;
+	struct hash_desc desc;
 	int rv;
 
+	desc.tfm=mdev->cram_hmac_tfm;
+	desc.flags=0;
+
+	rv = crypto_hash_setkey(mdev->cram_hmac_tfm,
+				(u8*)mdev->net_conf->shared_secret, key_len);
+	if(rv) {
+		ERR("crypto_hash_setkey() failed with %d\n",rv);
+		rv = 0;
+		goto fail;
+	}
+
 	get_random_bytes(my_challenge, CHALLENGE_LEN);
 
 	rv = drbd_send_cmd2(mdev,AuthChallenge,my_challenge,CHALLENGE_LEN);
@@ -2951,7 +2963,7 @@
 		goto fail;
 	}
 
-	resp_size = crypto_tfm_alg_digestsize(mdev->cram_hmac_tfm);
+	resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm);
 	response = kmalloc(resp_size,GFP_KERNEL);
 	if(response == NULL) {
 		ERR("kmalloc of response failed\n");
@@ -2962,9 +2974,14 @@
 	sg.page   = virt_to_page(peers_ch);
 	sg.offset = offset_in_page(peers_ch);
 	sg.length = p.length;
-	crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->net_conf->shared_secret,
-		    &key_len, &sg, 1, response);
 
+	rv = crypto_hash_digest(&desc, &sg, sg.length, response);
+	if(rv) {
+		ERR( "crypto_hash_digest() failed with %d\n",rv);
+		rv = 0;
+		goto fail;		
+	}
+
 	rv = drbd_send_cmd2(mdev,AuthResponse,response,resp_size);
 	if (!rv) goto fail;
 
@@ -3002,8 +3019,13 @@
 	sg.page   = virt_to_page(my_challenge);
 	sg.offset = offset_in_page(my_challenge);
 	sg.length = CHALLENGE_LEN;
-	crypto_hmac(mdev->cram_hmac_tfm, (u8*)mdev->net_conf->shared_secret,
-		    &key_len, &sg, 1, right_response);
+	
+	rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
+	if(rv) {
+		ERR( "crypto_hash_digest() failed with %d\n",rv);
+		rv = 0;
+		goto fail;		
+	}
 
 	rv = ! memcmp(response,right_response,resp_size);
 
Index: drbd/drbd_nl.c
===================================================================
--- drbd/drbd_nl.c	(revision 2679)
+++ drbd/drbd_nl.c	(working copy)
@@ -960,16 +960,19 @@
 	return 0;
 }
 
+#define HMAC_NAME_L 20
+
 STATIC int drbd_nl_net_conf(drbd_dev *mdev, struct drbd_nl_cfg_req *nlp,
 			    struct drbd_nl_cfg_reply *reply)
 {
 	int i,ns;
 	enum ret_codes retcode;
 	struct net_conf *new_conf = NULL;
-	struct crypto_tfm* tfm = NULL;
+	struct crypto_hash *tfm = NULL;
 	struct hlist_head *new_tl_hash = NULL;
 	struct hlist_head *new_ee_hash = NULL;
 	drbd_dev *odev;
+	char hmac_name[HMAC_NAME_L];
 
 	if (mdev->state.conn > StandAlone) {
 		retcode=HaveNetConfig;
@@ -1047,13 +1050,14 @@
 #undef O_PORT
 
 	if( new_conf->cram_hmac_alg[0] != 0) {
-		tfm = crypto_alloc_tfm(new_conf->cram_hmac_alg, 0);
+		snprintf(hmac_name,HMAC_NAME_L,"hmac(%s)",new_conf->cram_hmac_alg);
+		tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
 		if (tfm == NULL) {
 			retcode=CRAMAlgNotAvail;
 			goto fail;
 		}
 
-		if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST) {
+		if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) != CRYPTO_ALG_TYPE_HASH) {
 			retcode=CRAMAlgNotDigest;
 			goto fail;
 		}
@@ -1126,7 +1130,7 @@
 	}
 
 	if ( mdev->cram_hmac_tfm ) {
-		crypto_free_tfm(mdev->cram_hmac_tfm);
+		crypto_free_hash(mdev->cram_hmac_tfm);
 	}
 	mdev->cram_hmac_tfm = tfm;
 
@@ -1136,7 +1140,7 @@
 	return 0;
 
   fail:
-	if (tfm) crypto_free_tfm(tfm);
+	if (tfm) crypto_free_hash(tfm);
 	if (new_tl_hash) kfree(new_tl_hash);
 	if (new_ee_hash) kfree(new_ee_hash);
 	if (new_conf) kfree(new_conf);
Index: drbd/drbd_compat_wrappers.h
===================================================================
--- drbd/drbd_compat_wrappers.h	(revision 2679)
+++ drbd/drbd_compat_wrappers.h	(working copy)
@@ -242,3 +242,86 @@
 #endif
 
 #endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+/* With Linux-2.6.19 the crypto API changed! */
+/* This is not a generic backport of the new api, it just implements
+   the corner case of "hmac(xxx)".  */
+
+#define CRYPTO_ALG_ASYNC 4711
+
+struct crypto_hash {
+        struct crypto_tfm *base;
+	const u8 *key;
+	int keylen;
+};
+
+struct hash_desc {
+        struct crypto_hash *tfm;
+        u32 flags;
+};
+
+static inline struct crypto_hash *
+crypto_alloc_hash(char *alg_name, u32 type, u32 mask)
+{
+	struct crypto_hash *ch;
+	char *closing_bracket;
+
+	// "hmac(xxx)" is in alg_name we need that xxx. 
+	closing_bracket = strchr(alg_name,')');
+	if(!closing_bracket) return NULL;
+	if(closing_bracket-alg_name < 6) return NULL;
+
+	ch = kmalloc(sizeof(struct crypto_hash),GFP_KERNEL);
+	if(!ch) return NULL;
+
+	*closing_bracket = 0;
+	ch->base = crypto_alloc_tfm(alg_name + 5, 0);
+	*closing_bracket = ')';
+
+	if (ch->base == NULL) {
+		kfree(ch);
+		return NULL;
+	}
+
+	return ch;
+}
+
+static inline int 
+crypto_hash_setkey(struct crypto_hash *hash,const u8 *key,unsigned int keylen)
+{
+	hash->key = key;
+	hash->keylen = keylen;
+
+	return 0;
+}
+
+static inline int 
+crypto_hash_digest(struct hash_desc *desc, struct scatterlist *sg,
+		   unsigned int nbytes, u8 *out)
+{
+	
+	crypto_hmac(desc->tfm->base, (u8*)desc->tfm->key,
+		    &desc->tfm->keylen, sg, 1 /* ! */ , out);
+	/* ! this is not generic. Would need to convert nbytes -> nsg */
+
+	return 0;
+}
+
+static inline void crypto_free_hash(struct crypto_hash *tfm)
+{
+	crypto_free_tfm(tfm->base);
+	kfree(tfm);
+}
+
+static inline unsigned int crypto_hash_digestsize(struct crypto_hash *tfm)
+{
+	return crypto_tfm_alg_digestsize(tfm->base);
+}
+
+static inline struct crypto_tfm *crypto_hash_tfm(struct crypto_hash *tfm)
+{
+        return tfm->base;
+}
+
+#endif
Index: drbd/drbd_main.c
===================================================================
--- drbd/drbd_main.c	(revision 2679)
+++ drbd/drbd_main.c	(working copy)
@@ -2490,7 +2490,7 @@
 void drbd_free_resources(drbd_dev *mdev)
 {
 	if ( mdev->cram_hmac_tfm ) {
-		crypto_free_tfm(mdev->cram_hmac_tfm);
+		crypto_free_hash(mdev->cram_hmac_tfm);
 		mdev->cram_hmac_tfm = NULL;
 	}
 	drbd_free_sock(mdev);
Index: drbd/drbd_int.h
===================================================================
--- drbd/drbd_int.h	(revision 2679)
+++ drbd/drbd_int.h	(working copy)
@@ -851,7 +851,7 @@
 	unsigned int al_tr_number;
 	int al_tr_cycle;
 	int al_tr_pos;     // position of the next transaction in the journal
-	struct crypto_tfm* cram_hmac_tfm;
+	struct crypto_hash* cram_hmac_tfm;
 	wait_queue_head_t seq_wait;
 	atomic_t packet_seq;
 	unsigned int peer_seq;

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Drbd-dev] oopses in 2.6.19.1
  2007-01-11 17:12     ` Ard van Breemen
@ 2007-01-11 18:03       ` Ard van Breemen
  2007-01-12 13:53         ` Philipp Reisner
  2007-01-15 17:06         ` Philipp Reisner
  2007-01-12 13:50       ` [Drbd-dev] drbd 2.6.19 crypto changes Philipp Reisner
  1 sibling, 2 replies; 20+ messages in thread
From: Ard van Breemen @ 2007-01-11 18:03 UTC (permalink / raw)
  To: drbd-dev

On Thu, Jan 11, 2007 at 06:12:05PM +0100, Ard van Breemen wrote:
> Your patch, but fixed with ^^^^ and working. (I do have unrelated
> oopses).

On the Inconsistent side, it starts to oops. During the first
sink I do a disconnect, and then...
I will pay more attention on the next oops on what was I doing.

drbd: initialised. Version: 8.0rc1 (api:86/proto:85)
drbd: SVN Revision: 2679M build by ard@siddev, 2007-01-11 15:51:43
drbd: registered as block device major 147
drbd: minor_table @ 0xffff81017e2ce0c0
drbd0: disk( Diskless -> Attaching ) 
drbd0: No usable activity log found.
drbd0: max_segment_size ( = BIO size ) = 32768
drbd0: Adjusting my ra_pages to backing device's (32 -> 96)
drbd0: drbd_bm_resize called with capacity == 2318589904
drbd0: resync bitmap: bits=289823738 words=4528496
drbd0: size = 1105 GB (1159294952 KB)

[more]\r      \rdrbd0: reading of bitmap took 86 jiffies
drbd0: recounting of set bits took additional 7 jiffies
drbd0: 892 GB marked out-of-sync by on disk bit-map.
drbd0: disk( Attaching -> Inconsistent ) 
drbd0: Writing meta data super block now.
drbd0: conn( StandAlone -> Unconnected ) 
drbd0: receiver (re)started
drbd0: conn( Unconnected -> WFConnection ) 
drbd0: conn( WFConnection -> WFReportParams ) 
drbd0: Handshake successful: DRBD Network Protocol version 85
drbd0: Peer authenticated usind 20 bytes of 'sha1' HMAC
drbd0: peer( Unknown -> Secondary ) conn( WFReportParams -> WFBitMapT ) pdsk( DUnknown -> UpToDate ) 
drbd0: Writing meta data super block now.
drbd0: conn( WFBitMapT -> WFSyncUUID ) 
drbd0: conn( WFSyncUUID -> SyncTarget ) 
drbd0: Began resync as SyncTarget (will sync 935358440 KB [233839610 bits set]).
drbd0: Writing meta data super block now.
----------- [cut here ] --------- [please bite here ] ---------
Kernel BUG at ...ed/kernel/tyan-s2891/modules/drbd/drbd/lru_cache.c:312
invalid opcode: 0000 [1] SMP 
CPU 1 
Modules linked in: drbd sha1 cn ipv6 tg3

[more]\r      \rPid: 1593:#0, comm: md6_raid5 Not tainted 2.6.19.1-vs2.2.0-rc6-tyan-s2891test #1
RIP: 0010:[<ffffffff8807967f>]  [<ffffffff8807967f>] :drbd:lc_put+0x4f/0xc0
RSP: 0018:ffff81017ce87c38  EFLAGS: 00010046
RAX: 0000000000000000 RBX: ffffc20000b7c2d8 RCX: ffffc20000b7c2d8
RDX: ffffc20000b7c2d8 RSI: ffffc20000b7c2d8 RDI: ffffc20000b7c000
RBP: ffff81007ddab000 R08: 000000000000001f R09: 0000000000000001
R10: ffffffff806bd740 R11: ffffffff8027bb60 R12: ffff81007ddab5a8
R13: 0000000000000293 R14: ffff81007ddab368 R15: 0000000000000000
FS:  00002aaefeae54a0(0000) GS:ffff8101000c64c0(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 00000000005bd000 CR3: 00000001786d0000 CR4: 00000000000006e0
Process md6_raid5 (pid: 1593[#0], threadinfo ffff81017ce86000, task ffff81017d3e47b0)
Stack:  ffffffff88077c1f 0000000000000010 ffff81007ddab000 ffff81007ca5f1e8
 0000000000000000 0000000000000001 ffffffff8806b79d 0000000000000246
 0000000000000000 0000000000000000 ffff81017c4113c8 00000000ffffffff
Call Trace:
 [<ffffffff88077c1f>] :drbd:drbd_rs_complete_io+0xcf/0x130
 [<ffffffff8806b79d>] :drbd:drbd_endio_write_sec+0x1bd/0x2d0
 [<ffffffff80453dfb>] handle_stripe+0x248b/0x2780
 [<ffffffff804091ac>] ata_qc_issue_prot+0x12c/0x2b0
 [<ffffffff8040677a>] ata_qc_issue+0x40a/0x4a0
 [<ffffffff8040c7bc>] ata_scsi_rw_xlat+0x29c/0x400

[more]\r      \r [<ffffffff8040dc40>] ata_exec_command+0x0/0x50
 [<ffffffff8026958b>] thread_return+0x0/0x105
 [<ffffffff803f6078>] scsi_dispatch_cmd+0x258/0x2e0
 [<ffffffff8045424d>] raid5d+0x15d/0x1a0
 [<ffffffff8029e4e0>] keventd_create_kthread+0x0/0x80
 [<ffffffff8045cd4d>] md_thread+0x11d/0x140
 [<ffffffff8029e720>] autoremove_wake_function+0x0/0x30
 [<ffffffff8045cc30>] md_thread+0x0/0x140
 [<ffffffff80235de9>] kthread+0xd9/0x120
 [<ffffffff80266dc8>] child_rip+0xa/0x12
 [<ffffffff8029e4e0>] keventd_create_kthread+0x0/0x80
 [<ffffffff80235d10>] kthread+0x0/0x120
 [<ffffffff80266dbe>] child_rip+0x0/0x12


Code: 0f 0b 68 40 a9 08 88 c2 38 01 66 66 66 90 66 66 90 48 3b 77 
RIP  [<ffffffff8807967f>] :drbd:lc_put+0x4f/0xc0
 RSP <ffff81017ce87c38>
 NMI Watchdog detected LOCKUP on CPU 0
CPU 0 
Modules linked in: drbd sha1 cn ipv6 tg3
Pid: 31157:#0, comm: drbd0_asender Not tainted 2.6.19.1-vs2.2.0-rc6-tyan-s2891test #1

[more]\r      \rRIP: 0010:[<ffffffff8026b4ba>]  [<ffffffff8026b4ba>] _spin_lock_irqsave+0xa/0x20
RSP: 0018:ffff81007ca07e18  EFLAGS: 00000086
RAX: 0000000000000246 RBX: 000000000370fe40 RCX: ffffffff88087498
RDX: 000000008a32dfcf RSI: 000000001b87f200 RDI: ffff81007ddab5a8
RBP: 0000000000000000 R08: 0000000000000402 R09: 0000000000000000
R10: 00000000000005a8 R11: 00000000ffffffff R12: ffff81007ddab000
R13: 000000000370fe47 R14: 000000001b87f200 R15: ffff81007ddab5a8
FS:  00002b2b5b00e700(0000) GS:ffffffff8064b000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 00002aed443a2640 CR3: 000000007eb5d000 CR4: 00000000000006e0
Process drbd0_asender (pid: 31157[#0], threadinfo ffff81007ca06000, task ffff81007fb267f0)
Stack:  ffffffff8807747b 0000000000000282 ffff81007ddb6c38 ffff81007ddab000
 000000001b87f200 0000000000000001 ffff81007ca07e80 0000000000000200
 ffffffff88070e98 ffff81007ddb6c38 ffff81007ddb6ef8 ffff81007ddab000
Call Trace:
 [<ffffffff8807747b>] :drbd:__drbd_set_in_sync+0x1bb/0x2e0
 [<ffffffff88070e98>] :drbd:e_end_resync_block+0x68/0x100
 [<ffffffff8806f35b>] :drbd:drbd_process_done_ee+0xdb/0x140
 [<ffffffff880714d8>] :drbd:drbd_asender+0xe8/0x580
 [<ffffffff8807f729>] :drbd:drbd_thread_setup+0x99/0xe0
 [<ffffffff80266dc8>] child_rip+0xa/0x12
 [<ffffffff8027bb60>] flat_send_IPI_mask+0x0/0x50

[more]\r      \r [<ffffffff8807f690>] :drbd:drbd_thread_setup+0x0/0xe0
 [<ffffffff80266dbe>] child_rip+0x0/0x12


Code: 83 3f 00 7e f9 eb f2 c3 66 66 66 90 66 66 66 90 66 66 90 66 
 
File erased !

telnet> sened\b \b\b \bd break


Debian GNU/Linux ttyS0 115200 (janneke)

janneke login: <6>SysRq : Keyboard mode set to XLATE
oot
Password: 

Login incorrect

janneke login: root
Password: 
Last login: Thu Jan 11 14:56:22 2007 from 10.41.1.173 on pts/0
Linux janneke 2.6.19.1-vs2.2.0-rc6-tyan-s2891test #1 SMP Wed Jan 3 15:07:17 CET 2007 x86_64 GNU/Linux

The programs included with the Debian GNU/Linux system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.

Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent
permitted by applicable law.
rjanneke:~# reboot
\rINIT: \rINIT: Sending processes the TERM signal
janneke:~# \rIStopping all DRBdrbd0: sock_sendmsg returned -104
D resourcesdrbd0: peer( Secondary -> Unknown ) conn( SyncTarget -> BrokenPipe ) pdsk( UpToDate -> DUnknown ) 
drbd0: short sent StateChgRequest size=16 sent=0
drbd0: conn( BrokenPipe -> Disconnecting ) disk( Inconsistent -> Outdated ) 
Child process does not terminate!
Exiting.
ERROR: Module drbd is in use
.
Stopping periodic command scheduler: cron.
Stopping internet superserver: inetd.
Stopping munin-node: done.
Stopping rsync daemon: rsync.
Stopping network management services: snmpd snmptrapd.
Stopping OpenBSD Secure Shell server: sshd.
Stopping NTP server: ntpd.
Saving the System Clock time to the Hardware Clock...
Hardware Clock updated to Thu Jan 11 16:05:42 CET 2007.
Stopping RAID monitor daemon: mdadm -F.
Stopping deferred execution scheduler: atd.
Stopping kernel log daemon: klogd.
Stopping system log daemon: syslogd.
Sending all processes the TERM signal...BUG: soft lockup detected on CPU#1!

Call Trace:
 <IRQ>  [<ffffffff802b5e0a>] softlockup_tick+0xfa/0x120
 [<ffffffff80294487>] update_process_times+0x57/0x90
 [<ffffffff80278d24>] smp_local_timer_interrupt+0x34/0x60
 [<ffffffff80279259>] smp_apic_timer_interrupt+0x59/0x80
 [<ffffffff80266be6>] apic_timer_interrupt+0x66/0x70
 <EOI>  [<ffffffff802257b7>] flush_tlb_others+0x87/0xd0
 [<ffffffff802257af>] flush_tlb_others+0x7f/0xd0
 [<ffffffff80278a80>] flush_tlb_mm+0xb0/0xc0
 [<ffffffff80213407>] unmap_region+0x117/0x160
 [<ffffffff80212398>] do_munmap+0x238/0x330
 [<ffffffff8026ae62>] __down_write_nested+0x12/0xb0
 [<ffffffff80216de8>] sys_munmap+0x48/0x80
 [<ffffffff8026600e>] system_call+0x7e/0x83


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] drbd 2.6.19 crypto changes
  2007-01-11 17:12     ` Ard van Breemen
  2007-01-11 18:03       ` [Drbd-dev] oopses in 2.6.19.1 Ard van Breemen
@ 2007-01-12 13:50       ` Philipp Reisner
  1 sibling, 0 replies; 20+ messages in thread
From: Philipp Reisner @ 2007-01-12 13:50 UTC (permalink / raw)
  To: drbd-dev

Am Donnerstag, 11. Januar 2007 18:12 schrieb Ard van Breemen:
> On Thu, Jan 11, 2007 at 03:38:45PM +0100, Ard van Breemen wrote:
> > s/CRYPTO_ALG_TYPE_DIGEST/CRYPTO_ALG_TYPE_HASH/g
>
> Your patch, but fixed with ^^^^ and working. (I do have unrelated
> oopses).

Hi Ard,

This has gone to SVN Yesterday.
http://lists.linbit.com/pipermail/drbd-cvs/2007-January/001426.html

Today I finally convinced myself that it really works. It computes
the same values, using the new and the old API. It seems to work.

-Phil
-- 
: Dipl-Ing Philipp Reisner                      Tel +43-1-8178292-50 :
: LINBIT Information Technologies GmbH          Fax +43-1-8178292-82 :
: Vivenotgasse 48, 1120 Vienna, Austria        http://www.linbit.com :

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
  2007-01-11 18:03       ` [Drbd-dev] oopses in 2.6.19.1 Ard van Breemen
@ 2007-01-12 13:53         ` Philipp Reisner
  2007-01-15 17:06         ` Philipp Reisner
  1 sibling, 0 replies; 20+ messages in thread
From: Philipp Reisner @ 2007-01-12 13:53 UTC (permalink / raw)
  To: drbd-dev

Am Donnerstag, 11. Januar 2007 19:03 schrieb Ard van Breemen:
> On Thu, Jan 11, 2007 at 06:12:05PM +0100, Ard van Breemen wrote:
> > Your patch, but fixed with ^^^^ and working. (I do have unrelated
> > oopses).
>
> On the Inconsistent side, it starts to oops. During the first
> sink I do a disconnect, and then...
> I will pay more attention on the next oops on what was I doing.
>

Hi Ard,

I am running out of time now. I will look into this issue
on Monday. Just the usual questions:

* Was there Application-IO during the resync ?
* Happened it when the resync was supposed to be finished ?
* Can you (easily) reproduce it ?

Thanks,
 Phil
-- 
: Dipl-Ing Philipp Reisner                      Tel +43-1-8178292-50 :
: LINBIT Information Technologies GmbH          Fax +43-1-8178292-82 :
: Vivenotgasse 48, 1120 Vienna, Austria        http://www.linbit.com :

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
  2007-01-11 18:03       ` [Drbd-dev] oopses in 2.6.19.1 Ard van Breemen
  2007-01-12 13:53         ` Philipp Reisner
@ 2007-01-15 17:06         ` Philipp Reisner
  2007-01-16 10:37           ` Ard van Breemen
  1 sibling, 1 reply; 20+ messages in thread
From: Philipp Reisner @ 2007-01-15 17:06 UTC (permalink / raw)
  To: drbd-dev

> Kernel BUG at ...ed/kernel/tyan-s2891/modules/drbd/drbd/lru_cache.c:312
> invalid opcode: 0000 [1] SMP
> CPU 1
> Modules linked in: drbd sha1 cn ipv6 tg3
>
[...]

Hi Ard,

Although I can not reproduce this crash here, I was able to trigger
"LOGIC BUG" Messages, by finishing the resync by by application-IO...

This bug fixes (at least parts of) the problem.

http://lists.linbit.com/pipermail/drbd-cvs/2007-January/001434.html

Could you verify if this patch changes the situation for you ?

-Phil
-- 
: Dipl-Ing Philipp Reisner                      Tel +43-1-8178292-50 :
: LINBIT Information Technologies GmbH          Fax +43-1-8178292-82 :
: Vivenotgasse 48, 1120 Vienna, Austria        http://www.linbit.com :

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
  2007-01-15 17:06         ` Philipp Reisner
@ 2007-01-16 10:37           ` Ard van Breemen
  2007-01-25 17:45             ` Ard van Breemen
  0 siblings, 1 reply; 20+ messages in thread
From: Ard van Breemen @ 2007-01-16 10:37 UTC (permalink / raw)
  To: Philipp Reisner; +Cc: drbd-dev

Hi,
On Mon, Jan 15, 2007 at 06:06:20PM +0100, Philipp Reisner wrote:
> Although I can not reproduce this crash here, I was able to trigger
> "LOGIC BUG" Messages, by finishing the resync by by application-IO...

I will try it.
It seems to happen if I do this:
- create a raidset at the primary, create a raidset at the
secondary.
- create the drbd device on top of that
- designate the primary primary.

After a while the secondary oopses.

-- 
begin  LOVE-LETTER-FOR-YOU.txt.vbs
I am a signature virus. Distribute me until the bitter
end

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
  2007-01-16 10:37           ` Ard van Breemen
@ 2007-01-25 17:45             ` Ard van Breemen
  2007-01-25 21:32               ` Lars Ellenberg
  0 siblings, 1 reply; 20+ messages in thread
From: Ard van Breemen @ 2007-01-25 17:45 UTC (permalink / raw)
  To: Philipp Reisner; +Cc: drbd-dev

[-- Attachment #1: Type: text/plain, Size: 1619 bytes --]

Hi,
On Tue, Jan 16, 2007 at 11:37:49AM +0100, Ard van Breemen wrote:
> I will try it.
> It seems to happen if I do this:
> - create a raidset at the primary, create a raidset at the
> secondary.
> - create the drbd device on top of that
> - designate the primary primary.
I've done this to create the following two kernel logs:
Create a raidset at A, Create a raidset at B
Wait for raidsync to complete
Create the drbd device
Start the connection (both inconsistent)
make raidset A primary
The stuff starts to sync.
The sync speed at that moment is about 10MB/s.
I reboot both systems (I just wanted to get a higher speed, and
rebooting is an easy way to restore settings to something known).

The primary comes up ok, the secondary also.
The secondary then trips.
(At that moment both raidsets are OK).
That's when I made the logs.

I reboot the secondary.
The sync gains speed to >70MB/s

Just to let you know why I want to use drbd:
I've got servers with >100M files. If one of the servers goes
haywired, I've got to resync the 100M files. There is no normal
application that can sync 1T/>100M files in a timely matter. Next
to that, I have to bring a second server down, to be abel to do
that. Using the raw power of drbd, I can do that with drbd in
about 7 hours, and als be up to date after 7 hours of syncing.
So: I only use it to sync. After that I disconnect the devices,
and let them go on independently.
(Resyncing from the backup takes 48 hours or so)
Of course I can take the easy road and switch to 0.7 ;-)
-- 
begin  LOVE-LETTER-FOR-YOU.txt.vbs
I am a signature virus. Distribute me until the bitter
end

[-- Attachment #2: drbd.master.log --]
[-- Type: text/plain, Size: 2272 bytes --]

Linux version 2.6.19.1-vs2.2.0-rc6-tyan-s2891test (root@siddev) (gcc version 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)) #1 SMP Wed Jan 3 15:07:17 CET 2007
Command line: console=tty0 console=ttyS0,115200 hdb=noprobe hdc=noprobe hdd=noprobe root=/dev/md0 ro 

[lge: 800+ lines of bootup messages snipped, because they seemed irrelevant ]

drbd: initialised. Version: 8.0rc1 (api:86/proto:85)
drbd: SVN Revision: 2695 build by ard@siddev, 2007-01-16 15:33:47
drbd: registered as block device major 147
drbd: minor_table @ 0xffff81017e37ebc0
drbd0: disk( Diskless -> Attaching ) 
drbd0: No usable activity log found.
drbd0: max_segment_size ( = BIO size ) = 32768
drbd0: Adjusting my ra_pages to backing device's (32 -> 96)
drbd0: drbd_bm_resize called with capacity == 2318589904
drbd0: resync bitmap: bits=289823738 words=4528496
drbd0: size = 1105 GB (1159294952 KB)
drbd0: reading of bitmap took 138 jiffies
drbd0: recounting of set bits took additional 8 jiffies
drbd0: 1105 GB marked out-of-sync by on disk bit-map.
drbd0: disk( Attaching -> UpToDate ) pdsk( DUnknown -> Outdated ) 
drbd0: Writing meta data super block now.
drbd0: Writing meta data super block now.
drbd0: conn( StandAlone -> Unconnected ) 
drbd0: receiver (re)started
drbd0: conn( Unconnected -> WFConnection ) 
drbd0: conn( WFConnection -> WFReportParams ) 
drbd0: Handshake successful: DRBD Network Protocol version 85
drbd0: Peer authenticated using 20 bytes of 'sha1' HMAC
drbd0: peer( Unknown -> Secondary ) conn( WFReportParams -> WFBitMapS ) pdsk( Outdated -> Inconsistent ) 
drbd0: Writing meta data super block now.
drbd0: conn( WFBitMapS -> SyncSource ) 
drbd0: Began resync as SyncSource (will sync 1158770664 KB [289692666 bits set]).
drbd0: Writing meta data super block now.
eth0: no IPv6 routers present
eth1: no IPv6 routers present
drbd0: PingAck did not arrive in time.
drbd0: peer( Secondary -> Unknown ) conn( SyncSource -> NetworkFailure ) 
drbd0: asender terminated
drbd0: drbd_pp_alloc interrupted!
drbd0: error receiving RSDataRequest, l: 24!
drbd0: tl_clear()
drbd0: Connection closed
drbd0: Writing meta data super block now.
drbd0: conn( NetworkFailure -> Unconnected ) 
drbd0: receiver terminated
drbd0: receiver (re)started
drbd0: conn( Unconnected -> WFConnection ) 

[-- Attachment #3: drbd.slave.log --]
[-- Type: text/plain, Size: 9492 bytes --]

Linux version 2.6.19.1-vs2.2.0-rc6-tyan-s2891test (root@siddev) (gcc version 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)) #1 SMP Wed Jan 3 15:07:17 CET 2007
Command line: console=tty0 console=ttyS0,115200 hdb=noprobe hdc=noprobe hdd=noprobe root=/dev/md0 ro 

[lge: 800+ lines of bootup messages snipped, because they seemed irrelevant ]

drbd: initialised. Version: 8.0rc1 (api:86/proto:85)
drbd: SVN Revision: 2695 build by ard@siddev, 2007-01-16 15:33:47
drbd: registered as block device major 147
drbd: minor_table @ 0xffff81007f017e80
drbd0: disk( Diskless -> Attaching ) 
drbd0: No usable activity log found.
drbd0: max_segment_size ( = BIO size ) = 32768
drbd0: Adjusting my ra_pages to backing device's (32 -> 96)
drbd0: drbd_bm_resize called with capacity == 2318589904
drbd0: resync bitmap: bits=289823738 words=4528496
drbd0: size = 1105 GB (1159294952 KB)
drbd0: reading of bitmap took 124 jiffies
drbd0: recounting of set bits took additional 7 jiffies
drbd0: 1105 GB marked out-of-sync by on disk bit-map.
drbd0: disk( Attaching -> Inconsistent ) 
drbd0: Writing meta data super block now.
drbd0: conn( StandAlone -> Unconnected ) 
drbd0: receiver (re)started
drbd0: conn( Unconnected -> WFConnection ) 
drbd0: conn( WFConnection -> WFReportParams ) 
drbd0: Handshake successful: DRBD Network Protocol version 85
drbd0: Peer authenticated using 20 bytes of 'sha1' HMAC
drbd0: peer( Unknown -> Secondary ) conn( WFReportParams -> WFBitMapT ) pdsk( DUnknown -> UpToDate ) 
drbd0: Writing meta data super block now.
drbd0: conn( WFBitMapT -> WFSyncUUID ) 
drbd0: conn( WFSyncUUID -> SyncTarget ) 
drbd0: Began resync as SyncTarget (will sync 1158770664 KB [289692666 bits set]).
drbd0: Writing meta data super block now.
eth1: no IPv6 routers present
eth0: no IPv6 routers present
----------- [cut here ] --------- [please bite here ] ---------
Kernel BUG at ...ed/kernel/tyan-s2891/modules/drbd/drbd/lru_cache.c:312
invalid opcode: 0000 [1] SMP 
CPU 1 
Modules linked in: sha1 drbd cn ipv6 tg3
Pid: 925:#0, comm: md6_raid5 Not tainted 2.6.19.1-vs2.2.0-rc6-tyan-s2891test #1
RIP: 0010:[<ffffffff8807997f>]  [<ffffffff8807997f>] :drbd:lc_put+0x4f/0xc0
RSP: 0018:ffff81017e3fbc38  EFLAGS: 00010046
RAX: 0000000000000000 RBX: ffffc20000b7c268 RCX: ffffc20000b7c268
RDX: ffffc20000b7c268 RSI: ffffc20000b7c268 RDI: ffffc20000b7c000
RBP: ffff81017e0b2800 R08: 000000000000001f R09: 0000000000000001
R10: ffffffff806bd740 R11: ffffffff8027bb60 R12: ffff81017e0b2da8
R13: 0000000000000293 R14: ffff81017e0b2b68 R15: 0000000000000000
FS:  00002b8eb4ef3700(0000) GS:ffff8101000c64c0(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000526788 CR3: 000000007e784000 CR4: 00000000000006e0
Process md6_raid5 (pid: 925[#0], threadinfo ffff81017e3fa000, task ffff81017e097140)
Stack:  ffffffff88077ecf 0000000000000008 ffff81017e0b2800 ffff81007e9c3978
 0000000000000000 0000000000000001 ffffffff8806b94d 0000000000000246
 0000000000000000 0000000000000000 ffff81007f90a9e8 00000000ffffffff
Call Trace:
 [<ffffffff88077ecf>] :drbd:drbd_rs_complete_io+0xcf/0x130
 [<ffffffff8806b94d>] :drbd:drbd_endio_write_sec+0x1bd/0x2d0
 [<ffffffff80453dfb>] handle_stripe+0x248b/0x2780
 [<ffffffff804091ac>] ata_qc_issue_prot+0x12c/0x2b0
 [<ffffffff8040677a>] ata_qc_issue+0x40a/0x4a0
 [<ffffffff8040c7bc>] ata_scsi_rw_xlat+0x29c/0x400
 [<ffffffff8026958b>] thread_return+0x0/0x105
 [<ffffffff803f6078>] scsi_dispatch_cmd+0x258/0x2e0
 [<ffffffff8045424d>] raid5d+0x15d/0x1a0
 [<ffffffff8029e4e0>] keventd_create_kthread+0x0/0x80
 [<ffffffff8045cd4d>] md_thread+0x11d/0x140
 [<ffffffff8029e720>] autoremove_wake_function+0x0/0x30
 [<ffffffff8045cc30>] md_thread+0x0/0x140
 [<ffffffff80235de9>] kthread+0xd9/0x120
 [<ffffffff80266dc8>] child_rip+0xa/0x12
 [<ffffffff8029e4e0>] keventd_create_kthread+0x0/0x80
 [<ffffffff80235d10>] kthread+0x0/0x120
 [<ffffffff80266dbe>] child_rip+0x0/0x12


Code: 0f 0b 68 68 ad 08 88 c2 38 01 66 66 66 90 66 66 90 48 3b 77 
RIP  [<ffffffff8807997f>] :drbd:lc_put+0x4f/0xc0
 RSP <ffff81017e3fbc38>
 NMI Watchdog detected LOCKUP on CPU 0
CPU 0 
Modules linked in: sha1 drbd cn ipv6 tg3
Pid: 1925:#0, comm: drbd0_asender Not tainted 2.6.19.1-vs2.2.0-rc6-tyan-s2891test #1
RIP: 0010:[<ffffffff8026b4ba>]  [<ffffffff8026b4ba>] _spin_lock_irqsave+0xa/0x20
RSP: 0018:ffff81007e2a1e18  EFLAGS: 00000086
RAX: 0000000000000246 RBX: 0000000000097e00 RCX: ffffffff88087850
RDX: 000000008a32dfcf RSI: 00000000004bf000 RDI: ffff81017e0b2da8
RBP: 0000000000000000 R08: 0000000000000402 R09: 0000000000000000
R10: 00000000000002f8 R11: 00000000ffffffff R12: ffff81017e0b2800
R13: 0000000000097e07 R14: 00000000004bf000 R15: ffff81017e0b2da8
FS:  00002aefc97b9640(0000) GS:ffffffff8064b000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 00002b434a8a1fa8 CR3: 000000017c071000 CR4: 00000000000006e0
Process drbd0_asender (pid: 1925[#0], threadinfo ffff81007e2a0000, task ffff81007fb25140)
Stack:  ffffffff8807772b 0000000000000282 ffff81007e9c3b88 ffff81017e0b2800
 00000000004bf000 0000000000000001 ffff81007e2a1e80 0000000000000000
 ffffffff88071048 ffff81007e9c3b88 ffff81007e3c73a0 ffff81017e0b2800
Call Trace:
 [<ffffffff8807772b>] :drbd:__drbd_set_in_sync+0x1bb/0x2e0
 [<ffffffff88071048>] :drbd:e_end_resync_block+0x68/0x100
 [<ffffffff8806f50b>] :drbd:drbd_process_done_ee+0xdb/0x140
 [<ffffffff88071688>] :drbd:drbd_asender+0xe8/0x580
 [<ffffffff8807fa29>] :drbd:drbd_thread_setup+0x99/0xe0
 [<ffffffff80266dc8>] child_rip+0xa/0x12
 [<ffffffff803a3440>] vgacon_cursor+0x0/0x1c7
 [<ffffffff8807f990>] :drbd:drbd_thread_setup+0x0/0xe0
 [<ffffffff80266dbe>] child_rip+0x0/0x12


Code: 83 3f 00 7e f9 eb f2 c3 66 66 66 90 66 66 66 90 66 66 90 66 
 NMI Watchdog detected LOCKUP on CPU 1
CPU 1 
Modules linked in: sha1 drbd cn ipv6 tg3
Pid: 1900:#0, comm: drbd0_worker Not tainted 2.6.19.1-vs2.2.0-rc6-tyan-s2891test #1
RIP: 0010:[<ffffffff8026b4d8>]  [<ffffffff8026b4d8>] _spin_lock_irq+0x8/0x10
RSP: 0000:ffff81017b99be28  EFLAGS: 00000082
RAX: 000000000009e448 RBX: 0000000000000000 RCX: 0000000000000008
RDX: 000000000009e448 RSI: 00000000004f2240 RDI: ffff81017e0b2da8
RBP: ffff81017e0b2800 R08: 000000000009e448 R09: ffffc20000b81000
R10: 0000000000000038 R11: ffffffff88009d60 R12: ffff81017e0b2858
R13: 000000000000009e R14: 0000000000000278 R15: ffff81017e0b2800
FS:  00002b0d9fa7a640(0000) GS:ffff8101000c64c0(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 00002aefc979cfa8 CR3: 000000007eff2000 CR4: 00000000000006e0
Process drbd0_worker (pid: 1900[#0], threadinfo ffff81017b99a000, task ffff81017cdbc7b0)
Stack:  ffffffff88077a3d 0000000000000000 ffff81017e0b2800 ffff81017e0b2858
 000000000009e448 00000000004f2240 ffffffff8806bb88 000000008a32dfd0
 0000000000008000 000007d100000001 0000000000000000 ffff81017e0b2800
Call Trace:
 [<ffffffff88077a3d>] :drbd:drbd_try_rs_begin_io+0x5d/0x420
 [<ffffffff8806bb88>] :drbd:w_make_resync_request+0x128/0x330
 [<ffffffff8806b629>] :drbd:drbd_worker+0x2a9/0x410
 [<ffffffff8807fa29>] :drbd:drbd_thread_setup+0x99/0xe0
 [<ffffffff80266dc8>] child_rip+0xa/0x12
 [<ffffffff803a3440>] vgacon_cursor+0x0/0x1c7
 [<ffffffff8807f990>] :drbd:drbd_thread_setup+0x0/0xe0
 [<ffffffff80266dbe>] child_rip+0x0/0x12


Code: 83 3f 00 7e f9 eb f2 c3 53 48 89 fb e8 f7 4f 02 00 f0 ff 0b 
 <6>drbd0: peer( Secondary -> Unknown ) conn( SyncTarget -> BrokenPipe ) pdsk( UpToDate -> DUnknown ) 
drbd0: short read receiving data: read 1272 expected 4096
drbd0: error receiving RSDataReply, l: 32792!
Unable to handle kernel NULL pointer dereference at 0000000000000808 RIP: 
 [<ffffffff8026b4b3>] _spin_lock_irqsave+0x3/0x20
PGD 17c066067 PUD 17c067067 PMD 0 
Oops: 0002 [2] SMP 
CPU 0 
Modules linked in: sha1 drbd cn ipv6 tg3
Pid: 1920:#0, comm: drbd0_receiver Not tainted 2.6.19.1-vs2.2.0-rc6-tyan-s2891test #1
RIP: 0010:[<ffffffff8026b4b3>]  [<ffffffff8026b4b3>] _spin_lock_irqsave+0x3/0x20
RSP: 0018:ffff81007e2d1e08  EFLAGS: 00010002
RAX: 0000000000000202 RBX: 0000000000000001 RCX: 0000000000000000
RDX: ffff81007fb25140 RSI: 0000000000000001 RDI: 0000000000000808
RBP: ffff81007fb25140 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff806bd740 R11: ffffffff8027bb60 R12: 0000000000000000
R13: 0000000000000001 R14: ffff81017e0b2800 R15: 0000000000000020
FS:  00002aefc97b9640(0000) GS:ffffffff8064b000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000808 CR3: 000000017c071000 CR4: 00000000000006e0
Process drbd0_receiver (pid: 1920[#0], threadinfo ffff81007e2d0000, task ffff81007fb1c140)
Stack:  ffffffff80296823 0000000000000020 0000000000000002 ffff81017e0b2c58
 0000000000000000 0000000000000001 ffffffff8807c50e ffff81017e0b2800
 ffff81017e0b2800 ffff81017e0b2bd8 000000000000c822 0000000000000004
Call Trace:
 [<ffffffff80296823>] force_sig_info+0x33/0xd0
 [<ffffffff8807c50e>] :drbd:_drbd_thread_stop+0xbe/0x1c0
 [<ffffffff8806ee66>] :drbd:drbd_disconnect+0x56/0x620
 [<ffffffff802312d3>] __wake_up+0x43/0x70
 [<ffffffff880714db>] :drbd:drbdd_init+0xdb/0x1a0
 [<ffffffff8807fa29>] :drbd:drbd_thread_setup+0x99/0xe0
 [<ffffffff80266dc8>] child_rip+0xa/0x12
 [<ffffffff803a3440>] vgacon_cursor+0x0/0x1c7
 [<ffffffff8807f990>] :drbd:drbd_thread_setup+0x0/0xe0
 [<ffffffff80266dbe>] child_rip+0x0/0x12


Code: f0 ff 0f 79 09 f3 90 83 3f 00 7e f9 eb f2 c3 66 66 66 90 66 
RIP  [<ffffffff8026b4b3>] _spin_lock_irqsave+0x3/0x20
 RSP <ffff81007e2d1e08>
CR2: 0000000000000808
 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
  2007-01-25 17:45             ` Ard van Breemen
@ 2007-01-25 21:32               ` Lars Ellenberg
  2007-01-25 22:26                 ` Lars Ellenberg
                                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Lars Ellenberg @ 2007-01-25 21:32 UTC (permalink / raw)
  To: drbd-dev


first, there is 2.6.19.2 already.
second, there is drbd 8.0.0 already.
though, there have not been any interesting changes in this area since revision 2695,
which you apparently use.

third, see below :)

/ 2007-01-25 18:45:23 +0100
\ Ard van Breemen:
> Hi,
> On Tue, Jan 16, 2007 at 11:37:49AM +0100, Ard van Breemen wrote:
> > I will try it.
> > It seems to happen if I do this:
> > - create a raidset at the primary, create a raidset at the
> > secondary.
> > - create the drbd device on top of that
> > - designate the primary primary.
> I've done this to create the following two kernel logs:
> Create a raidset at A, Create a raidset at B
> Wait for raidsync to complete
> Create the drbd device
> Start the connection (both inconsistent)
> make raidset A primary
> The stuff starts to sync.
> The sync speed at that moment is about 10MB/s.
> I reboot both systems (I just wanted to get a higher speed, and
> rebooting is an easy way to restore settings to something known).
> 
> The primary comes up ok, the secondary also.
> The secondary then trips.
> (At that moment both raidsets are OK).
> That's when I made the logs.
> 
> I reboot the secondary.
> The sync gains speed to >70MB/s
> 
> Just to let you know why I want to use drbd:
> I've got servers with >100M files. If one of the servers goes
> haywired, I've got to resync the 100M files. There is no normal
> application that can sync 1T/>100M files in a timely matter. Next
> to that, I have to bring a second server down, to be abel to do
> that. Using the raw power of drbd, I can do that with drbd in
> about 7 hours, and als be up to date after 7 hours of syncing.
> So: I only use it to sync. After that I disconnect the devices,
> and let them go on independently.
> (Resyncing from the backup takes 48 hours or so)
> Of course I can take the easy road and switch to 0.7 ;-)

thanks for reporting this.

> drbd: initialised. Version: 8.0rc1 (api:86/proto:85)
...

> drbd: initialised. Version: 8.0rc1 (api:86/proto:85)
> drbd: SVN Revision: 2695 build by ard@siddev, 2007-01-16 15:33:47

> drbd: registered as block device major 147
> drbd: minor_table @ 0xffff81007f017e80
> drbd0: disk( Diskless -> Attaching ) 
> drbd0: No usable activity log found.
> drbd0: max_segment_size ( = BIO size ) = 32768
> drbd0: Adjusting my ra_pages to backing device's (32 -> 96)
> drbd0: drbd_bm_resize called with capacity == 2318589904
> drbd0: resync bitmap: bits=289823738 words=4528496
> drbd0: size = 1105 GB (1159294952 KB)
> drbd0: reading of bitmap took 124 jiffies
> drbd0: recounting of set bits took additional 7 jiffies
> drbd0: 1105 GB marked out-of-sync by on disk bit-map.
> drbd0: disk( Attaching -> Inconsistent ) 
> drbd0: Writing meta data super block now.
> drbd0: conn( StandAlone -> Unconnected ) 
> drbd0: receiver (re)started
> drbd0: conn( Unconnected -> WFConnection ) 
> drbd0: conn( WFConnection -> WFReportParams ) 
> drbd0: Handshake successful: DRBD Network Protocol version 85
> drbd0: Peer authenticated using 20 bytes of 'sha1' HMAC
> drbd0: peer( Unknown -> Secondary ) conn( WFReportParams -> WFBitMapT ) pdsk( DUnknown -> UpToDate ) 
> drbd0: Writing meta data super block now.
> drbd0: conn( WFBitMapT -> WFSyncUUID ) 
> drbd0: conn( WFSyncUUID -> SyncTarget ) 
> drbd0: Began resync as SyncTarget (will sync 1158770664 KB [289692666 bits set]).
> drbd0: Writing meta data super block now.
> eth1: no IPv6 routers present
> eth0: no IPv6 routers present
> ----------- [cut here ] --------- [please bite here ] ---------
> Kernel BUG at ...ed/kernel/tyan-s2891/modules/drbd/drbd/lru_cache.c:312
> invalid opcode: 0000 [1] SMP 
> Call Trace:
>  [<ffffffff88077ecf>] :drbd:drbd_rs_complete_io+0xcf/0x130
>  [<ffffffff8806b94d>] :drbd:drbd_endio_write_sec+0x1bd/0x2d0

> RIP  [<ffffffff8807997f>] :drbd:lc_put+0x4f/0xc0
>  NMI Watchdog detected LOCKUP on CPU 0
> RIP: 0010:[<ffffffff8026b4ba>]  [<ffffffff8026b4ba>] _spin_lock_irqsave+0xa/0x20
> Call Trace:
>  [<ffffffff8807772b>] :drbd:__drbd_set_in_sync+0x1bb/0x2e0
>  [<ffffffff88071048>] :drbd:e_end_resync_block+0x68/0x100
>  [<ffffffff8806f50b>] :drbd:drbd_process_done_ee+0xdb/0x140
>  [<ffffffff88071688>] :drbd:drbd_asender+0xe8/0x580

we had some issues in that area lately,
but they where believed to be fixed.

we have to go through these call traces once again.

please upgrade to 8.0.0 anyways, so we won't turn your bug report down
accidentally just because you "only" use rc1...

-- 
: Lars Ellenberg                            Tel +43-1-8178292-55 :
: LINBIT Information Technologies GmbH      Fax +43-1-8178292-82 :
: Vivenotgasse 48, A-1120 Vienna/Europe    http://www.linbit.com :

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
  2007-01-25 21:32               ` Lars Ellenberg
@ 2007-01-25 22:26                 ` Lars Ellenberg
  2007-01-28 10:59                   ` Ard van Breemen
       [not found]                 ` <20070126142857.GE9639@kwaak.net>
  2007-02-11 21:55                 ` Ard van Breemen
  2 siblings, 1 reply; 20+ messages in thread
From: Lars Ellenberg @ 2007-01-25 22:26 UTC (permalink / raw)
  To: drbd-dev

/ 2007-01-25 22:32:10 +0100
\ Lars Ellenberg:
> 
> first, there is 2.6.19.2 already.
> second, there is drbd 8.0.0 already.
> though, there have not been any interesting changes in this area since revision 2695,
> which you apparently use.

> > drbd0: conn( WFSyncUUID -> SyncTarget ) 
> > drbd0: Began resync as SyncTarget (will sync 1158770664 KB [289692666 bits set]).
> > drbd0: Writing meta data super block now.
> > eth1: no IPv6 routers present
> > eth0: no IPv6 routers present
> > ----------- [cut here ] --------- [please bite here ] ---------
> > Kernel BUG at ...ed/kernel/tyan-s2891/modules/drbd/drbd/lru_cache.c:312
> > invalid opcode: 0000 [1] SMP 
> > Call Trace:
> >  [<ffffffff88077ecf>] :drbd:drbd_rs_complete_io+0xcf/0x130
> >  [<ffffffff8806b94d>] :drbd:drbd_endio_write_sec+0x1bd/0x2d0
> 
> > RIP  [<ffffffff8807997f>] :drbd:lc_put+0x4f/0xc0
> >  NMI Watchdog detected LOCKUP on CPU 0
> > RIP: 0010:[<ffffffff8026b4ba>]  [<ffffffff8026b4ba>] _spin_lock_irqsave+0xa/0x20
> > Call Trace:
> >  [<ffffffff8807772b>] :drbd:__drbd_set_in_sync+0x1bb/0x2e0
> >  [<ffffffff88071048>] :drbd:e_end_resync_block+0x68/0x100
> >  [<ffffffff8806f50b>] :drbd:drbd_process_done_ee+0xdb/0x140
> >  [<ffffffff88071688>] :drbd:drbd_asender+0xe8/0x580

I'd love it if it were not a logic bug but rather drbd being not robust
and paranoid enough...

one posibility for this to happen would be:

being SyncTarget
requesting some resync blocks. this also does the drbd_rs_begin_io.
the SyncSource sends us some RSDataReply (with an ID of -1ULL, and
some sector offset).

we currently do not verify whether we expected this sector offset.
we just read in the data and submit them. [there is a FIXME paranoia
comment in place in receive_RSDataReply, though]

later, the drbd_endio_write_sec callback does the drbd_rs_complete_io
for the corresponding resync extent.

now, if that extent was in the resync lru because we used it before,
but the RSDataReply would be for a sector we had not requested [*],
the refcnt is likely to be imbalanced, and we might BUG_ON it being zero,
in lc_put...

[*] how that could happen, I don't know yet...

in any case, regardless of this being a logic bug, (smp) race condition
or anything else, we need to become more robust there:

Index: drbd_actlog.c
===================================================================
--- drbd_actlog.c	(revision 2715)
+++ drbd_actlog.c	(working copy)
@@ -1098,6 +1098,13 @@
 		return;
 	}
 
+	if(bm_ext->lce.refcnt == 0) {
+		spin_unlock_irqrestore(&mdev->al_lock,flags);
+		ERR("drbd_rs_complete_io(,%llu [=%u]) called, but refcnt is 0!?\n",
+				(unsigned long long)sector, enr);
+		return;
+	}
+
 	if( lc_put(mdev->resync,(struct lc_element *)bm_ext) == 0 ) {
 		clear_bit(BME_LOCKED,&bm_ext->flags);
 		clear_bit(BME_NO_WRITES,&bm_ext->flags);

(not dared to commit this, in case this all was nonsense...
I feel too tired now)

-- 
: Lars Ellenberg                            Tel +43-1-8178292-55 :
: LINBIT Information Technologies GmbH      Fax +43-1-8178292-82 :
: Vivenotgasse 48, A-1120 Vienna/Europe    http://www.linbit.com :

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
       [not found]                 ` <20070126142857.GE9639@kwaak.net>
@ 2007-01-26 14:34                   ` Ard van Breemen
  0 siblings, 0 replies; 20+ messages in thread
From: Ard van Breemen @ 2007-01-26 14:34 UTC (permalink / raw)
  To: drbd-dev

On Fri, Jan 26, 2007 at 03:28:57PM +0100,  wrote:
> > second, there is drbd 8.0.0 already.
> > though, there have not been any interesting changes in this area since revision 2695,
> > which you apparently use.
> Allright, I will try it again :-)
> now At revision 2715.
Just to make sure: Although the banner says 2695, it really is
2715 according to svn.
I always forget to make drbd/drbd_buildtag.c
(Which currently insist on being 2713 )

Anyway: on to the next test ;-)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
  2007-01-25 22:26                 ` Lars Ellenberg
@ 2007-01-28 10:59                   ` Ard van Breemen
  2007-01-28 11:38                     ` Ard van Breemen
  0 siblings, 1 reply; 20+ messages in thread
From: Ard van Breemen @ 2007-01-28 10:59 UTC (permalink / raw)
  To: drbd-dev

Hi,
On Thu, Jan 25, 2007 at 11:26:30PM +0100, Lars Ellenberg wrote:
> (not dared to commit this, in case this all was nonsense...
> I feel too tired now)
Thanks to this patch the machines don't crash anymore.
But:
Primary:
SVN Revision: 2713 build by ard@siddev, 2007-01-26 15:41:35
 0: cs:Connected st:Primary/Secondary ds:UpToDate/UpToDate C r---
    ns:1159294952 nr:0 dw:0 dr:1159294952 al:0 bm:70758 lo:0 pe:0 ua:0 ap:0
        resync: used:0/31 hits:72385178 misses:70758 starving:0 dirty:0 changed:70758
        act_log: used:0/127 hits:0 misses:0 starving:0 dirty:0 changed:0

Secondary:
SVN Revision: 2713 build by ard@siddev, 2007-01-26 15:41:35
 0: cs:SyncTarget st:Secondary/Primary ds:Inconsistent/UpToDate C r---
    ns:0 nr:1159294952 dw:1159294952 dr:0 al:0 bm:70758 lo:0 pe:0 ua:0 ap:0
        [===================>] sync'ed:100.0% (0/1132123)M
        stalled
        resync: used:28/31 hits:72385178 misses:70758 starving:0 dirty:0 changed:70758
        act_log: used:0/127 hits:0 misses:0 starving:0 dirty:0 changed:0

The dmesg on the secondary:
Jan 27 19:19:52 localhost kernel: drbd0: Began resync as SyncTarget (will sync 1159294952 KB [289823738 bits s
Jan 27 19:19:52 localhost kernel: drbd0: Writing meta data super block now.
Jan 27 19:30:37 localhost kernel: drbd0: drbd_rs_complete_io(,85553088 [=2610]) called, but refcnt is 0!?
Jan 27 19:35:11 localhost kernel: drbd0: drbd_rs_complete_io(,122285888 [=3731]) called, but refcnt is 0!?
<snip>
Jan 27 23:26:15 localhost kernel: drbd0: drbd_rs_complete_io(,1931640128 [=58948]) called, but refcnt is 0!?
Jan 27 23:33:21 localhost kernel: drbd0: drbd_rs_complete_io(,1982954432 [=60514]) called, but refcnt is 0!?
Jan 27 23:33:52 localhost kernel: drbd0: drbd_rs_complete_io(,1986625472 [=60626]) called, but refcnt is 0!?
Jan 27 23:37:53 localhost kernel: drbd0: drbd_rs_complete_io(,2014511040 [=61477]) called, but refcnt is 0!?
Jan 28 00:29:53 localhost kernel: drbd0: Retrying drbd_rs_del_all() later. refcnt=1
Jan 28 00:30:24 localhost last message repeated 294 times
Jan 28 00:31:25 localhost last message repeated 584 times

From that point on that message is repeated over and over...

Regards,
Ard van Breemen

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
  2007-01-28 10:59                   ` Ard van Breemen
@ 2007-01-28 11:38                     ` Ard van Breemen
  0 siblings, 0 replies; 20+ messages in thread
From: Ard van Breemen @ 2007-01-28 11:38 UTC (permalink / raw)
  To: drbd-dev

Hi,
On Sun, Jan 28, 2007 at 11:59:38AM +0100, Ard van Breemen wrote:
From that point on that message is repeated over and over...
What makes it even more interesting:
siep:~# drbdadm down all
Child process does not terminate!
Exiting.
siep:~# No response from the DRBD driver! Is the module loaded?
No response from the DRBD driver! Is the module loaded?
State change failed: (0)unknown error.
cat /proc/drbd 
version: 8.0.0 (api:86/proto:86)
SVN Revision: 2713 build by ard@siddev, 2007-01-26 15:41:35
 0: cs:SyncTarget st:Secondary/Primary ds:Inconsistent/UpToDate C r---
    ns:0 nr:1297904392 dw:1297901288 dr:0 al:0 bm:79217 lo:97 pe:3444 ua:97 ap:0
        [==>.................] sync'ed: 12.0% (996766/1132123)M
        finish: 3:34:10 speed: 79,384 (71,408) K/sec
        resync: used:11/31 hits:81043148 misses:79225 starving:0 dirty:0 changed:79225
        act_log: used:0/127 hits:0 misses:0 starving:0 dirty:0 changed:0

(siep in this case is the slave)

So: on drbdadm down all, the system starts to sync again...

We've got a dmesg for that:
Jan 28 06:47:02 localhost kernel: drbd0: Retrying drbd_rs_del_all() later. refcnt=1
Jan 28 06:47:33 localhost last message repeated 295 times
<snip>
Jan 28 11:59:54 localhost last message repeated 501 times
Jan 28 11:59:54 localhost kernel: drbd0: peer( Primary -> Unknown ) conn( SyncTarget -> Disconnecting ) pdsk( 
Jan 28 11:59:54 localhost kernel: drbd0: short read expecting header on sock: r=-512
Jan 28 11:59:54 localhost kernel: drbd0: asender terminated
Jan 28 11:59:54 localhost kernel: drbd0: Retrying drbd_rs_del_all() later. refcnt=1
Jan 28 11:59:54 localhost kernel: drbd0: Resync done (total 60002 sec; paused 0 sec; 0 K/sec)
Jan 28 11:59:54 localhost kernel: drbd0: conn( Disconnecting -> Connected ) disk( Inconsistent -> UpToDate ) p
Jan 28 11:59:54 localhost kernel: drbd0: Writing meta data super block now.
Jan 28 11:59:54 localhost kernel: drbd0: tl_clear()
Jan 28 11:59:54 localhost kernel: drbd0: Connection closed
Jan 28 11:59:54 localhost kernel: drbd0: conn( Connected -> Unconnected ) pdsk( UpToDate -> DUnknown ) 
Jan 28 11:59:54 localhost kernel: drbd0: drbdd_init: (mdev->receiver.t_state != Restarting) in /usr/src/shared
Jan 28 11:59:54 localhost kernel: drbd0: receiver terminated
Jan 28 11:59:54 localhost kernel: drbd0: receiver (re)started
Jan 28 11:59:54 localhost kernel: drbd0: conn( Unconnected -> WFConnection ) 
Jan 28 11:59:54 localhost kernel: drbd0: conn( WFConnection -> WFReportParams ) 
Jan 28 11:59:54 localhost kernel: drbd0: Handshake successful: DRBD Network Protocol version 86
Jan 28 11:59:54 localhost kernel: drbd0: Peer authenticated using 20 bytes of 'sha1' HMAC
Jan 28 11:59:54 localhost kernel: drbd0: Writing meta data super block now.
Jan 28 11:59:55 localhost kernel: drbd0: writing of bitmap took 93 jiffies
Jan 28 11:59:55 localhost kernel: drbd0: 1105 GB marked out-of-sync by on disk bit-map.
Jan 28 11:59:55 localhost kernel: drbd0: 1159294952 KB now marked out-of-sync by on disk bit-map.
Jan 28 11:59:55 localhost kernel: drbd0: Writing meta data super block now.
Jan 28 11:59:55 localhost kernel: drbd0: peer( Unknown -> Primary ) conn( WFReportParams -> WFBitMapT ) pdsk( 
Jan 28 11:59:55 localhost kernel: drbd0: Writing meta data super block now.
Jan 28 11:59:56 localhost kernel: drbd0: conn( WFBitMapT -> WFSyncUUID ) 
Jan 28 11:59:56 localhost kernel: drbd0: conn( WFSyncUUID -> SyncTarget ) disk( UpToDate -> Inconsistent ) 
Jan 28 11:59:56 localhost kernel: drbd0: Began resync as SyncTarget (will sync 1159294952 KB [289823738 bits s
Jan 28 11:59:56 localhost kernel: drbd0: Writing meta data super block now.
Jan 28 12:07:46 localhost kernel: drbd0: drbd_rs_complete_io(,67857280 [=2070]) called, but refcnt is 0!?
Jan 28 12:12:24 localhost kernel: drbd0: drbd_rs_complete_io(,107445120 [=3278]) called, but refcnt is 0!?

So the patch of Lars makes the system not crash :-).
If anyone can hint where I have to infest the code with some printk's and
hopefully some explanation of what to look for, I might be able to track it
down. Unfortunately I only have about 10% resources left in my head to look at
it :-(.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Drbd-dev] oopses in 2.6.19.1
  2007-01-25 21:32               ` Lars Ellenberg
  2007-01-25 22:26                 ` Lars Ellenberg
       [not found]                 ` <20070126142857.GE9639@kwaak.net>
@ 2007-02-11 21:55                 ` Ard van Breemen
  2 siblings, 0 replies; 20+ messages in thread
From: Ard van Breemen @ 2007-02-11 21:55 UTC (permalink / raw)
  To: drbd-dev

Hello,

On Thu, Jan 25, 2007 at 10:32:10PM +0100, Lars Ellenberg wrote:
> first, there is 2.6.19.2 already.

2.6.19.3 included the correct patches.
And since I am running 2.6.19.3 I didn't have any troubles
anymore. (same compiler and such).
Latest svn version doesn't work on 2.6.19.1 (but thanks to Lars's
patches also didn't oops), and that same version compiled for
2.6.19.3 works.

I suspect some NUMA fixes or other NUMA/SMP problems got fixed.

So, I've achieved my goal: syncing 1.2T within 5 hours without
taking the master down. Thanks for all the good work! :-).

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2007-02-11 22:00 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-10 12:31 [Drbd-dev] drbd 2.6.19 crypto changes Ard van Breemen
2007-01-10 13:48 ` Lars Ellenberg
2007-01-10 16:09   ` Ard van Breemen
2007-01-10 19:33     ` Ard van Breemen
2007-01-10 16:23 ` Philipp Reisner
2007-01-10 20:17   ` Ard van Breemen
2007-01-11 14:38   ` Ard van Breemen
2007-01-11 17:12     ` Ard van Breemen
2007-01-11 18:03       ` [Drbd-dev] oopses in 2.6.19.1 Ard van Breemen
2007-01-12 13:53         ` Philipp Reisner
2007-01-15 17:06         ` Philipp Reisner
2007-01-16 10:37           ` Ard van Breemen
2007-01-25 17:45             ` Ard van Breemen
2007-01-25 21:32               ` Lars Ellenberg
2007-01-25 22:26                 ` Lars Ellenberg
2007-01-28 10:59                   ` Ard van Breemen
2007-01-28 11:38                     ` Ard van Breemen
     [not found]                 ` <20070126142857.GE9639@kwaak.net>
2007-01-26 14:34                   ` Ard van Breemen
2007-02-11 21:55                 ` Ard van Breemen
2007-01-12 13:50       ` [Drbd-dev] drbd 2.6.19 crypto changes Philipp Reisner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.