Distributed Replicated Block Device (DRBD) development
 help / color / mirror / Atom feed
From: "Christoph Böhmwalder" <christoph.boehmwalder@linbit.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: Philipp Reisner <philipp.reisner@linbit.com>,
	linux-kernel@vger.kernel.org, linux-block@vger.kernel.org,
	Lars Ellenberg <lars.ellenberg@linbit.com>,
	drbd-dev@lists.linbit.com
Subject: [PATCH 03/20] drbd: introduce DRBD 9 on-disk metadata format
Date: Fri, 27 Mar 2026 23:38:03 +0100	[thread overview]
Message-ID: <20260327223820.2244227-4-christoph.boehmwalder@linbit.com> (raw)
In-Reply-To: <20260327223820.2244227-1-christoph.boehmwalder@linbit.com>

Add a new header that captures the DRBD 9 on-disk metadata layout,
enabling state tracking for mutiple peers.
It includes the per-device superblock and per-peer slot structures
needed to track bitmap UUIDs and sync state for each peer.

Co-developed-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Co-developed-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Co-developed-by: Joel Colledge <joel.colledge@linbit.com>
Signed-off-by: Joel Colledge <joel.colledge@linbit.com>
Co-developed-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Signed-off-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
---
 drivers/block/drbd/drbd_meta_data.h | 126 ++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 drivers/block/drbd/drbd_meta_data.h

diff --git a/drivers/block/drbd/drbd_meta_data.h b/drivers/block/drbd/drbd_meta_data.h
new file mode 100644
index 000000000000..af77e8d53f02
--- /dev/null
+++ b/drivers/block/drbd/drbd_meta_data.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef DRBD_META_DATA_H
+#define DRBD_META_DATA_H
+
+/* how I came up with this magic?
+ * base64 decode "actlog==" ;) */
+#define DRBD_AL_MAGIC 0x69cb65a2
+
+#define BM_BLOCK_SHIFT_4k	12			 /* 4k per bit */
+#define BM_BLOCK_SHIFT_MIN	BM_BLOCK_SHIFT_4k
+#define BM_BLOCK_SHIFT_MAX	20
+#define BM_BLOCK_SIZE_4k	4096
+#define BM_BLOCK_SIZE_MIN	(1<<BM_BLOCK_SHIFT_MIN)
+#define BM_BLOCK_SIZE_MAX	(1<<BM_BLOCK_SHIFT_MAX)
+
+struct peer_dev_md_on_disk_9 {
+	__be64 bitmap_uuid;
+	__be64 bitmap_dagtag;
+	__be32 flags;
+	__be32 bitmap_index;
+	__be32 reserved_u32[2];
+} __packed;
+
+struct meta_data_on_disk_9 {
+	__be64 effective_size;    /* last agreed size */
+	__be64 current_uuid;
+	__be64 members;		  /* only if MDF_HAVE_MEMBERS_MASK is in the flags */
+	__be64 reserved_u64[3];   /* to have the magic at the same position as in v07, and v08 */
+	__be64 device_uuid;
+	__be32 flags;             /* MDF */
+	__be32 magic;
+	__be32 md_size_sect;
+	__be32 al_offset;         /* offset to this block */
+	__be32 al_nr_extents;     /* important for restoring the AL */
+	__be32 bm_offset;         /* offset to the bitmap, from here */
+	__be32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
+	__be32 la_peer_max_bio_size;   /* last peer max_bio_size */
+	__be32 bm_max_peers;
+	__be32 node_id;
+
+	/* see al_tr_number_to_on_disk_sector() */
+	__be32 al_stripes;
+	__be32 al_stripe_size_4k;
+
+	__be32 reserved_u32[2];
+
+	struct peer_dev_md_on_disk_9 peers[DRBD_PEERS_MAX];
+	__be64 history_uuids[HISTORY_UUIDS];
+
+	unsigned char padding_start[0];
+	unsigned char padding_end[0] __aligned(4096);
+} __packed;
+
+/* Attention, these two are defined in drbd_int.h as well! */
+#define AL_UPDATES_PER_TRANSACTION 64
+#define AL_CONTEXT_PER_TRANSACTION 919
+
+enum al_transaction_types {
+	AL_TR_UPDATE = 0,
+	AL_TR_INITIALIZED = 0xffff
+};
+/* all fields on disc in big endian */
+struct __packed al_transaction_on_disk {
+	/* don't we all like magic */
+	__be32	magic;
+
+	/* to identify the most recent transaction block
+	 * in the on disk ring buffer */
+	__be32	tr_number;
+
+	/* checksum on the full 4k block, with this field set to 0. */
+	__be32	crc32c;
+
+	/* type of transaction, special transaction types like:
+	 * purge-all, set-all-idle, set-all-active, ... to-be-defined
+	 * see also enum al_transaction_types */
+	__be16	transaction_type;
+
+	/* we currently allow only a few thousand extents,
+	 * so 16bit will be enough for the slot number. */
+
+	/* how many updates in this transaction */
+	__be16	n_updates;
+
+	/* maximum slot number, "al-extents" in drbd.conf speak.
+	 * Having this in each transaction should make reconfiguration
+	 * of that parameter easier. */
+	__be16	context_size;
+
+	/* slot number the context starts with */
+	__be16	context_start_slot_nr;
+
+	/* Some reserved bytes.  Expected usage is a 64bit counter of
+	 * sectors-written since device creation, and other data generation tag
+	 * supporting usage */
+	__be32	__reserved[4];
+
+	/* --- 36 byte used --- */
+
+	/* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
+	 * in one transaction, then use the remaining byte in the 4k block for
+	 * context information.  "Flexible" number of updates per transaction
+	 * does not help, as we have to account for the case when all update
+	 * slots are used anyways, so it would only complicate code without
+	 * additional benefit.
+	 */
+	__be16	update_slot_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* but the extent number is 32bit, which at an extent size of 4 MiB
+	 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
+	__be32	update_extent_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* --- 420 bytes used (36 + 64*6) --- */
+
+	/* 4096 - 420 = 3676 = 919 * 4 */
+	__be32	context[AL_CONTEXT_PER_TRANSACTION];
+};
+
+#define DRBD_AL_PMEM_MAGIC 0x6aa667a6 /* "al==pmem" */
+
+struct __packed al_on_pmem {
+	__be32 magic;
+	__be32 slots[];
+};
+
+#endif
-- 
2.53.0


  parent reply	other threads:[~2026-03-27 22:38 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-27 22:38 [PATCH 00/20] DRBD 9 rework Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 01/20] drbd: mark as BROKEN during " Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 02/20] drbd: extend wire protocol definitions for DRBD 9 Christoph Böhmwalder
2026-03-28 14:13   ` kernel test robot
2026-03-27 22:38 ` Christoph Böhmwalder [this message]
2026-03-27 22:38 ` [PATCH 04/20] drbd: add transport layer abstraction Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 05/20] drbd: add TCP transport implementation Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 06/20] drbd: add RDMA " Christoph Böhmwalder
2026-04-08  5:42   ` Christoph Hellwig
2026-04-08 12:01     ` Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 07/20] drbd: add load-balancing TCP transport Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 08/20] drbd: add DAX/PMEM support for metadata access Christoph Böhmwalder
2026-04-08  5:46   ` Christoph Hellwig
2026-03-27 22:38 ` [PATCH 09/20] drbd: add optional compatibility layer for DRBD 8.4 Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 10/20] drbd: rename drbd_worker.c to drbd_sender.c Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 11/20] drbd: rework sender for DRBD 9 multi-peer Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 12/20] drbd: replace per-device state model with multi-peer data structures Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 13/20] drbd: rewrite state machine for DRBD 9 multi-peer clusters Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 14/20] drbd: rework activity log and bitmap for multi-peer replication Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 15/20] drbd: rework request processing for DRBD 9 multi-peer IO Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 16/20] drbd: rework module core for DRBD 9 transport and multi-peer Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 17/20] drbd: rework receiver for DRBD 9 transport and multi-peer protocol Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 18/20] drbd: rework netlink management interface for DRBD 9 Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 19/20] drbd: update monitoring interfaces for multi-peer topology Christoph Böhmwalder
2026-03-27 22:38 ` [PATCH 20/20] drbd: remove BROKEN for DRBD Christoph Böhmwalder
2026-03-28 12:21   ` kernel test robot
2026-03-28 14:20   ` kernel test robot
2026-04-03  1:30 ` [PATCH 00/20] DRBD 9 rework Jens Axboe
2026-04-03 13:24   ` Christoph Böhmwalder
2026-04-08  5:17   ` Christoph Hellwig
2026-04-08 12:58     ` Jens Axboe
2026-04-09  6:40       ` Christoph Hellwig
2026-04-10  1:14         ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260327223820.2244227-4-christoph.boehmwalder@linbit.com \
    --to=christoph.boehmwalder@linbit.com \
    --cc=axboe@kernel.dk \
    --cc=drbd-dev@lists.linbit.com \
    --cc=lars.ellenberg@linbit.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=philipp.reisner@linbit.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox