linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1] libxfs: support reproducible filesystems using deterministic time/seed
@ 2025-11-07 16:12 Luca Di Maio
  2025-11-07 16:37 ` Darrick J. Wong
  0 siblings, 1 reply; 3+ messages in thread
From: Luca Di Maio @ 2025-11-07 16:12 UTC (permalink / raw)
  To: linux-xfs; +Cc: Luca Di Maio, dimitri.ledkov, smoser, djwong

Add support for reproducible filesystem creation through two environment
variables that enable deterministic behavior when building XFS filesystems.

SOURCE_DATE_EPOCH support:
When SOURCE_DATE_EPOCH is set, use its value for all filesystem timestamps
instead of the current time. This follows the reproducible builds
specification (https://reproducible-builds.org/specs/source-date-epoch/)
and ensures consistent inode timestamps across builds.

DETERMINISTIC_SEED support:
When DETERMINISTIC_SEED is set, use it to generate deterministic values
from get_random_u32() instead of reading from /dev/urandom. This ensures
that UUIDs, and other randomly-selected values are consistent across builds.

The implementation introduces two helper functions to minimize changes
to existing code:

- current_fixed_time(): Helper that parses and caches SOURCE_DATE_EPOCH.
  Returns fixed timestamp when set, with fallback on parse errors.
- get_msws_prng_32(): Helper implementing Middle Square Weyl Sequence PRNG.
  Uses DETERMINISTIC_SEED to generate deterministic pseudo-random sequence.
  Accepts decimal/hex/octal values via base-0 parsing.
- Both helpers use one-time initialization to avoid repeated getenv() calls.
- Both quickly exit and noop if environment is not set or has invalid
  variables, falling back to original behaviour.

Example usage:
  SOURCE_DATE_EPOCH=1234567890 \
  DETERMINISTIC_SEED=0xDEADBEEF \
  mkfs.xfs \
	-m uuid=$EXAMPLE_UUID \
	-p file=./rootfs \
	disk1.img

This enables distributions and build systems to create bit-for-bit
identical XFS filesystems when needed for verification and debugging.

Signed-off-by: Luca Di Maio <luca.dimaio1@gmail.com>
---
 libxfs/util.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/libxfs/util.c b/libxfs/util.c
index 3597850d..676da81b 100644
--- a/libxfs/util.c
+++ b/libxfs/util.c
@@ -137,12 +137,69 @@ xfs_log_calc_unit_res(
 	return unit_bytes;
 }
 
+/*
+ * current_fixed_time() tries to detect if SOURCE_DATE_EPOCH is in our
+ * environment, and set input timespec's timestamp to that value.
+ *
+ * Returns true on success, fail otherwise.
+ */
+bool
+current_fixed_time(
+	struct			timespec64 *tv)
+{
+	/*
+	 * To avoid many getenv() we'll use an initialization static flag, so
+	 * we only read once.
+	 */
+	static bool		read_env = false;
+	static time64_t		epoch = -1;
+	char			*source_date_epoch;
+
+	if (!read_env) {
+		read_env = true;
+		source_date_epoch = getenv("SOURCE_DATE_EPOCH");
+		if (source_date_epoch && source_date_epoch[0] != '\0') {
+			errno = 0;
+			epoch = strtoul(source_date_epoch, NULL, 10);
+			if (errno != 0) {
+				epoch = -1;
+				return false;
+			}
+		}
+	}
+
+	/*
+	 * This will happen only if we successfully read a valid
+	 * SOURCE_DATE_EPOCH and properly initiated the epoch value.
+	 */
+	if (read_env && epoch >= 0) {
+		tv->tv_sec = (time_t)epoch;
+		tv->tv_nsec = 0;
+		return true;
+	}
+
+	/*
+	 * We initialized but had no valid SOURCE_DATE_EPOCH so we fall back
+	 * to regular behaviour.
+	 */
+	return false;
+}
+
 struct timespec64
 current_time(struct inode *inode)
 {
 	struct timespec64	tv;
 	struct timeval		stv;
 
+	/*
+	 * Check if we're creating a reproducible filesystem.
+	 * In this case we try to parse our SOURCE_DATE_EPOCH from environment.
+	 * If it fails, fall back to returning gettimeofday()
+	 * like we used to do.
+	 */
+	if (current_fixed_time(&tv))
+		return tv;
+
 	gettimeofday(&stv, (struct timezone *)0);
 	tv.tv_sec = stv.tv_sec;
 	tv.tv_nsec = stv.tv_usec * 1000;
@@ -515,6 +572,72 @@ void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork) { }
 void xfs_da_mark_sick(struct xfs_da_args *args) { }
 void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask) { }
 
+/*
+ * get_msws_prng_32() tries to detect if DETERMINISTIC_SEED is in our
+ * environment, and set our result to a pseudo-random number instead of
+ * extracting from getrandom().
+ *
+ * Returns true on success, fail otherwise.
+ *
+ * This function uses Middle Square Weyl Sequence to create pseudo-random
+ * numbers based on our DETERMINISTIC_SEED.
+ *    Ref: https://arxiv.org/pdf/1704.00358
+ */
+bool
+get_msws_prng_32(
+	uint32_t	*result)
+{
+	/*
+	 * To avoid many getenv() we'll use an initialization static flag, so
+	 * we only read once.
+	 */
+	static bool	read_env = false;
+	/* MSWS state variables */
+	static uint64_t msws_c = 0;  /* increment (user seed) */
+	static uint64_t msws_n = 0;  /* current value */
+	static uint64_t msws_s = 0;  /* accumulator */
+	char		*seed;
+	unsigned long	deterministic_seed;
+
+	if (!read_env) {
+		read_env = true;
+		seed = getenv("DETERMINISTIC_SEED");
+		if (seed && seed[0] != '\0') {
+			errno = 0;
+			deterministic_seed = strtoul(seed, NULL, 0);
+			if (errno != 0)
+				return false;
+
+			/*
+			 * In this variation or MSWS we will use
+			 * DETERMINISTIC_SEED as our odd number in the formula,
+			 * so we will need to ensure it is odd.
+			 */
+			msws_c = deterministic_seed | 1;
+		}
+	}
+
+	/*
+	 * This will happen only if we successfully read a valid
+	 * DETERMINISTIC_SEED and properly initiated the sequence.
+	 */
+	if (read_env && msws_c != 0) {
+		msws_n *= msws_n;
+		msws_s += msws_c;
+		msws_n += msws_s;
+		msws_n = (msws_n >> 32) | (msws_n << 32);
+		*result = (uint32_t)msws_n;
+
+		return true;
+	}
+
+	/*
+	 * We initialized but had no valid DETERMINISTIC_SEED so we fall back
+	 * to regular behaviour.
+	 */
+	return false;
+}
+
 #ifdef HAVE_GETRANDOM_NONBLOCK
 uint32_t
 get_random_u32(void)
@@ -522,6 +645,15 @@ get_random_u32(void)
 	uint32_t	ret;
 	ssize_t		sz;
 
+	/*
+	* Check if we're creating a reproducible filesystem.
+	* In this case we try to parse our DETERMINISTIC_SEED from environment
+	* and use a pseudorandom number generator.
+	* If it fails, fall back to returning getrandom()
+	* like we used to do.
+	*/
+	if (get_msws_prng_32(&ret))
+		return ret;
 	/*
 	 * Try to extract a u32 of randomness from /dev/urandom.  If that
 	 * fails, fall back to returning zero like we used to do.
-- 
2.51.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-11-07 17:38 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-07 16:12 [PATCH v1] libxfs: support reproducible filesystems using deterministic time/seed Luca Di Maio
2025-11-07 16:37 ` Darrick J. Wong
2025-11-07 17:38   ` Luca Di Maio

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).