public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] libxfs: support reproducible filesystems using deterministic time/seed
@ 2025-11-08 14:39 Luca Di Maio
  2025-11-10 18:01 ` Darrick J. Wong
  2025-11-24  6:13 ` Christoph Hellwig
  0 siblings, 2 replies; 4+ messages in thread
From: Luca Di Maio @ 2025-11-08 14:39 UTC (permalink / raw)
  To: linux-xfs; +Cc: Luca Di Maio, dimitri.ledkov, smoser, djwong

Add support for reproducible filesystem creation through two environment
variables that enable deterministic behavior when building XFS filesystems.

SOURCE_DATE_EPOCH support:
When SOURCE_DATE_EPOCH is set, use its value for all filesystem timestamps
instead of the current time. This follows the reproducible builds
specification (https://reproducible-builds.org/specs/source-date-epoch/)
and ensures consistent inode timestamps across builds.

DETERMINISTIC_SEED support:
When DETERMINISTIC_SEED=1 is set, return a fixed seed value (0x53454544 =
"SEED") from get_random_u32() instead of reading from /dev/urandom.

get_random_u32() seems to be used mostly to set inode generation number, being
fixed should not be create collision issues at mkfs time.

The implementation introduces two helper functions to minimize changes
to existing code:

- current_fixed_time(): Parses and caches SOURCE_DATE_EPOCH on first
  call. Returns fixed timestamp when set, falls back to gettimeofday() on
  parse errors or when unset.
- get_deterministic_seed(): Checks for DETERMINISTIC_SEED=1 environment
  variable on first call, and returns a fixed seed value (0x53454544).
  Falls back to getrandom() when unset.
- Both helpers use one-time initialization to avoid repeated getenv() calls.
- Both quickly exit and noop if environment is not set or has invalid
  variables, falling back to original behaviour.

Example usage:
  SOURCE_DATE_EPOCH=1234567890 \
  DETERMINISTIC_SEED=1 \
  mkfs.xfs \
	-m uuid=$EXAMPLE_UUID \
	-p file=./rootfs \
	disk1.img

This enables distributions and build systems to create bit-for-bit
identical XFS filesystems when needed for verification and debugging.

v1 -> v2:
- simplify deterministic seed by returning a fixed value instead
  of using Middle Square Weyl Sequence PRNG
- fix timestamp type time_t -> time64_t
- fix timestamp initialization flag to allow negative epochs
- fix timestamp conversion type using strtoll
- fix timestamp conversion check to be sure the whole string was parsed
- print warning message when SOURCE_DATE_EPOCH is invalid

Signed-off-by: Luca Di Maio <luca.dimaio1@gmail.com>
---
 libxfs/util.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)

diff --git a/libxfs/util.c b/libxfs/util.c
index 3597850d..f6af4531 100644
--- a/libxfs/util.c
+++ b/libxfs/util.c
@@ -137,12 +137,76 @@ xfs_log_calc_unit_res(
 	return unit_bytes;
 }
 
+/*
+ * current_fixed_time() tries to detect if SOURCE_DATE_EPOCH is in our
+ * environment, and set input timespec's timestamp to that value.
+ *
+ * Returns true on success, fail otherwise.
+ */
+bool
+current_fixed_time(
+	struct			timespec64 *tv)
+{
+	/*
+	 * To avoid many getenv() we'll use an initialization static flag, so
+	 * we only read once.
+	 */
+	static bool		enabled = false;
+	static bool		read_env = false;
+	static time64_t		epoch;
+	char			*endp;
+	char			*source_date_epoch;
+
+	if (!read_env) {
+		read_env = true;
+		source_date_epoch = getenv("SOURCE_DATE_EPOCH");
+		if (source_date_epoch && source_date_epoch[0] != '\0') {
+			errno = 0;
+			epoch = strtoll(source_date_epoch, &endp, 10);
+			if (errno != 0 || *endp != '\0') {
+				fprintf(stderr,
+			"%s: SOURCE_DATE_EPOCH '%s' invalid timestamp, ignoring.\n",
+				progname, source_date_epoch);
+
+				return false;
+			}
+
+			enabled = true;
+		}
+	}
+
+	/*
+	 * This will happen only if we successfully read a valid
+	 * SOURCE_DATE_EPOCH and properly initiated the epoch value.
+	 */
+	if (read_env && enabled) {
+		tv->tv_sec = epoch;
+		tv->tv_nsec = 0;
+		return true;
+	}
+
+	/*
+	 * We initialized but had no valid SOURCE_DATE_EPOCH so we fall back
+	 * to regular behaviour.
+	 */
+	return false;
+}
+
 struct timespec64
 current_time(struct inode *inode)
 {
 	struct timespec64	tv;
 	struct timeval		stv;
 
+	/*
+	 * Check if we're creating a reproducible filesystem.
+	 * In this case we try to parse our SOURCE_DATE_EPOCH from environment.
+	 * If it fails, fall back to returning gettimeofday()
+	 * like we used to do.
+	 */
+	if (current_fixed_time(&tv))
+		return tv;
+
 	gettimeofday(&stv, (struct timezone *)0);
 	tv.tv_sec = stv.tv_sec;
 	tv.tv_nsec = stv.tv_usec * 1000;
@@ -515,6 +579,49 @@ void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork) { }
 void xfs_da_mark_sick(struct xfs_da_args *args) { }
 void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask) { }
 
+/*
+ * get_deterministic_seed() tries to detect if DETERMINISTIC_SEED=1 is in our
+ * environment, and set our result to 0x53454544 (SEED) instead of
+ * extracting from getrandom().
+ *
+ * Returns true on success, fail otherwise.
+ */
+bool
+get_deterministic_seed(
+	uint32_t	*result)
+{
+	/*
+	 * To avoid many getenv() we'll use an initialization static flag, so
+	 * we only read once.
+	 */
+	static bool	enabled = false;
+	static bool	read_env = false;
+	static uint32_t	deterministic_seed = 0x53454544; /* SEED */
+	char		*seed_env;
+
+	if (!read_env) {
+		read_env = true;
+		seed_env = getenv("DETERMINISTIC_SEED");
+		if (seed_env && strcmp(seed_env, "1") == 0)
+			enabled = true;
+	}
+
+	/*
+	 * This will happen only if we successfully read DETERMINISTIC_SEED=1.
+	 */
+	if (read_env && enabled) {
+		*result = deterministic_seed;
+
+		return true;
+	}
+
+	/*
+	 * We initialized but had no DETERMINISTIC_SEED=1 in env so we fall
+	 * back to regular behaviour.
+	 */
+	return false;
+}
+
 #ifdef HAVE_GETRANDOM_NONBLOCK
 uint32_t
 get_random_u32(void)
@@ -522,6 +629,13 @@ get_random_u32(void)
 	uint32_t	ret;
 	ssize_t		sz;
 
+	/*
+	 * Check for DETERMINISTIC_SEED in environment, it means we're
+	 * creating a reproducible filesystem.
+	 * If it fails, fall back to returning getrandom() like we used to do.
+	 */
+	if (get_deterministic_seed(&ret))
+		return ret;
 	/*
 	 * Try to extract a u32 of randomness from /dev/urandom.  If that
 	 * fails, fall back to returning zero like we used to do.
-- 
2.51.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-11-24  9:38 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-08 14:39 [PATCH v2] libxfs: support reproducible filesystems using deterministic time/seed Luca Di Maio
2025-11-10 18:01 ` Darrick J. Wong
2025-11-24  6:13 ` Christoph Hellwig
2025-11-24  9:38   ` Luca Di Maio

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox