linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/7] libfrog: hoist free space histogram code
  2023-05-26  0:39 [PATCHSET v25.0 0/7] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
@ 2023-05-26  1:50 ` Darrick J. Wong
  0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2023-05-26  1:50 UTC (permalink / raw)
  To: djwong, cem; +Cc: linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Combine the two free space histograms in xfs_db and xfs_spaceman into a
single implementation.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 db/freesp.c         |   83 +++++--------------------------
 libfrog/Makefile    |    2 +
 libfrog/histogram.c |  135 +++++++++++++++++++++++++++++++++++++++++++++++++++
 libfrog/histogram.h |   50 +++++++++++++++++++
 spaceman/freesp.c   |   93 +++++++++--------------------------
 5 files changed, 225 insertions(+), 138 deletions(-)
 create mode 100644 libfrog/histogram.c
 create mode 100644 libfrog/histogram.h


diff --git a/db/freesp.c b/db/freesp.c
index 6f234666584..7e71bf47a16 100644
--- a/db/freesp.c
+++ b/db/freesp.c
@@ -12,14 +12,7 @@
 #include "output.h"
 #include "init.h"
 #include "malloc.h"
-
-typedef struct histent
-{
-	int		low;
-	int		high;
-	long long	count;
-	long long	blocks;
-} histent_t;
+#include "libfrog/histogram.h"
 
 static void	addhistent(int h);
 static void	addtohist(xfs_agnumber_t agno, xfs_agblock_t agbno,
@@ -46,13 +39,10 @@ static int		alignment;
 static int		countflag;
 static int		dumpflag;
 static int		equalsize;
-static histent_t	*hist;
-static int		histcount;
+static struct histogram	freesp_hist;
 static int		multsize;
 static int		seen1;
 static int		summaryflag;
-static long long	totblocks;
-static long long	totexts;
 
 static const cmdinfo_t	freesp_cmd =
 	{ "freesp", NULL, freesp_f, 0, -1, 0,
@@ -93,18 +83,13 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount)
+	if (hist_buckets(&freesp_hist))
 		printhist();
-	if (summaryflag) {
-		dbprintf(_("total free extents %lld\n"), totexts);
-		dbprintf(_("total free blocks %lld\n"), totblocks);
-		dbprintf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
-	}
+	if (summaryflag)
+		hist_summarize(&freesp_hist);
 	if (aglist)
 		xfree(aglist);
-	if (hist)
-		xfree(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 
@@ -132,10 +117,9 @@ init(
 	int		speced = 0;
 
 	agcount = countflag = dumpflag = equalsize = multsize = optind = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
+
 	while ((c = getopt(argc, argv, "A:a:bcde:h:m:s")) != EOF) {
 		switch (c) {
 		case 'A':
@@ -163,7 +147,7 @@ init(
 			speced = 1;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				return usage();
 			addhistent(atoi(optarg));
 			speced = 1;
@@ -339,14 +323,7 @@ static void
 addhistent(
 	int	h)
 {
-	hist = xrealloc(hist, (histcount + 1) * sizeof(*hist));
-	if (h == 0)
-		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
-	if (h == 1)
-		seen1 = 1;
+	hist_add_bucket(&freesp_hist, h);
 }
 
 static void
@@ -355,30 +332,12 @@ addtohist(
 	xfs_agblock_t	agbno,
 	xfs_extlen_t	len)
 {
-	int		i;
-
 	if (alignment && (XFS_AGB_TO_FSB(mp,agno,agbno) % alignment))
 		return;
 
 	if (dumpflag)
 		dbprintf("%8d %8d %8d\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((histent_t *)a)->low - ((histent_t *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -387,6 +346,7 @@ histinit(
 {
 	int	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -396,27 +356,12 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
 static void
 printhist(void)
 {
-	int	i;
-
-	dbprintf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			dbprintf("%7d %7d %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist);
 }
diff --git a/libfrog/Makefile b/libfrog/Makefile
index e14e62e4d1b..ad32b0674b2 100644
--- a/libfrog/Makefile
+++ b/libfrog/Makefile
@@ -20,6 +20,7 @@ convert.c \
 crc32.c \
 file_exchange.c \
 fsgeom.c \
+histogram.c \
 list_sort.c \
 linux.c \
 logging.c \
@@ -44,6 +45,7 @@ crc32table.h \
 dahashselftest.h \
 file_exchange.h \
 fsgeom.h \
+histogram.h \
 logging.h \
 paths.h \
 projects.h \
diff --git a/libfrog/histogram.c b/libfrog/histogram.c
new file mode 100644
index 00000000000..e854ac5e467
--- /dev/null
+++ b/libfrog/histogram.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2023 Oracle.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include <stdlib.h>
+#include <string.h>
+#include "platform_defs.h"
+#include "libfrog/histogram.h"
+
+/* Create a new bucket with the given low value. */
+int
+hist_add_bucket(
+	struct histogram	*hs,
+	long long		bucket_low)
+{
+	struct histent		*buckets;
+
+	if (hs->nr_buckets == INT_MAX)
+		return EFBIG;
+
+	buckets = realloc(hs->buckets,
+			(hs->nr_buckets + 1) * sizeof(struct histent));
+	if (!buckets)
+		return errno;
+
+	hs->buckets = buckets;
+	hs->buckets[hs->nr_buckets].low = bucket_low;
+	hs->buckets[hs->nr_buckets].count = buckets[hs->nr_buckets].blocks = 0;
+	hs->nr_buckets++;
+	return 0;
+}
+
+/* Add an observation to the histogram. */
+void
+hist_add(
+	struct histogram	*hs,
+	long long		len)
+{
+	unsigned int		i;
+
+	hs->totexts++;
+	hs->totblocks += len;
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].high >= len) {
+			hs->buckets[i].count++;
+			hs->buckets[i].blocks += len;
+			break;
+		}
+	}
+}
+
+static int
+histent_cmp(
+	const void		*a,
+	const void		*b)
+{
+	const struct histent	*ha = a;
+	const struct histent	*hb = b;
+
+	if (ha->low < hb->low)
+		return -1;
+	if (ha->low > hb->low)
+		return 1;
+	return 0;
+}
+
+/* Prepare a histogram for bucket configuration. */
+void
+hist_init(
+	struct histogram	*hs)
+{
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Prepare a histogram to receive data observations. */
+void
+hist_prepare(
+	struct histogram	*hs,
+	long long		maxlen)
+{
+	unsigned int		i;
+
+	qsort(hs->buckets, hs->nr_buckets, sizeof(struct histent), histent_cmp);
+
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (i < hs->nr_buckets - 1)
+			hs->buckets[i].high = hs->buckets[i + 1].low - 1;
+		else
+			hs->buckets[i].high = maxlen;
+	}
+}
+
+/* Free all data associated with a histogram. */
+void
+hist_free(
+	struct histogram	*hs)
+{
+	free(hs->buckets);
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Dump a histogram to stdout. */
+void
+hist_print(
+	const struct histogram	*hs)
+{
+	unsigned int		i;
+
+	printf("%7s %7s %7s %7s %6s\n",
+		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].count == 0)
+			continue;
+
+		printf("%7lld %7lld %7lld %7lld %6.2f\n",
+				hs->buckets[i].low, hs->buckets[i].high,
+				hs->buckets[i].count, hs->buckets[i].blocks,
+				hs->buckets[i].blocks * 100.0 / hs->totblocks);
+	}
+}
+
+/* Summarize the contents of the histogram. */
+void
+hist_summarize(
+	const struct histogram	*hs)
+{
+	printf(_("total free extents %lld\n"), hs->totexts);
+	printf(_("total free blocks %lld\n"), hs->totblocks);
+	printf(_("average free extent size %g\n"),
+			(double)hs->totblocks / (double)hs->totexts);
+}
diff --git a/libfrog/histogram.h b/libfrog/histogram.h
new file mode 100644
index 00000000000..c215bdaa98c
--- /dev/null
+++ b/libfrog/histogram.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2023 Oracle.
+ * All Rights Reserved.
+ */
+#ifndef __LIBFROG_HISTOGRAM_H__
+#define __LIBFROG_HISTOGRAM_H__
+
+struct histent
+{
+	/* Low and high size of this bucket */
+	long long	low;
+	long long	high;
+
+	/* Count of observations recorded */
+	long long	count;
+
+	/* Sum of blocks recorded */
+	long long	blocks;
+};
+
+struct histogram {
+	/* Sum of all blocks recorded */
+	long long	totblocks;
+
+	/* Count of all observations recorded */
+	long long	totexts;
+
+	struct histent	*buckets;
+
+	/* Number of buckets */
+	unsigned int	nr_buckets;
+};
+
+int hist_add_bucket(struct histogram *hs, long long bucket_low);
+void hist_add(struct histogram *hs, long long len);
+void hist_init(struct histogram *hs);
+void hist_prepare(struct histogram *hs, long long maxlen);
+void hist_free(struct histogram *hs);
+void hist_print(const struct histogram *hs);
+void hist_summarize(const struct histogram *hs);
+
+static inline unsigned int hist_buckets(const struct histogram *hs)
+{
+	return hs->nr_buckets;
+}
+
+#endif /* __LIBFROG_HISTOGRAM_H__ */
diff --git a/spaceman/freesp.c b/spaceman/freesp.c
index 70dcdb5c923..996cbb7e2c0 100644
--- a/spaceman/freesp.c
+++ b/spaceman/freesp.c
@@ -15,76 +15,52 @@
 #include "libfrog/paths.h"
 #include "space.h"
 #include "input.h"
-
-struct histent
-{
-	long long	low;
-	long long	high;
-	long long	count;
-	long long	blocks;
-};
+#include "libfrog/histogram.h"
 
 static int		agcount;
 static xfs_agnumber_t	*aglist;
-static struct histent	*hist;
+static struct histogram	freesp_hist;
 static int		dumpflag;
 static long long	equalsize;
 static long long	multsize;
-static int		histcount;
 static int		seen1;
 static int		summaryflag;
 static int		gflag;
 static bool		rtflag;
-static long long	totblocks;
-static long long	totexts;
 
 static cmdinfo_t freesp_cmd;
 
-static void
+static inline void
 addhistent(
 	long long	h)
 {
-	if (histcount == INT_MAX) {
+	int		error;
+
+	error = hist_add_bucket(&freesp_hist, h);
+	if (error == EFBIG) {
 		printf(_("Too many histogram buckets.\n"));
 		return;
 	}
-	hist = realloc(hist, (histcount + 1) * sizeof(*hist));
+	if (error) {
+		printf("%s\n", strerror(error));
+		return;
+	}
+
 	if (h == 0)
 		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
 	if (h == 1)
 		seen1 = 1;
 }
 
-static void
+static inline void
 addtohist(
 	xfs_agnumber_t	agno,
 	xfs_agblock_t	agbno,
 	off64_t		len)
 {
-	long		i;
-
 	if (dumpflag)
 		printf("%8d %8d %8"PRId64"\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((struct histent *)a)->low - ((struct histent *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -93,6 +69,7 @@ histinit(
 {
 	long long	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -102,29 +79,14 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
-static void
+static inline void
 printhist(void)
 {
-	int	i;
-
-	printf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			printf("%7lld %7lld %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist);
 }
 
 static int
@@ -255,10 +217,8 @@ init(
 	int			speced = 0;	/* only one of -b -e -h or -m */
 
 	agcount = dumpflag = equalsize = multsize = optind = gflag = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
 	rtflag = false;
 
 	while ((c = getopt(argc, argv, "a:bde:gh:m:rs")) != EOF) {
@@ -287,7 +247,7 @@ init(
 			gflag++;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				goto many_spec;
 			/* addhistent increments histcount */
 			x = cvt_s64(optarg, 0);
@@ -345,18 +305,13 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount && !gflag)
+	if (hist_buckets(&freesp_hist) > 0 && !gflag)
 		printhist();
-	if (summaryflag) {
-		printf(_("total free extents %lld\n"), totexts);
-		printf(_("total free blocks %lld\n"), totblocks);
-		printf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
-	}
+	if (summaryflag)
+		hist_summarize(&freesp_hist);
 	if (aglist)
 		free(aglist);
-	if (hist)
-		free(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 1/7] libfrog: hoist free space histogram code
  2023-12-31 19:48 [PATCHSET v29.0 33/40] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
@ 2023-12-31 22:50 ` Darrick J. Wong
  0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2023-12-31 22:50 UTC (permalink / raw)
  To: djwong, cem; +Cc: linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Combine the two free space histograms in xfs_db and xfs_spaceman into a
single implementation.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 db/freesp.c         |   83 +++++--------------------------
 libfrog/Makefile    |    2 +
 libfrog/histogram.c |  135 +++++++++++++++++++++++++++++++++++++++++++++++++++
 libfrog/histogram.h |   50 +++++++++++++++++++
 spaceman/freesp.c   |   93 +++++++++--------------------------
 5 files changed, 225 insertions(+), 138 deletions(-)
 create mode 100644 libfrog/histogram.c
 create mode 100644 libfrog/histogram.h


diff --git a/db/freesp.c b/db/freesp.c
index 6f234666584..7e71bf47a16 100644
--- a/db/freesp.c
+++ b/db/freesp.c
@@ -12,14 +12,7 @@
 #include "output.h"
 #include "init.h"
 #include "malloc.h"
-
-typedef struct histent
-{
-	int		low;
-	int		high;
-	long long	count;
-	long long	blocks;
-} histent_t;
+#include "libfrog/histogram.h"
 
 static void	addhistent(int h);
 static void	addtohist(xfs_agnumber_t agno, xfs_agblock_t agbno,
@@ -46,13 +39,10 @@ static int		alignment;
 static int		countflag;
 static int		dumpflag;
 static int		equalsize;
-static histent_t	*hist;
-static int		histcount;
+static struct histogram	freesp_hist;
 static int		multsize;
 static int		seen1;
 static int		summaryflag;
-static long long	totblocks;
-static long long	totexts;
 
 static const cmdinfo_t	freesp_cmd =
 	{ "freesp", NULL, freesp_f, 0, -1, 0,
@@ -93,18 +83,13 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount)
+	if (hist_buckets(&freesp_hist))
 		printhist();
-	if (summaryflag) {
-		dbprintf(_("total free extents %lld\n"), totexts);
-		dbprintf(_("total free blocks %lld\n"), totblocks);
-		dbprintf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
-	}
+	if (summaryflag)
+		hist_summarize(&freesp_hist);
 	if (aglist)
 		xfree(aglist);
-	if (hist)
-		xfree(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 
@@ -132,10 +117,9 @@ init(
 	int		speced = 0;
 
 	agcount = countflag = dumpflag = equalsize = multsize = optind = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
+
 	while ((c = getopt(argc, argv, "A:a:bcde:h:m:s")) != EOF) {
 		switch (c) {
 		case 'A':
@@ -163,7 +147,7 @@ init(
 			speced = 1;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				return usage();
 			addhistent(atoi(optarg));
 			speced = 1;
@@ -339,14 +323,7 @@ static void
 addhistent(
 	int	h)
 {
-	hist = xrealloc(hist, (histcount + 1) * sizeof(*hist));
-	if (h == 0)
-		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
-	if (h == 1)
-		seen1 = 1;
+	hist_add_bucket(&freesp_hist, h);
 }
 
 static void
@@ -355,30 +332,12 @@ addtohist(
 	xfs_agblock_t	agbno,
 	xfs_extlen_t	len)
 {
-	int		i;
-
 	if (alignment && (XFS_AGB_TO_FSB(mp,agno,agbno) % alignment))
 		return;
 
 	if (dumpflag)
 		dbprintf("%8d %8d %8d\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((histent_t *)a)->low - ((histent_t *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -387,6 +346,7 @@ histinit(
 {
 	int	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -396,27 +356,12 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
 static void
 printhist(void)
 {
-	int	i;
-
-	dbprintf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			dbprintf("%7d %7d %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist);
 }
diff --git a/libfrog/Makefile b/libfrog/Makefile
index f8bb39f2712..bbc5b887cd3 100644
--- a/libfrog/Makefile
+++ b/libfrog/Makefile
@@ -20,6 +20,7 @@ convert.c \
 crc32.c \
 file_exchange.c \
 fsgeom.c \
+histogram.c \
 list_sort.c \
 linux.c \
 logging.c \
@@ -45,6 +46,7 @@ dahashselftest.h \
 div64.h \
 file_exchange.h \
 fsgeom.h \
+histogram.h \
 logging.h \
 paths.h \
 projects.h \
diff --git a/libfrog/histogram.c b/libfrog/histogram.c
new file mode 100644
index 00000000000..553ba3d7c6e
--- /dev/null
+++ b/libfrog/histogram.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include <stdlib.h>
+#include <string.h>
+#include "platform_defs.h"
+#include "libfrog/histogram.h"
+
+/* Create a new bucket with the given low value. */
+int
+hist_add_bucket(
+	struct histogram	*hs,
+	long long		bucket_low)
+{
+	struct histent		*buckets;
+
+	if (hs->nr_buckets == INT_MAX)
+		return EFBIG;
+
+	buckets = realloc(hs->buckets,
+			(hs->nr_buckets + 1) * sizeof(struct histent));
+	if (!buckets)
+		return errno;
+
+	hs->buckets = buckets;
+	hs->buckets[hs->nr_buckets].low = bucket_low;
+	hs->buckets[hs->nr_buckets].count = buckets[hs->nr_buckets].blocks = 0;
+	hs->nr_buckets++;
+	return 0;
+}
+
+/* Add an observation to the histogram. */
+void
+hist_add(
+	struct histogram	*hs,
+	long long		len)
+{
+	unsigned int		i;
+
+	hs->totexts++;
+	hs->totblocks += len;
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].high >= len) {
+			hs->buckets[i].count++;
+			hs->buckets[i].blocks += len;
+			break;
+		}
+	}
+}
+
+static int
+histent_cmp(
+	const void		*a,
+	const void		*b)
+{
+	const struct histent	*ha = a;
+	const struct histent	*hb = b;
+
+	if (ha->low < hb->low)
+		return -1;
+	if (ha->low > hb->low)
+		return 1;
+	return 0;
+}
+
+/* Prepare a histogram for bucket configuration. */
+void
+hist_init(
+	struct histogram	*hs)
+{
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Prepare a histogram to receive data observations. */
+void
+hist_prepare(
+	struct histogram	*hs,
+	long long		maxlen)
+{
+	unsigned int		i;
+
+	qsort(hs->buckets, hs->nr_buckets, sizeof(struct histent), histent_cmp);
+
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (i < hs->nr_buckets - 1)
+			hs->buckets[i].high = hs->buckets[i + 1].low - 1;
+		else
+			hs->buckets[i].high = maxlen;
+	}
+}
+
+/* Free all data associated with a histogram. */
+void
+hist_free(
+	struct histogram	*hs)
+{
+	free(hs->buckets);
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Dump a histogram to stdout. */
+void
+hist_print(
+	const struct histogram	*hs)
+{
+	unsigned int		i;
+
+	printf("%7s %7s %7s %7s %6s\n",
+		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].count == 0)
+			continue;
+
+		printf("%7lld %7lld %7lld %7lld %6.2f\n",
+				hs->buckets[i].low, hs->buckets[i].high,
+				hs->buckets[i].count, hs->buckets[i].blocks,
+				hs->buckets[i].blocks * 100.0 / hs->totblocks);
+	}
+}
+
+/* Summarize the contents of the histogram. */
+void
+hist_summarize(
+	const struct histogram	*hs)
+{
+	printf(_("total free extents %lld\n"), hs->totexts);
+	printf(_("total free blocks %lld\n"), hs->totblocks);
+	printf(_("average free extent size %g\n"),
+			(double)hs->totblocks / (double)hs->totexts);
+}
diff --git a/libfrog/histogram.h b/libfrog/histogram.h
new file mode 100644
index 00000000000..2e2b169a79b
--- /dev/null
+++ b/libfrog/histogram.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#ifndef __LIBFROG_HISTOGRAM_H__
+#define __LIBFROG_HISTOGRAM_H__
+
+struct histent
+{
+	/* Low and high size of this bucket */
+	long long	low;
+	long long	high;
+
+	/* Count of observations recorded */
+	long long	count;
+
+	/* Sum of blocks recorded */
+	long long	blocks;
+};
+
+struct histogram {
+	/* Sum of all blocks recorded */
+	long long	totblocks;
+
+	/* Count of all observations recorded */
+	long long	totexts;
+
+	struct histent	*buckets;
+
+	/* Number of buckets */
+	unsigned int	nr_buckets;
+};
+
+int hist_add_bucket(struct histogram *hs, long long bucket_low);
+void hist_add(struct histogram *hs, long long len);
+void hist_init(struct histogram *hs);
+void hist_prepare(struct histogram *hs, long long maxlen);
+void hist_free(struct histogram *hs);
+void hist_print(const struct histogram *hs);
+void hist_summarize(const struct histogram *hs);
+
+static inline unsigned int hist_buckets(const struct histogram *hs)
+{
+	return hs->nr_buckets;
+}
+
+#endif /* __LIBFROG_HISTOGRAM_H__ */
diff --git a/spaceman/freesp.c b/spaceman/freesp.c
index 70dcdb5c923..996cbb7e2c0 100644
--- a/spaceman/freesp.c
+++ b/spaceman/freesp.c
@@ -15,76 +15,52 @@
 #include "libfrog/paths.h"
 #include "space.h"
 #include "input.h"
-
-struct histent
-{
-	long long	low;
-	long long	high;
-	long long	count;
-	long long	blocks;
-};
+#include "libfrog/histogram.h"
 
 static int		agcount;
 static xfs_agnumber_t	*aglist;
-static struct histent	*hist;
+static struct histogram	freesp_hist;
 static int		dumpflag;
 static long long	equalsize;
 static long long	multsize;
-static int		histcount;
 static int		seen1;
 static int		summaryflag;
 static int		gflag;
 static bool		rtflag;
-static long long	totblocks;
-static long long	totexts;
 
 static cmdinfo_t freesp_cmd;
 
-static void
+static inline void
 addhistent(
 	long long	h)
 {
-	if (histcount == INT_MAX) {
+	int		error;
+
+	error = hist_add_bucket(&freesp_hist, h);
+	if (error == EFBIG) {
 		printf(_("Too many histogram buckets.\n"));
 		return;
 	}
-	hist = realloc(hist, (histcount + 1) * sizeof(*hist));
+	if (error) {
+		printf("%s\n", strerror(error));
+		return;
+	}
+
 	if (h == 0)
 		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
 	if (h == 1)
 		seen1 = 1;
 }
 
-static void
+static inline void
 addtohist(
 	xfs_agnumber_t	agno,
 	xfs_agblock_t	agbno,
 	off64_t		len)
 {
-	long		i;
-
 	if (dumpflag)
 		printf("%8d %8d %8"PRId64"\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((struct histent *)a)->low - ((struct histent *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -93,6 +69,7 @@ histinit(
 {
 	long long	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -102,29 +79,14 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
-static void
+static inline void
 printhist(void)
 {
-	int	i;
-
-	printf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			printf("%7lld %7lld %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist);
 }
 
 static int
@@ -255,10 +217,8 @@ init(
 	int			speced = 0;	/* only one of -b -e -h or -m */
 
 	agcount = dumpflag = equalsize = multsize = optind = gflag = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
 	rtflag = false;
 
 	while ((c = getopt(argc, argv, "a:bde:gh:m:rs")) != EOF) {
@@ -287,7 +247,7 @@ init(
 			gflag++;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				goto many_spec;
 			/* addhistent increments histcount */
 			x = cvt_s64(optarg, 0);
@@ -345,18 +305,13 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount && !gflag)
+	if (hist_buckets(&freesp_hist) > 0 && !gflag)
 		printhist();
-	if (summaryflag) {
-		printf(_("total free extents %lld\n"), totexts);
-		printf(_("total free blocks %lld\n"), totblocks);
-		printf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
-	}
+	if (summaryflag)
+		hist_summarize(&freesp_hist);
 	if (aglist)
 		free(aglist);
-	if (hist)
-		free(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 1/7] libfrog: hoist free space histogram code
  2024-07-02  0:50 [PATCHSET v30.7 05/16] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
@ 2024-07-02  1:03 ` Darrick J. Wong
  2024-07-02  5:32   ` Christoph Hellwig
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-02  1:03 UTC (permalink / raw)
  To: djwong, cem; +Cc: linux-xfs, hch

From: Darrick J. Wong <djwong@kernel.org>

Combine the two free space histograms in xfs_db and xfs_spaceman into a
single implementation.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 db/freesp.c         |   83 +++++--------------------------
 libfrog/Makefile    |    2 +
 libfrog/histogram.c |  135 +++++++++++++++++++++++++++++++++++++++++++++++++++
 libfrog/histogram.h |   50 +++++++++++++++++++
 spaceman/freesp.c   |   93 +++++++++--------------------------
 5 files changed, 225 insertions(+), 138 deletions(-)
 create mode 100644 libfrog/histogram.c
 create mode 100644 libfrog/histogram.h


diff --git a/db/freesp.c b/db/freesp.c
index 883741e66fee..7a2da002a138 100644
--- a/db/freesp.c
+++ b/db/freesp.c
@@ -12,14 +12,7 @@
 #include "output.h"
 #include "init.h"
 #include "malloc.h"
-
-typedef struct histent
-{
-	int		low;
-	int		high;
-	long long	count;
-	long long	blocks;
-} histent_t;
+#include "libfrog/histogram.h"
 
 static void	addhistent(int h);
 static void	addtohist(xfs_agnumber_t agno, xfs_agblock_t agbno,
@@ -46,13 +39,10 @@ static int		alignment;
 static int		countflag;
 static int		dumpflag;
 static int		equalsize;
-static histent_t	*hist;
-static int		histcount;
+static struct histogram	freesp_hist;
 static int		multsize;
 static int		seen1;
 static int		summaryflag;
-static long long	totblocks;
-static long long	totexts;
 
 static const cmdinfo_t	freesp_cmd =
 	{ "freesp", NULL, freesp_f, 0, -1, 0,
@@ -93,18 +83,13 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount)
+	if (hist_buckets(&freesp_hist))
 		printhist();
-	if (summaryflag) {
-		dbprintf(_("total free extents %lld\n"), totexts);
-		dbprintf(_("total free blocks %lld\n"), totblocks);
-		dbprintf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
-	}
+	if (summaryflag)
+		hist_summarize(&freesp_hist);
 	if (aglist)
 		xfree(aglist);
-	if (hist)
-		xfree(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 
@@ -132,10 +117,9 @@ init(
 	int		speced = 0;
 
 	agcount = countflag = dumpflag = equalsize = multsize = optind = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
+
 	while ((c = getopt(argc, argv, "A:a:bcde:h:m:s")) != EOF) {
 		switch (c) {
 		case 'A':
@@ -163,7 +147,7 @@ init(
 			speced = 1;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				return usage();
 			addhistent(atoi(optarg));
 			speced = 1;
@@ -339,14 +323,7 @@ static void
 addhistent(
 	int	h)
 {
-	hist = xrealloc(hist, (histcount + 1) * sizeof(*hist));
-	if (h == 0)
-		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
-	if (h == 1)
-		seen1 = 1;
+	hist_add_bucket(&freesp_hist, h);
 }
 
 static void
@@ -355,30 +332,12 @@ addtohist(
 	xfs_agblock_t	agbno,
 	xfs_extlen_t	len)
 {
-	int		i;
-
 	if (alignment && (XFS_AGB_TO_FSB(mp,agno,agbno) % alignment))
 		return;
 
 	if (dumpflag)
 		dbprintf("%8d %8d %8d\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((histent_t *)a)->low - ((histent_t *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -387,6 +346,7 @@ histinit(
 {
 	int	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -396,27 +356,12 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
 static void
 printhist(void)
 {
-	int	i;
-
-	dbprintf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			dbprintf("%7d %7d %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist);
 }
diff --git a/libfrog/Makefile b/libfrog/Makefile
index 53e3c3492377..acfa228bc8ec 100644
--- a/libfrog/Makefile
+++ b/libfrog/Makefile
@@ -20,6 +20,7 @@ convert.c \
 crc32.c \
 file_exchange.c \
 fsgeom.c \
+histogram.c \
 list_sort.c \
 linux.c \
 logging.c \
@@ -45,6 +46,7 @@ dahashselftest.h \
 div64.h \
 file_exchange.h \
 fsgeom.h \
+histogram.h \
 logging.h \
 paths.h \
 projects.h \
diff --git a/libfrog/histogram.c b/libfrog/histogram.c
new file mode 100644
index 000000000000..553ba3d7c6e8
--- /dev/null
+++ b/libfrog/histogram.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include <stdlib.h>
+#include <string.h>
+#include "platform_defs.h"
+#include "libfrog/histogram.h"
+
+/* Create a new bucket with the given low value. */
+int
+hist_add_bucket(
+	struct histogram	*hs,
+	long long		bucket_low)
+{
+	struct histent		*buckets;
+
+	if (hs->nr_buckets == INT_MAX)
+		return EFBIG;
+
+	buckets = realloc(hs->buckets,
+			(hs->nr_buckets + 1) * sizeof(struct histent));
+	if (!buckets)
+		return errno;
+
+	hs->buckets = buckets;
+	hs->buckets[hs->nr_buckets].low = bucket_low;
+	hs->buckets[hs->nr_buckets].count = buckets[hs->nr_buckets].blocks = 0;
+	hs->nr_buckets++;
+	return 0;
+}
+
+/* Add an observation to the histogram. */
+void
+hist_add(
+	struct histogram	*hs,
+	long long		len)
+{
+	unsigned int		i;
+
+	hs->totexts++;
+	hs->totblocks += len;
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].high >= len) {
+			hs->buckets[i].count++;
+			hs->buckets[i].blocks += len;
+			break;
+		}
+	}
+}
+
+static int
+histent_cmp(
+	const void		*a,
+	const void		*b)
+{
+	const struct histent	*ha = a;
+	const struct histent	*hb = b;
+
+	if (ha->low < hb->low)
+		return -1;
+	if (ha->low > hb->low)
+		return 1;
+	return 0;
+}
+
+/* Prepare a histogram for bucket configuration. */
+void
+hist_init(
+	struct histogram	*hs)
+{
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Prepare a histogram to receive data observations. */
+void
+hist_prepare(
+	struct histogram	*hs,
+	long long		maxlen)
+{
+	unsigned int		i;
+
+	qsort(hs->buckets, hs->nr_buckets, sizeof(struct histent), histent_cmp);
+
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (i < hs->nr_buckets - 1)
+			hs->buckets[i].high = hs->buckets[i + 1].low - 1;
+		else
+			hs->buckets[i].high = maxlen;
+	}
+}
+
+/* Free all data associated with a histogram. */
+void
+hist_free(
+	struct histogram	*hs)
+{
+	free(hs->buckets);
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Dump a histogram to stdout. */
+void
+hist_print(
+	const struct histogram	*hs)
+{
+	unsigned int		i;
+
+	printf("%7s %7s %7s %7s %6s\n",
+		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].count == 0)
+			continue;
+
+		printf("%7lld %7lld %7lld %7lld %6.2f\n",
+				hs->buckets[i].low, hs->buckets[i].high,
+				hs->buckets[i].count, hs->buckets[i].blocks,
+				hs->buckets[i].blocks * 100.0 / hs->totblocks);
+	}
+}
+
+/* Summarize the contents of the histogram. */
+void
+hist_summarize(
+	const struct histogram	*hs)
+{
+	printf(_("total free extents %lld\n"), hs->totexts);
+	printf(_("total free blocks %lld\n"), hs->totblocks);
+	printf(_("average free extent size %g\n"),
+			(double)hs->totblocks / (double)hs->totexts);
+}
diff --git a/libfrog/histogram.h b/libfrog/histogram.h
new file mode 100644
index 000000000000..2e2b169a79b0
--- /dev/null
+++ b/libfrog/histogram.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#ifndef __LIBFROG_HISTOGRAM_H__
+#define __LIBFROG_HISTOGRAM_H__
+
+struct histent
+{
+	/* Low and high size of this bucket */
+	long long	low;
+	long long	high;
+
+	/* Count of observations recorded */
+	long long	count;
+
+	/* Sum of blocks recorded */
+	long long	blocks;
+};
+
+struct histogram {
+	/* Sum of all blocks recorded */
+	long long	totblocks;
+
+	/* Count of all observations recorded */
+	long long	totexts;
+
+	struct histent	*buckets;
+
+	/* Number of buckets */
+	unsigned int	nr_buckets;
+};
+
+int hist_add_bucket(struct histogram *hs, long long bucket_low);
+void hist_add(struct histogram *hs, long long len);
+void hist_init(struct histogram *hs);
+void hist_prepare(struct histogram *hs, long long maxlen);
+void hist_free(struct histogram *hs);
+void hist_print(const struct histogram *hs);
+void hist_summarize(const struct histogram *hs);
+
+static inline unsigned int hist_buckets(const struct histogram *hs)
+{
+	return hs->nr_buckets;
+}
+
+#endif /* __LIBFROG_HISTOGRAM_H__ */
diff --git a/spaceman/freesp.c b/spaceman/freesp.c
index f5177cb4ee5d..97c92b131a89 100644
--- a/spaceman/freesp.c
+++ b/spaceman/freesp.c
@@ -15,76 +15,52 @@
 #include "libfrog/paths.h"
 #include "space.h"
 #include "input.h"
-
-struct histent
-{
-	long long	low;
-	long long	high;
-	long long	count;
-	long long	blocks;
-};
+#include "libfrog/histogram.h"
 
 static int		agcount;
 static xfs_agnumber_t	*aglist;
-static struct histent	*hist;
+static struct histogram	freesp_hist;
 static int		dumpflag;
 static long long	equalsize;
 static long long	multsize;
-static int		histcount;
 static int		seen1;
 static int		summaryflag;
 static int		gflag;
 static bool		rtflag;
-static long long	totblocks;
-static long long	totexts;
 
 static cmdinfo_t freesp_cmd;
 
-static void
+static inline void
 addhistent(
 	long long	h)
 {
-	if (histcount == INT_MAX) {
+	int		error;
+
+	error = hist_add_bucket(&freesp_hist, h);
+	if (error == EFBIG) {
 		printf(_("Too many histogram buckets.\n"));
 		return;
 	}
-	hist = realloc(hist, (histcount + 1) * sizeof(*hist));
+	if (error) {
+		printf("%s\n", strerror(error));
+		return;
+	}
+
 	if (h == 0)
 		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
 	if (h == 1)
 		seen1 = 1;
 }
 
-static void
+static inline void
 addtohist(
 	xfs_agnumber_t	agno,
 	xfs_agblock_t	agbno,
 	off_t		len)
 {
-	long		i;
-
 	if (dumpflag)
 		printf("%8d %8d %8"PRId64"\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((struct histent *)a)->low - ((struct histent *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -93,6 +69,7 @@ histinit(
 {
 	long long	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -102,29 +79,14 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
-static void
+static inline void
 printhist(void)
 {
-	int	i;
-
-	printf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			printf("%7lld %7lld %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist);
 }
 
 static int
@@ -255,10 +217,8 @@ init(
 	int			speced = 0;	/* only one of -b -e -h or -m */
 
 	agcount = dumpflag = equalsize = multsize = optind = gflag = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
 	rtflag = false;
 
 	while ((c = getopt(argc, argv, "a:bde:gh:m:rs")) != EOF) {
@@ -287,7 +247,7 @@ init(
 			gflag++;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				goto many_spec;
 			/* addhistent increments histcount */
 			x = cvt_s64(optarg, 0);
@@ -345,18 +305,13 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount && !gflag)
+	if (hist_buckets(&freesp_hist) > 0 && !gflag)
 		printhist();
-	if (summaryflag) {
-		printf(_("total free extents %lld\n"), totexts);
-		printf(_("total free blocks %lld\n"), totblocks);
-		printf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
-	}
+	if (summaryflag)
+		hist_summarize(&freesp_hist);
 	if (aglist)
 		free(aglist);
-	if (hist)
-		free(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/7] libfrog: hoist free space histogram code
  2024-07-02  1:03 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
@ 2024-07-02  5:32   ` Christoph Hellwig
  2024-07-03  2:47     ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-07-02  5:32 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: cem, linux-xfs, hch

> +struct histent
> +{

The braces goes to the previous line for our normal coding style.

Also the naming with histend/histogram the naming seems a bit too
generic for something very block specific.  Should the naming be
adjusted a bit?


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/7] libfrog: hoist free space histogram code
  2024-07-02  5:32   ` Christoph Hellwig
@ 2024-07-03  2:47     ` Darrick J. Wong
  2024-07-03  4:30       ` Christoph Hellwig
  0 siblings, 1 reply; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03  2:47 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: cem, linux-xfs

On Tue, Jul 02, 2024 at 07:32:38AM +0200, Christoph Hellwig wrote:
> > +struct histent
> > +{
> 
> The braces goes to the previous line for our normal coding style.
> 
> Also the naming with histend/histogram the naming seems a bit too
> generic for something very block specific.  Should the naming be
> adjusted a bit?

Hmm.  Either we prefix everything with space, e.g. "struct
space_histogram" and "int spacehist_add(...)"...

Or change "blocks" to "value" and "extents" to "observations":

struct histent
{
	/* Low and high size of this bucket */
	long long	low;
	long long	high;

	/* Count of observations recorded */
	long long	nr_observations;

	/* Sum of values recorded */
	long long	sum_values;
};

struct histogram {
	/* Sum of all values recorded */
	long long	tot_values;

	/* Count of all observations recorded */
	long long	tot_observations;

	struct histent	*buckets;

	/* Number of buckets */
	unsigned int	nr_buckets;
};

int hist_add_bucket(struct histogram *hs, long long bucket_low);
void hist_add(struct histogram *hs, long long value);
void hist_prepare(struct histogram *hs, long long maxvalue);

and then hist_print/hist_summarize would have to be told the units of
the values ("blocks") and the units of the observations ("extents") to
print to stdout.

The first option is a bit lazy since there's nothing really diskspace
specific about the histogram other than the printf labels; and the
second option is more generic but maybe we should let the first
non-space histogram user figure out how to do that?

<shrug> Your thoughts?

--D

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/7] libfrog: hoist free space histogram code
  2024-07-03  2:47     ` Darrick J. Wong
@ 2024-07-03  4:30       ` Christoph Hellwig
  2024-07-03  4:55         ` Darrick J. Wong
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-07-03  4:30 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, cem, linux-xfs

On Tue, Jul 02, 2024 at 07:47:38PM -0700, Darrick J. Wong wrote:
> Or change "blocks" to "value" and "extents" to "observations":

> The first option is a bit lazy since there's nothing really diskspace
> specific about the histogram other than the printf labels; and the
> second option is more generic but maybe we should let the first
> non-space histogram user figure out how to do that?

Actually the second sounds easy enough even if we never end up using
it for anything but space tracking.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/7] libfrog: hoist free space histogram code
  2024-07-03  4:30       ` Christoph Hellwig
@ 2024-07-03  4:55         ` Darrick J. Wong
  0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03  4:55 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: cem, linux-xfs

On Wed, Jul 03, 2024 at 06:30:09AM +0200, Christoph Hellwig wrote:
> On Tue, Jul 02, 2024 at 07:47:38PM -0700, Darrick J. Wong wrote:
> > Or change "blocks" to "value" and "extents" to "observations":
> 
> > The first option is a bit lazy since there's nothing really diskspace
> > specific about the histogram other than the printf labels; and the
> > second option is more generic but maybe we should let the first
> > non-space histogram user figure out how to do that?
> 
> Actually the second sounds easy enough even if we never end up using
> it for anything but space tracking.

Ok, will do that tomorrow.

--D

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCHSET v30.8] xfs_scrub: use free space histograms to reduce fstrim runtime
@ 2024-07-03 21:47 Darrick J. Wong
  2024-07-03 21:47 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
                   ` (6 more replies)
  0 siblings, 7 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03 21:47 UTC (permalink / raw)
  To: cem, djwong; +Cc: Christoph Hellwig, hch, linux-xfs

Hi all,

This patchset dramatically reduces the runtime of the FITRIM calls made
during phase 8 of xfs_scrub.  It turns out that phase 8 can really get
bogged down if the free space contains a large number of very small
extents.  In these cases, the runtime can increase by an order of
magnitude to free less than 1% of the free space.  This is not worth the
time, since we're spending a lot of time to do very little work.  The
FITRIM ioctl allows us to specify a minimum extent length, so we can use
statistical methods to compute a minlen parameter.

It turns out xfs_db/spaceman already have the code needed to create
histograms of free space extent lengths.  We add the ability to compute
a CDF of the extent lengths, which make it easy to pick a minimum length
corresponding to 99% of the free space.  In most cases, this results in
dramatic reductions in phase 8 runtime.  Hence, move the histogram code
to libfrog, and wire up xfs_scrub, since phase 7 already walks the
fsmap.

We also add a new -o suboption to xfs_scrub so that people who /do/ want
to examine every free extent can do so.

If you're going to start using this code, I strongly recommend pulling
from my git trees, which are linked below.

This has been running on the djcloud for months with no problems.  Enjoy!
Comments and questions are, as always, welcome.

--D

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=scrub-fstrim-minlen-freesp-histogram

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=scrub-fstrim-minlen-freesp-histogram
---
Commits in this patchset:
 * libfrog: hoist free space histogram code
 * libfrog: print wider columns for free space histogram
 * libfrog: print cdf of free space buckets
 * xfs_scrub: don't close stdout when closing the progress bar
 * xfs_scrub: remove pointless spacemap.c arguments
 * xfs_scrub: collect free space histograms during phase 7
 * xfs_scrub: tune fstrim minlen parameter based on free space histograms
---
 db/freesp.c          |   89 ++++------------
 libfrog/Makefile     |    2 
 libfrog/histogram.c  |  270 ++++++++++++++++++++++++++++++++++++++++++++++++++
 libfrog/histogram.h  |   78 ++++++++++++++
 man/man8/xfs_scrub.8 |   16 +++
 scrub/phase7.c       |   47 ++++++++-
 scrub/phase8.c       |   91 ++++++++++++++++-
 scrub/spacemap.c     |   11 +-
 scrub/vfs.c          |    4 +
 scrub/vfs.h          |    2 
 scrub/xfs_scrub.c    |   45 ++++++++
 scrub/xfs_scrub.h    |   16 +++
 spaceman/freesp.c    |   99 ++++++------------
 13 files changed, 620 insertions(+), 150 deletions(-)
 create mode 100644 libfrog/histogram.c
 create mode 100644 libfrog/histogram.h


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/7] libfrog: hoist free space histogram code
  2024-07-03 21:47 [PATCHSET v30.8] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
@ 2024-07-03 21:47 ` Darrick J. Wong
  2024-07-04  5:22   ` Christoph Hellwig
  2024-07-08 17:27   ` [PATCH v30.8.1 " Darrick J. Wong
  2024-07-03 21:47 ` [PATCH 2/7] libfrog: print wider columns for free space histogram Darrick J. Wong
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03 21:47 UTC (permalink / raw)
  To: cem, djwong; +Cc: hch, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Combine the two free space histograms in xfs_db and xfs_spaceman into a
single implementation.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 db/freesp.c         |   89 ++++++++------------------------
 libfrog/Makefile    |    2 +
 libfrog/histogram.c |  143 +++++++++++++++++++++++++++++++++++++++++++++++++++
 libfrog/histogram.h |   64 +++++++++++++++++++++++
 spaceman/freesp.c   |   99 ++++++++++++-----------------------
 5 files changed, 265 insertions(+), 132 deletions(-)
 create mode 100644 libfrog/histogram.c
 create mode 100644 libfrog/histogram.h


diff --git a/db/freesp.c b/db/freesp.c
index 883741e66fee..43520481d5e0 100644
--- a/db/freesp.c
+++ b/db/freesp.c
@@ -12,14 +12,7 @@
 #include "output.h"
 #include "init.h"
 #include "malloc.h"
-
-typedef struct histent
-{
-	int		low;
-	int		high;
-	long long	count;
-	long long	blocks;
-} histent_t;
+#include "libfrog/histogram.h"
 
 static void	addhistent(int h);
 static void	addtohist(xfs_agnumber_t agno, xfs_agblock_t agbno,
@@ -46,13 +39,10 @@ static int		alignment;
 static int		countflag;
 static int		dumpflag;
 static int		equalsize;
-static histent_t	*hist;
-static int		histcount;
+static struct histogram	freesp_hist;
 static int		multsize;
 static int		seen1;
 static int		summaryflag;
-static long long	totblocks;
-static long long	totexts;
 
 static const cmdinfo_t	freesp_cmd =
 	{ "freesp", NULL, freesp_f, 0, -1, 0,
@@ -93,18 +83,20 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount)
+	if (hist_buckets(&freesp_hist))
 		printhist();
 	if (summaryflag) {
-		dbprintf(_("total free extents %lld\n"), totexts);
-		dbprintf(_("total free blocks %lld\n"), totblocks);
-		dbprintf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
+		struct histogram_strings hstr = {
+			.sum		= _("total free blocks"),
+			.observations	= _("total free extents"),
+			.averages	= _("average free extent size"),
+		};
+
+		hist_summarize(&freesp_hist, &hstr);
 	}
 	if (aglist)
 		xfree(aglist);
-	if (hist)
-		xfree(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 
@@ -132,10 +124,9 @@ init(
 	int		speced = 0;
 
 	agcount = countflag = dumpflag = equalsize = multsize = optind = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
+
 	while ((c = getopt(argc, argv, "A:a:bcde:h:m:s")) != EOF) {
 		switch (c) {
 		case 'A':
@@ -163,7 +154,7 @@ init(
 			speced = 1;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				return usage();
 			addhistent(atoi(optarg));
 			speced = 1;
@@ -339,14 +330,7 @@ static void
 addhistent(
 	int	h)
 {
-	hist = xrealloc(hist, (histcount + 1) * sizeof(*hist));
-	if (h == 0)
-		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
-	if (h == 1)
-		seen1 = 1;
+	hist_add_bucket(&freesp_hist, h);
 }
 
 static void
@@ -355,30 +339,12 @@ addtohist(
 	xfs_agblock_t	agbno,
 	xfs_extlen_t	len)
 {
-	int		i;
-
 	if (alignment && (XFS_AGB_TO_FSB(mp,agno,agbno) % alignment))
 		return;
 
 	if (dumpflag)
 		dbprintf("%8d %8d %8d\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((histent_t *)a)->low - ((histent_t *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -387,6 +353,7 @@ histinit(
 {
 	int	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -396,27 +363,17 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
 static void
 printhist(void)
 {
-	int	i;
+	struct histogram_strings hstr = {
+		.sum		= _("blocks"),
+		.observations	= _("extents"),
+	};
 
-	dbprintf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			dbprintf("%7d %7d %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist, &hstr);
 }
diff --git a/libfrog/Makefile b/libfrog/Makefile
index 53e3c3492377..acfa228bc8ec 100644
--- a/libfrog/Makefile
+++ b/libfrog/Makefile
@@ -20,6 +20,7 @@ convert.c \
 crc32.c \
 file_exchange.c \
 fsgeom.c \
+histogram.c \
 list_sort.c \
 linux.c \
 logging.c \
@@ -45,6 +46,7 @@ dahashselftest.h \
 div64.h \
 file_exchange.h \
 fsgeom.h \
+histogram.h \
 logging.h \
 paths.h \
 projects.h \
diff --git a/libfrog/histogram.c b/libfrog/histogram.c
new file mode 100644
index 000000000000..c2f344a88eb6
--- /dev/null
+++ b/libfrog/histogram.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include <stdlib.h>
+#include <string.h>
+#include "platform_defs.h"
+#include "libfrog/histogram.h"
+
+/* Create a new bucket with the given low value. */
+int
+hist_add_bucket(
+	struct histogram	*hs,
+	long long		bucket_low)
+{
+	struct histbucket	*buckets;
+
+	if (hs->nr_buckets == INT_MAX)
+		return EFBIG;
+
+	buckets = realloc(hs->buckets,
+			(hs->nr_buckets + 1) * sizeof(struct histbucket));
+	if (!buckets)
+		return errno;
+
+	hs->buckets = buckets;
+	hs->buckets[hs->nr_buckets].low = bucket_low;
+	hs->buckets[hs->nr_buckets].nr_obs = 0;
+	hs->buckets[hs->nr_buckets].sum = 0;
+	hs->nr_buckets++;
+	return 0;
+}
+
+/* Add an observation to the histogram. */
+void
+hist_add(
+	struct histogram	*hs,
+	long long		len)
+{
+	unsigned int		i;
+
+	hs->tot_obs++;
+	hs->tot_sum += len;
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].high >= len) {
+			hs->buckets[i].nr_obs++;
+			hs->buckets[i].sum += len;
+			break;
+		}
+	}
+}
+
+static int
+histbucket_cmp(
+	const void		*a,
+	const void		*b)
+{
+	const struct histbucket	*ha = a;
+	const struct histbucket	*hb = b;
+
+	if (ha->low < hb->low)
+		return -1;
+	if (ha->low > hb->low)
+		return 1;
+	return 0;
+}
+
+/* Prepare a histogram for bucket configuration. */
+void
+hist_init(
+	struct histogram	*hs)
+{
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Prepare a histogram to receive data observations. */
+void
+hist_prepare(
+	struct histogram	*hs,
+	long long		maxlen)
+{
+	unsigned int		i;
+
+	qsort(hs->buckets, hs->nr_buckets, sizeof(struct histbucket),
+			histbucket_cmp);
+
+	for (i = 0; i < hs->nr_buckets - 1; i++)
+		hs->buckets[i].high = hs->buckets[i + 1].low - 1;
+	hs->buckets[hs->nr_buckets - 1].high = maxlen;
+}
+
+/* Free all data associated with a histogram. */
+void
+hist_free(
+	struct histogram	*hs)
+{
+	free(hs->buckets);
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Dump a histogram to stdout. */
+void
+hist_print(
+	const struct histogram		*hs,
+	const struct histogram_strings	*hstr)
+{
+	unsigned int			obs_w = strlen(hstr->observations);
+	unsigned int			sum_w = strlen(hstr->sum);
+	unsigned int			i;
+
+	printf("%7s %7s %*s %*s %6s\n",
+			_("from"), _("to"),
+			obs_w, hstr->observations,
+			sum_w, hstr->sum,
+			_("pct"));
+
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].nr_obs == 0)
+			continue;
+
+		printf("%7lld %7lld %*lld %*lld %6.2f\n",
+				hs->buckets[i].low, hs->buckets[i].high,
+				obs_w, hs->buckets[i].nr_obs,
+				sum_w, hs->buckets[i].sum,
+				hs->buckets[i].sum * 100.0 / hs->tot_sum);
+	}
+}
+
+/* Summarize the contents of the histogram. */
+void
+hist_summarize(
+	const struct histogram		*hs,
+	const struct histogram_strings	*hstr)
+{
+	printf("%s %lld\n", hstr->observations, hs->tot_obs);
+	printf("%s %lld\n", hstr->sum, hs->tot_sum);
+	printf("%s %g\n", hstr->averages,
+			(double)hs->tot_sum / (double)hs->tot_obs);
+}
diff --git a/libfrog/histogram.h b/libfrog/histogram.h
new file mode 100644
index 000000000000..d85f8edb8752
--- /dev/null
+++ b/libfrog/histogram.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#ifndef __LIBFROG_HISTOGRAM_H__
+#define __LIBFROG_HISTOGRAM_H__
+
+struct histbucket
+{
+	/* Low and high size of this bucket */
+	long long		low;
+	long long		high;
+
+	/* Count of observations recorded */
+	long long		nr_obs;
+
+	/* Sum of values recorded */
+	long long		sum;
+};
+
+struct histogram {
+	/* Sum of all values recorded */
+	long long		tot_sum;
+
+	/* Count of all observations recorded */
+	long long		tot_obs;
+
+	struct histbucket	*buckets;
+
+	/* Number of buckets */
+	unsigned int		nr_buckets;
+};
+
+int hist_add_bucket(struct histogram *hs, long long bucket_low);
+void hist_add(struct histogram *hs, long long value);
+void hist_init(struct histogram *hs);
+void hist_prepare(struct histogram *hs, long long maxvalue);
+void hist_free(struct histogram *hs);
+
+struct histogram_strings {
+	/* What does each sum represent? ("free blocks") */
+	const char		*sum;
+
+	/* What does each observation represent? ("free extents") */
+	const char		*observations;
+
+	/* What does sum / observation represent? ("average extent length") */
+	const char		*averages;
+};
+
+void hist_print(const struct histogram *hs,
+		const struct histogram_strings *hstr);
+void hist_summarize(const struct histogram *hs,
+		const struct histogram_strings *hstr);
+
+static inline unsigned int hist_buckets(const struct histogram *hs)
+{
+	return hs->nr_buckets;
+}
+
+#endif /* __LIBFROG_HISTOGRAM_H__ */
diff --git a/spaceman/freesp.c b/spaceman/freesp.c
index f5177cb4ee5d..dfbec52a7160 100644
--- a/spaceman/freesp.c
+++ b/spaceman/freesp.c
@@ -15,76 +15,52 @@
 #include "libfrog/paths.h"
 #include "space.h"
 #include "input.h"
-
-struct histent
-{
-	long long	low;
-	long long	high;
-	long long	count;
-	long long	blocks;
-};
+#include "libfrog/histogram.h"
 
 static int		agcount;
 static xfs_agnumber_t	*aglist;
-static struct histent	*hist;
+static struct histogram	freesp_hist;
 static int		dumpflag;
 static long long	equalsize;
 static long long	multsize;
-static int		histcount;
 static int		seen1;
 static int		summaryflag;
 static int		gflag;
 static bool		rtflag;
-static long long	totblocks;
-static long long	totexts;
 
 static cmdinfo_t freesp_cmd;
 
-static void
+static inline void
 addhistent(
 	long long	h)
 {
-	if (histcount == INT_MAX) {
+	int		error;
+
+	error = hist_add_bucket(&freesp_hist, h);
+	if (error == EFBIG) {
 		printf(_("Too many histogram buckets.\n"));
 		return;
 	}
-	hist = realloc(hist, (histcount + 1) * sizeof(*hist));
+	if (error) {
+		printf("%s\n", strerror(error));
+		return;
+	}
+
 	if (h == 0)
 		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
 	if (h == 1)
 		seen1 = 1;
 }
 
-static void
+static inline void
 addtohist(
 	xfs_agnumber_t	agno,
 	xfs_agblock_t	agbno,
 	off_t		len)
 {
-	long		i;
-
 	if (dumpflag)
 		printf("%8d %8d %8"PRId64"\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((struct histent *)a)->low - ((struct histent *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -93,6 +69,7 @@ histinit(
 {
 	long long	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -102,29 +79,19 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
-static void
+static inline void
 printhist(void)
 {
-	int	i;
+	struct histogram_strings hstr = {
+		.sum		= _("blocks"),
+		.observations	= _("extents"),
+	};
 
-	printf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			printf("%7lld %7lld %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist, &hstr);
 }
 
 static int
@@ -255,10 +222,8 @@ init(
 	int			speced = 0;	/* only one of -b -e -h or -m */
 
 	agcount = dumpflag = equalsize = multsize = optind = gflag = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
 	rtflag = false;
 
 	while ((c = getopt(argc, argv, "a:bde:gh:m:rs")) != EOF) {
@@ -287,7 +252,7 @@ init(
 			gflag++;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				goto many_spec;
 			/* addhistent increments histcount */
 			x = cvt_s64(optarg, 0);
@@ -345,18 +310,20 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount && !gflag)
+	if (hist_buckets(&freesp_hist) > 0 && !gflag)
 		printhist();
 	if (summaryflag) {
-		printf(_("total free extents %lld\n"), totexts);
-		printf(_("total free blocks %lld\n"), totblocks);
-		printf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
+		struct histogram_strings hstr = {
+			.sum		= _("total free blocks"),
+			.observations	= _("total free extents"),
+			.averages	= _("average free extent size"),
+		};
+
+		hist_summarize(&freesp_hist, &hstr);
 	}
 	if (aglist)
 		free(aglist);
-	if (hist)
-		free(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/7] libfrog: print wider columns for free space histogram
  2024-07-03 21:47 [PATCHSET v30.8] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
  2024-07-03 21:47 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
@ 2024-07-03 21:47 ` Darrick J. Wong
  2024-07-03 21:47 ` [PATCH 3/7] libfrog: print cdf of free space buckets Darrick J. Wong
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03 21:47 UTC (permalink / raw)
  To: cem, djwong; +Cc: Christoph Hellwig, hch, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

The values reported here can reach very large values, so compute the
column width dynamically.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 libfrog/histogram.c |   29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)


diff --git a/libfrog/histogram.c b/libfrog/histogram.c
index c2f344a88eb6..7cee6b350046 100644
--- a/libfrog/histogram.c
+++ b/libfrog/histogram.c
@@ -110,10 +110,30 @@ hist_print(
 {
 	unsigned int			obs_w = strlen(hstr->observations);
 	unsigned int			sum_w = strlen(hstr->sum);
+	unsigned int			from_w = 7, to_w = 7;
 	unsigned int			i;
 
-	printf("%7s %7s %*s %*s %6s\n",
-			_("from"), _("to"),
+	for (i = 0; i < hs->nr_buckets; i++) {
+		char buf[256];
+
+		if (hs->buckets[i].nr_obs == 0)
+			continue;
+
+		snprintf(buf, sizeof(buf) - 1, "%lld", hs->buckets[i].low);
+		from_w = max(from_w, strlen(buf));
+
+		snprintf(buf, sizeof(buf) - 1, "%lld", hs->buckets[i].high);
+		to_w = max(to_w, strlen(buf));
+
+		snprintf(buf, sizeof(buf) - 1, "%lld", hs->buckets[i].nr_obs);
+		obs_w = max(obs_w, strlen(buf));
+
+		snprintf(buf, sizeof(buf) - 1, "%lld", hs->buckets[i].sum);
+		sum_w = max(sum_w, strlen(buf));
+	}
+
+	printf("%*s %*s %*s %*s %6s\n",
+			from_w, _("from"), to_w, _("to"),
 			obs_w, hstr->observations,
 			sum_w, hstr->sum,
 			_("pct"));
@@ -122,8 +142,9 @@ hist_print(
 		if (hs->buckets[i].nr_obs == 0)
 			continue;
 
-		printf("%7lld %7lld %*lld %*lld %6.2f\n",
-				hs->buckets[i].low, hs->buckets[i].high,
+		printf("%*lld %*lld %*lld %*lld %6.2f\n",
+				from_w, hs->buckets[i].low,
+				to_w, hs->buckets[i].high,
 				obs_w, hs->buckets[i].nr_obs,
 				sum_w, hs->buckets[i].sum,
 				hs->buckets[i].sum * 100.0 / hs->tot_sum);


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/7] libfrog: print cdf of free space buckets
  2024-07-03 21:47 [PATCHSET v30.8] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
  2024-07-03 21:47 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
  2024-07-03 21:47 ` [PATCH 2/7] libfrog: print wider columns for free space histogram Darrick J. Wong
@ 2024-07-03 21:47 ` Darrick J. Wong
  2024-07-03 21:47 ` [PATCH 4/7] xfs_scrub: don't close stdout when closing the progress bar Darrick J. Wong
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03 21:47 UTC (permalink / raw)
  To: cem, djwong; +Cc: Christoph Hellwig, hch, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Print the cumulative distribution function of the free space buckets in
reverse order.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 libfrog/histogram.c |   76 ++++++++++++++++++++++++++++++++++++++++++++++++---
 libfrog/histogram.h |    8 +++++
 2 files changed, 80 insertions(+), 4 deletions(-)


diff --git a/libfrog/histogram.c b/libfrog/histogram.c
index 7cee6b350046..59d46363c1da 100644
--- a/libfrog/histogram.c
+++ b/libfrog/histogram.c
@@ -102,17 +102,81 @@ hist_free(
 	memset(hs, 0, sizeof(struct histogram));
 }
 
+/*
+ * Compute the CDF of the histogram in decreasing order of value.
+ *
+ * For a free space histogram, callers can determine how much free space is not
+ * in the long tail of small extents, e.g. 98% of the free space extents are
+ * larger than 31 blocks.
+ */
+static struct histogram_cdf *
+hist_cdf(
+	const struct histogram	*hs)
+{
+	struct histogram_cdf	*cdf;
+	int			i = hs->nr_buckets - 1;
+
+	ASSERT(hs->nr_buckets < INT_MAX);
+
+	cdf = malloc(sizeof(struct histogram_cdf));
+	if (!cdf)
+		return NULL;
+	cdf->histogram = hs;
+
+	if (hs->nr_buckets == 0) {
+		cdf->buckets = NULL;
+		return cdf;
+	}
+
+	cdf->buckets = calloc(hs->nr_buckets, sizeof(struct histbucket));
+	if (!cdf->buckets) {
+		free(cdf);
+		return NULL;
+	}
+
+	cdf->buckets[i].nr_obs = hs->buckets[i].nr_obs;
+	cdf->buckets[i].sum = hs->buckets[i].sum;
+	i--;
+
+	while (i >= 0) {
+		cdf->buckets[i].nr_obs = hs->buckets[i].nr_obs +
+					cdf->buckets[i + 1].nr_obs;
+
+		cdf->buckets[i].sum =    hs->buckets[i].sum +
+					cdf->buckets[i + 1].sum;
+		i--;
+	}
+
+	return cdf;
+}
+
+/* Free all data associated with a histogram cdf. */
+static void
+histcdf_free(
+	struct histogram_cdf	*cdf)
+{
+	free(cdf->buckets);
+	free(cdf);
+}
+
 /* Dump a histogram to stdout. */
 void
 hist_print(
 	const struct histogram		*hs,
 	const struct histogram_strings	*hstr)
 {
+	struct histogram_cdf		*cdf;
 	unsigned int			obs_w = strlen(hstr->observations);
 	unsigned int			sum_w = strlen(hstr->sum);
 	unsigned int			from_w = 7, to_w = 7;
 	unsigned int			i;
 
+	cdf = hist_cdf(hs);
+	if (!cdf) {
+		perror(_("histogram cdf"));
+		return;
+	}
+
 	for (i = 0; i < hs->nr_buckets; i++) {
 		char buf[256];
 
@@ -132,23 +196,27 @@ hist_print(
 		sum_w = max(sum_w, strlen(buf));
 	}
 
-	printf("%*s %*s %*s %*s %6s\n",
+	printf("%*s %*s %*s %*s %6s %6s %6s\n",
 			from_w, _("from"), to_w, _("to"),
 			obs_w, hstr->observations,
 			sum_w, hstr->sum,
-			_("pct"));
+			_("pct"), _("blkcdf"), _("extcdf"));
 
 	for (i = 0; i < hs->nr_buckets; i++) {
 		if (hs->buckets[i].nr_obs == 0)
 			continue;
 
-		printf("%*lld %*lld %*lld %*lld %6.2f\n",
+		printf("%*lld %*lld %*lld %*lld %6.2f %6.2f %6.2f\n",
 				from_w, hs->buckets[i].low,
 				to_w, hs->buckets[i].high,
 				obs_w, hs->buckets[i].nr_obs,
 				sum_w, hs->buckets[i].sum,
-				hs->buckets[i].sum * 100.0 / hs->tot_sum);
+				hs->buckets[i].sum * 100.0 / hs->tot_sum,
+				cdf->buckets[i].sum * 100.0 / hs->tot_sum,
+				cdf->buckets[i].nr_obs * 100.0 / hs->tot_obs);
 	}
+
+	histcdf_free(cdf);
 }
 
 /* Summarize the contents of the histogram. */
diff --git a/libfrog/histogram.h b/libfrog/histogram.h
index d85f8edb8752..967698774b0c 100644
--- a/libfrog/histogram.h
+++ b/libfrog/histogram.h
@@ -34,6 +34,14 @@ struct histogram {
 	unsigned int		nr_buckets;
 };
 
+struct histogram_cdf {
+	/* histogram from which this cdf was computed */
+	const struct histogram	*histogram;
+
+	/* distribution information */
+	struct histbucket	*buckets;
+};
+
 int hist_add_bucket(struct histogram *hs, long long bucket_low);
 void hist_add(struct histogram *hs, long long value);
 void hist_init(struct histogram *hs);


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/7] xfs_scrub: don't close stdout when closing the progress bar
  2024-07-03 21:47 [PATCHSET v30.8] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
                   ` (2 preceding siblings ...)
  2024-07-03 21:47 ` [PATCH 3/7] libfrog: print cdf of free space buckets Darrick J. Wong
@ 2024-07-03 21:47 ` Darrick J. Wong
  2024-07-03 21:48 ` [PATCH 5/7] xfs_scrub: remove pointless spacemap.c arguments Darrick J. Wong
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03 21:47 UTC (permalink / raw)
  To: cem, djwong; +Cc: Christoph Hellwig, hch, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

When we're tearing down the progress bar file stream, check that it's
not an alias of stdout before closing it.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 scrub/xfs_scrub.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c
index edf58d07bde2..adf9d13e5090 100644
--- a/scrub/xfs_scrub.c
+++ b/scrub/xfs_scrub.c
@@ -878,7 +878,7 @@ main(
 	if (ctx.runtime_errors)
 		ret |= SCRUB_RET_OPERROR;
 	phase_end(&all_pi, 0);
-	if (progress_fp)
+	if (progress_fp && fileno(progress_fp) != 1)
 		fclose(progress_fp);
 out_unicrash:
 	unicrash_unload();


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/7] xfs_scrub: remove pointless spacemap.c arguments
  2024-07-03 21:47 [PATCHSET v30.8] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
                   ` (3 preceding siblings ...)
  2024-07-03 21:47 ` [PATCH 4/7] xfs_scrub: don't close stdout when closing the progress bar Darrick J. Wong
@ 2024-07-03 21:48 ` Darrick J. Wong
  2024-07-03 21:48 ` [PATCH 6/7] xfs_scrub: collect free space histograms during phase 7 Darrick J. Wong
  2024-07-03 21:48 ` [PATCH 7/7] xfs_scrub: tune fstrim minlen parameter based on free space histograms Darrick J. Wong
  6 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03 21:48 UTC (permalink / raw)
  To: cem, djwong; +Cc: Christoph Hellwig, hch, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Remove unused parameters from the full-device spacemap scan functions.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 scrub/spacemap.c |   11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)


diff --git a/scrub/spacemap.c b/scrub/spacemap.c
index 9cefe074c6f6..e35756db2eed 100644
--- a/scrub/spacemap.c
+++ b/scrub/spacemap.c
@@ -132,7 +132,6 @@ scan_ag_rmaps(
 static void
 scan_dev_rmaps(
 	struct scrub_ctx	*ctx,
-	int			idx,
 	dev_t			dev,
 	struct scan_blocks	*sbx)
 {
@@ -170,7 +169,7 @@ scan_rt_rmaps(
 {
 	struct scrub_ctx	*ctx = (struct scrub_ctx *)wq->wq_ctx;
 
-	scan_dev_rmaps(ctx, agno, ctx->fsinfo.fs_rtdev, arg);
+	scan_dev_rmaps(ctx, ctx->fsinfo.fs_rtdev, arg);
 }
 
 /* Iterate all the reverse mappings of the log device. */
@@ -182,7 +181,7 @@ scan_log_rmaps(
 {
 	struct scrub_ctx	*ctx = (struct scrub_ctx *)wq->wq_ctx;
 
-	scan_dev_rmaps(ctx, agno, ctx->fsinfo.fs_logdev, arg);
+	scan_dev_rmaps(ctx, ctx->fsinfo.fs_logdev, arg);
 }
 
 /*
@@ -210,8 +209,7 @@ scrub_scan_all_spacemaps(
 		return ret;
 	}
 	if (ctx->fsinfo.fs_rt) {
-		ret = -workqueue_add(&wq, scan_rt_rmaps,
-				ctx->mnt.fsgeom.agcount + 1, &sbx);
+		ret = -workqueue_add(&wq, scan_rt_rmaps, 0, &sbx);
 		if (ret) {
 			sbx.aborted = true;
 			str_liberror(ctx, ret, _("queueing rtdev fsmap work"));
@@ -219,8 +217,7 @@ scrub_scan_all_spacemaps(
 		}
 	}
 	if (ctx->fsinfo.fs_log) {
-		ret = -workqueue_add(&wq, scan_log_rmaps,
-				ctx->mnt.fsgeom.agcount + 2, &sbx);
+		ret = -workqueue_add(&wq, scan_log_rmaps, 0, &sbx);
 		if (ret) {
 			sbx.aborted = true;
 			str_liberror(ctx, ret, _("queueing logdev fsmap work"));


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 6/7] xfs_scrub: collect free space histograms during phase 7
  2024-07-03 21:47 [PATCHSET v30.8] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
                   ` (4 preceding siblings ...)
  2024-07-03 21:48 ` [PATCH 5/7] xfs_scrub: remove pointless spacemap.c arguments Darrick J. Wong
@ 2024-07-03 21:48 ` Darrick J. Wong
  2024-07-03 21:48 ` [PATCH 7/7] xfs_scrub: tune fstrim minlen parameter based on free space histograms Darrick J. Wong
  6 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03 21:48 UTC (permalink / raw)
  To: cem, djwong; +Cc: Christoph Hellwig, hch, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Collect a histogram of free space observed during phase 7.  We'll put
this information to use in the next patch.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 libfrog/histogram.c |   38 ++++++++++++++++++++++++++++++++++++++
 libfrog/histogram.h |    3 +++
 scrub/phase7.c      |   47 +++++++++++++++++++++++++++++++++++++++++++++--
 scrub/xfs_scrub.c   |    5 +++++
 scrub/xfs_scrub.h   |    4 ++++
 5 files changed, 95 insertions(+), 2 deletions(-)


diff --git a/libfrog/histogram.c b/libfrog/histogram.c
index 59d46363c1da..543c8a636462 100644
--- a/libfrog/histogram.c
+++ b/libfrog/histogram.c
@@ -230,3 +230,41 @@ hist_summarize(
 	printf("%s %g\n", hstr->averages,
 			(double)hs->tot_sum / (double)hs->tot_obs);
 }
+
+/* Copy the contents of src to dest. */
+void
+hist_import(
+	struct histogram	*dest,
+	const struct histogram	*src)
+{
+	unsigned int		i;
+
+	ASSERT(dest->nr_buckets == src->nr_buckets);
+
+	dest->tot_sum += src->tot_sum;
+	dest->tot_obs += src->tot_obs;
+
+	for (i = 0; i < dest->nr_buckets; i++) {
+		ASSERT(dest->buckets[i].low == src->buckets[i].low);
+		ASSERT(dest->buckets[i].high == src->buckets[i].high);
+
+		dest->buckets[i].nr_obs += src->buckets[i].nr_obs;
+		dest->buckets[i].sum += src->buckets[i].sum;
+	}
+}
+
+/*
+ * Move the contents of src to dest and reinitialize src.  dst must not
+ * contain any observations or buckets.
+ */
+void
+hist_move(
+	struct histogram	*dest,
+	struct histogram	*src)
+{
+	ASSERT(dest->nr_buckets == 0);
+	ASSERT(dest->tot_obs == 0);
+
+	memcpy(dest, src, sizeof(struct histogram));
+	hist_init(src);
+}
diff --git a/libfrog/histogram.h b/libfrog/histogram.h
index 967698774b0c..654de86b96e6 100644
--- a/libfrog/histogram.h
+++ b/libfrog/histogram.h
@@ -69,4 +69,7 @@ static inline unsigned int hist_buckets(const struct histogram *hs)
 	return hs->nr_buckets;
 }
 
+void hist_import(struct histogram *dest, const struct histogram *src);
+void hist_move(struct histogram *dest, struct histogram *src);
+
 #endif /* __LIBFROG_HISTOGRAM_H__ */
diff --git a/scrub/phase7.c b/scrub/phase7.c
index cce5ede00122..475d8f157eec 100644
--- a/scrub/phase7.c
+++ b/scrub/phase7.c
@@ -12,6 +12,7 @@
 #include "libfrog/ptvar.h"
 #include "libfrog/fsgeom.h"
 #include "libfrog/scrub.h"
+#include "libfrog/histogram.h"
 #include "list.h"
 #include "xfs_scrub.h"
 #include "common.h"
@@ -27,8 +28,36 @@ struct summary_counts {
 	unsigned long long	rbytes;		/* rt dev bytes */
 	unsigned long long	next_phys;	/* next phys bytes we see? */
 	unsigned long long	agbytes;	/* freespace bytes */
+
+	/* Free space histogram, in fsb */
+	struct histogram	datadev_hist;
 };
 
+/*
+ * Initialize a free space histogram.  Unsharded realtime volumes can be up to
+ * 2^52 blocks long, so we allocate enough buckets to handle that.
+ */
+static inline void
+init_freesp_hist(
+	struct histogram	*hs)
+{
+	unsigned int		i;
+
+	hist_init(hs);
+	for (i = 0; i < 53; i++)
+		hist_add_bucket(hs, 1ULL << i);
+	hist_prepare(hs, 1ULL << 53);
+}
+
+static void
+summary_count_init(
+	void			*data)
+{
+	struct summary_counts	*counts = data;
+
+	init_freesp_hist(&counts->datadev_hist);
+}
+
 /* Record block usage. */
 static int
 count_block_summary(
@@ -48,8 +77,14 @@ count_block_summary(
 	if (fsmap->fmr_device == ctx->fsinfo.fs_logdev)
 		return 0;
 	if ((fsmap->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
-	    fsmap->fmr_owner == XFS_FMR_OWN_FREE)
+	    fsmap->fmr_owner == XFS_FMR_OWN_FREE) {
+		uint64_t	blocks;
+
+		blocks = cvt_b_to_off_fsbt(&ctx->mnt, fsmap->fmr_length);
+		if (fsmap->fmr_device == ctx->fsinfo.fs_datadev)
+			hist_add(&counts->datadev_hist, blocks);
 		return 0;
+	}
 
 	len = fsmap->fmr_length;
 
@@ -87,6 +122,9 @@ add_summaries(
 	total->dbytes += item->dbytes;
 	total->rbytes += item->rbytes;
 	total->agbytes += item->agbytes;
+
+	hist_import(&total->datadev_hist, &item->datadev_hist);
+	hist_free(&item->datadev_hist);
 	return 0;
 }
 
@@ -118,6 +156,8 @@ phase7_func(
 	int			ip;
 	int			error;
 
+	summary_count_init(&totalcount);
+
 	/* Check and fix the summary metadata. */
 	scrub_item_init_fs(&sri);
 	scrub_item_schedule_group(&sri, XFROG_SCRUB_GROUP_SUMMARY);
@@ -136,7 +176,7 @@ phase7_func(
 	}
 
 	error = -ptvar_alloc(scrub_nproc(ctx), sizeof(struct summary_counts),
-			NULL, &ptvar);
+			summary_count_init, &ptvar);
 	if (error) {
 		str_liberror(ctx, error, _("setting up block counter"));
 		return error;
@@ -153,6 +193,9 @@ phase7_func(
 	}
 	ptvar_free(ptvar);
 
+	/* Preserve free space histograms for phase 8. */
+	hist_move(&ctx->datadev_hist, &totalcount.datadev_hist);
+
 	/* Scan the whole fs. */
 	error = scrub_count_all_inodes(ctx, &counted_inodes);
 	if (error) {
diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c
index adf9d13e5090..2894f6148e10 100644
--- a/scrub/xfs_scrub.c
+++ b/scrub/xfs_scrub.c
@@ -18,6 +18,7 @@
 #include "descr.h"
 #include "unicrash.h"
 #include "progress.h"
+#include "libfrog/histogram.h"
 
 /*
  * XFS Online Metadata Scrub (and Repair)
@@ -669,6 +670,8 @@ main(
 	int			ret = SCRUB_RET_SUCCESS;
 	int			error;
 
+	hist_init(&ctx.datadev_hist);
+
 	fprintf(stdout, "EXPERIMENTAL xfs_scrub program in use! Use at your own risk!\n");
 	fflush(stdout);
 
@@ -883,6 +886,8 @@ main(
 out_unicrash:
 	unicrash_unload();
 
+	hist_free(&ctx.datadev_hist);
+
 	/*
 	 * If we're being run as a service, the return code must fit the LSB
 	 * init script action error guidelines, which is to say that we
diff --git a/scrub/xfs_scrub.h b/scrub/xfs_scrub.h
index 6272a36879e7..1a28f0cc847e 100644
--- a/scrub/xfs_scrub.h
+++ b/scrub/xfs_scrub.h
@@ -7,6 +7,7 @@
 #define XFS_SCRUB_XFS_SCRUB_H_
 
 #include "libfrog/fsgeom.h"
+#include "libfrog/histogram.h"
 
 extern char *progname;
 
@@ -86,6 +87,9 @@ struct scrub_ctx {
 	unsigned long long	preens;
 	bool			scrub_setup_succeeded;
 	bool			preen_triggers[XFS_SCRUB_TYPE_NR];
+
+	/* Free space histograms, in fsb */
+	struct histogram	datadev_hist;
 };
 
 /* Phase helper functions */


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 7/7] xfs_scrub: tune fstrim minlen parameter based on free space histograms
  2024-07-03 21:47 [PATCHSET v30.8] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
                   ` (5 preceding siblings ...)
  2024-07-03 21:48 ` [PATCH 6/7] xfs_scrub: collect free space histograms during phase 7 Darrick J. Wong
@ 2024-07-03 21:48 ` Darrick J. Wong
  6 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-03 21:48 UTC (permalink / raw)
  To: cem, djwong; +Cc: Christoph Hellwig, hch, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Currently, phase 8 runs very slowly on filesystems with a lot of small
free space extents.  To reduce the amount of time spent on fstrim
activities during phase 8, we want to balance estimated runtime against
completeness of the trim.  In short, the goal is to reduce runtime by
avoiding small trim requests.

At the start of phase 8, a CDF is computed in decreasing order of extent
length from the histogram buckets created during the fsmap scan in phase
7.  A point corresponding to the fstrim percentage target is chosen from
the CDF and mapped back to a histogram bucket, and free space extents
smaller than that amount are ommitted from fstrim.

On my aging /home filesystem, the free space histogram reported by
xfs_spaceman looks like this:

   from      to extents    blocks    pct blkcdf extcdf
      1       1  121953    121953   0.04 100.00 100.00
      2       3  124741    299694   0.09  99.96  81.16
      4       7  113492    593763   0.18  99.87  61.89
      8      15  109215   1179524   0.36  99.69  44.36
     16      31   76972   1695455   0.52  99.33  27.48
     32      63   48655   2219667   0.68  98.82  15.59
     64     127   31398   2876898   0.88  98.14   8.08
    128     255    8014   1447920   0.44  97.27   3.23
    256     511    4142   1501758   0.46  96.82   1.99
    512    1023    2433   1768732   0.54  96.37   1.35
   1024    2047    1795   2648460   0.81  95.83   0.97
   2048    4095    1429   4206103   1.28  95.02   0.69
   4096    8191    1045   6162111   1.88  93.74   0.47
   8192   16383     791   9242745   2.81  91.87   0.31
  16384   32767     473  10883977   3.31  89.06   0.19
  32768   65535     272  12385566   3.77  85.74   0.12
  65536  131071     192  18098739   5.51  81.98   0.07
 131072  262143     108  20675199   6.29  76.47   0.04
 262144  524287      80  29061285   8.84  70.18   0.03
 524288 1048575      39  29002829   8.83  61.33   0.02
1048576 2097151      25  36824985  11.21  52.51   0.01
2097152 4194303      32 101727192  30.95  41.30   0.01
4194304 8388607       7  34007410  10.35  10.35   0.00

From this table, we see that free space extents that are 16 blocks or
longer constitute 99.3% of the free space in the filesystem but only
27.5% of the extents.  If we set the fstrim minlen parameter to 16
blocks, that means that we can trim over 99% of the space in one third
of the time it would take to trim everything.

Add a new -o fstrim_pct= option to xfs_scrub just in case there are
users out there who want a different percentage.  For example, accepting
a 95% trim would net us a speed increase of nearly two orders of
magnitude, ignoring system call overhead.  Setting it to 100% will trim
everything, just like fstrim(8).

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 libfrog/histogram.c  |    4 +-
 libfrog/histogram.h  |    3 ++
 man/man8/xfs_scrub.8 |   16 +++++++++
 scrub/phase8.c       |   91 +++++++++++++++++++++++++++++++++++++++++++++++---
 scrub/vfs.c          |    4 ++
 scrub/vfs.h          |    2 +
 scrub/xfs_scrub.c    |   38 ++++++++++++++++++++-
 scrub/xfs_scrub.h    |   12 +++++++
 8 files changed, 160 insertions(+), 10 deletions(-)


diff --git a/libfrog/histogram.c b/libfrog/histogram.c
index 543c8a636462..f6a749e3cc81 100644
--- a/libfrog/histogram.c
+++ b/libfrog/histogram.c
@@ -109,7 +109,7 @@ hist_free(
  * in the long tail of small extents, e.g. 98% of the free space extents are
  * larger than 31 blocks.
  */
-static struct histogram_cdf *
+struct histogram_cdf *
 hist_cdf(
 	const struct histogram	*hs)
 {
@@ -151,7 +151,7 @@ hist_cdf(
 }
 
 /* Free all data associated with a histogram cdf. */
-static void
+void
 histcdf_free(
 	struct histogram_cdf	*cdf)
 {
diff --git a/libfrog/histogram.h b/libfrog/histogram.h
index 654de86b96e6..e4395870182d 100644
--- a/libfrog/histogram.h
+++ b/libfrog/histogram.h
@@ -69,6 +69,9 @@ static inline unsigned int hist_buckets(const struct histogram *hs)
 	return hs->nr_buckets;
 }
 
+struct histogram_cdf *hist_cdf(const struct histogram *hs);
+void histcdf_free(struct histogram_cdf *cdf);
+
 void hist_import(struct histogram *dest, const struct histogram *src);
 void hist_move(struct histogram *dest, struct histogram *src);
 
diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8
index 404baba696e1..b9f253e1b079 100644
--- a/man/man8/xfs_scrub.8
+++ b/man/man8/xfs_scrub.8
@@ -100,6 +100,22 @@ The
 supported are:
 .RS 1.0i
 .TP
+.BI fstrim_pct= percentage
+To constrain the amount of time spent on fstrim activities during phase 8,
+this program tries to balance estimated runtime against completeness of the
+trim.
+In short, the program avoids small trim requests to save time.
+
+During phase 7, a log-scale histogram of free space extents is constructed.
+At the start of phase 8, a CDF is computed in decreasing order of extent
+length from the histogram buckets.
+A point corresponding to the fstrim percentage target is chosen from the CDF
+and mapped back to a histogram bucket.
+Free space extents at least as long as the bucket size are trimmed.
+Smaller extents are ignored.
+
+By default, the percentage threshold is 99%.
+.TP
 .BI iwarn
 Treat informational messages as warnings.
 This will result in a nonzero return code, and a higher logging level.
diff --git a/scrub/phase8.c b/scrub/phase8.c
index e35bf11bf329..1c88460c3396 100644
--- a/scrub/phase8.c
+++ b/scrub/phase8.c
@@ -11,6 +11,7 @@
 #include "list.h"
 #include "libfrog/paths.h"
 #include "libfrog/workqueue.h"
+#include "libfrog/histogram.h"
 #include "xfs_scrub.h"
 #include "common.h"
 #include "progress.h"
@@ -57,10 +58,12 @@ static int
 fstrim_fsblocks(
 	struct scrub_ctx	*ctx,
 	uint64_t		start_fsb,
-	uint64_t		fsbcount)
+	uint64_t		fsbcount,
+	uint64_t		minlen_fsb)
 {
 	uint64_t		start = cvt_off_fsb_to_b(&ctx->mnt, start_fsb);
 	uint64_t		len = cvt_off_fsb_to_b(&ctx->mnt, fsbcount);
+	uint64_t		minlen = cvt_off_fsb_to_b(&ctx->mnt, minlen_fsb);
 	int			error;
 
 	while (len > 0) {
@@ -68,7 +71,7 @@ fstrim_fsblocks(
 
 		run = min(len, FSTRIM_MAX_BYTES);
 
-		error = fstrim(ctx, start, run);
+		error = fstrim(ctx, start, run, minlen);
 		if (error == EOPNOTSUPP) {
 			/* Pretend we finished all the work. */
 			progress_add(len);
@@ -78,9 +81,10 @@ fstrim_fsblocks(
 			char		descr[DESCR_BUFSZ];
 
 			snprintf(descr, sizeof(descr) - 1,
-					_("fstrim start 0x%llx run 0x%llx"),
+					_("fstrim start 0x%llx run 0x%llx minlen 0x%llx"),
 					(unsigned long long)start,
-					(unsigned long long)run);
+					(unsigned long long)run,
+					(unsigned long long)minlen);
 			str_liberror(ctx, error, descr);
 			return error;
 		}
@@ -93,6 +97,80 @@ fstrim_fsblocks(
 	return 0;
 }
 
+/*
+ * Return the smallest minlen that still enables us to discard the specified
+ * number of free blocks.  Returns 0 if something goes wrong, which means no
+ * minlen threshold for discard.
+ */
+static uint64_t
+minlen_for_threshold(
+	const struct histogram	*hs,
+	uint64_t		blk_threshold)
+{
+	struct histogram_cdf	*cdf;
+	unsigned int		i;
+	uint64_t		ret = 0;
+
+	/* Insufficient samples to make a meaningful histogram */
+	if (hs->tot_obs < hs->nr_buckets * 10)
+		return 0;
+
+	cdf = hist_cdf(hs);
+	if (!cdf)
+		return 0;
+
+	for (i = 1; i < hs->nr_buckets; i++) {
+		if (cdf->buckets[i].sum < blk_threshold) {
+			ret = hs->buckets[i - 1].low;
+			break;
+		}
+	}
+
+	histcdf_free(cdf);
+	return ret;
+}
+
+/* Compute a suitable minlen parameter for fstrim. */
+static uint64_t
+fstrim_compute_minlen(
+	const struct scrub_ctx	*ctx,
+	const struct histogram	*freesp_hist)
+{
+	uint64_t		ret;
+	double			blk_threshold = 0;
+	unsigned int		ag_max_usable;
+
+	/*
+	 * The kernel will reject a minlen that's larger than m_ag_max_usable.
+	 * We can't calculate or query that value directly, so we guesstimate
+	 * that it's 95% of the AG size.
+	 */
+	ag_max_usable = ctx->mnt.fsgeom.agblocks * 95 / 100;
+
+	if (debug > 1) {
+		struct histogram_strings hstr = {
+			.sum		= _("free space blocks"),
+			.observations	= _("free space extents"),
+		};
+
+		hist_print(freesp_hist, &hstr);
+	}
+
+	ret = minlen_for_threshold(freesp_hist,
+			freesp_hist->tot_sum * ctx->fstrim_block_pct);
+
+	if (debug > 1)
+		printf(_("fstrim minlen %lld threshold %lld ag_max_usable %u\n"),
+				(unsigned long long)ret,
+				(unsigned long long)blk_threshold,
+				ag_max_usable);
+	if (ret > ag_max_usable)
+		ret = ag_max_usable;
+	if (ret == 1)
+		ret = 0;
+	return ret;
+}
+
 /* Trim each AG on the data device. */
 static int
 fstrim_datadev(
@@ -100,8 +178,11 @@ fstrim_datadev(
 {
 	struct xfs_fsop_geom	*geo = &ctx->mnt.fsgeom;
 	uint64_t		fsbno;
+	uint64_t		minlen_fsb;
 	int			error;
 
+	minlen_fsb = fstrim_compute_minlen(ctx, &ctx->datadev_hist);
+
 	for (fsbno = 0; fsbno < geo->datablocks; fsbno += geo->agblocks) {
 		uint64_t	fsbcount;
 
@@ -112,7 +193,7 @@ fstrim_datadev(
 		 */
 		progress_add(geo->blocksize);
 		fsbcount = min(geo->datablocks - fsbno, geo->agblocks);
-		error = fstrim_fsblocks(ctx, fsbno, fsbcount);
+		error = fstrim_fsblocks(ctx, fsbno, fsbcount, minlen_fsb);
 		if (error)
 			return error;
 	}
diff --git a/scrub/vfs.c b/scrub/vfs.c
index cc958ba9438e..22c19485a2da 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -300,11 +300,13 @@ int
 fstrim(
 	struct scrub_ctx	*ctx,
 	uint64_t		start,
-	uint64_t		len)
+	uint64_t		len,
+	uint64_t		minlen)
 {
 	struct fstrim_range	range = {
 		.start		= start,
 		.len		= len,
+		.minlen		= minlen,
 	};
 
 	if (ioctl(ctx->mnt.fd, FITRIM, &range) == 0)
diff --git a/scrub/vfs.h b/scrub/vfs.h
index 1af8d80d1de6..f0cfd53c27be 100644
--- a/scrub/vfs.h
+++ b/scrub/vfs.h
@@ -24,6 +24,6 @@ typedef int (*scan_fs_tree_dirent_fn)(struct scrub_ctx *, const char *,
 int scan_fs_tree(struct scrub_ctx *ctx, scan_fs_tree_dir_fn dir_fn,
 		scan_fs_tree_dirent_fn dirent_fn, void *arg);
 
-int fstrim(struct scrub_ctx *ctx, uint64_t start, uint64_t len);
+int fstrim(struct scrub_ctx *ctx, uint64_t start, uint64_t len, uint64_t minlen);
 
 #endif /* XFS_SCRUB_VFS_H_ */
diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c
index 2894f6148e10..296d814eceeb 100644
--- a/scrub/xfs_scrub.c
+++ b/scrub/xfs_scrub.c
@@ -622,11 +622,13 @@ report_outcome(
  */
 enum o_opt_nums {
 	IWARN = 0,
+	FSTRIM_PCT,
 	O_MAX_OPTS,
 };
 
 static char *o_opts[] = {
 	[IWARN]			= "iwarn",
+	[FSTRIM_PCT]		= "fstrim_pct",
 	[O_MAX_OPTS]		= NULL,
 };
 
@@ -635,8 +637,11 @@ parse_o_opts(
 	struct scrub_ctx	*ctx,
 	char			*p)
 {
+	double			dval;
+
 	while (*p != '\0')  {
 		char		*val;
+		char		*endp;
 
 		switch (getsubopt(&p, o_opts, &val))  {
 		case IWARN:
@@ -647,6 +652,35 @@ parse_o_opts(
 			}
 			info_is_warning = true;
 			break;
+		case FSTRIM_PCT:
+			if (!val) {
+				fprintf(stderr,
+ _("-o fstrim_pct requires a parameter\n"));
+				usage();
+			}
+
+			errno = 0;
+			dval = strtod(val, &endp);
+
+			if (*endp) {
+				fprintf(stderr,
+ _("-o fstrim_pct must be a floating point number\n"));
+				usage();
+			}
+			if (errno) {
+				fprintf(stderr,
+ _("-o fstrim_pct: %s\n"),
+						strerror(errno));
+				usage();
+			}
+			if (dval <= 0 || dval > 100) {
+				fprintf(stderr,
+ _("-o fstrim_pct must be larger than 0 and less than 100\n"));
+				usage();
+			}
+
+			ctx->fstrim_block_pct = dval / 100.0;
+			break;
 		default:
 			usage();
 			break;
@@ -659,7 +693,9 @@ main(
 	int			argc,
 	char			**argv)
 {
-	struct scrub_ctx	ctx = {0};
+	struct scrub_ctx	ctx = {
+		.fstrim_block_pct = FSTRIM_BLOCK_PCT_DEFAULT,
+	};
 	struct phase_rusage	all_pi;
 	char			*mtab = NULL;
 	FILE			*progress_fp = NULL;
diff --git a/scrub/xfs_scrub.h b/scrub/xfs_scrub.h
index 1a28f0cc847e..7d48f4bad9ce 100644
--- a/scrub/xfs_scrub.h
+++ b/scrub/xfs_scrub.h
@@ -90,8 +90,20 @@ struct scrub_ctx {
 
 	/* Free space histograms, in fsb */
 	struct histogram	datadev_hist;
+
+	/*
+	 * Pick the largest value for fstrim minlen such that we trim at least
+	 * this much space per volume.
+	 */
+	double			fstrim_block_pct;
 };
 
+/*
+ * Trim only enough free space extents (in order of decreasing length) to
+ * ensure that this percentage of the free space is trimmed.
+ */
+#define FSTRIM_BLOCK_PCT_DEFAULT	(99.0 / 100.0)
+
 /* Phase helper functions */
 void xfs_shutdown_fs(struct scrub_ctx *ctx);
 int scrub_cleanup(struct scrub_ctx *ctx);


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/7] libfrog: hoist free space histogram code
  2024-07-03 21:47 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
@ 2024-07-04  5:22   ` Christoph Hellwig
  2024-07-08 17:27   ` [PATCH v30.8.1 " Darrick J. Wong
  1 sibling, 0 replies; 18+ messages in thread
From: Christoph Hellwig @ 2024-07-04  5:22 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: cem, hch, linux-xfs

> +struct histbucket
> +{

This still use the non-standard brace placement.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v30.8.1 1/7] libfrog: hoist free space histogram code
  2024-07-03 21:47 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
  2024-07-04  5:22   ` Christoph Hellwig
@ 2024-07-08 17:27   ` Darrick J. Wong
  1 sibling, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-08 17:27 UTC (permalink / raw)
  To: cem; +Cc: hch, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Combine the two free space histograms in xfs_db and xfs_spaceman into a
single implementation.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
v30.8.1: move struct histbucket brace up by one line
---
 db/freesp.c         |   89 ++++++++------------------------
 libfrog/Makefile    |    2 +
 libfrog/histogram.c |  143 +++++++++++++++++++++++++++++++++++++++++++++++++++
 libfrog/histogram.h |   63 ++++++++++++++++++++++
 spaceman/freesp.c   |   99 ++++++++++++-----------------------
 5 files changed, 264 insertions(+), 132 deletions(-)
 create mode 100644 libfrog/histogram.c
 create mode 100644 libfrog/histogram.h

diff --git a/db/freesp.c b/db/freesp.c
index 883741e66fee..43520481d5e0 100644
--- a/db/freesp.c
+++ b/db/freesp.c
@@ -12,14 +12,7 @@
 #include "output.h"
 #include "init.h"
 #include "malloc.h"
-
-typedef struct histent
-{
-	int		low;
-	int		high;
-	long long	count;
-	long long	blocks;
-} histent_t;
+#include "libfrog/histogram.h"
 
 static void	addhistent(int h);
 static void	addtohist(xfs_agnumber_t agno, xfs_agblock_t agbno,
@@ -46,13 +39,10 @@ static int		alignment;
 static int		countflag;
 static int		dumpflag;
 static int		equalsize;
-static histent_t	*hist;
-static int		histcount;
+static struct histogram	freesp_hist;
 static int		multsize;
 static int		seen1;
 static int		summaryflag;
-static long long	totblocks;
-static long long	totexts;
 
 static const cmdinfo_t	freesp_cmd =
 	{ "freesp", NULL, freesp_f, 0, -1, 0,
@@ -93,18 +83,20 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount)
+	if (hist_buckets(&freesp_hist))
 		printhist();
 	if (summaryflag) {
-		dbprintf(_("total free extents %lld\n"), totexts);
-		dbprintf(_("total free blocks %lld\n"), totblocks);
-		dbprintf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
+		struct histogram_strings hstr = {
+			.sum		= _("total free blocks"),
+			.observations	= _("total free extents"),
+			.averages	= _("average free extent size"),
+		};
+
+		hist_summarize(&freesp_hist, &hstr);
 	}
 	if (aglist)
 		xfree(aglist);
-	if (hist)
-		xfree(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 
@@ -132,10 +124,9 @@ init(
 	int		speced = 0;
 
 	agcount = countflag = dumpflag = equalsize = multsize = optind = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
+
 	while ((c = getopt(argc, argv, "A:a:bcde:h:m:s")) != EOF) {
 		switch (c) {
 		case 'A':
@@ -163,7 +154,7 @@ init(
 			speced = 1;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				return usage();
 			addhistent(atoi(optarg));
 			speced = 1;
@@ -339,14 +330,7 @@ static void
 addhistent(
 	int	h)
 {
-	hist = xrealloc(hist, (histcount + 1) * sizeof(*hist));
-	if (h == 0)
-		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
-	if (h == 1)
-		seen1 = 1;
+	hist_add_bucket(&freesp_hist, h);
 }
 
 static void
@@ -355,30 +339,12 @@ addtohist(
 	xfs_agblock_t	agbno,
 	xfs_extlen_t	len)
 {
-	int		i;
-
 	if (alignment && (XFS_AGB_TO_FSB(mp,agno,agbno) % alignment))
 		return;
 
 	if (dumpflag)
 		dbprintf("%8d %8d %8d\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((histent_t *)a)->low - ((histent_t *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -387,6 +353,7 @@ histinit(
 {
 	int	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -396,27 +363,17 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
 static void
 printhist(void)
 {
-	int	i;
+	struct histogram_strings hstr = {
+		.sum		= _("blocks"),
+		.observations	= _("extents"),
+	};
 
-	dbprintf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			dbprintf("%7d %7d %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist, &hstr);
 }
diff --git a/libfrog/Makefile b/libfrog/Makefile
index 53e3c3492377..acfa228bc8ec 100644
--- a/libfrog/Makefile
+++ b/libfrog/Makefile
@@ -20,6 +20,7 @@ convert.c \
 crc32.c \
 file_exchange.c \
 fsgeom.c \
+histogram.c \
 list_sort.c \
 linux.c \
 logging.c \
@@ -45,6 +46,7 @@ dahashselftest.h \
 div64.h \
 file_exchange.h \
 fsgeom.h \
+histogram.h \
 logging.h \
 paths.h \
 projects.h \
diff --git a/libfrog/histogram.c b/libfrog/histogram.c
new file mode 100644
index 000000000000..c2f344a88eb6
--- /dev/null
+++ b/libfrog/histogram.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include <stdlib.h>
+#include <string.h>
+#include "platform_defs.h"
+#include "libfrog/histogram.h"
+
+/* Create a new bucket with the given low value. */
+int
+hist_add_bucket(
+	struct histogram	*hs,
+	long long		bucket_low)
+{
+	struct histbucket	*buckets;
+
+	if (hs->nr_buckets == INT_MAX)
+		return EFBIG;
+
+	buckets = realloc(hs->buckets,
+			(hs->nr_buckets + 1) * sizeof(struct histbucket));
+	if (!buckets)
+		return errno;
+
+	hs->buckets = buckets;
+	hs->buckets[hs->nr_buckets].low = bucket_low;
+	hs->buckets[hs->nr_buckets].nr_obs = 0;
+	hs->buckets[hs->nr_buckets].sum = 0;
+	hs->nr_buckets++;
+	return 0;
+}
+
+/* Add an observation to the histogram. */
+void
+hist_add(
+	struct histogram	*hs,
+	long long		len)
+{
+	unsigned int		i;
+
+	hs->tot_obs++;
+	hs->tot_sum += len;
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].high >= len) {
+			hs->buckets[i].nr_obs++;
+			hs->buckets[i].sum += len;
+			break;
+		}
+	}
+}
+
+static int
+histbucket_cmp(
+	const void		*a,
+	const void		*b)
+{
+	const struct histbucket	*ha = a;
+	const struct histbucket	*hb = b;
+
+	if (ha->low < hb->low)
+		return -1;
+	if (ha->low > hb->low)
+		return 1;
+	return 0;
+}
+
+/* Prepare a histogram for bucket configuration. */
+void
+hist_init(
+	struct histogram	*hs)
+{
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Prepare a histogram to receive data observations. */
+void
+hist_prepare(
+	struct histogram	*hs,
+	long long		maxlen)
+{
+	unsigned int		i;
+
+	qsort(hs->buckets, hs->nr_buckets, sizeof(struct histbucket),
+			histbucket_cmp);
+
+	for (i = 0; i < hs->nr_buckets - 1; i++)
+		hs->buckets[i].high = hs->buckets[i + 1].low - 1;
+	hs->buckets[hs->nr_buckets - 1].high = maxlen;
+}
+
+/* Free all data associated with a histogram. */
+void
+hist_free(
+	struct histogram	*hs)
+{
+	free(hs->buckets);
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Dump a histogram to stdout. */
+void
+hist_print(
+	const struct histogram		*hs,
+	const struct histogram_strings	*hstr)
+{
+	unsigned int			obs_w = strlen(hstr->observations);
+	unsigned int			sum_w = strlen(hstr->sum);
+	unsigned int			i;
+
+	printf("%7s %7s %*s %*s %6s\n",
+			_("from"), _("to"),
+			obs_w, hstr->observations,
+			sum_w, hstr->sum,
+			_("pct"));
+
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].nr_obs == 0)
+			continue;
+
+		printf("%7lld %7lld %*lld %*lld %6.2f\n",
+				hs->buckets[i].low, hs->buckets[i].high,
+				obs_w, hs->buckets[i].nr_obs,
+				sum_w, hs->buckets[i].sum,
+				hs->buckets[i].sum * 100.0 / hs->tot_sum);
+	}
+}
+
+/* Summarize the contents of the histogram. */
+void
+hist_summarize(
+	const struct histogram		*hs,
+	const struct histogram_strings	*hstr)
+{
+	printf("%s %lld\n", hstr->observations, hs->tot_obs);
+	printf("%s %lld\n", hstr->sum, hs->tot_sum);
+	printf("%s %g\n", hstr->averages,
+			(double)hs->tot_sum / (double)hs->tot_obs);
+}
diff --git a/libfrog/histogram.h b/libfrog/histogram.h
new file mode 100644
index 000000000000..68afdeb29415
--- /dev/null
+++ b/libfrog/histogram.h
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#ifndef __LIBFROG_HISTOGRAM_H__
+#define __LIBFROG_HISTOGRAM_H__
+
+struct histbucket {
+	/* Low and high size of this bucket */
+	long long		low;
+	long long		high;
+
+	/* Count of observations recorded */
+	long long		nr_obs;
+
+	/* Sum of values recorded */
+	long long		sum;
+};
+
+struct histogram {
+	/* Sum of all values recorded */
+	long long		tot_sum;
+
+	/* Count of all observations recorded */
+	long long		tot_obs;
+
+	struct histbucket	*buckets;
+
+	/* Number of buckets */
+	unsigned int		nr_buckets;
+};
+
+int hist_add_bucket(struct histogram *hs, long long bucket_low);
+void hist_add(struct histogram *hs, long long value);
+void hist_init(struct histogram *hs);
+void hist_prepare(struct histogram *hs, long long maxvalue);
+void hist_free(struct histogram *hs);
+
+struct histogram_strings {
+	/* What does each sum represent? ("free blocks") */
+	const char		*sum;
+
+	/* What does each observation represent? ("free extents") */
+	const char		*observations;
+
+	/* What does sum / observation represent? ("average extent length") */
+	const char		*averages;
+};
+
+void hist_print(const struct histogram *hs,
+		const struct histogram_strings *hstr);
+void hist_summarize(const struct histogram *hs,
+		const struct histogram_strings *hstr);
+
+static inline unsigned int hist_buckets(const struct histogram *hs)
+{
+	return hs->nr_buckets;
+}
+
+#endif /* __LIBFROG_HISTOGRAM_H__ */
diff --git a/spaceman/freesp.c b/spaceman/freesp.c
index f5177cb4ee5d..dfbec52a7160 100644
--- a/spaceman/freesp.c
+++ b/spaceman/freesp.c
@@ -15,76 +15,52 @@
 #include "libfrog/paths.h"
 #include "space.h"
 #include "input.h"
-
-struct histent
-{
-	long long	low;
-	long long	high;
-	long long	count;
-	long long	blocks;
-};
+#include "libfrog/histogram.h"
 
 static int		agcount;
 static xfs_agnumber_t	*aglist;
-static struct histent	*hist;
+static struct histogram	freesp_hist;
 static int		dumpflag;
 static long long	equalsize;
 static long long	multsize;
-static int		histcount;
 static int		seen1;
 static int		summaryflag;
 static int		gflag;
 static bool		rtflag;
-static long long	totblocks;
-static long long	totexts;
 
 static cmdinfo_t freesp_cmd;
 
-static void
+static inline void
 addhistent(
 	long long	h)
 {
-	if (histcount == INT_MAX) {
+	int		error;
+
+	error = hist_add_bucket(&freesp_hist, h);
+	if (error == EFBIG) {
 		printf(_("Too many histogram buckets.\n"));
 		return;
 	}
-	hist = realloc(hist, (histcount + 1) * sizeof(*hist));
+	if (error) {
+		printf("%s\n", strerror(error));
+		return;
+	}
+
 	if (h == 0)
 		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
 	if (h == 1)
 		seen1 = 1;
 }
 
-static void
+static inline void
 addtohist(
 	xfs_agnumber_t	agno,
 	xfs_agblock_t	agbno,
 	off_t		len)
 {
-	long		i;
-
 	if (dumpflag)
 		printf("%8d %8d %8"PRId64"\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((struct histent *)a)->low - ((struct histent *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -93,6 +69,7 @@ histinit(
 {
 	long long	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -102,29 +79,19 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
-static void
+static inline void
 printhist(void)
 {
-	int	i;
+	struct histogram_strings hstr = {
+		.sum		= _("blocks"),
+		.observations	= _("extents"),
+	};
 
-	printf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			printf("%7lld %7lld %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist, &hstr);
 }
 
 static int
@@ -255,10 +222,8 @@ init(
 	int			speced = 0;	/* only one of -b -e -h or -m */
 
 	agcount = dumpflag = equalsize = multsize = optind = gflag = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
 	rtflag = false;
 
 	while ((c = getopt(argc, argv, "a:bde:gh:m:rs")) != EOF) {
@@ -287,7 +252,7 @@ init(
 			gflag++;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				goto many_spec;
 			/* addhistent increments histcount */
 			x = cvt_s64(optarg, 0);
@@ -345,18 +310,20 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount && !gflag)
+	if (hist_buckets(&freesp_hist) > 0 && !gflag)
 		printhist();
 	if (summaryflag) {
-		printf(_("total free extents %lld\n"), totexts);
-		printf(_("total free blocks %lld\n"), totblocks);
-		printf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
+		struct histogram_strings hstr = {
+			.sum		= _("total free blocks"),
+			.observations	= _("total free extents"),
+			.averages	= _("average free extent size"),
+		};
+
+		hist_summarize(&freesp_hist, &hstr);
 	}
 	if (aglist)
 		free(aglist);
-	if (hist)
-		free(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 1/7] libfrog: hoist free space histogram code
  2024-07-30  0:19 [PATCHSET v30.9 13/23] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
@ 2024-07-30  1:11 ` Darrick J. Wong
  0 siblings, 0 replies; 18+ messages in thread
From: Darrick J. Wong @ 2024-07-30  1:11 UTC (permalink / raw)
  To: djwong, cem; +Cc: Christoph Hellwig, linux-xfs

From: Darrick J. Wong <djwong@kernel.org>

Combine the two free space histograms in xfs_db and xfs_spaceman into a
single implementation.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 db/freesp.c         |   89 ++++++++------------------------
 libfrog/Makefile    |    2 +
 libfrog/histogram.c |  143 +++++++++++++++++++++++++++++++++++++++++++++++++++
 libfrog/histogram.h |   63 ++++++++++++++++++++++
 spaceman/freesp.c   |   99 ++++++++++++-----------------------
 5 files changed, 264 insertions(+), 132 deletions(-)
 create mode 100644 libfrog/histogram.c
 create mode 100644 libfrog/histogram.h


diff --git a/db/freesp.c b/db/freesp.c
index 883741e66..43520481d 100644
--- a/db/freesp.c
+++ b/db/freesp.c
@@ -12,14 +12,7 @@
 #include "output.h"
 #include "init.h"
 #include "malloc.h"
-
-typedef struct histent
-{
-	int		low;
-	int		high;
-	long long	count;
-	long long	blocks;
-} histent_t;
+#include "libfrog/histogram.h"
 
 static void	addhistent(int h);
 static void	addtohist(xfs_agnumber_t agno, xfs_agblock_t agbno,
@@ -46,13 +39,10 @@ static int		alignment;
 static int		countflag;
 static int		dumpflag;
 static int		equalsize;
-static histent_t	*hist;
-static int		histcount;
+static struct histogram	freesp_hist;
 static int		multsize;
 static int		seen1;
 static int		summaryflag;
-static long long	totblocks;
-static long long	totexts;
 
 static const cmdinfo_t	freesp_cmd =
 	{ "freesp", NULL, freesp_f, 0, -1, 0,
@@ -93,18 +83,20 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount)
+	if (hist_buckets(&freesp_hist))
 		printhist();
 	if (summaryflag) {
-		dbprintf(_("total free extents %lld\n"), totexts);
-		dbprintf(_("total free blocks %lld\n"), totblocks);
-		dbprintf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
+		struct histogram_strings hstr = {
+			.sum		= _("total free blocks"),
+			.observations	= _("total free extents"),
+			.averages	= _("average free extent size"),
+		};
+
+		hist_summarize(&freesp_hist, &hstr);
 	}
 	if (aglist)
 		xfree(aglist);
-	if (hist)
-		xfree(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 
@@ -132,10 +124,9 @@ init(
 	int		speced = 0;
 
 	agcount = countflag = dumpflag = equalsize = multsize = optind = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
+
 	while ((c = getopt(argc, argv, "A:a:bcde:h:m:s")) != EOF) {
 		switch (c) {
 		case 'A':
@@ -163,7 +154,7 @@ init(
 			speced = 1;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				return usage();
 			addhistent(atoi(optarg));
 			speced = 1;
@@ -339,14 +330,7 @@ static void
 addhistent(
 	int	h)
 {
-	hist = xrealloc(hist, (histcount + 1) * sizeof(*hist));
-	if (h == 0)
-		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
-	if (h == 1)
-		seen1 = 1;
+	hist_add_bucket(&freesp_hist, h);
 }
 
 static void
@@ -355,30 +339,12 @@ addtohist(
 	xfs_agblock_t	agbno,
 	xfs_extlen_t	len)
 {
-	int		i;
-
 	if (alignment && (XFS_AGB_TO_FSB(mp,agno,agbno) % alignment))
 		return;
 
 	if (dumpflag)
 		dbprintf("%8d %8d %8d\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((histent_t *)a)->low - ((histent_t *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -387,6 +353,7 @@ histinit(
 {
 	int	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -396,27 +363,17 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
 static void
 printhist(void)
 {
-	int	i;
+	struct histogram_strings hstr = {
+		.sum		= _("blocks"),
+		.observations	= _("extents"),
+	};
 
-	dbprintf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			dbprintf("%7d %7d %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist, &hstr);
 }
diff --git a/libfrog/Makefile b/libfrog/Makefile
index 53e3c3492..acfa228bc 100644
--- a/libfrog/Makefile
+++ b/libfrog/Makefile
@@ -20,6 +20,7 @@ convert.c \
 crc32.c \
 file_exchange.c \
 fsgeom.c \
+histogram.c \
 list_sort.c \
 linux.c \
 logging.c \
@@ -45,6 +46,7 @@ dahashselftest.h \
 div64.h \
 file_exchange.h \
 fsgeom.h \
+histogram.h \
 logging.h \
 paths.h \
 projects.h \
diff --git a/libfrog/histogram.c b/libfrog/histogram.c
new file mode 100644
index 000000000..c2f344a88
--- /dev/null
+++ b/libfrog/histogram.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include <stdlib.h>
+#include <string.h>
+#include "platform_defs.h"
+#include "libfrog/histogram.h"
+
+/* Create a new bucket with the given low value. */
+int
+hist_add_bucket(
+	struct histogram	*hs,
+	long long		bucket_low)
+{
+	struct histbucket	*buckets;
+
+	if (hs->nr_buckets == INT_MAX)
+		return EFBIG;
+
+	buckets = realloc(hs->buckets,
+			(hs->nr_buckets + 1) * sizeof(struct histbucket));
+	if (!buckets)
+		return errno;
+
+	hs->buckets = buckets;
+	hs->buckets[hs->nr_buckets].low = bucket_low;
+	hs->buckets[hs->nr_buckets].nr_obs = 0;
+	hs->buckets[hs->nr_buckets].sum = 0;
+	hs->nr_buckets++;
+	return 0;
+}
+
+/* Add an observation to the histogram. */
+void
+hist_add(
+	struct histogram	*hs,
+	long long		len)
+{
+	unsigned int		i;
+
+	hs->tot_obs++;
+	hs->tot_sum += len;
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].high >= len) {
+			hs->buckets[i].nr_obs++;
+			hs->buckets[i].sum += len;
+			break;
+		}
+	}
+}
+
+static int
+histbucket_cmp(
+	const void		*a,
+	const void		*b)
+{
+	const struct histbucket	*ha = a;
+	const struct histbucket	*hb = b;
+
+	if (ha->low < hb->low)
+		return -1;
+	if (ha->low > hb->low)
+		return 1;
+	return 0;
+}
+
+/* Prepare a histogram for bucket configuration. */
+void
+hist_init(
+	struct histogram	*hs)
+{
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Prepare a histogram to receive data observations. */
+void
+hist_prepare(
+	struct histogram	*hs,
+	long long		maxlen)
+{
+	unsigned int		i;
+
+	qsort(hs->buckets, hs->nr_buckets, sizeof(struct histbucket),
+			histbucket_cmp);
+
+	for (i = 0; i < hs->nr_buckets - 1; i++)
+		hs->buckets[i].high = hs->buckets[i + 1].low - 1;
+	hs->buckets[hs->nr_buckets - 1].high = maxlen;
+}
+
+/* Free all data associated with a histogram. */
+void
+hist_free(
+	struct histogram	*hs)
+{
+	free(hs->buckets);
+	memset(hs, 0, sizeof(struct histogram));
+}
+
+/* Dump a histogram to stdout. */
+void
+hist_print(
+	const struct histogram		*hs,
+	const struct histogram_strings	*hstr)
+{
+	unsigned int			obs_w = strlen(hstr->observations);
+	unsigned int			sum_w = strlen(hstr->sum);
+	unsigned int			i;
+
+	printf("%7s %7s %*s %*s %6s\n",
+			_("from"), _("to"),
+			obs_w, hstr->observations,
+			sum_w, hstr->sum,
+			_("pct"));
+
+	for (i = 0; i < hs->nr_buckets; i++) {
+		if (hs->buckets[i].nr_obs == 0)
+			continue;
+
+		printf("%7lld %7lld %*lld %*lld %6.2f\n",
+				hs->buckets[i].low, hs->buckets[i].high,
+				obs_w, hs->buckets[i].nr_obs,
+				sum_w, hs->buckets[i].sum,
+				hs->buckets[i].sum * 100.0 / hs->tot_sum);
+	}
+}
+
+/* Summarize the contents of the histogram. */
+void
+hist_summarize(
+	const struct histogram		*hs,
+	const struct histogram_strings	*hstr)
+{
+	printf("%s %lld\n", hstr->observations, hs->tot_obs);
+	printf("%s %lld\n", hstr->sum, hs->tot_sum);
+	printf("%s %g\n", hstr->averages,
+			(double)hs->tot_sum / (double)hs->tot_obs);
+}
diff --git a/libfrog/histogram.h b/libfrog/histogram.h
new file mode 100644
index 000000000..68afdeb29
--- /dev/null
+++ b/libfrog/histogram.h
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * Copyright (c) 2017-2024 Oracle.
+ * All Rights Reserved.
+ */
+#ifndef __LIBFROG_HISTOGRAM_H__
+#define __LIBFROG_HISTOGRAM_H__
+
+struct histbucket {
+	/* Low and high size of this bucket */
+	long long		low;
+	long long		high;
+
+	/* Count of observations recorded */
+	long long		nr_obs;
+
+	/* Sum of values recorded */
+	long long		sum;
+};
+
+struct histogram {
+	/* Sum of all values recorded */
+	long long		tot_sum;
+
+	/* Count of all observations recorded */
+	long long		tot_obs;
+
+	struct histbucket	*buckets;
+
+	/* Number of buckets */
+	unsigned int		nr_buckets;
+};
+
+int hist_add_bucket(struct histogram *hs, long long bucket_low);
+void hist_add(struct histogram *hs, long long value);
+void hist_init(struct histogram *hs);
+void hist_prepare(struct histogram *hs, long long maxvalue);
+void hist_free(struct histogram *hs);
+
+struct histogram_strings {
+	/* What does each sum represent? ("free blocks") */
+	const char		*sum;
+
+	/* What does each observation represent? ("free extents") */
+	const char		*observations;
+
+	/* What does sum / observation represent? ("average extent length") */
+	const char		*averages;
+};
+
+void hist_print(const struct histogram *hs,
+		const struct histogram_strings *hstr);
+void hist_summarize(const struct histogram *hs,
+		const struct histogram_strings *hstr);
+
+static inline unsigned int hist_buckets(const struct histogram *hs)
+{
+	return hs->nr_buckets;
+}
+
+#endif /* __LIBFROG_HISTOGRAM_H__ */
diff --git a/spaceman/freesp.c b/spaceman/freesp.c
index f5177cb4e..dfbec52a7 100644
--- a/spaceman/freesp.c
+++ b/spaceman/freesp.c
@@ -15,76 +15,52 @@
 #include "libfrog/paths.h"
 #include "space.h"
 #include "input.h"
-
-struct histent
-{
-	long long	low;
-	long long	high;
-	long long	count;
-	long long	blocks;
-};
+#include "libfrog/histogram.h"
 
 static int		agcount;
 static xfs_agnumber_t	*aglist;
-static struct histent	*hist;
+static struct histogram	freesp_hist;
 static int		dumpflag;
 static long long	equalsize;
 static long long	multsize;
-static int		histcount;
 static int		seen1;
 static int		summaryflag;
 static int		gflag;
 static bool		rtflag;
-static long long	totblocks;
-static long long	totexts;
 
 static cmdinfo_t freesp_cmd;
 
-static void
+static inline void
 addhistent(
 	long long	h)
 {
-	if (histcount == INT_MAX) {
+	int		error;
+
+	error = hist_add_bucket(&freesp_hist, h);
+	if (error == EFBIG) {
 		printf(_("Too many histogram buckets.\n"));
 		return;
 	}
-	hist = realloc(hist, (histcount + 1) * sizeof(*hist));
+	if (error) {
+		printf("%s\n", strerror(error));
+		return;
+	}
+
 	if (h == 0)
 		h = 1;
-	hist[histcount].low = h;
-	hist[histcount].count = hist[histcount].blocks = 0;
-	histcount++;
 	if (h == 1)
 		seen1 = 1;
 }
 
-static void
+static inline void
 addtohist(
 	xfs_agnumber_t	agno,
 	xfs_agblock_t	agbno,
 	off_t		len)
 {
-	long		i;
-
 	if (dumpflag)
 		printf("%8d %8d %8"PRId64"\n", agno, agbno, len);
-	totexts++;
-	totblocks += len;
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].high >= len) {
-			hist[i].count++;
-			hist[i].blocks += len;
-			break;
-		}
-	}
-}
-
-static int
-hcmp(
-	const void	*a,
-	const void	*b)
-{
-	return ((struct histent *)a)->low - ((struct histent *)b)->low;
+	hist_add(&freesp_hist, len);
 }
 
 static void
@@ -93,6 +69,7 @@ histinit(
 {
 	long long	i;
 
+	hist_init(&freesp_hist);
 	if (equalsize) {
 		for (i = 1; i < maxlen; i += equalsize)
 			addhistent(i);
@@ -102,29 +79,19 @@ histinit(
 	} else {
 		if (!seen1)
 			addhistent(1);
-		qsort(hist, histcount, sizeof(*hist), hcmp);
-	}
-	for (i = 0; i < histcount; i++) {
-		if (i < histcount - 1)
-			hist[i].high = hist[i + 1].low - 1;
-		else
-			hist[i].high = maxlen;
 	}
+	hist_prepare(&freesp_hist, maxlen);
 }
 
-static void
+static inline void
 printhist(void)
 {
-	int	i;
+	struct histogram_strings hstr = {
+		.sum		= _("blocks"),
+		.observations	= _("extents"),
+	};
 
-	printf("%7s %7s %7s %7s %6s\n",
-		_("from"), _("to"), _("extents"), _("blocks"), _("pct"));
-	for (i = 0; i < histcount; i++) {
-		if (hist[i].count)
-			printf("%7lld %7lld %7lld %7lld %6.2f\n", hist[i].low,
-				hist[i].high, hist[i].count, hist[i].blocks,
-				hist[i].blocks * 100.0 / totblocks);
-	}
+	hist_print(&freesp_hist, &hstr);
 }
 
 static int
@@ -255,10 +222,8 @@ init(
 	int			speced = 0;	/* only one of -b -e -h or -m */
 
 	agcount = dumpflag = equalsize = multsize = optind = gflag = 0;
-	histcount = seen1 = summaryflag = 0;
-	totblocks = totexts = 0;
+	seen1 = summaryflag = 0;
 	aglist = NULL;
-	hist = NULL;
 	rtflag = false;
 
 	while ((c = getopt(argc, argv, "a:bde:gh:m:rs")) != EOF) {
@@ -287,7 +252,7 @@ init(
 			gflag++;
 			break;
 		case 'h':
-			if (speced && !histcount)
+			if (speced && hist_buckets(&freesp_hist) == 0)
 				goto many_spec;
 			/* addhistent increments histcount */
 			x = cvt_s64(optarg, 0);
@@ -345,18 +310,20 @@ freesp_f(
 		if (inaglist(agno))
 			scan_ag(agno);
 	}
-	if (histcount && !gflag)
+	if (hist_buckets(&freesp_hist) > 0 && !gflag)
 		printhist();
 	if (summaryflag) {
-		printf(_("total free extents %lld\n"), totexts);
-		printf(_("total free blocks %lld\n"), totblocks);
-		printf(_("average free extent size %g\n"),
-			(double)totblocks / (double)totexts);
+		struct histogram_strings hstr = {
+			.sum		= _("total free blocks"),
+			.observations	= _("total free extents"),
+			.averages	= _("average free extent size"),
+		};
+
+		hist_summarize(&freesp_hist, &hstr);
 	}
 	if (aglist)
 		free(aglist);
-	if (hist)
-		free(hist);
+	hist_free(&freesp_hist);
 	return 0;
 }
 


^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2024-07-30  1:11 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-03 21:47 [PATCHSET v30.8] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
2024-07-03 21:47 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
2024-07-04  5:22   ` Christoph Hellwig
2024-07-08 17:27   ` [PATCH v30.8.1 " Darrick J. Wong
2024-07-03 21:47 ` [PATCH 2/7] libfrog: print wider columns for free space histogram Darrick J. Wong
2024-07-03 21:47 ` [PATCH 3/7] libfrog: print cdf of free space buckets Darrick J. Wong
2024-07-03 21:47 ` [PATCH 4/7] xfs_scrub: don't close stdout when closing the progress bar Darrick J. Wong
2024-07-03 21:48 ` [PATCH 5/7] xfs_scrub: remove pointless spacemap.c arguments Darrick J. Wong
2024-07-03 21:48 ` [PATCH 6/7] xfs_scrub: collect free space histograms during phase 7 Darrick J. Wong
2024-07-03 21:48 ` [PATCH 7/7] xfs_scrub: tune fstrim minlen parameter based on free space histograms Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2024-07-30  0:19 [PATCHSET v30.9 13/23] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
2024-07-30  1:11 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
2024-07-02  0:50 [PATCHSET v30.7 05/16] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
2024-07-02  1:03 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
2024-07-02  5:32   ` Christoph Hellwig
2024-07-03  2:47     ` Darrick J. Wong
2024-07-03  4:30       ` Christoph Hellwig
2024-07-03  4:55         ` Darrick J. Wong
2023-12-31 19:48 [PATCHSET v29.0 33/40] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
2023-12-31 22:50 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong
2023-05-26  0:39 [PATCHSET v25.0 0/7] xfs_scrub: use free space histograms to reduce fstrim runtime Darrick J. Wong
2023-05-26  1:50 ` [PATCH 1/7] libfrog: hoist free space histogram code Darrick J. Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).